Merge pull request #824 from router-for-me/script

feat(script): add usage statistics preservation across container rebuilds
Merge pull request #830 from router-for-me/gemini
2026-02-07 23:10:51 +08:00 · 2026-01-02 20:42:25 +08:00 · 2026-01-02 10:59:24 +08:00 · 2026-01-02 10:55:59 +08:00 · 2026-01-02 10:01:20 +08:00 · 2026-01-02 09:43:40 +08:00
23 changed files with 1176 additions and 131 deletions
--- a/docker-build.sh
+++ b/docker-build.sh
@@ -5,9 +5,115 @@
 # This script automates the process of building and running the Docker container
 # with version information dynamically injected at build time.

-# Exit immediately if a command exits with a non-zero status.
+# Hidden feature: Preserve usage statistics across rebuilds
+# Usage: ./docker-build.sh --with-usage
+# First run prompts for management API key, saved to temp/stats/.api_secret
+
 set -euo pipefail

+STATS_DIR="temp/stats"
+STATS_FILE="${STATS_DIR}/.usage_backup.json"
+SECRET_FILE="${STATS_DIR}/.api_secret"
+WITH_USAGE=false
+
+get_port() {
+  if [[ -f "config.yaml" ]]; then
+    grep -E "^port:" config.yaml | sed -E 's/^port: *["'"'"']?([0-9]+)["'"'"']?.*$/\1/'
+  else
+    echo "8317"
+  fi
+}
+
+export_stats_api_secret() {
+  if [[ -f "${SECRET_FILE}" ]]; then
+    API_SECRET=$(cat "${SECRET_FILE}")
+  else
+    if [[ ! -d "${STATS_DIR}" ]]; then
+      mkdir -p "${STATS_DIR}"
+    fi
+    echo "First time using --with-usage. Management API key required."
+    read -r -p "Enter management key: " -s API_SECRET
+    echo
+    echo "${API_SECRET}" > "${SECRET_FILE}"
+    chmod 600 "${SECRET_FILE}"
+  fi
+}
+
+check_container_running() {
+  local port
+  port=$(get_port)
+
+  if ! curl -s -o /dev/null -w "%{http_code}" "http://localhost:${port}/" | grep -q "200"; then
+    echo "Error: cli-proxy-api service is not responding at localhost:${port}"
+    echo "Please start the container first or use without --with-usage flag."
+    exit 1
+  fi
+}
+
+export_stats() {
+  local port
+  port=$(get_port)
+
+  if [[ ! -d "${STATS_DIR}" ]]; then
+    mkdir -p "${STATS_DIR}"
+  fi
+  check_container_running
+  echo "Exporting usage statistics..."
+  EXPORT_RESPONSE=$(curl -s -w "\n%{http_code}" -H "X-Management-Key: ${API_SECRET}" \
+    "http://localhost:${port}/v0/management/usage/export")
+  HTTP_CODE=$(echo "${EXPORT_RESPONSE}" | tail -n1)
+  RESPONSE_BODY=$(echo "${EXPORT_RESPONSE}" | sed '$d')
+
+  if [[ "${HTTP_CODE}" != "200" ]]; then
+    echo "Export failed (HTTP ${HTTP_CODE}): ${RESPONSE_BODY}"
+    exit 1
+  fi
+
+  echo "${RESPONSE_BODY}" > "${STATS_FILE}"
+  echo "Statistics exported to ${STATS_FILE}"
+}
+
+import_stats() {
+  local port
+  port=$(get_port)
+
+  echo "Importing usage statistics..."
+  IMPORT_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST \
+    -H "X-Management-Key: ${API_SECRET}" \
+    -H "Content-Type: application/json" \
+    -d @"${STATS_FILE}" \
+    "http://localhost:${port}/v0/management/usage/import")
+  IMPORT_CODE=$(echo "${IMPORT_RESPONSE}" | tail -n1)
+  IMPORT_BODY=$(echo "${IMPORT_RESPONSE}" | sed '$d')
+
+  if [[ "${IMPORT_CODE}" == "200" ]]; then
+    echo "Statistics imported successfully"
+  else
+    echo "Import failed (HTTP ${IMPORT_CODE}): ${IMPORT_BODY}"
+  fi
+
+  rm -f "${STATS_FILE}"
+}
+
+wait_for_service() {
+  local port
+  port=$(get_port)
+
+  echo "Waiting for service to be ready..."
+  for i in {1..30}; do
+    if curl -s -o /dev/null -w "%{http_code}" "http://localhost:${port}/" | grep -q "200"; then
+      break
+    fi
+    sleep 1
+  done
+  sleep 2
+}
+
+if [[ "${1:-}" == "--with-usage" ]]; then
+  WITH_USAGE=true
+  export_stats_api_secret
+fi
+
 # --- Step 1: Choose Environment ---
 echo "Please select an option:"
 echo "1) Run using Pre-built Image (Recommended)"
@@ -18,7 +124,14 @@ read -r -p "Enter choice [1-2]: " choice
 case "$choice" in
  1)
    echo "--- Running with Pre-built Image ---"
+    if [[ "${WITH_USAGE}" == "true" ]]; then
+      export_stats
+    fi
    docker compose up -d --remove-orphans --no-build
+    if [[ "${WITH_USAGE}" == "true" ]]; then
+      wait_for_service
+      import_stats
+    fi
    echo "Services are starting from remote image."
    echo "Run 'docker compose logs -f' to see the logs."
    ;;
@@ -38,7 +151,11 @@ case "$choice" in

    # Build and start the services with a local-only image tag
    export CLI_PROXY_IMAGE="cli-proxy-api:local"
-    
+
+    if [[ "${WITH_USAGE}" == "true" ]]; then
+      export_stats
+    fi
+
    echo "Building the Docker image..."
    docker compose build \
      --build-arg VERSION="${VERSION}" \
@@ -48,6 +165,11 @@ case "$choice" in
    echo "Starting the services..."
    docker compose up -d --remove-orphans --pull never

+    if [[ "${WITH_USAGE}" == "true" ]]; then
+      wait_for_service
+      import_stats
+    fi
+
    echo "Build complete. Services are starting."
    echo "Run 'docker compose logs -f' to see the logs."
    ;;
@@ -55,4 +177,4 @@ case "$choice" in
    echo "Invalid choice. Please enter 1 or 2."
    exit 1
    ;;
-esac
+esac
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -740,7 +740,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
 		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
-		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
+		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
 		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -625,6 +625,131 @@ func (r *ModelRegistry) GetAvailableModels(handlerType string) []map[string]any
 	return models
 }

+// GetAvailableModelsByProvider returns models available for the given provider identifier.
+// Parameters:
+//   - provider: Provider identifier (e.g., "codex", "gemini", "antigravity")
+//
+// Returns:
+//   - []*ModelInfo: List of available models for the provider
+func (r *ModelRegistry) GetAvailableModelsByProvider(provider string) []*ModelInfo {
+	provider = strings.ToLower(strings.TrimSpace(provider))
+	if provider == "" {
+		return nil
+	}
+
+	r.mutex.RLock()
+	defer r.mutex.RUnlock()
+
+	type providerModel struct {
+		count int
+		info  *ModelInfo
+	}
+
+	providerModels := make(map[string]*providerModel)
+
+	for clientID, clientProvider := range r.clientProviders {
+		if clientProvider != provider {
+			continue
+		}
+		modelIDs := r.clientModels[clientID]
+		if len(modelIDs) == 0 {
+			continue
+		}
+		clientInfos := r.clientModelInfos[clientID]
+		for _, modelID := range modelIDs {
+			modelID = strings.TrimSpace(modelID)
+			if modelID == "" {
+				continue
+			}
+			entry := providerModels[modelID]
+			if entry == nil {
+				entry = &providerModel{}
+				providerModels[modelID] = entry
+			}
+			entry.count++
+			if entry.info == nil {
+				if clientInfos != nil {
+					if info := clientInfos[modelID]; info != nil {
+						entry.info = info
+					}
+				}
+				if entry.info == nil {
+					if reg, ok := r.models[modelID]; ok && reg != nil && reg.Info != nil {
+						entry.info = reg.Info
+					}
+				}
+			}
+		}
+	}
+
+	if len(providerModels) == 0 {
+		return nil
+	}
+
+	quotaExpiredDuration := 5 * time.Minute
+	now := time.Now()
+	result := make([]*ModelInfo, 0, len(providerModels))
+
+	for modelID, entry := range providerModels {
+		if entry == nil || entry.count <= 0 {
+			continue
+		}
+		registration, ok := r.models[modelID]
+
+		expiredClients := 0
+		cooldownSuspended := 0
+		otherSuspended := 0
+		if ok && registration != nil {
+			if registration.QuotaExceededClients != nil {
+				for clientID, quotaTime := range registration.QuotaExceededClients {
+					if clientID == "" {
+						continue
+					}
+					if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
+						continue
+					}
+					if quotaTime != nil && now.Sub(*quotaTime) < quotaExpiredDuration {
+						expiredClients++
+					}
+				}
+			}
+			if registration.SuspendedClients != nil {
+				for clientID, reason := range registration.SuspendedClients {
+					if clientID == "" {
+						continue
+					}
+					if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
+						continue
+					}
+					if strings.EqualFold(reason, "quota") {
+						cooldownSuspended++
+						continue
+					}
+					otherSuspended++
+				}
+			}
+		}
+
+		availableClients := entry.count
+		effectiveClients := availableClients - expiredClients - otherSuspended
+		if effectiveClients < 0 {
+			effectiveClients = 0
+		}
+
+		if effectiveClients > 0 || (availableClients > 0 && (expiredClients > 0 || cooldownSuspended > 0) && otherSuspended == 0) {
+			if entry.info != nil {
+				result = append(result, entry.info)
+				continue
+			}
+			if ok && registration != nil && registration.Info != nil {
+				result = append(result, registration.Info)
+			}
+		}
+	}
+
+	return result
+}
+
 // GetModelCount returns the number of available clients for a specific model
 // Parameters:
 //   - modelID: The model ID to check
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -323,6 +323,11 @@ type translatedPayload struct {
 func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream)
 	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
 	payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
 	payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload)
@@ -331,7 +336,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true)
 	payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
 	payload = fixGeminiImageAspectRatio(req.Model, payload)
-	payload = applyPayloadConfig(e.cfg, req.Model, payload)
+	payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated)
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
 	payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -94,13 +94,18 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)

 	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
 	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
+	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -189,13 +194,18 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

 	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
 	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, true)
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
+	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -525,13 +535,18 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

 	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
 	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
+	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -57,6 +57,11 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
 	body, _ = sjson.SetBytes(body, "model", model)
 	// Inject thinking config based on model metadata for thinking variants
@@ -65,7 +70,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if !strings.HasPrefix(model, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)

 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
@@ -167,12 +172,17 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
 		model = override
 	}
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", model)
 	// Inject thinking config based on model metadata for thinking variants
 	body = e.injectThinkingConfig(model, req.Metadata, body)
 	body = checkSystemInstructions(body)
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)

 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -56,13 +56,18 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
 	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
 	body = NormalizeThinkingConfig(body, model, false)
 	if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
 		return resp, errValidate
 	}
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
@@ -156,6 +161,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)

 	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
@@ -163,7 +173,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
 		return nil, errValidate
 	}
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "model", model)

--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -77,6 +77,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
 	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
@@ -84,7 +89,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -216,6 +221,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
 	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
@@ -223,7 +233,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
-	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
+	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)

 	projectID := resolveGeminiProjectID(auth)

--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -85,13 +85,18 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
 	body = ApplyThinkingMetadata(body, req.Metadata, model)
 	body = util.ApplyDefaultThinkingIfNeeded(model, body)
 	body = util.NormalizeGeminiThinkingBudget(model, body)
 	body = util.StripThinkingConfigIfUnsupported(model, body)
 	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)

 	action := "generateContent"
@@ -183,13 +188,18 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
 	body = ApplyThinkingMetadata(body, req.Metadata, model)
 	body = util.ApplyDefaultThinkingIfNeeded(model, body)
 	body = util.NormalizeGeminiThinkingBudget(model, body)
 	body = util.StripThinkingConfigIfUnsupported(model, body)
 	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)

 	baseURL := resolveGeminiBaseURL(auth)
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -122,6 +122,11 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
@@ -134,7 +139,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", req.Model)

 	action := "generateContent"
@@ -225,6 +230,11 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
 		if budgetOverride != nil {
@@ -237,7 +247,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	body = util.NormalizeGeminiThinkingBudget(model, body)
 	body = util.StripThinkingConfigIfUnsupported(model, body)
 	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)

 	action := "generateContent"
@@ -324,6 +334,11 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
@@ -336,7 +351,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", req.Model)

 	baseURL := vertexBaseURL(location)
@@ -444,6 +459,11 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
 	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
 		if budgetOverride != nil {
@@ -456,7 +476,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	body = util.NormalizeGeminiThinkingBudget(model, body)
 	body = util.StripThinkingConfigIfUnsupported(model, body)
 	body = fixGeminiImageAspectRatio(model, body)
-	body = applyPayloadConfig(e.cfg, model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "model", model)

 	// For API key auth, use simpler URL format without project/location
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -56,6 +56,11 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	body, _ = sjson.SetBytes(body, "model", req.Model)
@@ -65,7 +70,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	}
 	body = applyIFlowThinkingConfig(body)
 	body = preserveReasoningContentInMessages(body)
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)

 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint

@@ -145,6 +150,11 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

 	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
@@ -160,7 +170,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
 		body = ensureToolsArray(body)
 	}
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)

 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint

@@ -441,21 +451,18 @@ func ensureToolsArray(body []byte) []byte {
 	return updated
 }

-// preserveReasoningContentInMessages ensures reasoning_content from assistant messages in the
-// conversation history is preserved when sending to iFlow models that support thinking.
-// This is critical for multi-turn conversations where the model needs to see its previous
-// reasoning to maintain coherent thought chains across tool calls and conversation turns.
+// preserveReasoningContentInMessages checks if reasoning_content from assistant messages
+// is preserved in conversation history for iFlow models that support thinking.
+// This is helpful for multi-turn conversations where the model may benefit from seeing
+// its previous reasoning to maintain coherent thought chains.
 //
-// For GLM-4.7 and MiniMax-M2.1, the full assistant response (including reasoning) must be
-// appended back into message history before the next call.
+// For GLM-4.6/4.7 and MiniMax M2/M2.1, it is recommended to include the full assistant
+// response (including reasoning_content) in message history for better context continuity.
 func preserveReasoningContentInMessages(body []byte) []byte {
 	model := strings.ToLower(gjson.GetBytes(body, "model").String())

 	// Only apply to models that support thinking with history preservation
-	needsPreservation := strings.HasPrefix(model, "glm-4.7") ||
-		strings.HasPrefix(model, "glm-4-7") ||
-		strings.HasPrefix(model, "minimax-m2.1") ||
-		strings.HasPrefix(model, "minimax-m2-1")
+	needsPreservation := strings.HasPrefix(model, "glm-4") || strings.HasPrefix(model, "minimax-m2")

 	if !needsPreservation {
 		return body
@@ -493,45 +500,35 @@ func preserveReasoningContentInMessages(body []byte) []byte {
 // This should be called after NormalizeThinkingConfig has processed the payload.
 //
 // Model-specific handling:
-//   - GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false}
-//   - MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
-//   - Other iFlow models: Uses chat_template_kwargs.enable_thinking (boolean)
+//   - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false
+//   - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
 func applyIFlowThinkingConfig(body []byte) []byte {
 	effort := gjson.GetBytes(body, "reasoning_effort")
-	model := strings.ToLower(gjson.GetBytes(body, "model").String())
-
-	// Check if thinking should be enabled
-	val := ""
-	if effort.Exists() {
-		val = strings.ToLower(strings.TrimSpace(effort.String()))
+	if !effort.Exists() {
+		return body
 	}
-	enableThinking := effort.Exists() && val != "none" && val != ""
+
+	model := strings.ToLower(gjson.GetBytes(body, "model").String())
+	val := strings.ToLower(strings.TrimSpace(effort.String()))
+	enableThinking := val != "none" && val != ""

 	// Remove reasoning_effort as we'll convert to model-specific format
-	if effort.Exists() {
-		body, _ = sjson.DeleteBytes(body, "reasoning_effort")
-	}
+	body, _ = sjson.DeleteBytes(body, "reasoning_effort")
+	body, _ = sjson.DeleteBytes(body, "thinking")

-	// GLM-4.7: Use extra_body with thinking config and clear_thinking: false
-	if strings.HasPrefix(model, "glm-4.7") || strings.HasPrefix(model, "glm-4-7") {
-		if enableThinking {
-			body, _ = sjson.SetBytes(body, "extra_body.thinking.type", "enabled")
-			body, _ = sjson.SetBytes(body, "extra_body.clear_thinking", false)
-		}
-		return body
-	}
-
-	// MiniMax-M2.1: Use reasoning_split=true for interleaved thinking
-	if strings.HasPrefix(model, "minimax-m2.1") || strings.HasPrefix(model, "minimax-m2-1") {
-		if enableThinking {
-			body, _ = sjson.SetBytes(body, "reasoning_split", true)
-		}
-		return body
-	}
-
-	// Other iFlow models (including GLM-4.6): Use chat_template_kwargs.enable_thinking
-	if effort.Exists() {
+	// GLM-4.6/4.7: Use chat_template_kwargs
+	if strings.HasPrefix(model, "glm-4") {
 		body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
+		if enableThinking {
+			body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false)
+		}
+		return body
+	}
+
+	// MiniMax M2/M2.1: Use reasoning_split
+	if strings.HasPrefix(model, "minimax-m2") {
+		body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking)
+		return body
 	}

 	return body
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -53,12 +53,17 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	// Translate inbound request to OpenAI format
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream)
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
 	modelOverride := e.resolveUpstreamModel(req.Model, auth)
 	if modelOverride != "" {
 		translated = e.overrideModel(translated, modelOverride)
 	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
+	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
 	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
 	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
@@ -145,12 +150,17 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	}
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	modelOverride := e.resolveUpstreamModel(req.Model, auth)
 	if modelOverride != "" {
 		translated = e.overrideModel(translated, modelOverride)
 	}
-	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
+	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
 	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
 	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -104,17 +104,11 @@ func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model
 	return payload
 }

-// applyPayloadConfig applies payload default and override rules from configuration
-// to the given JSON payload for the specified model.
-// Defaults only fill missing fields, while overrides always overwrite existing values.
-func applyPayloadConfig(cfg *config.Config, model string, payload []byte) []byte {
-	return applyPayloadConfigWithRoot(cfg, model, "", "", payload)
-}
-
 // applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter
 // paths as relative to the provided root path (for example, "request" for Gemini CLI)
-// and restricts matches to the given protocol when supplied.
-func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload []byte) []byte {
+// and restricts matches to the given protocol when supplied. Defaults are checked
+// against the original payload when provided.
+func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte) []byte {
 	if cfg == nil || len(payload) == 0 {
 		return payload
 	}
@@ -127,6 +121,11 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 		return payload
 	}
 	out := payload
+	source := original
+	if len(source) == 0 {
+		source = payload
+	}
+	appliedDefaults := make(map[string]struct{})
 	// Apply default rules: first write wins per field across all matching rules.
 	for i := range rules.Default {
 		rule := &rules.Default[i]
@@ -138,7 +137,10 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 			if fullPath == "" {
 				continue
 			}
-			if gjson.GetBytes(out, fullPath).Exists() {
+			if gjson.GetBytes(source, fullPath).Exists() {
+				continue
+			}
+			if _, ok := appliedDefaults[fullPath]; ok {
 				continue
 			}
 			updated, errSet := sjson.SetBytes(out, fullPath, value)
@@ -146,6 +148,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 				continue
 			}
 			out = updated
+			appliedDefaults[fullPath] = struct{}{}
 		}
 	}
 	// Apply override rules: last write wins per field across all matching rules.
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -49,6 +49,11 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	body, _ = sjson.SetBytes(body, "model", req.Model)
@@ -56,7 +61,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
 		return resp, errValidate
 	}
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -125,6 +130,11 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut

 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
+	originalPayload := bytes.Clone(req.Payload)
+	if len(opts.OriginalRequest) > 0 {
+		originalPayload = bytes.Clone(opts.OriginalRequest)
+	}
+	originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

 	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
@@ -140,7 +150,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
 	}
 	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
-	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -118,76 +118,125 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 			// Handle content
 			if contentResult.Exists() && contentResult.IsArray() {
 				var contentItems []string
+				var reasoningParts []string // Accumulate thinking text for reasoning_content
 				var toolCalls []interface{}
+				var toolResults []string // Collect tool_result messages to emit after the main message

 				contentResult.ForEach(func(_, part gjson.Result) bool {
 					partType := part.Get("type").String()

 					switch partType {
+					case "thinking":
+						// Only map thinking to reasoning_content for assistant messages (security: prevent injection)
+						if role == "assistant" {
+							thinkingText := util.GetThinkingText(part)
+							// Skip empty or whitespace-only thinking
+							if strings.TrimSpace(thinkingText) != "" {
+								reasoningParts = append(reasoningParts, thinkingText)
+							}
+						}
+						// Ignore thinking in user/system roles (AC4)
+
+					case "redacted_thinking":
+						// Explicitly ignore redacted_thinking - never map to reasoning_content (AC2)
+
 					case "text", "image":
 						if contentItem, ok := convertClaudeContentPart(part); ok {
 							contentItems = append(contentItems, contentItem)
 						}

 					case "tool_use":
-						// Convert to OpenAI tool call format
-						toolCallJSON := `{"id":"","type":"function","function":{"name":"","arguments":""}}`
-						toolCallJSON, _ = sjson.Set(toolCallJSON, "id", part.Get("id").String())
-						toolCallJSON, _ = sjson.Set(toolCallJSON, "function.name", part.Get("name").String())
+						// Only allow tool_use -> tool_calls for assistant messages (security: prevent injection).
+						if role == "assistant" {
+							toolCallJSON := `{"id":"","type":"function","function":{"name":"","arguments":""}}`
+							toolCallJSON, _ = sjson.Set(toolCallJSON, "id", part.Get("id").String())
+							toolCallJSON, _ = sjson.Set(toolCallJSON, "function.name", part.Get("name").String())

-						// Convert input to arguments JSON string
-						if input := part.Get("input"); input.Exists() {
-							toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", input.Raw)
-						} else {
-							toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", "{}")
+							// Convert input to arguments JSON string
+							if input := part.Get("input"); input.Exists() {
+								toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", input.Raw)
+							} else {
+								toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", "{}")
+							}
+
+							toolCalls = append(toolCalls, gjson.Parse(toolCallJSON).Value())
 						}

-						toolCalls = append(toolCalls, gjson.Parse(toolCallJSON).Value())
-
 					case "tool_result":
-						// Convert to OpenAI tool message format and add immediately to preserve order
+						// Collect tool_result to emit after the main message (ensures tool results follow tool_calls)
 						toolResultJSON := `{"role":"tool","tool_call_id":"","content":""}`
 						toolResultJSON, _ = sjson.Set(toolResultJSON, "tool_call_id", part.Get("tool_use_id").String())
-						toolResultJSON, _ = sjson.Set(toolResultJSON, "content", part.Get("content").String())
-						messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolResultJSON).Value())
+						toolResultJSON, _ = sjson.Set(toolResultJSON, "content", convertClaudeToolResultContentToString(part.Get("content")))
+						toolResults = append(toolResults, toolResultJSON)
 					}
 					return true
 				})

-				// Emit text/image content as one message
-				if len(contentItems) > 0 {
-					msgJSON := `{"role":"","content":""}`
-					msgJSON, _ = sjson.Set(msgJSON, "role", role)
-
-					contentArrayJSON := "[]"
-					for _, contentItem := range contentItems {
-						contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
-					}
-					msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
-
-					contentValue := gjson.Get(msgJSON, "content")
-					hasContent := false
-					switch {
-					case !contentValue.Exists():
-						hasContent = false
-					case contentValue.Type == gjson.String:
-						hasContent = contentValue.String() != ""
-					case contentValue.IsArray():
-						hasContent = len(contentValue.Array()) > 0
-					default:
-						hasContent = contentValue.Raw != "" && contentValue.Raw != "null"
-					}
-
-					if hasContent {
-						messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
-					}
+				// Build reasoning content string
+				reasoningContent := ""
+				if len(reasoningParts) > 0 {
+					reasoningContent = strings.Join(reasoningParts, "\n\n")
 				}

-				// Emit tool calls in a separate assistant message
-				if role == "assistant" && len(toolCalls) > 0 {
-					toolCallMsgJSON := `{"role":"assistant","tool_calls":[]}`
-					toolCallMsgJSON, _ = sjson.Set(toolCallMsgJSON, "tool_calls", toolCalls)
-					messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolCallMsgJSON).Value())
+				hasContent := len(contentItems) > 0
+				hasReasoning := reasoningContent != ""
+				hasToolCalls := len(toolCalls) > 0
+				hasToolResults := len(toolResults) > 0
+
+				// OpenAI requires: tool messages MUST immediately follow the assistant message with tool_calls.
+				// Therefore, we emit tool_result messages FIRST (they respond to the previous assistant's tool_calls),
+				// then emit the current message's content.
+				for _, toolResultJSON := range toolResults {
+					messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolResultJSON).Value())
+				}
+
+				// For assistant messages: emit a single unified message with content, tool_calls, and reasoning_content
+				// This avoids splitting into multiple assistant messages which breaks OpenAI tool-call adjacency
+				if role == "assistant" {
+					if hasContent || hasReasoning || hasToolCalls {
+						msgJSON := `{"role":"assistant"}`
+
+						// Add content (as array if we have items, empty string if reasoning-only)
+						if hasContent {
+							contentArrayJSON := "[]"
+							for _, contentItem := range contentItems {
+								contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
+							}
+							msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
+						} else {
+							// Ensure content field exists for OpenAI compatibility
+							msgJSON, _ = sjson.Set(msgJSON, "content", "")
+						}
+
+						// Add reasoning_content if present
+						if hasReasoning {
+							msgJSON, _ = sjson.Set(msgJSON, "reasoning_content", reasoningContent)
+						}
+
+						// Add tool_calls if present (in same message as content)
+						if hasToolCalls {
+							msgJSON, _ = sjson.Set(msgJSON, "tool_calls", toolCalls)
+						}
+
+						messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
+					}
+				} else {
+					// For non-assistant roles: emit content message if we have content
+					// If the message only contains tool_results (no text/image), we still processed them above
+					if hasContent {
+						msgJSON := `{"role":""}`
+						msgJSON, _ = sjson.Set(msgJSON, "role", role)
+
+						contentArrayJSON := "[]"
+						for _, contentItem := range contentItems {
+							contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
+						}
+						msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
+
+						messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
+					} else if hasToolResults && !hasContent {
+						// tool_results already emitted above, no additional user message needed
+					}
 				}

 			} else if contentResult.Exists() && contentResult.Type == gjson.String {
@@ -307,3 +356,43 @@ func convertClaudeContentPart(part gjson.Result) (string, bool) {
 		return "", false
 	}
 }
+
+func convertClaudeToolResultContentToString(content gjson.Result) string {
+	if !content.Exists() {
+		return ""
+	}
+
+	if content.Type == gjson.String {
+		return content.String()
+	}
+
+	if content.IsArray() {
+		var parts []string
+		content.ForEach(func(_, item gjson.Result) bool {
+			switch {
+			case item.Type == gjson.String:
+				parts = append(parts, item.String())
+			case item.IsObject() && item.Get("text").Exists() && item.Get("text").Type == gjson.String:
+				parts = append(parts, item.Get("text").String())
+			default:
+				parts = append(parts, item.Raw)
+			}
+			return true
+		})
+
+		joined := strings.Join(parts, "\n\n")
+		if strings.TrimSpace(joined) != "" {
+			return joined
+		}
+		return content.Raw
+	}
+
+	if content.IsObject() {
+		if text := content.Get("text"); text.Exists() && text.Type == gjson.String {
+			return text.String()
+		}
+		return content.Raw
+	}
+
+	return content.Raw
+}
--- a/internal/translator/openai/claude/openai_claude_request_test.go
+++ b/internal/translator/openai/claude/openai_claude_request_test.go
@@ -0,0 +1,500 @@
+package claude
+
+import (
+	"testing"
+
+	"github.com/tidwall/gjson"
+)
+
+// TestConvertClaudeRequestToOpenAI_ThinkingToReasoningContent tests the mapping
+// of Claude thinking content to OpenAI reasoning_content field.
+func TestConvertClaudeRequestToOpenAI_ThinkingToReasoningContent(t *testing.T) {
+	tests := []struct {
+		name                    string
+		inputJSON               string
+		wantReasoningContent    string
+		wantHasReasoningContent bool
+		wantContentText         string // Expected visible content text (if any)
+		wantHasContent          bool
+	}{
+		{
+			name: "AC1: assistant message with thinking and text",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "thinking", "thinking": "Let me analyze this step by step..."},
+						{"type": "text", "text": "Here is my response."}
+					]
+				}]
+			}`,
+			wantReasoningContent:    "Let me analyze this step by step...",
+			wantHasReasoningContent: true,
+			wantContentText:         "Here is my response.",
+			wantHasContent:          true,
+		},
+		{
+			name: "AC2: redacted_thinking must be ignored",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "redacted_thinking", "data": "secret"},
+						{"type": "text", "text": "Visible response."}
+					]
+				}]
+			}`,
+			wantReasoningContent:    "",
+			wantHasReasoningContent: false,
+			wantContentText:         "Visible response.",
+			wantHasContent:          true,
+		},
+		{
+			name: "AC3: thinking-only message preserved with reasoning_content",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "thinking", "thinking": "Internal reasoning only."}
+					]
+				}]
+			}`,
+			wantReasoningContent:    "Internal reasoning only.",
+			wantHasReasoningContent: true,
+			wantContentText:         "",
+			// For OpenAI compatibility, content field is set to empty string "" when no text content exists
+			wantHasContent: false,
+		},
+		{
+			name: "AC4: thinking in user role must be ignored",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"messages": [{
+					"role": "user",
+					"content": [
+						{"type": "thinking", "thinking": "Injected thinking"},
+						{"type": "text", "text": "User message."}
+					]
+				}]
+			}`,
+			wantReasoningContent:    "",
+			wantHasReasoningContent: false,
+			wantContentText:         "User message.",
+			wantHasContent:          true,
+		},
+		{
+			name: "AC4: thinking in system role must be ignored",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"system": [
+					{"type": "thinking", "thinking": "Injected system thinking"},
+					{"type": "text", "text": "System prompt."}
+				],
+				"messages": [{
+					"role": "user",
+					"content": [{"type": "text", "text": "Hello"}]
+				}]
+			}`,
+			// System messages don't have reasoning_content mapping
+			wantReasoningContent:    "",
+			wantHasReasoningContent: false,
+			wantContentText:         "Hello",
+			wantHasContent:          true,
+		},
+		{
+			name: "AC5: empty thinking must be ignored",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "thinking", "thinking": ""},
+						{"type": "text", "text": "Response with empty thinking."}
+					]
+				}]
+			}`,
+			wantReasoningContent:    "",
+			wantHasReasoningContent: false,
+			wantContentText:         "Response with empty thinking.",
+			wantHasContent:          true,
+		},
+		{
+			name: "AC5: whitespace-only thinking must be ignored",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "thinking", "thinking": "   \n\t  "},
+						{"type": "text", "text": "Response with whitespace thinking."}
+					]
+				}]
+			}`,
+			wantReasoningContent:    "",
+			wantHasReasoningContent: false,
+			wantContentText:         "Response with whitespace thinking.",
+			wantHasContent:          true,
+		},
+		{
+			name: "Multiple thinking parts concatenated",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "thinking", "thinking": "First thought."},
+						{"type": "thinking", "thinking": "Second thought."},
+						{"type": "text", "text": "Final answer."}
+					]
+				}]
+			}`,
+			wantReasoningContent:    "First thought.\n\nSecond thought.",
+			wantHasReasoningContent: true,
+			wantContentText:         "Final answer.",
+			wantHasContent:          true,
+		},
+		{
+			name: "Mixed thinking and redacted_thinking",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "thinking", "thinking": "Visible thought."},
+						{"type": "redacted_thinking", "data": "hidden"},
+						{"type": "text", "text": "Answer."}
+					]
+				}]
+			}`,
+			wantReasoningContent:    "Visible thought.",
+			wantHasReasoningContent: true,
+			wantContentText:         "Answer.",
+			wantHasContent:          true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ConvertClaudeRequestToOpenAI("test-model", []byte(tt.inputJSON), false)
+			resultJSON := gjson.ParseBytes(result)
+
+			// Find the relevant message (skip system message at index 0)
+			messages := resultJSON.Get("messages").Array()
+			if len(messages) < 2 {
+				if tt.wantHasReasoningContent || tt.wantHasContent {
+					t.Fatalf("Expected at least 2 messages (system + user/assistant), got %d", len(messages))
+				}
+				return
+			}
+
+			// Check the last non-system message
+			var targetMsg gjson.Result
+			for i := len(messages) - 1; i >= 0; i-- {
+				if messages[i].Get("role").String() != "system" {
+					targetMsg = messages[i]
+					break
+				}
+			}
+
+			// Check reasoning_content
+			gotReasoningContent := targetMsg.Get("reasoning_content").String()
+			gotHasReasoningContent := targetMsg.Get("reasoning_content").Exists()
+
+			if gotHasReasoningContent != tt.wantHasReasoningContent {
+				t.Errorf("reasoning_content existence = %v, want %v", gotHasReasoningContent, tt.wantHasReasoningContent)
+			}
+
+			if gotReasoningContent != tt.wantReasoningContent {
+				t.Errorf("reasoning_content = %q, want %q", gotReasoningContent, tt.wantReasoningContent)
+			}
+
+			// Check content
+			content := targetMsg.Get("content")
+			// content has meaningful content if it's a non-empty array, or a non-empty string
+			var gotHasContent bool
+			switch {
+			case content.IsArray():
+				gotHasContent = len(content.Array()) > 0
+			case content.Type == gjson.String:
+				gotHasContent = content.String() != ""
+			default:
+				gotHasContent = false
+			}
+
+			if gotHasContent != tt.wantHasContent {
+				t.Errorf("content existence = %v, want %v", gotHasContent, tt.wantHasContent)
+			}
+
+			if tt.wantHasContent && tt.wantContentText != "" {
+				// Find text content
+				var foundText string
+				content.ForEach(func(_, v gjson.Result) bool {
+					if v.Get("type").String() == "text" {
+						foundText = v.Get("text").String()
+						return false
+					}
+					return true
+				})
+				if foundText != tt.wantContentText {
+					t.Errorf("content text = %q, want %q", foundText, tt.wantContentText)
+				}
+			}
+		})
+	}
+}
+
+// TestConvertClaudeRequestToOpenAI_ThinkingOnlyMessagePreserved tests AC3:
+// that a message with only thinking content is preserved (not dropped).
+func TestConvertClaudeRequestToOpenAI_ThinkingOnlyMessagePreserved(t *testing.T) {
+	inputJSON := `{
+		"model": "claude-3-opus",
+		"messages": [
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "What is 2+2?"}]
+			},
+			{
+				"role": "assistant",
+				"content": [{"type": "thinking", "thinking": "Let me calculate: 2+2=4"}]
+			},
+			{
+				"role": "user",
+				"content": [{"type": "text", "text": "Thanks"}]
+			}
+		]
+	}`
+
+	result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
+	resultJSON := gjson.ParseBytes(result)
+
+	messages := resultJSON.Get("messages").Array()
+
+	// Should have: system (auto-added) + user + assistant (thinking-only) + user = 4 messages
+	if len(messages) != 4 {
+		t.Fatalf("Expected 4 messages, got %d. Messages: %v", len(messages), resultJSON.Get("messages").Raw)
+	}
+
+	// Check the assistant message (index 2) has reasoning_content
+	assistantMsg := messages[2]
+	if assistantMsg.Get("role").String() != "assistant" {
+		t.Errorf("Expected message[2] to be assistant, got %s", assistantMsg.Get("role").String())
+	}
+
+	if !assistantMsg.Get("reasoning_content").Exists() {
+		t.Error("Expected assistant message to have reasoning_content")
+	}
+
+	if assistantMsg.Get("reasoning_content").String() != "Let me calculate: 2+2=4" {
+		t.Errorf("Unexpected reasoning_content: %s", assistantMsg.Get("reasoning_content").String())
+	}
+}
+
+func TestConvertClaudeRequestToOpenAI_ToolResultOrderAndContent(t *testing.T) {
+	inputJSON := `{
+		"model": "claude-3-opus",
+		"messages": [
+			{
+				"role": "assistant",
+				"content": [
+					{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}}
+				]
+			},
+			{
+				"role": "user",
+				"content": [
+					{"type": "text", "text": "before"},
+					{"type": "tool_result", "tool_use_id": "call_1", "content": [{"type":"text","text":"tool ok"}]},
+					{"type": "text", "text": "after"}
+				]
+			}
+		]
+	}`
+
+	result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
+	resultJSON := gjson.ParseBytes(result)
+	messages := resultJSON.Get("messages").Array()
+
+	// OpenAI requires: tool messages MUST immediately follow assistant(tool_calls).
+	// Correct order: system + assistant(tool_calls) + tool(result) + user(before+after)
+	if len(messages) != 4 {
+		t.Fatalf("Expected 4 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
+	}
+
+	if messages[0].Get("role").String() != "system" {
+		t.Fatalf("Expected messages[0] to be system, got %s", messages[0].Get("role").String())
+	}
+
+	if messages[1].Get("role").String() != "assistant" || !messages[1].Get("tool_calls").Exists() {
+		t.Fatalf("Expected messages[1] to be assistant tool_calls, got %s: %s", messages[1].Get("role").String(), messages[1].Raw)
+	}
+
+	// tool message MUST immediately follow assistant(tool_calls) per OpenAI spec
+	if messages[2].Get("role").String() != "tool" {
+		t.Fatalf("Expected messages[2] to be tool (must follow tool_calls), got %s", messages[2].Get("role").String())
+	}
+	if got := messages[2].Get("tool_call_id").String(); got != "call_1" {
+		t.Fatalf("Expected tool_call_id %q, got %q", "call_1", got)
+	}
+	if got := messages[2].Get("content").String(); got != "tool ok" {
+		t.Fatalf("Expected tool content %q, got %q", "tool ok", got)
+	}
+
+	// User message comes after tool message
+	if messages[3].Get("role").String() != "user" {
+		t.Fatalf("Expected messages[3] to be user, got %s", messages[3].Get("role").String())
+	}
+	// User message should contain both "before" and "after" text
+	if got := messages[3].Get("content.0.text").String(); got != "before" {
+		t.Fatalf("Expected user text[0] %q, got %q", "before", got)
+	}
+	if got := messages[3].Get("content.1.text").String(); got != "after" {
+		t.Fatalf("Expected user text[1] %q, got %q", "after", got)
+	}
+}
+
+func TestConvertClaudeRequestToOpenAI_ToolResultObjectContent(t *testing.T) {
+	inputJSON := `{
+		"model": "claude-3-opus",
+		"messages": [
+			{
+				"role": "assistant",
+				"content": [
+					{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}}
+				]
+			},
+			{
+				"role": "user",
+				"content": [
+					{"type": "tool_result", "tool_use_id": "call_1", "content": {"foo": "bar"}}
+				]
+			}
+		]
+	}`
+
+	result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
+	resultJSON := gjson.ParseBytes(result)
+	messages := resultJSON.Get("messages").Array()
+
+	// system + assistant(tool_calls) + tool(result)
+	if len(messages) != 3 {
+		t.Fatalf("Expected 3 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
+	}
+
+	if messages[2].Get("role").String() != "tool" {
+		t.Fatalf("Expected messages[2] to be tool, got %s", messages[2].Get("role").String())
+	}
+
+	toolContent := messages[2].Get("content").String()
+	parsed := gjson.Parse(toolContent)
+	if parsed.Get("foo").String() != "bar" {
+		t.Fatalf("Expected tool content JSON foo=bar, got %q", toolContent)
+	}
+}
+
+func TestConvertClaudeRequestToOpenAI_AssistantTextToolUseTextOrder(t *testing.T) {
+	inputJSON := `{
+		"model": "claude-3-opus",
+		"messages": [
+			{
+				"role": "assistant",
+				"content": [
+					{"type": "text", "text": "pre"},
+					{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}},
+					{"type": "text", "text": "post"}
+				]
+			}
+		]
+	}`
+
+	result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
+	resultJSON := gjson.ParseBytes(result)
+	messages := resultJSON.Get("messages").Array()
+
+	// New behavior: content + tool_calls unified in single assistant message
+	// Expect: system + assistant(content[pre,post] + tool_calls)
+	if len(messages) != 2 {
+		t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
+	}
+
+	if messages[0].Get("role").String() != "system" {
+		t.Fatalf("Expected messages[0] to be system, got %s", messages[0].Get("role").String())
+	}
+
+	assistantMsg := messages[1]
+	if assistantMsg.Get("role").String() != "assistant" {
+		t.Fatalf("Expected messages[1] to be assistant, got %s", assistantMsg.Get("role").String())
+	}
+
+	// Should have both content and tool_calls in same message
+	if !assistantMsg.Get("tool_calls").Exists() {
+		t.Fatalf("Expected assistant message to have tool_calls")
+	}
+	if got := assistantMsg.Get("tool_calls.0.id").String(); got != "call_1" {
+		t.Fatalf("Expected tool_call id %q, got %q", "call_1", got)
+	}
+	if got := assistantMsg.Get("tool_calls.0.function.name").String(); got != "do_work" {
+		t.Fatalf("Expected tool_call name %q, got %q", "do_work", got)
+	}
+
+	// Content should have both pre and post text
+	if got := assistantMsg.Get("content.0.text").String(); got != "pre" {
+		t.Fatalf("Expected content[0] text %q, got %q", "pre", got)
+	}
+	if got := assistantMsg.Get("content.1.text").String(); got != "post" {
+		t.Fatalf("Expected content[1] text %q, got %q", "post", got)
+	}
+}
+
+func TestConvertClaudeRequestToOpenAI_AssistantThinkingToolUseThinkingSplit(t *testing.T) {
+	inputJSON := `{
+		"model": "claude-3-opus",
+		"messages": [
+			{
+				"role": "assistant",
+				"content": [
+					{"type": "thinking", "thinking": "t1"},
+					{"type": "text", "text": "pre"},
+					{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}},
+					{"type": "thinking", "thinking": "t2"},
+					{"type": "text", "text": "post"}
+				]
+			}
+		]
+	}`
+
+	result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
+	resultJSON := gjson.ParseBytes(result)
+	messages := resultJSON.Get("messages").Array()
+
+	// New behavior: all content, thinking, and tool_calls unified in single assistant message
+	// Expect: system + assistant(content[pre,post] + tool_calls + reasoning_content[t1+t2])
+	if len(messages) != 2 {
+		t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
+	}
+
+	assistantMsg := messages[1]
+	if assistantMsg.Get("role").String() != "assistant" {
+		t.Fatalf("Expected messages[1] to be assistant, got %s", assistantMsg.Get("role").String())
+	}
+
+	// Should have content with both pre and post
+	if got := assistantMsg.Get("content.0.text").String(); got != "pre" {
+		t.Fatalf("Expected content[0] text %q, got %q", "pre", got)
+	}
+	if got := assistantMsg.Get("content.1.text").String(); got != "post" {
+		t.Fatalf("Expected content[1] text %q, got %q", "post", got)
+	}
+
+	// Should have tool_calls
+	if !assistantMsg.Get("tool_calls").Exists() {
+		t.Fatalf("Expected assistant message to have tool_calls")
+	}
+
+	// Should have combined reasoning_content from both thinking blocks
+	if got := assistantMsg.Get("reasoning_content").String(); got != "t1\n\nt2" {
+		t.Fatalf("Expected reasoning_content %q, got %q", "t1\n\nt2", got)
+	}
+}
--- a/internal/translator/openai/claude/openai_claude_response.go
+++ b/internal/translator/openai/claude/openai_claude_response.go
@@ -480,15 +480,15 @@ func collectOpenAIReasoningTexts(node gjson.Result) []string {

 	switch node.Type {
 	case gjson.String:
-		if text := strings.TrimSpace(node.String()); text != "" {
+		if text := node.String(); text != "" {
 			texts = append(texts, text)
 		}
 	case gjson.JSON:
 		if text := node.Get("text"); text.Exists() {
-			if trimmed := strings.TrimSpace(text.String()); trimmed != "" {
-				texts = append(texts, trimmed)
+			if textStr := text.String(); textStr != "" {
+				texts = append(texts, textStr)
 			}
-		} else if raw := strings.TrimSpace(node.Raw); raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
+		} else if raw := node.Raw; raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
 			texts = append(texts, raw)
 		}
 	}
--- a/internal/util/gemini_schema.go
+++ b/internal/util/gemini_schema.go
@@ -390,6 +390,11 @@ func addEmptySchemaPlaceholder(jsonStr string) string {

 		// If schema has properties but none are required, add a minimal placeholder.
 		if propsVal.IsObject() && !hasRequiredProperties {
+			// DO NOT add placeholder if it's a top-level schema (parentPath is empty)
+			// or if we've already added a placeholder reason above.
+			if parentPath == "" {
+				continue
+			}
 			placeholderPath := joinPath(propsPath, "_")
 			if !gjson.Get(jsonStr, placeholderPath).Exists() {
 				jsonStr, _ = sjson.Set(jsonStr, placeholderPath+".type", "boolean")
--- a/internal/util/gemini_schema_test.go
+++ b/internal/util/gemini_schema_test.go
@@ -127,8 +127,10 @@ func TestCleanJSONSchemaForAntigravity_AnyOfFlattening_SmartSelection(t *testing
 				"type": "object",
 				"description": "Accepts: null | object",
 				"properties": {
+					"_": { "type": "boolean" },
 					"kind": { "type": "string" }
-				}
+				},
+				"required": ["_"]
 			}
 		}
 	}`
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -251,9 +251,14 @@ func ThinkingBudgetToGemini3Level(model string, budget int) (string, bool) {

 // modelsWithDefaultThinking lists models that should have thinking enabled by default
 // when no explicit thinkingConfig is provided.
+// Note: Gemini 3 models are NOT included here because per Google's official documentation:
+//   - thinkingLevel defaults to "high" (dynamic thinking)
+//   - includeThoughts defaults to false
+//
+// We should not override these API defaults; let users explicitly configure if needed.
 var modelsWithDefaultThinking = map[string]bool{
-	"gemini-3-pro-preview":       true,
-	"gemini-3-pro-image-preview": true,
+	// "gemini-3-pro-preview":       true,
+	// "gemini-3-pro-image-preview": true,
 	// "gemini-3-flash-preview":     true,
 }

--- a/internal/util/sanitize_test.go
+++ b/internal/util/sanitize_test.go
@@ -0,0 +1,56 @@
+package util
+
+import (
+	"testing"
+)
+
+func TestSanitizeFunctionName(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{"Normal", "valid_name", "valid_name"},
+		{"With Dots", "name.with.dots", "name.with.dots"},
+		{"With Colons", "name:with:colons", "name:with:colons"},
+		{"With Dashes", "name-with-dashes", "name-with-dashes"},
+		{"Mixed Allowed", "name.with_dots:colons-dashes", "name.with_dots:colons-dashes"},
+		{"Invalid Characters", "name!with@invalid#chars", "name_with_invalid_chars"},
+		{"Spaces", "name with spaces", "name_with_spaces"},
+		{"Non-ASCII", "name_with_你好_chars", "name_with____chars"},
+		{"Starts with digit", "123name", "_123name"},
+		{"Starts with dot", ".name", "_.name"},
+		{"Starts with colon", ":name", "_:name"},
+		{"Starts with dash", "-name", "_-name"},
+		{"Starts with invalid char", "!name", "_name"},
+		{"Exactly 64 chars", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact"},
+		{"Too long (65 chars)", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charactX", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact"},
+		{"Very long", "this_is_a_very_long_name_that_exceeds_the_sixty_four_character_limit_for_function_names", "this_is_a_very_long_name_that_exceeds_the_sixty_four_character_l"},
+		{"Starts with digit (64 chars total)", "1234567890123456789012345678901234567890123456789012345678901234", "_123456789012345678901234567890123456789012345678901234567890123"},
+		{"Starts with invalid char (64 chars total)", "!234567890123456789012345678901234567890123456789012345678901234", "_234567890123456789012345678901234567890123456789012345678901234"},
+		{"Empty", "", ""},
+		{"Single character invalid", "@", "_"},
+		{"Single character valid", "a", "a"},
+		{"Single character digit", "1", "_1"},
+		{"Single character underscore", "_", "_"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := SanitizeFunctionName(tt.input)
+			if got != tt.expected {
+				t.Errorf("SanitizeFunctionName(%q) = %v, want %v", tt.input, got, tt.expected)
+			}
+			// Verify Gemini compliance
+			if len(got) > 64 {
+				t.Errorf("SanitizeFunctionName(%q) result too long: %d", tt.input, len(got))
+			}
+			if len(got) > 0 {
+				first := got[0]
+				if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
+					t.Errorf("SanitizeFunctionName(%q) result starts with invalid char: %c", tt.input, first)
+				}
+			}
+		})
+	}
+}
--- a/internal/util/util.go
+++ b/internal/util/util.go
@@ -8,12 +8,52 @@ import (
 	"io/fs"
 	"os"
 	"path/filepath"
+	"regexp"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	log "github.com/sirupsen/logrus"
 )

+var functionNameSanitizer = regexp.MustCompile(`[^a-zA-Z0-9_.:-]`)
+
+// SanitizeFunctionName ensures a function name matches the requirements for Gemini/Vertex AI.
+// It replaces invalid characters with underscores, ensures it starts with a letter or underscore,
+// and truncates it to 64 characters if necessary.
+// Regex Rule: [^a-zA-Z0-9_.:-] replaced with _.
+func SanitizeFunctionName(name string) string {
+	if name == "" {
+		return ""
+	}
+
+	// Replace invalid characters with underscore
+	sanitized := functionNameSanitizer.ReplaceAllString(name, "_")
+
+	// Ensure it starts with a letter or underscore
+	// Re-reading requirements: Must start with a letter or an underscore.
+	if len(sanitized) > 0 {
+		first := sanitized[0]
+		if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
+			// If it starts with an allowed character but not allowed at the beginning (digit, dot, colon, dash),
+			// we must prepend an underscore.
+
+			// To stay within the 64-character limit while prepending, we must truncate first.
+			if len(sanitized) >= 64 {
+				sanitized = sanitized[:63]
+			}
+			sanitized = "_" + sanitized
+		}
+	} else {
+		sanitized = "_"
+	}
+
+	// Truncate to 64 characters
+	if len(sanitized) > 64 {
+		sanitized = sanitized[:64]
+	}
+	return sanitized
+}
+
 // SetLogLevel configures the logrus log level based on the configuration.
 // It sets the log level to DebugLevel if debug mode is enabled, otherwise to InfoLevel.
 func SetLogLevel(cfg *config.Config) {
--- a/sdk/cliproxy/model_registry.go
+++ b/sdk/cliproxy/model_registry.go
@@ -13,6 +13,7 @@ type ModelRegistry interface {
 	ClearModelQuotaExceeded(clientID, modelID string)
 	ClientSupportsModel(clientID, modelID string) bool
 	GetAvailableModels(handlerType string) []map[string]any
+	GetAvailableModelsByProvider(provider string) []*ModelInfo
 }

 // GlobalModelRegistry returns the shared registry instance.
Author	SHA1	Message	Date
Luis Pater	c8b33a8cc3	Merge pull request #824 from router-for-me/script feat(script): add usage statistics preservation across container rebuilds	2026-01-02 20:42:25 +08:00
Luis Pater	b9d1e70ac2	Merge pull request #830 from router-for-me/gemini fix(util): disable default thinking for gemini-3 series	2026-01-02 10:59:24 +08:00
hkfires	fdf5720217	fix(gemini): remove default thinking for gemini 3 models	2026-01-02 10:55:59 +08:00
hkfires	f40bd0cd51	feat(script): add usage statistics preservation across container rebuilds	2026-01-02 10:01:20 +08:00
hkfires	e33676bb87	fix(util): disable default thinking for gemini-3 series	2026-01-02 09:43:40 +08:00
Luis Pater	2a663d5cba	feat(executor): enhance payload translation with original request context Refactored `applyPayloadConfig` to `applyPayloadConfigWithRoot`, adding support for default rule validation against the original payload when available. Updated all executors to use `applyPayloadConfigWithRoot` and incorporate an optional original request payload for translations.	2026-01-02 00:03:26 +08:00
Luis Pater	750b930679	Merge pull request #823 from router-for-me/translator feat(translator): enhance Claude-to-OpenAI conversion with thinking block and tool result handling	2026-01-01 20:16:10 +08:00
hkfires	3902fd7501	fix(iflow): remove thinking field from request body in thinking config handler	2026-01-01 19:40:28 +08:00
hkfires	4fc3d5e935	refactor(iflow): simplify thinking config handling for GLM and MiniMax models	2026-01-01 19:31:08 +08:00
hkfires	2d2f4572a7	fix(translator): remove unnecessary whitespace trimming in reasoning text collection	2026-01-01 12:39:09 +08:00
hkfires	8f4c46f38d	fix(translator): emit tool_result messages before user content in Claude-to-OpenAI conversion	2026-01-01 11:11:43 +08:00
hkfires	b6ba51bc2a	feat(translator): add thinking block and tool result handling for Claude-to-OpenAI conversion	2026-01-01 09:41:25 +08:00
Luis Pater	6a66d32d37	Merge pull request #803 from HsnSaboor/fix-invalid-function-names-sanitization-v2 feat(translator): resolve invalid function name errors by sanitizing Claude tool names	2026-01-01 01:15:50 +08:00
Luis Pater	8d15723195	feat(registry): add `GetAvailableModelsByProvider` method for retrieving models by provider	2025-12-31 23:37:46 +08:00
Saboor Hassan	47b9503112	chore: revert changes to internal/translator to comply with path guard This commit reverts all modifications within internal/translator. A separate issue will be created for the maintenance team to integrate SanitizeFunctionName into the translators. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>	2025-12-31 02:19:26 +05:00
Saboor Hassan	3b9253c2be	fix(translator): resolve invalid function name errors by sanitizing Claude tool names This commit centralizes tool name sanitization in SanitizeFunctionName, applying character compliance, starting character rules, and length limits. It also fixes a regression in gemini_schema tests and preserves MCP-specific shortening logic while ensuring compliance. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>	2025-12-31 02:14:46 +05:00
Saboor Hassan	d241359153	fix(translator): address PR feedback for tool name sanitization - Pre-compile sanitization regex for better performance. - Optimize SanitizeFunctionName for conciseness and correctness. - Handle 64-char edge cases by truncating before prepending underscore. - Fix bug in Antigravity translator (incorrect join index). - Refactor Gemini translators to avoid redundant sanitization calls. - Add comprehensive unit tests including 64-char edge cases. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>	2025-12-31 01:54:41 +05:00
Saboor Hassan	f4d4249ba5	feat(translator): sanitize tool/function names for upstream provider compatibility Implemented SanitizeFunctionName utility to ensure Claude tool names meet Gemini/Upstream strict naming conventions (alphanumeric, starts with letter/underscore, max 64 chars). Applied sanitization to tool definitions and usage in all relevant translators. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>	2025-12-31 01:41:07 +05:00