diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index cf8e216e..fffb50c4 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -393,7 +393,10 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c } originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream) payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream) - payload, _ = thinking.ApplyThinking(payload, req.Model, "gemini") + payload, err := thinking.ApplyThinking(payload, req.Model, "gemini") + if err != nil { + return nil, translatedPayload{}, err + } payload = fixGeminiImageAspectRatio(baseModel, payload) payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated) payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens") diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 4f704c05..94417ba0 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -136,7 +136,10 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity") + translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity") + if err != nil { + return resp, err + } // Preserve Claude special handling (use baseModel for registry lookups) translated = normalizeAntigravityThinking(baseModel, translated, isClaude) @@ -254,7 +257,10 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) - translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity") + translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity") + if err != nil { + return resp, err + } // Preserve Claude special handling (use baseModel for registry lookups) translated = normalizeAntigravityThinking(baseModel, translated, true) @@ -620,7 +626,10 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) - translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity") + translated, err = thinking.ApplyThinking(translated, req.Model, "antigravity") + if err != nil { + return nil, err + } // Preserve Claude special handling (use baseModel for registry lookups) translated = normalizeAntigravityThinking(baseModel, translated, isClaude) @@ -801,7 +810,10 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut // Prepare payload once (doesn't depend on baseURL) payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - payload, _ = thinking.ApplyThinking(payload, req.Model, "antigravity") + payload, err := thinking.ApplyThinking(payload, req.Model, "antigravity") + if err != nil { + return cliproxyexecutor.Response{}, err + } // Preserve Claude special handling (use baseModel for registry lookups) payload = normalizeAntigravityThinking(baseModel, payload, isClaude) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 9f2a5b22..636fefad 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -106,7 +106,10 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream) body, _ = sjson.SetBytes(body, "model", baseModel) - body, _ = thinking.ApplyThinking(body, req.Model, "claude") + body, err = thinking.ApplyThinking(body, req.Model, "claude") + if err != nil { + return resp, err + } if !strings.HasPrefix(baseModel, "claude-3-5-haiku") { body = checkSystemInstructions(body) @@ -236,7 +239,10 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) body, _ = sjson.SetBytes(body, "model", baseModel) - body, _ = thinking.ApplyThinking(body, req.Model, "claude") + body, err = thinking.ApplyThinking(body, req.Model, "claude") + if err != nil { + return nil, err + } body = checkSystemInstructions(body) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 9e553e3c..273987ea 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -96,7 +96,10 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re body = sdktranslator.TranslateRequest(from, to, baseModel, body, false) body = misc.StripCodexUserAgent(body) - body, _ = thinking.ApplyThinking(body, req.Model, "codex") + body, err = thinking.ApplyThinking(body, req.Model, "codex") + if err != nil { + return resp, err + } body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "model", baseModel) @@ -201,7 +204,10 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au body = sdktranslator.TranslateRequest(from, to, baseModel, body, true) body = misc.StripCodexUserAgent(body) - body, _ = thinking.ApplyThinking(body, req.Model, "codex") + body, err = thinking.ApplyThinking(body, req.Model, "codex") + if err != nil { + return nil, err + } body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) body, _ = sjson.DeleteBytes(body, "previous_response_id") @@ -302,7 +308,10 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth body = sdktranslator.TranslateRequest(from, to, baseModel, body, false) body = misc.StripCodexUserAgent(body) - body, _ = thinking.ApplyThinking(body, req.Model, "codex") + body, err := thinking.ApplyThinking(body, req.Model, "codex") + if err != nil { + return cliproxyexecutor.Response{}, err + } body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 3d08b830..add01cb3 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -123,7 +123,10 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - basePayload, _ = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli") + basePayload, err = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli") + if err != nil { + return resp, err + } basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload) basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated) @@ -269,7 +272,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) - basePayload, _ = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli") + basePayload, err = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli") + if err != nil { + return nil, err + } basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload) basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated) @@ -473,7 +479,10 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. for range models { payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - payload, _ = thinking.ApplyThinking(payload, req.Model, "gemini-cli") + payload, err = thinking.ApplyThinking(payload, req.Model, "gemini-cli") + if err != nil { + return cliproxyexecutor.Response{}, err + } payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index fd6ec22e..4cc5d945 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -120,7 +120,10 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + body, err = thinking.ApplyThinking(body, req.Model, "gemini") + if err != nil { + return resp, err + } body = fixGeminiImageAspectRatio(baseModel, body) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) @@ -219,7 +222,10 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) - body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + body, err = thinking.ApplyThinking(body, req.Model, "gemini") + if err != nil { + return nil, err + } body = fixGeminiImageAspectRatio(baseModel, body) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) @@ -332,7 +338,10 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("gemini") translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini") + translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini") + if err != nil { + return cliproxyexecutor.Response{}, err + } translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq) respCtx := context.WithValue(ctx, "alt", opts.Alt) diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index 18d9f8d6..8a412b47 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -170,7 +170,10 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + body, err = thinking.ApplyThinking(body, req.Model, "gemini") + if err != nil { + return resp, err + } body = fixGeminiImageAspectRatio(baseModel, body) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) @@ -269,7 +272,10 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + body, err = thinking.ApplyThinking(body, req.Model, "gemini") + if err != nil { + return resp, err + } body = fixGeminiImageAspectRatio(baseModel, body) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) @@ -369,7 +375,10 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) - body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + body, err = thinking.ApplyThinking(body, req.Model, "gemini") + if err != nil { + return nil, err + } body = fixGeminiImageAspectRatio(baseModel, body) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) @@ -485,7 +494,10 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) - body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + body, err = thinking.ApplyThinking(body, req.Model, "gemini") + if err != nil { + return nil, err + } body = fixGeminiImageAspectRatio(baseModel, body) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) @@ -593,7 +605,10 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini") + translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini") + if err != nil { + return cliproxyexecutor.Response{}, err + } translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq) translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel) @@ -674,7 +689,10 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini") + translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, "gemini") + if err != nil { + return cliproxyexecutor.Response{}, err + } translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq) translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel) diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index cc158250..6ce4221c 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -92,7 +92,10 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) body, _ = sjson.SetBytes(body, "model", baseModel) - body, _ = thinking.ApplyThinking(body, req.Model, "iflow") + body, err = thinking.ApplyThinking(body, req.Model, "iflow") + if err != nil { + return resp, err + } body = preserveReasoningContentInMessages(body) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) @@ -187,7 +190,10 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) body, _ = sjson.SetBytes(body, "model", baseModel) - body, _ = thinking.ApplyThinking(body, req.Model, "iflow") + body, err = thinking.ApplyThinking(body, req.Model, "iflow") + if err != nil { + return nil, err + } body = preserveReasoningContentInMessages(body) // Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour. diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 22e8b4c8..6ae9103f 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -92,7 +92,10 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream) translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated) - translated, _ = thinking.ApplyThinking(translated, req.Model, "openai") + translated, err = thinking.ApplyThinking(translated, req.Model, "openai") + if err != nil { + return resp, err + } url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) @@ -184,7 +187,10 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated) - translated, _ = thinking.ApplyThinking(translated, req.Model, "openai") + translated, err = thinking.ApplyThinking(translated, req.Model, "openai") + if err != nil { + return nil, err + } url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) @@ -291,7 +297,10 @@ func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyau modelForCounting := baseModel - translated, _ = thinking.ApplyThinking(translated, req.Model, "openai") + translated, err := thinking.ApplyThinking(translated, req.Model, "openai") + if err != nil { + return cliproxyexecutor.Response{}, err + } enc, err := tokenizerForModel(modelForCounting) if err != nil { diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index f7162893..ff35c935 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -86,7 +86,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) body, _ = sjson.SetBytes(body, "model", baseModel) - body, _ = thinking.ApplyThinking(body, req.Model, "openai") + body, err = thinking.ApplyThinking(body, req.Model, "openai") + if err != nil { + return resp, err + } body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) @@ -169,7 +172,10 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) body, _ = sjson.SetBytes(body, "model", baseModel) - body, _ = thinking.ApplyThinking(body, req.Model, "openai") + body, err = thinking.ApplyThinking(body, req.Model, "openai") + if err != nil { + return nil, err + } toolsResult := gjson.GetBytes(body, "tools") // I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response. diff --git a/internal/thinking/errors.go b/internal/thinking/errors.go index 56f82c68..1cf9ccd0 100644 --- a/internal/thinking/errors.go +++ b/internal/thinking/errors.go @@ -1,6 +1,8 @@ // Package thinking provides unified thinking configuration processing logic. package thinking +import "net/http" + // ErrorCode represents the type of thinking configuration error. type ErrorCode string @@ -69,3 +71,8 @@ func NewThinkingErrorWithModel(code ErrorCode, message, model string) *ThinkingE Model: model, } } + +// StatusCode implements a portable status code interface for HTTP handlers. +func (e *ThinkingError) StatusCode() int { + return http.StatusBadRequest +} diff --git a/internal/thinking/provider/geminicli/apply.go b/internal/thinking/provider/geminicli/apply.go index 1d5585ba..a4607107 100644 --- a/internal/thinking/provider/geminicli/apply.go +++ b/internal/thinking/provider/geminicli/apply.go @@ -55,6 +55,9 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo * if config.Mode == thinking.ModeAuto { return a.applyBudgetFormat(body, config) } + if config.Mode == thinking.ModeBudget { + return a.applyBudgetFormat(body, config) + } // For non-auto modes, choose format based on model capabilities support := modelInfo.Thinking