From 32d3809f8c2e09f57c7b04369e215e4902187272 Mon Sep 17 00:00:00 2001 From: auroraflux <14947763+auroraflux@users.noreply.github.com> Date: Sun, 30 Nov 2025 00:29:50 -0800 Subject: [PATCH] **feat(util): add -reasoning suffix support for Gemini models** Adds support for the `-reasoning` model name suffix which enables thinking/reasoning mode with dynamic budget. This allows clients to request reasoning-enabled inference using model names like `gemini-2.5-flash-reasoning` without explicit configuration. The suffix is normalized to the base model (e.g., gemini-2.5-flash) with thinkingBudget=-1 (dynamic) and include_thoughts=true. Follows the existing pattern established by -nothinking and -thinking-N suffixes. --- .../runtime/executor/antigravity_executor.go | 19 +++++++++++++++++++ internal/util/gemini_thinking.go | 9 +++++++++ 2 files changed, 28 insertions(+) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 13373537..bcc64310 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -17,6 +17,7 @@ import ( "github.com/google/uuid" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -58,6 +59,20 @@ func (e *AntigravityExecutor) Identifier() string { return antigravityAuthType } // PrepareRequest implements ProviderExecutor. func (e *AntigravityExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { return nil } +// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N). +// It trusts user intent when suffix is used, even if registry doesn't have Thinking metadata. +func applyThinkingMetadata(translated []byte, metadata map[string]any, model string) []byte { + budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata) + if !ok { + return translated + } + if budgetOverride != nil && util.ModelSupportsThinking(model) { + norm := util.NormalizeThinkingBudget(model, *budgetOverride) + budgetOverride = &norm + } + return util.ApplyGeminiCLIThinkingConfig(translated, budgetOverride, includeOverride) +} + // Execute handles non-streaming requests via the antigravity generate endpoint. func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) @@ -75,6 +90,8 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("antigravity") translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + translated = applyThinkingMetadata(translated, req.Metadata, req.Model) + baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -166,6 +183,8 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya to := sdktranslator.FromString("antigravity") translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + translated = applyThinkingMetadata(translated, req.Metadata, req.Model) + baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index d7481621..14077fa0 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -34,6 +34,15 @@ func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) { return base, &budgetValue, &include, true } + // Handle "-reasoning" suffix: enables thinking with dynamic budget (-1) + // Maps: gemini-2.5-flash-reasoning -> gemini-2.5-flash with thinkingBudget=-1 + if strings.HasSuffix(lower, "-reasoning") { + base := model[:len(model)-len("-reasoning")] + budgetValue := -1 // Dynamic budget + include := true + return base, &budgetValue, &include, true + } + idx := strings.LastIndex(lower, "-thinking-") if idx == -1 { return model, nil, nil, false