diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 64e78199..c82c2b67 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -943,18 +943,6 @@ func GetQwenModels() []*ModelInfo { } } -// GetAntigravityThinkingConfig returns the Thinking configuration for antigravity models. -// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup. -func GetAntigravityThinkingConfig() map[string]*ThinkingSupport { - return map[string]*ThinkingSupport{ - "gemini-2.5-flash": {Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - "gemini-2.5-flash-lite": {Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - "gemini-3-pro-preview": {Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, - "gemini-claude-sonnet-4-5-thinking": {Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, - "gemini-claude-opus-4-5-thinking": {Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, - } -} - // GetIFlowModels returns supported models for iFlow OAuth accounts. func GetIFlowModels() []*ModelInfo { entries := []struct { @@ -997,3 +985,22 @@ func GetIFlowModels() []*ModelInfo { } return models } + +// AntigravityModelConfig captures static antigravity model overrides, including +// Thinking budget limits and provider max completion tokens. +type AntigravityModelConfig struct { + Thinking *ThinkingSupport + MaxCompletionTokens int +} + +// GetAntigravityModelConfig returns static configuration for antigravity models. +// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup. +func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { + return map[string]*AntigravityModelConfig{ + "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, + "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, + "gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}}, + "gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + "gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + } +} diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index ed9207f0..b2aa231f 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -77,6 +77,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = normalizeAntigravityThinking(req.Model, translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -170,6 +171,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = normalizeAntigravityThinking(req.Model, translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -366,7 +368,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c } now := time.Now().Unix() - thinkingConfig := registry.GetAntigravityThinkingConfig() + modelConfig := registry.GetAntigravityModelConfig() models := make([]*registry.ModelInfo, 0, len(result.Map())) for originalName := range result.Map() { aliasName := modelName2Alias(originalName) @@ -383,8 +385,13 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c Type: antigravityAuthType, } // Look up Thinking support from static config using alias name - if thinking, ok := thinkingConfig[aliasName]; ok { - modelInfo.Thinking = thinking + if cfg, ok := modelConfig[aliasName]; ok { + if cfg.Thinking != nil { + modelInfo.Thinking = cfg.Thinking + } + if cfg.MaxCompletionTokens > 0 { + modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens + } } models = append(models, modelInfo) } @@ -804,3 +811,53 @@ func alias2ModelName(modelName string) string { return modelName } } + +// normalizeAntigravityThinking clamps or removes thinking config based on model support. +// For Claude models, it additionally ensures thinking budget < max_tokens. +func normalizeAntigravityThinking(model string, payload []byte) []byte { + payload = util.StripThinkingConfigIfUnsupported(model, payload) + if !util.ModelSupportsThinking(model) { + return payload + } + budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget") + if !budget.Exists() { + return payload + } + raw := int(budget.Int()) + normalized := util.NormalizeThinkingBudget(model, raw) + + isClaude := strings.Contains(strings.ToLower(model), "claude") + if isClaude { + effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload) + if effectiveMax > 0 && normalized >= effectiveMax { + normalized = effectiveMax - 1 + if normalized < 1 { + normalized = 1 + } + } + if setDefaultMax { + if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil { + payload = res + } + } + } + + updated, err := sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + if err != nil { + return payload + } + return updated +} + +// antigravityEffectiveMaxTokens returns the max tokens to cap thinking: +// prefer request-provided maxOutputTokens; otherwise fall back to model default. +// The boolean indicates whether the value came from the model default (and thus should be written back). +func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromModel bool) { + if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 { + return int(maxTok.Int()), false + } + if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 { + return modelInfo.MaxCompletionTokens, true + } + return 0, false +} diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go index 82e71758..1c90a803 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go @@ -111,7 +111,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } - // Temperature/top_p/top_k + // Temperature/top_p/top_k/max_tokens if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number { out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num) } @@ -121,6 +121,9 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ if tkr := gjson.GetBytes(rawJSON, "top_k"); tkr.Exists() && tkr.Type == gjson.Number { out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num) } + if maxTok := gjson.GetBytes(rawJSON, "max_tokens"); maxTok.Exists() && maxTok.Type == gjson.Number { + out, _ = sjson.SetBytes(out, "request.generationConfig.maxOutputTokens", maxTok.Num) + } // Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities // e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]