mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-03 04:50:52 +08:00
feat(antigravity): enforce thinking budget limits for Claude models
This commit is contained in:
@@ -943,18 +943,6 @@ func GetQwenModels() []*ModelInfo {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetAntigravityThinkingConfig returns the Thinking configuration for antigravity models.
|
|
||||||
// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
|
|
||||||
func GetAntigravityThinkingConfig() map[string]*ThinkingSupport {
|
|
||||||
return map[string]*ThinkingSupport{
|
|
||||||
"gemini-2.5-flash": {Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
|
||||||
"gemini-2.5-flash-lite": {Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
|
||||||
"gemini-3-pro-preview": {Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
|
||||||
"gemini-claude-sonnet-4-5-thinking": {Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true},
|
|
||||||
"gemini-claude-opus-4-5-thinking": {Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetIFlowModels returns supported models for iFlow OAuth accounts.
|
// GetIFlowModels returns supported models for iFlow OAuth accounts.
|
||||||
func GetIFlowModels() []*ModelInfo {
|
func GetIFlowModels() []*ModelInfo {
|
||||||
entries := []struct {
|
entries := []struct {
|
||||||
@@ -997,3 +985,22 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
}
|
}
|
||||||
return models
|
return models
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AntigravityModelConfig captures static antigravity model overrides, including
|
||||||
|
// Thinking budget limits and provider max completion tokens.
|
||||||
|
type AntigravityModelConfig struct {
|
||||||
|
Thinking *ThinkingSupport
|
||||||
|
MaxCompletionTokens int
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAntigravityModelConfig returns static configuration for antigravity models.
|
||||||
|
// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
|
||||||
|
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
||||||
|
return map[string]*AntigravityModelConfig{
|
||||||
|
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||||
|
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||||
|
"gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
|
||||||
|
"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||||
|
"gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -77,6 +77,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
|
|
||||||
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||||
|
translated = normalizeAntigravityThinking(req.Model, translated)
|
||||||
|
|
||||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||||
@@ -170,6 +171,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
|||||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
|
|
||||||
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||||
|
translated = normalizeAntigravityThinking(req.Model, translated)
|
||||||
|
|
||||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||||
@@ -366,7 +368,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
|
|||||||
}
|
}
|
||||||
|
|
||||||
now := time.Now().Unix()
|
now := time.Now().Unix()
|
||||||
thinkingConfig := registry.GetAntigravityThinkingConfig()
|
modelConfig := registry.GetAntigravityModelConfig()
|
||||||
models := make([]*registry.ModelInfo, 0, len(result.Map()))
|
models := make([]*registry.ModelInfo, 0, len(result.Map()))
|
||||||
for originalName := range result.Map() {
|
for originalName := range result.Map() {
|
||||||
aliasName := modelName2Alias(originalName)
|
aliasName := modelName2Alias(originalName)
|
||||||
@@ -383,8 +385,13 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
|
|||||||
Type: antigravityAuthType,
|
Type: antigravityAuthType,
|
||||||
}
|
}
|
||||||
// Look up Thinking support from static config using alias name
|
// Look up Thinking support from static config using alias name
|
||||||
if thinking, ok := thinkingConfig[aliasName]; ok {
|
if cfg, ok := modelConfig[aliasName]; ok {
|
||||||
modelInfo.Thinking = thinking
|
if cfg.Thinking != nil {
|
||||||
|
modelInfo.Thinking = cfg.Thinking
|
||||||
|
}
|
||||||
|
if cfg.MaxCompletionTokens > 0 {
|
||||||
|
modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens
|
||||||
|
}
|
||||||
}
|
}
|
||||||
models = append(models, modelInfo)
|
models = append(models, modelInfo)
|
||||||
}
|
}
|
||||||
@@ -804,3 +811,53 @@ func alias2ModelName(modelName string) string {
|
|||||||
return modelName
|
return modelName
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// normalizeAntigravityThinking clamps or removes thinking config based on model support.
|
||||||
|
// For Claude models, it additionally ensures thinking budget < max_tokens.
|
||||||
|
func normalizeAntigravityThinking(model string, payload []byte) []byte {
|
||||||
|
payload = util.StripThinkingConfigIfUnsupported(model, payload)
|
||||||
|
if !util.ModelSupportsThinking(model) {
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget")
|
||||||
|
if !budget.Exists() {
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
raw := int(budget.Int())
|
||||||
|
normalized := util.NormalizeThinkingBudget(model, raw)
|
||||||
|
|
||||||
|
isClaude := strings.Contains(strings.ToLower(model), "claude")
|
||||||
|
if isClaude {
|
||||||
|
effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
|
||||||
|
if effectiveMax > 0 && normalized >= effectiveMax {
|
||||||
|
normalized = effectiveMax - 1
|
||||||
|
if normalized < 1 {
|
||||||
|
normalized = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if setDefaultMax {
|
||||||
|
if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil {
|
||||||
|
payload = res
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
updated, err := sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||||
|
if err != nil {
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
|
||||||
|
// antigravityEffectiveMaxTokens returns the max tokens to cap thinking:
|
||||||
|
// prefer request-provided maxOutputTokens; otherwise fall back to model default.
|
||||||
|
// The boolean indicates whether the value came from the model default (and thus should be written back).
|
||||||
|
func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromModel bool) {
|
||||||
|
if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
|
||||||
|
return int(maxTok.Int()), false
|
||||||
|
}
|
||||||
|
if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
|
||||||
|
return modelInfo.MaxCompletionTokens, true
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|||||||
@@ -111,7 +111,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
|||||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Temperature/top_p/top_k
|
// Temperature/top_p/top_k/max_tokens
|
||||||
if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
|
if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
|
||||||
out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
|
out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
|
||||||
}
|
}
|
||||||
@@ -121,6 +121,9 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
|||||||
if tkr := gjson.GetBytes(rawJSON, "top_k"); tkr.Exists() && tkr.Type == gjson.Number {
|
if tkr := gjson.GetBytes(rawJSON, "top_k"); tkr.Exists() && tkr.Type == gjson.Number {
|
||||||
out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num)
|
out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num)
|
||||||
}
|
}
|
||||||
|
if maxTok := gjson.GetBytes(rawJSON, "max_tokens"); maxTok.Exists() && maxTok.Type == gjson.Number {
|
||||||
|
out, _ = sjson.SetBytes(out, "request.generationConfig.maxOutputTokens", maxTok.Num)
|
||||||
|
}
|
||||||
|
|
||||||
// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
|
// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
|
||||||
// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
|
// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
|
||||||
|
|||||||
Reference in New Issue
Block a user