Compare commits

..

24 Commits

Author SHA1 Message Date
Luis Pater
423ce97665 feat(util): implement dynamic thinking suffix normalization and refactor budget resolution logic
- Added support for parsing and normalizing dynamic thinking model suffixes.
- Centralized budget resolution across executors and payload helpers.
- Retired legacy Gemini-specific thinking handlers in favor of unified logic.
- Updated executors to use metadata-based thinking configuration.
- Added `ResolveOriginalModel` utility for resolving normalized upstream models using request metadata.
- Updated executors (Gemini, Codex, iFlow, OpenAI, Qwen) to incorporate upstream model resolution and substitute model values in payloads and request URLs.
- Ensured fallbacks handle cases with missing or malformed metadata to derive models robustly.
- Refactored upstream model resolution to dynamically incorporate metadata for selecting and normalizing models.
- Improved handling of thinking configurations and model overrides in executors.
- Removed hardcoded thinking model entries and migrated logic to metadata-based resolution.
- Updated payload mutations to always include the resolved model.
2025-12-11 03:10:50 +08:00
Luis Pater
e717939edb Fixed: #478
feat(antigravity): add support for inline image data in client responses
2025-12-10 23:55:53 +08:00
Luis Pater
94d61c7b2b fix(logging): update response aggregation logic to include all attempts 2025-12-10 16:53:48 +08:00
Luis Pater
1249b07eb8 feat(responses): add unique identifiers for responses, function calls, and tool uses 2025-12-10 16:02:54 +08:00
Luis Pater
6b37f33d31 feat(antigravity): add unique identifier for tool use blocks in response 2025-12-10 15:27:57 +08:00
Luis Pater
f25f419e5a fix(antigravity): remove references to autopush endpoint and update fallback logic 2025-12-10 00:13:20 +08:00
Luis Pater
b7e382008f Merge pull request #465 from router-for-me/think
Move thinking budget normalization from translators to executor
2025-12-09 21:10:33 +08:00
hkfires
70d6b95097 feat(amp): add /news.rss proxy route 2025-12-09 21:05:06 +08:00
hkfires
9b202b6c1c fix(executor): centralize default thinking config 2025-12-09 21:05:06 +08:00
hkfires
6a66b6801a feat(executor): enforce minimum thinking budget for antigravity models 2025-12-09 21:05:06 +08:00
hkfires
5b6d201408 refactor(translator): remove thinking budget normalization across all translators 2025-12-09 21:05:06 +08:00
hkfires
5ec9b5e5a9 feat(executor): normalize thinking budget across all Gemini executors 2025-12-09 21:05:06 +08:00
Luis Pater
5db3b58717 Merge pull request #470 from router-for-me/agry
fix(gemini): normalize model listing output
2025-12-09 21:00:29 +08:00
hkfires
347769b3e3 fix(openai-compat): use model id for auth model display 2025-12-09 18:09:14 +08:00
hkfires
3cfe7008a2 fix(registry): update gpt 5.1 model names 2025-12-09 17:55:21 +08:00
hkfires
da23ddb061 fix(gemini): normalize model listing output 2025-12-09 17:34:15 +08:00
Luis Pater
39b6b3b289 Fixed: #463
fix(antigravity): remove `$ref` and `$defs` from JSON during key deletion
2025-12-09 17:32:17 +08:00
Luis Pater
c600519fa4 refactor(logging): replace log.Fatalf with log.Errorf and add error handling paths 2025-12-09 17:16:30 +08:00
hkfires
e5312fb5a2 feat(antigravity): support canonical names for antigravity models 2025-12-09 16:54:13 +08:00
Luis Pater
92df0cada9 Merge pull request #461 from router-for-me/aistudio
feat(aistudio): normalize thinking budget in request translation
2025-12-09 08:41:46 +08:00
hkfires
96b55acff8 feat(aistudio): normalize thinking budget in request translation 2025-12-09 08:27:44 +08:00
Luis Pater
bb45fee1cf Merge remote-tracking branch 'origin/dev' into dev 2025-12-08 23:28:22 +08:00
Luis Pater
af00304b0c fix(antigravity): remove exclusiveMaximum from JSON during key deletion 2025-12-08 23:28:01 +08:00
vuonglv(Andy)
5c3a013cd1 feat(config): add configurable host binding for server (#454)
* feat(config): add configurable host binding for server
2025-12-08 23:16:39 +08:00
44 changed files with 922 additions and 807 deletions

View File

@@ -139,7 +139,8 @@ func main() {
wd, err := os.Getwd()
if err != nil {
log.Fatalf("failed to get working directory: %v", err)
log.Errorf("failed to get working directory: %v", err)
return
}
// Load environment variables from .env if present.
@@ -233,13 +234,15 @@ func main() {
})
cancel()
if err != nil {
log.Fatalf("failed to initialize postgres token store: %v", err)
log.Errorf("failed to initialize postgres token store: %v", err)
return
}
examplePath := filepath.Join(wd, "config.example.yaml")
ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
if errBootstrap := pgStoreInst.Bootstrap(ctx, examplePath); errBootstrap != nil {
cancel()
log.Fatalf("failed to bootstrap postgres-backed config: %v", errBootstrap)
log.Errorf("failed to bootstrap postgres-backed config: %v", errBootstrap)
return
}
cancel()
configFilePath = pgStoreInst.ConfigPath()
@@ -262,7 +265,8 @@ func main() {
if strings.Contains(resolvedEndpoint, "://") {
parsed, errParse := url.Parse(resolvedEndpoint)
if errParse != nil {
log.Fatalf("failed to parse object store endpoint %q: %v", objectStoreEndpoint, errParse)
log.Errorf("failed to parse object store endpoint %q: %v", objectStoreEndpoint, errParse)
return
}
switch strings.ToLower(parsed.Scheme) {
case "http":
@@ -270,10 +274,12 @@ func main() {
case "https":
useSSL = true
default:
log.Fatalf("unsupported object store scheme %q (only http and https are allowed)", parsed.Scheme)
log.Errorf("unsupported object store scheme %q (only http and https are allowed)", parsed.Scheme)
return
}
if parsed.Host == "" {
log.Fatalf("object store endpoint %q is missing host information", objectStoreEndpoint)
log.Errorf("object store endpoint %q is missing host information", objectStoreEndpoint)
return
}
resolvedEndpoint = parsed.Host
if parsed.Path != "" && parsed.Path != "/" {
@@ -292,13 +298,15 @@ func main() {
}
objectStoreInst, err = store.NewObjectTokenStore(objCfg)
if err != nil {
log.Fatalf("failed to initialize object token store: %v", err)
log.Errorf("failed to initialize object token store: %v", err)
return
}
examplePath := filepath.Join(wd, "config.example.yaml")
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
if errBootstrap := objectStoreInst.Bootstrap(ctx, examplePath); errBootstrap != nil {
cancel()
log.Fatalf("failed to bootstrap object-backed config: %v", errBootstrap)
log.Errorf("failed to bootstrap object-backed config: %v", errBootstrap)
return
}
cancel()
configFilePath = objectStoreInst.ConfigPath()
@@ -323,7 +331,8 @@ func main() {
gitStoreInst = store.NewGitTokenStore(gitStoreRemoteURL, gitStoreUser, gitStorePassword)
gitStoreInst.SetBaseDir(authDir)
if errRepo := gitStoreInst.EnsureRepository(); errRepo != nil {
log.Fatalf("failed to prepare git token store: %v", errRepo)
log.Errorf("failed to prepare git token store: %v", errRepo)
return
}
configFilePath = gitStoreInst.ConfigPath()
if configFilePath == "" {
@@ -332,17 +341,21 @@ func main() {
if _, statErr := os.Stat(configFilePath); errors.Is(statErr, fs.ErrNotExist) {
examplePath := filepath.Join(wd, "config.example.yaml")
if _, errExample := os.Stat(examplePath); errExample != nil {
log.Fatalf("failed to find template config file: %v", errExample)
log.Errorf("failed to find template config file: %v", errExample)
return
}
if errCopy := misc.CopyConfigTemplate(examplePath, configFilePath); errCopy != nil {
log.Fatalf("failed to bootstrap git-backed config: %v", errCopy)
log.Errorf("failed to bootstrap git-backed config: %v", errCopy)
return
}
if errCommit := gitStoreInst.PersistConfig(context.Background()); errCommit != nil {
log.Fatalf("failed to commit initial git-backed config: %v", errCommit)
log.Errorf("failed to commit initial git-backed config: %v", errCommit)
return
}
log.Infof("git-backed config initialized from template: %s", configFilePath)
} else if statErr != nil {
log.Fatalf("failed to inspect git-backed config: %v", statErr)
log.Errorf("failed to inspect git-backed config: %v", statErr)
return
}
cfg, err = config.LoadConfigOptional(configFilePath, isCloudDeploy)
if err == nil {
@@ -355,13 +368,15 @@ func main() {
} else {
wd, err = os.Getwd()
if err != nil {
log.Fatalf("failed to get working directory: %v", err)
log.Errorf("failed to get working directory: %v", err)
return
}
configFilePath = filepath.Join(wd, "config.yaml")
cfg, err = config.LoadConfigOptional(configFilePath, isCloudDeploy)
}
if err != nil {
log.Fatalf("failed to load config: %v", err)
log.Errorf("failed to load config: %v", err)
return
}
if cfg == nil {
cfg = &config.Config{}
@@ -391,7 +406,8 @@ func main() {
coreauth.SetQuotaCooldownDisabled(cfg.DisableCooling)
if err = logging.ConfigureLogOutput(cfg.LoggingToFile); err != nil {
log.Fatalf("failed to configure log output: %v", err)
log.Errorf("failed to configure log output: %v", err)
return
}
log.Infof("CLIProxyAPI Version: %s, Commit: %s, BuiltAt: %s", buildinfo.Version, buildinfo.Commit, buildinfo.BuildDate)
@@ -400,7 +416,8 @@ func main() {
util.SetLogLevel(cfg)
if resolvedAuthDir, errResolveAuthDir := util.ResolveAuthDir(cfg.AuthDir); errResolveAuthDir != nil {
log.Fatalf("failed to resolve auth directory: %v", errResolveAuthDir)
log.Errorf("failed to resolve auth directory: %v", errResolveAuthDir)
return
} else {
cfg.AuthDir = resolvedAuthDir
}

View File

@@ -1,3 +1,7 @@
# Server host/interface to bind to. Default is empty ("") to bind all interfaces (IPv4 + IPv6).
# Use "127.0.0.1" or "localhost" to restrict access to local machine only.
host: ""
# Server port
port: 8317

View File

@@ -713,14 +713,16 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
// Generate PKCE codes
pkceCodes, err := claude.GeneratePKCECodes()
if err != nil {
log.Fatalf("Failed to generate PKCE codes: %v", err)
log.Errorf("Failed to generate PKCE codes: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate PKCE codes"})
return
}
// Generate random state parameter
state, err := misc.GenerateRandomState()
if err != nil {
log.Fatalf("Failed to generate state parameter: %v", err)
log.Errorf("Failed to generate state parameter: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate state parameter"})
return
}
@@ -730,7 +732,8 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
// Generate authorization URL (then override redirect_uri to reuse server port)
authURL, state, err := anthropicAuth.GenerateAuthURL(state, pkceCodes)
if err != nil {
log.Fatalf("Failed to generate authorization URL: %v", err)
log.Errorf("Failed to generate authorization URL: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
return
}
@@ -872,7 +875,7 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
}
savedPath, errSave := h.saveTokenRecord(ctx, record)
if errSave != nil {
log.Fatalf("Failed to save authentication tokens: %v", errSave)
log.Errorf("Failed to save authentication tokens: %v", errSave)
oauthStatus[state] = "Failed to save authentication tokens"
return
}
@@ -1045,7 +1048,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
gemAuth := geminiAuth.NewGeminiAuth()
gemClient, errGetClient := gemAuth.GetAuthenticatedClient(ctx, &ts, h.cfg, true)
if errGetClient != nil {
log.Fatalf("failed to get authenticated client: %v", errGetClient)
log.Errorf("failed to get authenticated client: %v", errGetClient)
oauthStatus[state] = "Failed to get authenticated client"
return
}
@@ -1110,7 +1113,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
}
savedPath, errSave := h.saveTokenRecord(ctx, record)
if errSave != nil {
log.Fatalf("Failed to save token to file: %v", errSave)
log.Errorf("Failed to save token to file: %v", errSave)
oauthStatus[state] = "Failed to save token to file"
return
}
@@ -1131,14 +1134,16 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
// Generate PKCE codes
pkceCodes, err := codex.GeneratePKCECodes()
if err != nil {
log.Fatalf("Failed to generate PKCE codes: %v", err)
log.Errorf("Failed to generate PKCE codes: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate PKCE codes"})
return
}
// Generate random state parameter
state, err := misc.GenerateRandomState()
if err != nil {
log.Fatalf("Failed to generate state parameter: %v", err)
log.Errorf("Failed to generate state parameter: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate state parameter"})
return
}
@@ -1148,7 +1153,8 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
// Generate authorization URL
authURL, err := openaiAuth.GenerateAuthURL(state, pkceCodes)
if err != nil {
log.Fatalf("Failed to generate authorization URL: %v", err)
log.Errorf("Failed to generate authorization URL: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
return
}
@@ -1283,7 +1289,7 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
savedPath, errSave := h.saveTokenRecord(ctx, record)
if errSave != nil {
oauthStatus[state] = "Failed to save authentication tokens"
log.Fatalf("Failed to save authentication tokens: %v", errSave)
log.Errorf("Failed to save authentication tokens: %v", errSave)
return
}
fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
@@ -1318,7 +1324,8 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
state, errState := misc.GenerateRandomState()
if errState != nil {
log.Fatalf("Failed to generate state parameter: %v", errState)
log.Errorf("Failed to generate state parameter: %v", errState)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate state parameter"})
return
}
@@ -1514,7 +1521,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
}
savedPath, errSave := h.saveTokenRecord(ctx, record)
if errSave != nil {
log.Fatalf("Failed to save token to file: %v", errSave)
log.Errorf("Failed to save token to file: %v", errSave)
oauthStatus[state] = "Failed to save token to file"
return
}
@@ -1543,7 +1550,8 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
// Generate authorization URL
deviceFlow, err := qwenAuth.InitiateDeviceFlow(ctx)
if err != nil {
log.Fatalf("Failed to generate authorization URL: %v", err)
log.Errorf("Failed to generate authorization URL: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
return
}
authURL := deviceFlow.VerificationURIComplete
@@ -1570,7 +1578,7 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
}
savedPath, errSave := h.saveTokenRecord(ctx, record)
if errSave != nil {
log.Fatalf("Failed to save authentication tokens: %v", errSave)
log.Errorf("Failed to save authentication tokens: %v", errSave)
oauthStatus[state] = "Failed to save authentication tokens"
return
}
@@ -1674,7 +1682,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
savedPath, errSave := h.saveTokenRecord(ctx, record)
if errSave != nil {
oauthStatus[state] = "Failed to save authentication tokens"
log.Fatalf("Failed to save authentication tokens: %v", errSave)
log.Errorf("Failed to save authentication tokens: %v", errSave)
return
}
@@ -2103,6 +2111,7 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
continue
}
}
_ = resp.Body.Close()
return false, fmt.Errorf("project activation required: %s", errMessage)
}
return true, nil

View File

@@ -133,8 +133,8 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
return
}
// Normalize model (handles Gemini thinking suffixes)
normalizedModel, _ := util.NormalizeGeminiThinkingModel(modelName)
// Normalize model (handles dynamic thinking suffixes)
normalizedModel, _ := util.NormalizeThinkingModel(modelName)
// Track resolved model for logging (may change if mapping is applied)
resolvedModel := normalizedModel

View File

@@ -156,6 +156,7 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
rootMiddleware := []gin.HandlerFunc{m.managementAvailabilityMiddleware(), noCORSMiddleware(), m.localhostOnlyMiddleware()}
engine.GET("/threads/*path", append(rootMiddleware, proxyHandler)...)
engine.GET("/threads.rss", append(rootMiddleware, proxyHandler)...)
engine.GET("/news.rss", append(rootMiddleware, proxyHandler)...)
// Root-level auth routes for CLI login flow
// Amp uses multiple auth routes: /auth/cli-login, /auth/callback, /auth/sign-in, /auth/logout

View File

@@ -300,7 +300,7 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
// Create HTTP server
s.server = &http.Server{
Addr: fmt.Sprintf(":%d", cfg.Port),
Addr: fmt.Sprintf("%s:%d", cfg.Host, cfg.Port),
Handler: engine,
}

View File

@@ -76,7 +76,8 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken
auth := &proxy.Auth{User: username, Password: password}
dialer, errSOCKS5 := proxy.SOCKS5("tcp", proxyURL.Host, auth, proxy.Direct)
if errSOCKS5 != nil {
log.Fatalf("create SOCKS5 dialer failed: %v", errSOCKS5)
log.Errorf("create SOCKS5 dialer failed: %v", errSOCKS5)
return nil, fmt.Errorf("create SOCKS5 dialer failed: %w", errSOCKS5)
}
transport = &http.Transport{
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
@@ -238,7 +239,11 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config,
// Start the server in a goroutine.
go func() {
if err := server.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) {
log.Fatalf("ListenAndServe(): %v", err)
log.Errorf("ListenAndServe(): %v", err)
select {
case errChan <- err:
default:
}
}
}()

View File

@@ -65,20 +65,20 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
authenticator := sdkAuth.NewGeminiAuthenticator()
record, errLogin := authenticator.Login(ctx, cfg, loginOpts)
if errLogin != nil {
log.Fatalf("Gemini authentication failed: %v", errLogin)
log.Errorf("Gemini authentication failed: %v", errLogin)
return
}
storage, okStorage := record.Storage.(*gemini.GeminiTokenStorage)
if !okStorage || storage == nil {
log.Fatal("Gemini authentication failed: unsupported token storage")
log.Error("Gemini authentication failed: unsupported token storage")
return
}
geminiAuth := gemini.NewGeminiAuth()
httpClient, errClient := geminiAuth.GetAuthenticatedClient(ctx, storage, cfg, options.NoBrowser)
if errClient != nil {
log.Fatalf("Gemini authentication failed: %v", errClient)
log.Errorf("Gemini authentication failed: %v", errClient)
return
}
@@ -86,7 +86,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
projects, errProjects := fetchGCPProjects(ctx, httpClient)
if errProjects != nil {
log.Fatalf("Failed to get project list: %v", errProjects)
log.Errorf("Failed to get project list: %v", errProjects)
return
}
@@ -98,11 +98,11 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
selectedProjectID := promptForProjectSelection(projects, strings.TrimSpace(projectID), promptFn)
projectSelections, errSelection := resolveProjectSelections(selectedProjectID, projects)
if errSelection != nil {
log.Fatalf("Invalid project selection: %v", errSelection)
log.Errorf("Invalid project selection: %v", errSelection)
return
}
if len(projectSelections) == 0 {
log.Fatal("No project selected; aborting login.")
log.Error("No project selected; aborting login.")
return
}
@@ -116,7 +116,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
showProjectSelectionHelp(storage.Email, projects)
return
}
log.Fatalf("Failed to complete user setup: %v", errSetup)
log.Errorf("Failed to complete user setup: %v", errSetup)
return
}
finalID := strings.TrimSpace(storage.ProjectID)
@@ -133,11 +133,11 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
for _, pid := range activatedProjects {
isChecked, errCheck := checkCloudAPIIsEnabled(ctx, httpClient, pid)
if errCheck != nil {
log.Fatalf("Failed to check if Cloud AI API is enabled for %s: %v", pid, errCheck)
log.Errorf("Failed to check if Cloud AI API is enabled for %s: %v", pid, errCheck)
return
}
if !isChecked {
log.Fatalf("Failed to check if Cloud AI API is enabled for project %s. If you encounter an error message, please create an issue.", pid)
log.Errorf("Failed to check if Cloud AI API is enabled for project %s. If you encounter an error message, please create an issue.", pid)
return
}
}
@@ -153,7 +153,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
savedPath, errSave := store.Save(ctx, record)
if errSave != nil {
log.Fatalf("Failed to save token to file: %v", errSave)
log.Errorf("Failed to save token to file: %v", errSave)
return
}
@@ -555,6 +555,7 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
continue
}
}
_ = resp.Body.Close()
return false, fmt.Errorf("project activation required: %s", errMessage)
}
return true, nil

View File

@@ -45,12 +45,13 @@ func StartService(cfg *config.Config, configPath string, localPassword string) {
service, err := builder.Build()
if err != nil {
log.Fatalf("failed to build proxy service: %v", err)
log.Errorf("failed to build proxy service: %v", err)
return
}
err = service.Run(runCtx)
if err != nil && !errors.Is(err, context.Canceled) {
log.Fatalf("proxy service exited with error: %v", err)
log.Errorf("proxy service exited with error: %v", err)
}
}

View File

@@ -29,30 +29,30 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
}
rawPath := strings.TrimSpace(keyPath)
if rawPath == "" {
log.Fatalf("vertex-import: missing service account key path")
log.Errorf("vertex-import: missing service account key path")
return
}
data, errRead := os.ReadFile(rawPath)
if errRead != nil {
log.Fatalf("vertex-import: read file failed: %v", errRead)
log.Errorf("vertex-import: read file failed: %v", errRead)
return
}
var sa map[string]any
if errUnmarshal := json.Unmarshal(data, &sa); errUnmarshal != nil {
log.Fatalf("vertex-import: invalid service account json: %v", errUnmarshal)
log.Errorf("vertex-import: invalid service account json: %v", errUnmarshal)
return
}
// Validate and normalize private_key before saving
normalizedSA, errFix := vertex.NormalizeServiceAccountMap(sa)
if errFix != nil {
log.Fatalf("vertex-import: %v", errFix)
log.Errorf("vertex-import: %v", errFix)
return
}
sa = normalizedSA
email, _ := sa["client_email"].(string)
projectID, _ := sa["project_id"].(string)
if strings.TrimSpace(projectID) == "" {
log.Fatalf("vertex-import: project_id missing in service account json")
log.Errorf("vertex-import: project_id missing in service account json")
return
}
if strings.TrimSpace(email) == "" {
@@ -92,7 +92,7 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
}
path, errSave := store.Save(context.Background(), record)
if errSave != nil {
log.Fatalf("vertex-import: save credential failed: %v", errSave)
log.Errorf("vertex-import: save credential failed: %v", errSave)
return
}
fmt.Printf("Vertex credentials imported: %s\n", path)

View File

@@ -20,6 +20,9 @@ import (
// Config represents the application's configuration, loaded from a YAML file.
type Config struct {
config.SDKConfig `yaml:",inline"`
// Host is the network host/interface on which the API server will bind.
// Default is empty ("") to bind all interfaces (IPv4 + IPv6). Use "127.0.0.1" or "localhost" for local-only access.
Host string `yaml:"host" json:"-"`
// Port is the network port on which the API server will listen.
Port int `yaml:"port" json:"-"`
@@ -320,6 +323,7 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
// Unmarshal the YAML data into the Config struct.
var cfg Config
// Set defaults before unmarshal so that absent keys keep defaults.
cfg.Host = "" // Default empty: binds to all interfaces (IPv4 + IPv6)
cfg.LoggingToFile = false
cfg.UsageStatisticsEnabled = false
cfg.DisableCooling = false

View File

@@ -26,60 +26,6 @@ func GetClaudeModels() []*ModelInfo {
DisplayName: "Claude 4.5 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
},
{
ID: "claude-sonnet-4-5-thinking",
Object: "model",
Created: 1759104000, // 2025-09-29
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Sonnet Thinking",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-5-thinking",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-5-thinking-low",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking Low",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-5-thinking-medium",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking Medium",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-5-thinking-high",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus Thinking High",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
@@ -92,6 +38,7 @@ func GetClaudeModels() []*ModelInfo {
Description: "Premium model combining maximum intelligence with practical performance",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "claude-opus-4-1-20250805",
@@ -530,58 +477,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-minimal",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5 Minimal",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-low",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5 Low",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-medium",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5 Medium",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-high",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5 High",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-codex",
Object: "model",
@@ -595,45 +490,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-codex-low",
Object: "model",
Created: 1757894400,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-09-15",
DisplayName: "GPT 5 Codex Low",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-codex-medium",
Object: "model",
Created: 1757894400,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-09-15",
DisplayName: "GPT 5 Codex Medium",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-codex-high",
Object: "model",
Created: 1757894400,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-09-15",
DisplayName: "GPT 5 Codex High",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-codex-mini",
Object: "model",
@@ -647,32 +503,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-codex-mini-medium",
Object: "model",
Created: 1762473600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-11-07",
DisplayName: "GPT 5 Codex Mini Medium",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5-codex-mini-high",
Object: "model",
Created: 1762473600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-11-07",
DisplayName: "GPT 5 Codex Mini High",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1",
Object: "model",
@@ -686,58 +516,6 @@ func GetOpenAIModels() []*ModelInfo {
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-none",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Low",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-low",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Low",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-medium",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Medium",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-high",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 High",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex",
Object: "model",
@@ -745,47 +523,8 @@ func GetOpenAIModels() []*ModelInfo {
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Codex",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-low",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Codex Low",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-medium",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Codex Medium",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-high",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Codex High",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
DisplayName: "GPT 5.1 Codex",
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
@@ -797,39 +536,12 @@ func GetOpenAIModels() []*ModelInfo {
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Codex Mini",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
DisplayName: "GPT 5.1 Codex Mini",
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-mini-medium",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Codex Mini Medium",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-mini-high",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5 Codex Mini High",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-max",
Object: "model",
@@ -837,60 +549,8 @@ func GetOpenAIModels() []*ModelInfo {
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5 Codex Max",
Description: "Stable version of GPT 5 Codex Max",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-max-low",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5 Codex Max Low",
Description: "Stable version of GPT 5 Codex Max Low",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-max-medium",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5 Codex Max Medium",
Description: "Stable version of GPT 5 Codex Max Medium",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-max-high",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5 Codex Max High",
Description: "Stable version of GPT 5 Codex Max High",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
},
{
ID: "gpt-5.1-codex-max-xhigh",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5 Codex Max XHigh",
Description: "Stable version of GPT 5 Codex Max XHigh",
DisplayName: "GPT 5.1 Codex Max",
Description: "Stable version of GPT 5.1 Codex Max",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
@@ -991,16 +651,19 @@ func GetIFlowModels() []*ModelInfo {
type AntigravityModelConfig struct {
Thinking *ThinkingSupport
MaxCompletionTokens int
Name string
}
// GetAntigravityModelConfig returns static configuration for antigravity models.
// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
return map[string]*AntigravityModelConfig{
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
"gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
"gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-preview"},
"gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-image-preview"},
"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
}
}

View File

@@ -309,7 +309,9 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
to := sdktranslator.FromString("gemini")
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
payload = applyThinkingMetadata(payload, req.Metadata, req.Model)
payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
payload = util.ConvertThinkingLevelToBudget(payload)
payload = util.NormalizeGeminiThinkingBudget(req.Model, payload)
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
payload = fixGeminiImageAspectRatio(req.Model, payload)
payload = applyPayloadConfig(e.cfg, req.Model, payload)

View File

@@ -27,18 +27,18 @@ import (
)
const (
antigravityBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com"
antigravityBaseURLAutopush = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
antigravityBaseURLProd = "https://cloudcode-pa.googleapis.com"
antigravityStreamPath = "/v1internal:streamGenerateContent"
antigravityGeneratePath = "/v1internal:generateContent"
antigravityModelsPath = "/v1internal:fetchAvailableModels"
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
antigravityClientSecret = "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"
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
antigravityAuthType = "antigravity"
refreshSkew = 3000 * time.Second
streamScannerBuffer int = 20_971_520
antigravityBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com"
// antigravityBaseURLAutopush = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
antigravityBaseURLProd = "https://cloudcode-pa.googleapis.com"
antigravityStreamPath = "/v1internal:streamGenerateContent"
antigravityGeneratePath = "/v1internal:generateContent"
antigravityModelsPath = "/v1internal:fetchAvailableModels"
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
antigravityClientSecret = "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"
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
antigravityAuthType = "antigravity"
refreshSkew = 3000 * time.Second
streamScannerBuffer int = 20_971_520
)
var randSource = rand.New(rand.NewSource(time.Now().UnixNano()))
@@ -77,6 +77,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
translated = normalizeAntigravityThinking(req.Model, translated)
baseURLs := antigravityBaseURLFallbackOrder(auth)
@@ -171,6 +172,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
translated = normalizeAntigravityThinking(req.Model, translated)
baseURLs := antigravityBaseURLFallbackOrder(auth)
@@ -373,9 +375,14 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
for originalName := range result.Map() {
aliasName := modelName2Alias(originalName)
if aliasName != "" {
cfg := modelConfig[aliasName]
modelName := aliasName
if cfg != nil && cfg.Name != "" {
modelName = cfg.Name
}
modelInfo := &registry.ModelInfo{
ID: aliasName,
Name: aliasName,
Name: modelName,
Description: aliasName,
DisplayName: aliasName,
Version: aliasName,
@@ -385,7 +392,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
Type: antigravityAuthType,
}
// Look up Thinking support from static config using alias name
if cfg, ok := modelConfig[aliasName]; ok {
if cfg != nil {
if cfg.Thinking != nil {
modelInfo.Thinking = cfg.Thinking
}
@@ -536,6 +543,9 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
strJSON = util.DeleteKey(strJSON, "minLength")
strJSON = util.DeleteKey(strJSON, "maxLength")
strJSON = util.DeleteKey(strJSON, "exclusiveMinimum")
strJSON = util.DeleteKey(strJSON, "exclusiveMaximum")
strJSON = util.DeleteKey(strJSON, "$ref")
strJSON = util.DeleteKey(strJSON, "$defs")
paths = make([]string, 0)
util.Walk(gjson.Parse(strJSON), "", "anyOf", &paths)
@@ -651,7 +661,7 @@ func buildBaseURL(auth *cliproxyauth.Auth) string {
if baseURLs := antigravityBaseURLFallbackOrder(auth); len(baseURLs) > 0 {
return baseURLs[0]
}
return antigravityBaseURLAutopush
return antigravityBaseURLDaily
}
func resolveHost(base string) string {
@@ -687,7 +697,7 @@ func antigravityBaseURLFallbackOrder(auth *cliproxyauth.Auth) []string {
}
return []string{
antigravityBaseURLDaily,
antigravityBaseURLAutopush,
// antigravityBaseURLAutopush,
antigravityBaseURLProd,
}
}
@@ -831,9 +841,12 @@ func normalizeAntigravityThinking(model string, payload []byte) []byte {
effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
if effectiveMax > 0 && normalized >= effectiveMax {
normalized = effectiveMax - 1
if normalized < 1 {
normalized = 1
}
}
minBudget := antigravityMinThinkingBudget(model)
if minBudget > 0 && normalized >= 0 && normalized < minBudget {
// Budget is below minimum, remove thinking config entirely
payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
return payload
}
if setDefaultMax {
if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil {
@@ -861,3 +874,12 @@ func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromM
}
return 0, false
}
// antigravityMinThinkingBudget returns the minimum thinking budget for a model.
// Falls back to -1 if no model info is found.
func antigravityMinThinkingBudget(model string) int {
if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil {
return modelInfo.Thinking.Min
}
return -1
}

View File

@@ -54,15 +54,22 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
// Use streaming translation to preserve function calling, except for claude.
stream := from != to
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
modelForUpstream := req.Model
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
body, _ = sjson.SetBytes(body, "model", modelOverride)
modelForUpstream = modelOverride
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel == "" {
upstreamModel = req.Model
}
// Inject thinking config based on model suffix for thinking variants
body = e.injectThinkingConfig(req.Model, body)
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
upstreamModel = modelOverride
} else if !strings.EqualFold(upstreamModel, req.Model) {
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
upstreamModel = modelOverride
}
}
body, _ = sjson.SetBytes(body, "model", upstreamModel)
// Inject thinking config based on model metadata for thinking variants
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
body = checkSystemInstructions(body)
}
body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -161,11 +168,20 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
from := opts.SourceFormat
to := sdktranslator.FromString("claude")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
body, _ = sjson.SetBytes(body, "model", modelOverride)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel == "" {
upstreamModel = req.Model
}
// Inject thinking config based on model suffix for thinking variants
body = e.injectThinkingConfig(req.Model, body)
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
upstreamModel = modelOverride
} else if !strings.EqualFold(upstreamModel, req.Model) {
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
upstreamModel = modelOverride
}
}
body, _ = sjson.SetBytes(body, "model", upstreamModel)
// Inject thinking config based on model metadata for thinking variants
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
body = checkSystemInstructions(body)
body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -295,13 +311,20 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
// Use streaming translation to preserve function calling, except for claude.
stream := from != to
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
modelForUpstream := req.Model
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
body, _ = sjson.SetBytes(body, "model", modelOverride)
modelForUpstream = modelOverride
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
if upstreamModel == "" {
upstreamModel = req.Model
}
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
upstreamModel = modelOverride
} else if !strings.EqualFold(upstreamModel, req.Model) {
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
upstreamModel = modelOverride
}
}
body, _ = sjson.SetBytes(body, "model", upstreamModel)
if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
body = checkSystemInstructions(body)
}
@@ -427,25 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
return betas, body
}
// injectThinkingConfig adds thinking configuration based on model name suffix
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []byte {
// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes.
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
// Only inject if thinking config is not already present
if gjson.GetBytes(body, "thinking").Exists() {
return body
}
var budgetTokens int
switch {
case strings.HasSuffix(modelName, "-thinking-low"):
budgetTokens = 1024
case strings.HasSuffix(modelName, "-thinking-medium"):
budgetTokens = 8192
case strings.HasSuffix(modelName, "-thinking-high"):
budgetTokens = 24576
case strings.HasSuffix(modelName, "-thinking"):
// Default thinking without suffix uses medium budget
budgetTokens = 8192
default:
budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata)
if !ok || budgetTokens <= 0 {
return body
}
@@ -454,6 +467,44 @@ func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []b
return body
}
func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) {
budget, include, effort, matched := util.ThinkingFromMetadata(metadata)
if matched {
if include != nil && !*include {
return 0, false
}
if budget != nil {
normalized := util.NormalizeThinkingBudget(modelName, *budget)
if normalized > 0 {
return normalized, true
}
return 0, false
}
if effort != nil {
if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 {
return derived, true
}
}
}
return claudeBudgetFromSuffix(modelName)
}
func claudeBudgetFromSuffix(modelName string) (int, bool) {
lower := strings.ToLower(strings.TrimSpace(modelName))
switch {
case strings.HasSuffix(lower, "-thinking-low"):
return 1024, true
case strings.HasSuffix(lower, "-thinking-medium"):
return 8192, true
case strings.HasSuffix(lower, "-thinking-high"):
return 24576, true
case strings.HasSuffix(lower, "-thinking"):
return 8192, true
default:
return 0, false
}
}
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
// Anthropic API requires this constraint; violating it returns a 400 error.
// This function should be called after all thinking configuration is finalized.
@@ -491,35 +542,45 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
}
func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
if alias == "" {
trimmed := strings.TrimSpace(alias)
if trimmed == "" {
return ""
}
// Hardcoded mappings for thinking models to actual Claude model names
switch alias {
case "claude-opus-4-5-thinking", "claude-opus-4-5-thinking-low", "claude-opus-4-5-thinking-medium", "claude-opus-4-5-thinking-high":
return "claude-opus-4-5-20251101"
case "claude-sonnet-4-5-thinking":
return "claude-sonnet-4-5-20250929"
}
entry := e.resolveClaudeConfig(auth)
if entry == nil {
return ""
}
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
// Candidate names to match against configured aliases/names.
candidates := []string{strings.TrimSpace(normalizedModel)}
if !strings.EqualFold(normalizedModel, trimmed) {
candidates = append(candidates, trimmed)
}
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
candidates = append(candidates, original)
}
for i := range entry.Models {
model := entry.Models[i]
name := strings.TrimSpace(model.Name)
modelAlias := strings.TrimSpace(model.Alias)
if modelAlias != "" {
if strings.EqualFold(modelAlias, alias) {
for _, candidate := range candidates {
if candidate == "" {
continue
}
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
if name != "" {
return name
}
return alias
return candidate
}
if name != "" && strings.EqualFold(name, candidate) {
return name
}
continue
}
if name != "" && strings.EqualFold(name, alias) {
return name
}
}
return ""

View File

@@ -49,14 +49,14 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = e.setReasoningEffortByAlias(req.Model, body)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
body, _ = sjson.SetBytes(body, "stream", true)
body, _ = sjson.DeleteBytes(body, "previous_response_id")
@@ -142,13 +142,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = e.setReasoningEffortByAlias(req.Model, body)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.DeleteBytes(body, "previous_response_id")
body, _ = sjson.SetBytes(body, "model", upstreamModel)
url := strings.TrimSuffix(baseURL, "/") + "/responses"
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -235,14 +238,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
}
func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("codex")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
modelForCounting := req.Model
body = e.setReasoningEffortByAlias(req.Model, body)
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
body, _ = sjson.DeleteBytes(body, "previous_response_id")
body, _ = sjson.SetBytes(body, "stream", false)
@@ -261,83 +266,6 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
}
func (e *CodexExecutor) setReasoningEffortByAlias(modelName string, payload []byte) []byte {
if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5")
switch modelName {
case "gpt-5-minimal":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "minimal")
case "gpt-5-low":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
case "gpt-5-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex")
switch modelName {
case "gpt-5-codex-low":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
case "gpt-5-codex-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5-codex-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex-mini")
switch modelName {
case "gpt-5-codex-mini-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5-codex-mini-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5.1", "gpt-5.1-none", "gpt-5.1-low", "gpt-5.1-medium", "gpt-5.1-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1")
switch modelName {
case "gpt-5.1-none":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "none")
case "gpt-5.1-low":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
case "gpt-5.1-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5.1-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5.1-codex", "gpt-5.1-codex-low", "gpt-5.1-codex-medium", "gpt-5.1-codex-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex")
switch modelName {
case "gpt-5.1-codex-low":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
case "gpt-5.1-codex-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5.1-codex-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5.1-codex-mini", "gpt-5.1-codex-mini-medium", "gpt-5.1-codex-mini-high"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-mini")
switch modelName {
case "gpt-5.1-codex-mini-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5.1-codex-mini-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
}
} else if util.InArray([]string{"gpt-5.1-codex-max", "gpt-5.1-codex-max-low", "gpt-5.1-codex-max-medium", "gpt-5.1-codex-max-high", "gpt-5.1-codex-max-xhigh"}, modelName) {
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-max")
switch modelName {
case "gpt-5.1-codex-max-low":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
case "gpt-5.1-codex-max-medium":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
case "gpt-5.1-codex-max-high":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
case "gpt-5.1-codex-max-xhigh":
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "xhigh")
}
}
return payload
}
func tokenizerForCodexModel(model string) (tokenizer.Codec, error) {
sanitized := strings.ToLower(strings.TrimSpace(model))
switch {

View File

@@ -64,6 +64,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
to := sdktranslator.FromString("gemini-cli")
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
@@ -199,6 +201,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
to := sdktranslator.FromString("gemini-cli")
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)

View File

@@ -75,14 +75,19 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
// Official Gemini API via API key or OAuth bearer
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyThinkingMetadata(body, req.Metadata, req.Model)
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
action := "generateContent"
if req.Metadata != nil {
@@ -91,7 +96,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
}
}
baseURL := resolveGeminiBaseURL(auth)
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, action)
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, action)
if opts.Alt != "" && action != "countTokens" {
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
}
@@ -165,16 +170,21 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyThinkingMetadata(body, req.Metadata, req.Model)
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
baseURL := resolveGeminiBaseURL(auth)
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, "streamGenerateContent")
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "streamGenerateContent")
if opts.Alt == "" {
url = url + "?alt=sse"
} else {

View File

@@ -105,10 +105,12 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau
// countTokensWithServiceAccount handles token counting using service account credentials.
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
@@ -117,13 +119,14 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
}
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
baseURL := vertexBaseURL(location)
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
if errNewReq != nil {
@@ -191,10 +194,12 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
// countTokensWithAPIKey handles token counting using API key credentials.
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
@@ -203,6 +208,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
}
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
@@ -286,19 +292,24 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
}
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
action := "generateContent"
if req.Metadata != nil {
@@ -307,7 +318,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
}
}
baseURL := vertexBaseURL(location)
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, action)
if opts.Alt != "" && action != "countTokens" {
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
}
@@ -381,19 +392,24 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
}
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
action := "generateContent"
if req.Metadata != nil {
@@ -406,7 +422,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
if baseURL == "" {
baseURL = "https://generativelanguage.googleapis.com"
}
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, action)
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, action)
if opts.Alt != "" && action != "countTokens" {
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
}
@@ -477,22 +493,27 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
}
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
baseURL := vertexBaseURL(location)
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "streamGenerateContent")
if opts.Alt == "" {
url = url + "?alt=sse"
} else {
@@ -589,25 +610,30 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
}
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body)
body, _ = sjson.SetBytes(body, "model", upstreamModel)
// For API key auth, use simpler URL format without project/location
if baseURL == "" {
baseURL = "https://generativelanguage.googleapis.com"
}
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "streamGenerateContent")
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "streamGenerateContent")
if opts.Alt == "" {
url = url + "?alt=sse"
} else {

View File

@@ -57,6 +57,10 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = applyPayloadConfig(e.cfg, req.Model, body)
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -139,6 +143,10 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
toolsResult := gjson.GetBytes(body, "tools")
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {

View File

@@ -157,7 +157,7 @@ func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
if ginCtx == nil {
return
}
_, attempt := ensureAttempt(ginCtx)
attempts, attempt := ensureAttempt(ginCtx)
ensureResponseIntro(attempt)
if !attempt.headersWritten {
@@ -175,6 +175,8 @@ func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
}
attempt.response.WriteString(string(data))
attempt.bodyHasContent = true
updateAggregatedResponse(ginCtx, attempts)
}
func ginContextFrom(ctx context.Context) *gin.Context {

View File

@@ -58,6 +58,10 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
translated = e.overrideModel(translated, modelOverride)
}
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -143,6 +147,10 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
translated = e.overrideModel(translated, modelOverride)
}
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))

View File

@@ -12,8 +12,8 @@ import (
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
if !ok {
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
if !ok || (budgetOverride == nil && includeOverride == nil) {
return payload
}
if !util.ModelSupportsThinking(model) {
@@ -29,17 +29,60 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
if !ok {
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
if !ok || (budgetOverride == nil && includeOverride == nil) {
return payload
}
if budgetOverride != nil && util.ModelSupportsThinking(model) {
if !util.ModelSupportsThinking(model) {
return payload
}
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
budgetOverride = &norm
}
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
}
// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata.
// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking.
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte {
if len(metadata) == 0 {
return payload
}
if !util.ModelSupportsThinking(model) {
return payload
}
if gjson.GetBytes(payload, "reasoning.effort").Exists() {
return payload
}
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil {
return updated
}
}
return payload
}
// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
// when present in metadata. It avoids overwriting an existing reasoning_effort field.
func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
if len(metadata) == 0 {
return payload
}
if !util.ModelSupportsThinking(model) {
return payload
}
if gjson.GetBytes(payload, "reasoning_effort").Exists() {
return payload
}
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
return updated
}
}
return payload
}
// applyPayloadConfig applies payload default and override rules from configuration
// to the given JSON payload for the specified model.
// Defaults only fill missing fields, while overrides always overwrite existing values.

View File

@@ -12,6 +12,7 @@ import (
qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -50,6 +51,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
body = applyPayloadConfig(e.cfg, req.Model, body)
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -121,6 +126,10 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
to := sdktranslator.FromString("openai")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
body, _ = sjson.SetBytes(body, "model", upstreamModel)
}
toolsResult := gjson.GetBytes(body, "tools")
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
// This will have no real consequences. It's just to scare Qwen3.

View File

@@ -123,6 +123,15 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
functionResponse := client.FunctionResponse{ID: toolCallID, Name: funcName, Response: map[string]interface{}{"result": responseData}}
clientContent.Parts = append(clientContent.Parts, client.Part{FunctionResponse: &functionResponse})
}
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "image" {
sourceResult := contentResult.Get("source")
if sourceResult.Get("type").String() == "base64" {
inlineData := &client.InlineData{
MimeType: sourceResult.Get("media_type").String(),
Data: sourceResult.Get("data").String(),
}
clientContent.Parts = append(clientContent.Parts, client.Part{InlineData: inlineData})
}
}
}
contents = append(contents, clientContent)
@@ -180,7 +189,6 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
if t.Get("type").String() == "enabled" {
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
budget := int(b.Int())
budget = util.NormalizeThinkingBudget(modelName, budget)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}

View File

@@ -12,6 +12,7 @@ import (
"encoding/json"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/tidwall/gjson"
@@ -36,6 +37,9 @@ type Params struct {
HasToolUse bool // Indicates if tool use was observed in the stream
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
var toolUseIDCounter uint64
// ConvertAntigravityResponseToClaude performs sophisticated streaming response format conversion.
// This function implements a complex state machine that translates backend client responses
// into Claude Code-compatible Server-Sent Events (SSE) format. It manages different response types
@@ -216,7 +220,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
// Create the tool use block with unique ID and function details
data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, params.ResponseIndex)
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
data, _ = sjson.Set(data, "content_block.name", fcName)
output = output + fmt.Sprintf("data: %s\n\n\n", data)

View File

@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "low":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "medium":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "high":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
default:
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
var budget int
if v := tc.Get("thinkingBudget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
budget = int(v.Int())
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
setBudget = true
} else if v := tc.Get("thinking_budget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
budget = int(v.Int())
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
setBudget = true
}
@@ -82,7 +82,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget && normalized != 0 {
} else if setBudget && budget != 0 {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
}
@@ -94,7 +94,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
if t.Get("type").String() == "enabled" {
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
budget := util.NormalizeThinkingBudget(modelName, int(b.Int()))
budget := int(b.Int())
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
@@ -102,15 +102,6 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
}
}
// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
// This matches the official Gemini CLI behavior which always sends:
// { thinkingBudget: -1, includeThoughts: true }
// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
// Temperature/top_p/top_k/max_tokens
if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)

View File

@@ -11,6 +11,7 @@ import (
"encoding/json"
"fmt"
"strings"
"sync/atomic"
"time"
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
@@ -24,6 +25,9 @@ type convertCliResponseToOpenAIChatParams struct {
FunctionIndex int
}
// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
var functionCallIDCounter uint64
// ConvertAntigravityResponseToOpenAI translates a single chunk of a streaming response from the
// Gemini CLI API format to the OpenAI Chat Completions streaming format.
// It processes various Gemini CLI event types and transforms them into OpenAI-compatible JSON responses.
@@ -146,7 +150,7 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
fcName := functionCallResult.Get("name").String()
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {

View File

@@ -165,7 +165,6 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
if t.Get("type").String() == "enabled" {
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
budget := int(b.Int())
budget = util.NormalizeThinkingBudget(modelName, budget)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}

View File

@@ -12,6 +12,7 @@ import (
"encoding/json"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/tidwall/gjson"
@@ -27,6 +28,9 @@ type Params struct {
ResponseIndex int // Index counter for content blocks in the streaming response
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
var toolUseIDCounter uint64
// ConvertGeminiCLIResponseToClaude performs sophisticated streaming response format conversion.
// This function implements a complex state machine that translates backend client responses
// into Claude Code-compatible Server-Sent Events (SSE) format. It manages different response types
@@ -197,7 +201,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
// Create the tool use block with unique ID and function details
data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
data, _ = sjson.Set(data, "content_block.name", fcName)
output = output + fmt.Sprintf("data: %s\n\n\n", data)

View File

@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "low":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "medium":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "high":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
default:
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
var budget int
if v := tc.Get("thinkingBudget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
budget = int(v.Int())
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
setBudget = true
} else if v := tc.Get("thinking_budget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
budget = int(v.Int())
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
setBudget = true
}
@@ -82,21 +82,12 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget && normalized != 0 {
} else if setBudget && budget != 0 {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
// This matches the official Gemini CLI behavior which always sends:
// { thinkingBudget: -1, includeThoughts: true }
// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
// Temperature/top_p/top_k
if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)

View File

@@ -11,6 +11,7 @@ import (
"encoding/json"
"fmt"
"strings"
"sync/atomic"
"time"
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
@@ -24,6 +25,9 @@ type convertCliResponseToOpenAIChatParams struct {
FunctionIndex int
}
// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
var functionCallIDCounter uint64
// ConvertCliResponseToOpenAI translates a single chunk of a streaming response from the
// Gemini CLI API format to the OpenAI Chat Completions streaming format.
// It processes various Gemini CLI event types and transforms them into OpenAI-compatible JSON responses.
@@ -146,7 +150,7 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
fcName := functionCallResult.Get("name").String()
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {

View File

@@ -158,7 +158,6 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
if t.Get("type").String() == "enabled" {
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
budget := int(b.Int())
budget = util.NormalizeThinkingBudget(modelName, budget)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
}

View File

@@ -12,6 +12,7 @@ import (
"encoding/json"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/tidwall/gjson"
@@ -26,6 +27,9 @@ type Params struct {
ResponseIndex int
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
var toolUseIDCounter uint64
// ConvertGeminiResponseToClaude performs sophisticated streaming response format conversion.
// This function implements a complex state machine that translates backend client responses
// into Claude-compatible Server-Sent Events (SSE) format. It manages different response types
@@ -197,7 +201,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
// Create the tool use block with unique ID and function details
data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
data, _ = sjson.Set(data, "content_block.name", fcName)
output = output + fmt.Sprintf("data: %s\n\n\n", data)

View File

@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "low":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "medium":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "high":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
default:
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
var budget int
if v := tc.Get("thinkingBudget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
budget = int(v.Int())
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
setBudget = true
} else if v := tc.Get("thinking_budget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
budget = int(v.Int())
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
setBudget = true
}
@@ -82,7 +82,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget && normalized != 0 {
} else if setBudget && budget != 0 {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}

View File

@@ -11,6 +11,7 @@ import (
"encoding/json"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/tidwall/gjson"
@@ -23,6 +24,9 @@ type convertGeminiResponseToOpenAIChatParams struct {
FunctionIndex int
}
// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
var functionCallIDCounter uint64
// ConvertGeminiResponseToOpenAI translates a single chunk of a streaming response from the
// Gemini API format to the OpenAI Chat Completions streaming format.
// It processes various Gemini event types and transforms them into OpenAI-compatible JSON responses.
@@ -148,7 +152,7 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
fcName := functionCallResult.Get("name").String()
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
@@ -281,7 +285,7 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
}
functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
fcName := functionCallResult.Get("name").String()
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)

View File

@@ -400,16 +400,16 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "minimal":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "low":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "medium":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "high":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
default:
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -421,32 +421,22 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
var budget int
if v := tc.Get("thinking_budget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
budget = int(v.Int())
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
setBudget = true
}
if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget {
if normalized != 0 {
if budget != 0 {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
}
// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
// This matches the official Gemini CLI behavior which always sends:
// { thinkingBudget: -1, includeThoughts: true }
// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
// log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true")
}
result := []byte(out)
result = common.AttachDefaultSafetySettings(result, "safetySettings")
return result

View File

@@ -5,6 +5,7 @@ import (
"context"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/tidwall/gjson"
@@ -37,6 +38,12 @@ type geminiToResponsesState struct {
FuncCallIDs map[int]string
}
// responseIDCounter provides a process-wide unique counter for synthesized response identifiers.
var responseIDCounter uint64
// funcCallIDCounter provides a process-wide unique counter for function call identifiers.
var funcCallIDCounter uint64
func emitEvent(event string, payload string) string {
return fmt.Sprintf("event: %s\ndata: %s", event, payload)
}
@@ -205,7 +212,7 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
st.FuncArgsBuf[idx] = &strings.Builder{}
}
if st.FuncCallIDs[idx] == "" {
st.FuncCallIDs[idx] = fmt.Sprintf("call_%d", time.Now().UnixNano())
st.FuncCallIDs[idx] = fmt.Sprintf("call_%d_%d", time.Now().UnixNano(), atomic.AddUint64(&funcCallIDCounter, 1))
}
st.FuncNames[idx] = name
@@ -464,7 +471,7 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string
// id: prefer provider responseId, otherwise synthesize
id := root.Get("responseId").String()
if id == "" {
id = fmt.Sprintf("resp_%x", time.Now().UnixNano())
id = fmt.Sprintf("resp_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&responseIDCounter, 1))
}
// Normalize to response-style id (prefix resp_ if missing)
if !strings.HasPrefix(id, "resp_") {
@@ -575,7 +582,7 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string
if fc := p.Get("functionCall"); fc.Exists() {
name := fc.Get("name").String()
args := fc.Get("args")
callID := fmt.Sprintf("call_%x", time.Now().UnixNano())
callID := fmt.Sprintf("call_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&funcCallIDCounter, 1))
outputs = append(outputs, map[string]interface{}{
"id": fmt.Sprintf("fc_%s", callID),
"type": "function_call",

View File

@@ -5,6 +5,7 @@ import (
"context"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/tidwall/gjson"
@@ -41,6 +42,9 @@ type oaiToResponsesState struct {
UsageSeen bool
}
// responseIDCounter provides a process-wide unique counter for synthesized response identifiers.
var responseIDCounter uint64
func emitRespEvent(event string, payload string) string {
return fmt.Sprintf("event: %s\ndata: %s", event, payload)
}
@@ -590,7 +594,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(_ context.Co
// id: use provider id if present, otherwise synthesize
id := root.Get("id").String()
if id == "" {
id = fmt.Sprintf("resp_%x", time.Now().UnixNano())
id = fmt.Sprintf("resp_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&responseIDCounter, 1))
}
resp, _ = sjson.Set(resp, "id", id)

View File

@@ -1,8 +1,6 @@
package util
import (
"encoding/json"
"strconv"
"strings"
"github.com/tidwall/gjson"
@@ -15,80 +13,6 @@ const (
GeminiOriginalModelMetadataKey = "gemini_original_model"
)
func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
if model == "" {
return model, nil, nil, false
}
lower := strings.ToLower(model)
if !strings.HasPrefix(lower, "gemini-") {
return model, nil, nil, false
}
if strings.HasSuffix(lower, "-nothinking") {
base := model[:len(model)-len("-nothinking")]
budgetValue := 0
if strings.HasPrefix(lower, "gemini-2.5-pro") {
budgetValue = 128
}
include := false
return base, &budgetValue, &include, true
}
// Handle "-reasoning" suffix: enables thinking with dynamic budget (-1)
// Maps: gemini-2.5-flash-reasoning -> gemini-2.5-flash with thinkingBudget=-1
if strings.HasSuffix(lower, "-reasoning") {
base := model[:len(model)-len("-reasoning")]
budgetValue := -1 // Dynamic budget
include := true
return base, &budgetValue, &include, true
}
idx := strings.LastIndex(lower, "-thinking-")
if idx == -1 {
return model, nil, nil, false
}
digits := model[idx+len("-thinking-"):]
if digits == "" {
return model, nil, nil, false
}
end := len(digits)
for i := 0; i < len(digits); i++ {
if digits[i] < '0' || digits[i] > '9' {
end = i
break
}
}
if end == 0 {
return model, nil, nil, false
}
valueStr := digits[:end]
value, err := strconv.Atoi(valueStr)
if err != nil {
return model, nil, nil, false
}
base := model[:idx]
budgetValue := value
return base, &budgetValue, nil, true
}
func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
if !matched {
return baseModel, nil
}
metadata := map[string]any{
GeminiOriginalModelMetadataKey: modelName,
}
if budget != nil {
metadata[GeminiThinkingBudgetMetadataKey] = *budget
}
if include != nil {
metadata[GeminiIncludeThoughtsMetadataKey] = *include
}
return baseModel, metadata
}
func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
if budget == nil && includeThoughts == nil {
return body
@@ -133,78 +57,45 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
return updated
}
func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
if len(metadata) == 0 {
return nil, nil, false
// modelsWithDefaultThinking lists models that should have thinking enabled by default
// when no explicit thinkingConfig is provided.
var modelsWithDefaultThinking = map[string]bool{
"gemini-3-pro-preview": true,
}
// ModelHasDefaultThinking returns true if the model should have thinking enabled by default.
func ModelHasDefaultThinking(model string) bool {
return modelsWithDefaultThinking[model]
}
// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it.
// For standard Gemini API format (generationConfig.thinkingConfig path).
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
if !ModelHasDefaultThinking(model) {
return body
}
var (
budgetPtr *int
includePtr *bool
matched bool
)
if rawBudget, ok := metadata[GeminiThinkingBudgetMetadataKey]; ok {
switch v := rawBudget.(type) {
case int:
budget := v
budgetPtr = &budget
matched = true
case int32:
budget := int(v)
budgetPtr = &budget
matched = true
case int64:
budget := int(v)
budgetPtr = &budget
matched = true
case float64:
budget := int(v)
budgetPtr = &budget
matched = true
case json.Number:
if val, err := v.Int64(); err == nil {
budget := int(val)
budgetPtr = &budget
matched = true
}
}
if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() {
return body
}
if rawInclude, ok := metadata[GeminiIncludeThoughtsMetadataKey]; ok {
switch v := rawInclude.(type) {
case bool:
include := v
includePtr = &include
matched = true
case string:
if parsed, err := strconv.ParseBool(v); err == nil {
include := parsed
includePtr = &include
matched = true
}
case json.Number:
if val, err := v.Int64(); err == nil {
include := val != 0
includePtr = &include
matched = true
}
case int:
include := v != 0
includePtr = &include
matched = true
case int32:
include := v != 0
includePtr = &include
matched = true
case int64:
include := v != 0
includePtr = &include
matched = true
case float64:
include := v != 0
includePtr = &include
matched = true
}
updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1)
updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true)
return updated
}
// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it.
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
if !ModelHasDefaultThinking(model) {
return body
}
return budgetPtr, includePtr, matched
if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
return body
}
updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true)
return updated
}
// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
@@ -223,6 +114,32 @@ func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
return updated
}
// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini
// request body (generationConfig.thinkingConfig.thinkingBudget path).
func NormalizeGeminiThinkingBudget(model string, body []byte) []byte {
const budgetPath = "generationConfig.thinkingConfig.thinkingBudget"
budget := gjson.GetBytes(body, budgetPath)
if !budget.Exists() {
return body
}
normalized := NormalizeThinkingBudget(model, int(budget.Int()))
updated, _ := sjson.SetBytes(body, budgetPath, normalized)
return updated
}
// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI
// request body (request.generationConfig.thinkingConfig.thinkingBudget path).
func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte {
const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget"
budget := gjson.GetBytes(body, budgetPath)
if !budget.Exists() {
return body
}
normalized := NormalizeThinkingBudget(model, int(budget.Int()))
updated, _ := sjson.SetBytes(body, budgetPath, normalized)
return updated
}
// ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
// and converts it to "thinkingBudget".
// "high" -> 32768

View File

@@ -0,0 +1,327 @@
package util
import (
"encoding/json"
"strconv"
"strings"
)
const (
ThinkingBudgetMetadataKey = "thinking_budget"
ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
ReasoningEffortMetadataKey = "reasoning_effort"
ThinkingOriginalModelMetadataKey = "thinking_original_model"
)
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
// the normalized base model with extracted metadata. Supported patterns:
// - "-thinking-<number>" extracts a numeric budget
// - "-thinking-<level>" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none)
// - "-thinking" maps to a default reasoning effort of "medium"
// - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true
// - "-nothinking" maps to budget=0 and include_thoughts=false
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
if modelName == "" {
return modelName, nil
}
lower := strings.ToLower(modelName)
baseModel := modelName
var (
budgetOverride *int
includeThoughts *bool
reasoningEffort *string
matched bool
)
switch {
case strings.HasSuffix(lower, "-nothinking"):
baseModel = modelName[:len(modelName)-len("-nothinking")]
budget := 0
include := false
budgetOverride = &budget
includeThoughts = &include
matched = true
case strings.HasSuffix(lower, "-reasoning"):
baseModel = modelName[:len(modelName)-len("-reasoning")]
budget := -1
include := true
budgetOverride = &budget
includeThoughts = &include
matched = true
default:
if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
value := modelName[idx+len("-thinking-"):]
if value != "" {
if parsed, ok := parseIntPrefix(value); ok {
baseModel = modelName[:idx]
budgetOverride = &parsed
matched = true
} else if effort, okEffort := normalizeReasoningEffort(value); okEffort {
baseModel = modelName[:idx]
reasoningEffort = &effort
matched = true
}
}
} else if strings.HasSuffix(lower, "-thinking") {
baseModel = modelName[:len(modelName)-len("-thinking")]
effort := "medium"
reasoningEffort = &effort
matched = true
}
}
if !matched {
return baseModel, nil
}
metadata := map[string]any{
ThinkingOriginalModelMetadataKey: modelName,
}
if budgetOverride != nil {
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
}
if includeThoughts != nil {
metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts
}
if reasoningEffort != nil {
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
}
return baseModel, metadata
}
// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
// It accepts both the new generic keys and legacy Gemini-specific keys.
func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
if len(metadata) == 0 {
return nil, nil, nil, false
}
var (
budgetPtr *int
includePtr *bool
effortPtr *string
matched bool
)
readBudget := func(key string) {
if budgetPtr != nil {
return
}
if raw, ok := metadata[key]; ok {
if v, okNumber := parseNumberToInt(raw); okNumber {
budget := v
budgetPtr = &budget
matched = true
}
}
}
readInclude := func(key string) {
if includePtr != nil {
return
}
if raw, ok := metadata[key]; ok {
switch v := raw.(type) {
case bool:
val := v
includePtr = &val
matched = true
case *bool:
if v != nil {
val := *v
includePtr = &val
matched = true
}
}
}
}
readEffort := func(key string) {
if effortPtr != nil {
return
}
if raw, ok := metadata[key]; ok {
if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" {
normalized := strings.ToLower(strings.TrimSpace(val))
effortPtr = &normalized
matched = true
}
}
}
readBudget(ThinkingBudgetMetadataKey)
readBudget(GeminiThinkingBudgetMetadataKey)
readInclude(ThinkingIncludeThoughtsMetadataKey)
readInclude(GeminiIncludeThoughtsMetadataKey)
readEffort(ReasoningEffortMetadataKey)
readEffort("reasoning.effort")
return budgetPtr, includePtr, effortPtr, matched
}
// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
// converting reasoning effort strings into budgets when possible.
func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
budget, include, effort, matched := ThinkingFromMetadata(metadata)
if !matched {
return nil, nil, false
}
if budget == nil && effort != nil {
if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
budget = &derived
}
}
return budget, include, budget != nil || include != nil || effort != nil
}
// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
// inferring "auto" and "none" when budgets request dynamic or disabled thinking.
func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
budget, include, effort, matched := ThinkingFromMetadata(metadata)
if !matched {
return "", false
}
if effort != nil && *effort != "" {
return *effort, true
}
if budget != nil {
switch *budget {
case -1:
return "auto", true
case 0:
return "none", true
}
}
if include != nil && !*include {
return "none", true
}
return "", true
}
// ThinkingEffortToBudget maps reasoning effort levels to approximate budgets,
// clamping the result to the model's supported range.
func ThinkingEffortToBudget(model, effort string) (int, bool) {
if effort == "" {
return 0, false
}
switch strings.ToLower(effort) {
case "none":
return 0, true
case "auto":
return NormalizeThinkingBudget(model, -1), true
case "minimal":
return NormalizeThinkingBudget(model, 512), true
case "low":
return NormalizeThinkingBudget(model, 1024), true
case "medium":
return NormalizeThinkingBudget(model, 8192), true
case "high":
return NormalizeThinkingBudget(model, 24576), true
case "xhigh":
return NormalizeThinkingBudget(model, 32768), true
default:
return 0, false
}
}
// ResolveOriginalModel returns the original model name stored in metadata (if present),
// otherwise falls back to the provided model.
func ResolveOriginalModel(model string, metadata map[string]any) string {
normalize := func(name string) string {
if name == "" {
return ""
}
if base, _ := NormalizeThinkingModel(name); base != "" {
return base
}
return strings.TrimSpace(name)
}
if metadata != nil {
if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok {
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
if base := normalize(s); base != "" {
return base
}
}
}
if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok {
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
if base := normalize(s); base != "" {
return base
}
}
}
}
// Fallback: try to re-normalize the model name when metadata was dropped.
if base := normalize(model); base != "" {
return base
}
return model
}
func parseIntPrefix(value string) (int, bool) {
if value == "" {
return 0, false
}
digits := strings.TrimLeft(value, "-")
if digits == "" {
return 0, false
}
end := len(digits)
for i := 0; i < len(digits); i++ {
if digits[i] < '0' || digits[i] > '9' {
end = i
break
}
}
if end == 0 {
return 0, false
}
val, err := strconv.Atoi(digits[:end])
if err != nil {
return 0, false
}
return val, true
}
func parseNumberToInt(raw any) (int, bool) {
switch v := raw.(type) {
case int:
return v, true
case int32:
return int(v), true
case int64:
return int(v), true
case float64:
return int(v), true
case json.Number:
if val, err := v.Int64(); err == nil {
return int(val), true
}
case string:
if strings.TrimSpace(v) == "" {
return 0, false
}
if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
return parsed, true
}
}
return 0, false
}
func normalizeReasoningEffort(value string) (string, bool) {
if value == "" {
return "", false
}
effort := strings.ToLower(strings.TrimSpace(value))
switch effort {
case "minimal", "low", "medium", "high", "xhigh", "auto", "none":
return effort, true
default:
return "", false
}
}

View File

@@ -48,8 +48,24 @@ func (h *GeminiAPIHandler) Models() []map[string]any {
// GeminiModels handles the Gemini models listing endpoint.
// It returns a JSON response containing available Gemini models and their specifications.
func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
rawModels := h.Models()
normalizedModels := make([]map[string]any, 0, len(rawModels))
defaultMethods := []string{"generateContent"}
for _, model := range rawModels {
normalizedModel := make(map[string]any, len(model))
for k, v := range model {
normalizedModel[k] = v
}
if name, ok := normalizedModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
normalizedModel["name"] = "models/" + name
}
if _, ok := normalizedModel["supportedGenerationMethods"]; !ok {
normalizedModel["supportedGenerationMethods"] = defaultMethods
}
normalizedModels = append(normalizedModels, normalizedModel)
}
c.JSON(http.StatusOK, gin.H{
"models": h.Models(),
"models": normalizedModels,
})
}

View File

@@ -323,18 +323,32 @@ func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string
providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName)
// First, normalize the model name to handle suffixes like "-thinking-128"
// This needs to happen before determining the provider for non-dynamic models.
normalizedModel, metadata = normalizeModelMetadata(resolvedModelName)
targetModelName := resolvedModelName
if isDynamic {
targetModelName = extractedModelName
}
// Normalize the model name to handle dynamic thinking suffixes before determining the provider.
normalizedModel, metadata = normalizeModelMetadata(targetModelName)
if isDynamic {
providers = []string{providerName}
// For dynamic models, the extractedModelName is already normalized by parseDynamicModel
// so we use it as the final normalizedModel.
normalizedModel = extractedModelName
} else {
// For non-dynamic models, use the normalizedModel to get the provider name.
providers = util.GetProviderName(normalizedModel)
if len(providers) == 0 && metadata != nil {
if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok {
if originalModel, okStr := originalRaw.(string); okStr {
originalModel = strings.TrimSpace(originalModel)
if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) {
if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 {
providers = altProviders
normalizedModel = originalModel
}
}
}
}
}
}
if len(providers) == 0 {
@@ -382,7 +396,7 @@ func cloneBytes(src []byte) []byte {
}
func normalizeModelMetadata(modelName string) (string, map[string]any) {
return util.NormalizeGeminiThinkingModel(modelName)
return util.NormalizeThinkingModel(modelName)
}
func cloneMetadata(src map[string]any) map[string]any {

View File

@@ -498,7 +498,7 @@ func (s *Service) Run(ctx context.Context) error {
}()
time.Sleep(100 * time.Millisecond)
fmt.Printf("API server started successfully on: %d\n", s.cfg.Port)
fmt.Printf("API server started successfully on: %s:%d\n", s.cfg.Host, s.cfg.Port)
if s.hooks.OnAfterStart != nil {
s.hooks.OnAfterStart(s)
@@ -779,7 +779,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
Created: time.Now().Unix(),
OwnedBy: compat.Name,
Type: "openai-compatibility",
DisplayName: m.Name,
DisplayName: modelID,
})
}
// Register and return