feat(responses): add unique identifiers for responses, function calls, and tool uses

feat(antigravity): add unique identifier for tool use blocks in response
fix(antigravity): remove references to autopush endpoint and update fallback logic
2026-02-06 14:30:53 +08:00 · 2025-12-10 16:02:54 +08:00 · 2025-12-10 15:27:57 +08:00 · 2025-12-10 00:13:20 +08:00 · 2025-12-09 21:10:33 +08:00 · 2025-12-09 21:05:06 +08:00
34 changed files with 360 additions and 188 deletions
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -139,7 +139,8 @@ func main() {

 	wd, err := os.Getwd()
 	if err != nil {
-		log.Fatalf("failed to get working directory: %v", err)
+		log.Errorf("failed to get working directory: %v", err)
+		return
 	}

 	// Load environment variables from .env if present.
@@ -233,13 +234,15 @@ func main() {
 		})
 		cancel()
 		if err != nil {
-			log.Fatalf("failed to initialize postgres token store: %v", err)
+			log.Errorf("failed to initialize postgres token store: %v", err)
+			return
 		}
 		examplePath := filepath.Join(wd, "config.example.yaml")
 		ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
 		if errBootstrap := pgStoreInst.Bootstrap(ctx, examplePath); errBootstrap != nil {
 			cancel()
-			log.Fatalf("failed to bootstrap postgres-backed config: %v", errBootstrap)
+			log.Errorf("failed to bootstrap postgres-backed config: %v", errBootstrap)
+			return
 		}
 		cancel()
 		configFilePath = pgStoreInst.ConfigPath()
@@ -262,7 +265,8 @@ func main() {
 		if strings.Contains(resolvedEndpoint, "://") {
 			parsed, errParse := url.Parse(resolvedEndpoint)
 			if errParse != nil {
-				log.Fatalf("failed to parse object store endpoint %q: %v", objectStoreEndpoint, errParse)
+				log.Errorf("failed to parse object store endpoint %q: %v", objectStoreEndpoint, errParse)
+				return
 			}
 			switch strings.ToLower(parsed.Scheme) {
 			case "http":
@@ -270,10 +274,12 @@ func main() {
 			case "https":
 				useSSL = true
 			default:
-				log.Fatalf("unsupported object store scheme %q (only http and https are allowed)", parsed.Scheme)
+				log.Errorf("unsupported object store scheme %q (only http and https are allowed)", parsed.Scheme)
+				return
 			}
 			if parsed.Host == "" {
-				log.Fatalf("object store endpoint %q is missing host information", objectStoreEndpoint)
+				log.Errorf("object store endpoint %q is missing host information", objectStoreEndpoint)
+				return
 			}
 			resolvedEndpoint = parsed.Host
 			if parsed.Path != "" && parsed.Path != "/" {
@@ -292,13 +298,15 @@ func main() {
 		}
 		objectStoreInst, err = store.NewObjectTokenStore(objCfg)
 		if err != nil {
-			log.Fatalf("failed to initialize object token store: %v", err)
+			log.Errorf("failed to initialize object token store: %v", err)
+			return
 		}
 		examplePath := filepath.Join(wd, "config.example.yaml")
 		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 		if errBootstrap := objectStoreInst.Bootstrap(ctx, examplePath); errBootstrap != nil {
 			cancel()
-			log.Fatalf("failed to bootstrap object-backed config: %v", errBootstrap)
+			log.Errorf("failed to bootstrap object-backed config: %v", errBootstrap)
+			return
 		}
 		cancel()
 		configFilePath = objectStoreInst.ConfigPath()
@@ -323,7 +331,8 @@ func main() {
 		gitStoreInst = store.NewGitTokenStore(gitStoreRemoteURL, gitStoreUser, gitStorePassword)
 		gitStoreInst.SetBaseDir(authDir)
 		if errRepo := gitStoreInst.EnsureRepository(); errRepo != nil {
-			log.Fatalf("failed to prepare git token store: %v", errRepo)
+			log.Errorf("failed to prepare git token store: %v", errRepo)
+			return
 		}
 		configFilePath = gitStoreInst.ConfigPath()
 		if configFilePath == "" {
@@ -332,17 +341,21 @@ func main() {
 		if _, statErr := os.Stat(configFilePath); errors.Is(statErr, fs.ErrNotExist) {
 			examplePath := filepath.Join(wd, "config.example.yaml")
 			if _, errExample := os.Stat(examplePath); errExample != nil {
-				log.Fatalf("failed to find template config file: %v", errExample)
+				log.Errorf("failed to find template config file: %v", errExample)
+				return
 			}
 			if errCopy := misc.CopyConfigTemplate(examplePath, configFilePath); errCopy != nil {
-				log.Fatalf("failed to bootstrap git-backed config: %v", errCopy)
+				log.Errorf("failed to bootstrap git-backed config: %v", errCopy)
+				return
 			}
 			if errCommit := gitStoreInst.PersistConfig(context.Background()); errCommit != nil {
-				log.Fatalf("failed to commit initial git-backed config: %v", errCommit)
+				log.Errorf("failed to commit initial git-backed config: %v", errCommit)
+				return
 			}
 			log.Infof("git-backed config initialized from template: %s", configFilePath)
 		} else if statErr != nil {
-			log.Fatalf("failed to inspect git-backed config: %v", statErr)
+			log.Errorf("failed to inspect git-backed config: %v", statErr)
+			return
 		}
 		cfg, err = config.LoadConfigOptional(configFilePath, isCloudDeploy)
 		if err == nil {
@@ -355,13 +368,15 @@ func main() {
 	} else {
 		wd, err = os.Getwd()
 		if err != nil {
-			log.Fatalf("failed to get working directory: %v", err)
+			log.Errorf("failed to get working directory: %v", err)
+			return
 		}
 		configFilePath = filepath.Join(wd, "config.yaml")
 		cfg, err = config.LoadConfigOptional(configFilePath, isCloudDeploy)
 	}
 	if err != nil {
-		log.Fatalf("failed to load config: %v", err)
+		log.Errorf("failed to load config: %v", err)
+		return
 	}
 	if cfg == nil {
 		cfg = &config.Config{}
@@ -391,7 +406,8 @@ func main() {
 	coreauth.SetQuotaCooldownDisabled(cfg.DisableCooling)

 	if err = logging.ConfigureLogOutput(cfg.LoggingToFile); err != nil {
-		log.Fatalf("failed to configure log output: %v", err)
+		log.Errorf("failed to configure log output: %v", err)
+		return
 	}

 	log.Infof("CLIProxyAPI Version: %s, Commit: %s, BuiltAt: %s", buildinfo.Version, buildinfo.Commit, buildinfo.BuildDate)
@@ -400,7 +416,8 @@ func main() {
 	util.SetLogLevel(cfg)

 	if resolvedAuthDir, errResolveAuthDir := util.ResolveAuthDir(cfg.AuthDir); errResolveAuthDir != nil {
-		log.Fatalf("failed to resolve auth directory: %v", errResolveAuthDir)
+		log.Errorf("failed to resolve auth directory: %v", errResolveAuthDir)
+		return
 	} else {
 		cfg.AuthDir = resolvedAuthDir
 	}
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -1,3 +1,7 @@
+# Server host/interface to bind to. Default is empty ("") to bind all interfaces (IPv4 + IPv6).
+# Use "127.0.0.1" or "localhost" to restrict access to local machine only.
+host: ""
+
 # Server port
 port: 8317

--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -713,14 +713,16 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 	// Generate PKCE codes
 	pkceCodes, err := claude.GeneratePKCECodes()
 	if err != nil {
-		log.Fatalf("Failed to generate PKCE codes: %v", err)
+		log.Errorf("Failed to generate PKCE codes: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate PKCE codes"})
 		return
 	}

 	// Generate random state parameter
 	state, err := misc.GenerateRandomState()
 	if err != nil {
-		log.Fatalf("Failed to generate state parameter: %v", err)
+		log.Errorf("Failed to generate state parameter: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate state parameter"})
 		return
 	}

@@ -730,7 +732,8 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 	// Generate authorization URL (then override redirect_uri to reuse server port)
 	authURL, state, err := anthropicAuth.GenerateAuthURL(state, pkceCodes)
 	if err != nil {
-		log.Fatalf("Failed to generate authorization URL: %v", err)
+		log.Errorf("Failed to generate authorization URL: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
 		return
 	}

@@ -872,7 +875,7 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 		}
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
-			log.Fatalf("Failed to save authentication tokens: %v", errSave)
+			log.Errorf("Failed to save authentication tokens: %v", errSave)
 			oauthStatus[state] = "Failed to save authentication tokens"
 			return
 		}
@@ -1045,7 +1048,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		gemAuth := geminiAuth.NewGeminiAuth()
 		gemClient, errGetClient := gemAuth.GetAuthenticatedClient(ctx, &ts, h.cfg, true)
 		if errGetClient != nil {
-			log.Fatalf("failed to get authenticated client: %v", errGetClient)
+			log.Errorf("failed to get authenticated client: %v", errGetClient)
 			oauthStatus[state] = "Failed to get authenticated client"
 			return
 		}
@@ -1110,7 +1113,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		}
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
-			log.Fatalf("Failed to save token to file: %v", errSave)
+			log.Errorf("Failed to save token to file: %v", errSave)
 			oauthStatus[state] = "Failed to save token to file"
 			return
 		}
@@ -1131,14 +1134,16 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 	// Generate PKCE codes
 	pkceCodes, err := codex.GeneratePKCECodes()
 	if err != nil {
-		log.Fatalf("Failed to generate PKCE codes: %v", err)
+		log.Errorf("Failed to generate PKCE codes: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate PKCE codes"})
 		return
 	}

 	// Generate random state parameter
 	state, err := misc.GenerateRandomState()
 	if err != nil {
-		log.Fatalf("Failed to generate state parameter: %v", err)
+		log.Errorf("Failed to generate state parameter: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate state parameter"})
 		return
 	}

@@ -1148,7 +1153,8 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 	// Generate authorization URL
 	authURL, err := openaiAuth.GenerateAuthURL(state, pkceCodes)
 	if err != nil {
-		log.Fatalf("Failed to generate authorization URL: %v", err)
+		log.Errorf("Failed to generate authorization URL: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
 		return
 	}

@@ -1283,7 +1289,7 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
 			oauthStatus[state] = "Failed to save authentication tokens"
-			log.Fatalf("Failed to save authentication tokens: %v", errSave)
+			log.Errorf("Failed to save authentication tokens: %v", errSave)
 			return
 		}
 		fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
@@ -1318,7 +1324,8 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {

 	state, errState := misc.GenerateRandomState()
 	if errState != nil {
-		log.Fatalf("Failed to generate state parameter: %v", errState)
+		log.Errorf("Failed to generate state parameter: %v", errState)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate state parameter"})
 		return
 	}

@@ -1514,7 +1521,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		}
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
-			log.Fatalf("Failed to save token to file: %v", errSave)
+			log.Errorf("Failed to save token to file: %v", errSave)
 			oauthStatus[state] = "Failed to save token to file"
 			return
 		}
@@ -1543,7 +1550,8 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 	// Generate authorization URL
 	deviceFlow, err := qwenAuth.InitiateDeviceFlow(ctx)
 	if err != nil {
-		log.Fatalf("Failed to generate authorization URL: %v", err)
+		log.Errorf("Failed to generate authorization URL: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
 		return
 	}
 	authURL := deviceFlow.VerificationURIComplete
@@ -1570,7 +1578,7 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 		}
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
-			log.Fatalf("Failed to save authentication tokens: %v", errSave)
+			log.Errorf("Failed to save authentication tokens: %v", errSave)
 			oauthStatus[state] = "Failed to save authentication tokens"
 			return
 		}
@@ -1674,7 +1682,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
 			oauthStatus[state] = "Failed to save authentication tokens"
-			log.Fatalf("Failed to save authentication tokens: %v", errSave)
+			log.Errorf("Failed to save authentication tokens: %v", errSave)
 			return
 		}

@@ -2103,6 +2111,7 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
 				continue
 			}
 		}
+		_ = resp.Body.Close()
 		return false, fmt.Errorf("project activation required: %s", errMessage)
 	}
 	return true, nil
--- a/internal/api/modules/amp/routes.go
+++ b/internal/api/modules/amp/routes.go
@@ -156,6 +156,7 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
 	rootMiddleware := []gin.HandlerFunc{m.managementAvailabilityMiddleware(), noCORSMiddleware(), m.localhostOnlyMiddleware()}
 	engine.GET("/threads/*path", append(rootMiddleware, proxyHandler)...)
 	engine.GET("/threads.rss", append(rootMiddleware, proxyHandler)...)
+	engine.GET("/news.rss", append(rootMiddleware, proxyHandler)...)

 	// Root-level auth routes for CLI login flow
 	// Amp uses multiple auth routes: /auth/cli-login, /auth/callback, /auth/sign-in, /auth/logout
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -300,7 +300,7 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk

 	// Create HTTP server
 	s.server = &http.Server{
-		Addr:    fmt.Sprintf(":%d", cfg.Port),
+		Addr:    fmt.Sprintf("%s:%d", cfg.Host, cfg.Port),
 		Handler: engine,
 	}

--- a/internal/auth/gemini/gemini_auth.go
+++ b/internal/auth/gemini/gemini_auth.go
@@ -76,7 +76,8 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken
 			auth := &proxy.Auth{User: username, Password: password}
 			dialer, errSOCKS5 := proxy.SOCKS5("tcp", proxyURL.Host, auth, proxy.Direct)
 			if errSOCKS5 != nil {
-				log.Fatalf("create SOCKS5 dialer failed: %v", errSOCKS5)
+				log.Errorf("create SOCKS5 dialer failed: %v", errSOCKS5)
+				return nil, fmt.Errorf("create SOCKS5 dialer failed: %w", errSOCKS5)
 			}
 			transport = &http.Transport{
 				DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
@@ -238,7 +239,11 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config,
 	// Start the server in a goroutine.
 	go func() {
 		if err := server.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) {
-			log.Fatalf("ListenAndServe(): %v", err)
+			log.Errorf("ListenAndServe(): %v", err)
+			select {
+			case errChan <- err:
+			default:
+			}
 		}
 	}()

--- a/internal/cmd/login.go
+++ b/internal/cmd/login.go
@@ -65,20 +65,20 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 	authenticator := sdkAuth.NewGeminiAuthenticator()
 	record, errLogin := authenticator.Login(ctx, cfg, loginOpts)
 	if errLogin != nil {
-		log.Fatalf("Gemini authentication failed: %v", errLogin)
+		log.Errorf("Gemini authentication failed: %v", errLogin)
 		return
 	}

 	storage, okStorage := record.Storage.(*gemini.GeminiTokenStorage)
 	if !okStorage || storage == nil {
-		log.Fatal("Gemini authentication failed: unsupported token storage")
+		log.Error("Gemini authentication failed: unsupported token storage")
 		return
 	}

 	geminiAuth := gemini.NewGeminiAuth()
 	httpClient, errClient := geminiAuth.GetAuthenticatedClient(ctx, storage, cfg, options.NoBrowser)
 	if errClient != nil {
-		log.Fatalf("Gemini authentication failed: %v", errClient)
+		log.Errorf("Gemini authentication failed: %v", errClient)
 		return
 	}

@@ -86,7 +86,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {

 	projects, errProjects := fetchGCPProjects(ctx, httpClient)
 	if errProjects != nil {
-		log.Fatalf("Failed to get project list: %v", errProjects)
+		log.Errorf("Failed to get project list: %v", errProjects)
 		return
 	}

@@ -98,11 +98,11 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 	selectedProjectID := promptForProjectSelection(projects, strings.TrimSpace(projectID), promptFn)
 	projectSelections, errSelection := resolveProjectSelections(selectedProjectID, projects)
 	if errSelection != nil {
-		log.Fatalf("Invalid project selection: %v", errSelection)
+		log.Errorf("Invalid project selection: %v", errSelection)
 		return
 	}
 	if len(projectSelections) == 0 {
-		log.Fatal("No project selected; aborting login.")
+		log.Error("No project selected; aborting login.")
 		return
 	}

@@ -116,7 +116,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 				showProjectSelectionHelp(storage.Email, projects)
 				return
 			}
-			log.Fatalf("Failed to complete user setup: %v", errSetup)
+			log.Errorf("Failed to complete user setup: %v", errSetup)
 			return
 		}
 		finalID := strings.TrimSpace(storage.ProjectID)
@@ -133,11 +133,11 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 		for _, pid := range activatedProjects {
 			isChecked, errCheck := checkCloudAPIIsEnabled(ctx, httpClient, pid)
 			if errCheck != nil {
-				log.Fatalf("Failed to check if Cloud AI API is enabled for %s: %v", pid, errCheck)
+				log.Errorf("Failed to check if Cloud AI API is enabled for %s: %v", pid, errCheck)
 				return
 			}
 			if !isChecked {
-				log.Fatalf("Failed to check if Cloud AI API is enabled for project %s. If you encounter an error message, please create an issue.", pid)
+				log.Errorf("Failed to check if Cloud AI API is enabled for project %s. If you encounter an error message, please create an issue.", pid)
 				return
 			}
 		}
@@ -153,7 +153,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {

 	savedPath, errSave := store.Save(ctx, record)
 	if errSave != nil {
-		log.Fatalf("Failed to save token to file: %v", errSave)
+		log.Errorf("Failed to save token to file: %v", errSave)
 		return
 	}

@@ -555,6 +555,7 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
 				continue
 			}
 		}
+		_ = resp.Body.Close()
 		return false, fmt.Errorf("project activation required: %s", errMessage)
 	}
 	return true, nil
--- a/internal/cmd/run.go
+++ b/internal/cmd/run.go
@@ -45,12 +45,13 @@ func StartService(cfg *config.Config, configPath string, localPassword string) {

 	service, err := builder.Build()
 	if err != nil {
-		log.Fatalf("failed to build proxy service: %v", err)
+		log.Errorf("failed to build proxy service: %v", err)
+		return
 	}

 	err = service.Run(runCtx)
 	if err != nil && !errors.Is(err, context.Canceled) {
-		log.Fatalf("proxy service exited with error: %v", err)
+		log.Errorf("proxy service exited with error: %v", err)
 	}
 }

--- a/internal/cmd/vertex_import.go
+++ b/internal/cmd/vertex_import.go
@@ -29,30 +29,30 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
 	}
 	rawPath := strings.TrimSpace(keyPath)
 	if rawPath == "" {
-		log.Fatalf("vertex-import: missing service account key path")
+		log.Errorf("vertex-import: missing service account key path")
 		return
 	}
 	data, errRead := os.ReadFile(rawPath)
 	if errRead != nil {
-		log.Fatalf("vertex-import: read file failed: %v", errRead)
+		log.Errorf("vertex-import: read file failed: %v", errRead)
 		return
 	}
 	var sa map[string]any
 	if errUnmarshal := json.Unmarshal(data, &sa); errUnmarshal != nil {
-		log.Fatalf("vertex-import: invalid service account json: %v", errUnmarshal)
+		log.Errorf("vertex-import: invalid service account json: %v", errUnmarshal)
 		return
 	}
 	// Validate and normalize private_key before saving
 	normalizedSA, errFix := vertex.NormalizeServiceAccountMap(sa)
 	if errFix != nil {
-		log.Fatalf("vertex-import: %v", errFix)
+		log.Errorf("vertex-import: %v", errFix)
 		return
 	}
 	sa = normalizedSA
 	email, _ := sa["client_email"].(string)
 	projectID, _ := sa["project_id"].(string)
 	if strings.TrimSpace(projectID) == "" {
-		log.Fatalf("vertex-import: project_id missing in service account json")
+		log.Errorf("vertex-import: project_id missing in service account json")
 		return
 	}
 	if strings.TrimSpace(email) == "" {
@@ -92,7 +92,7 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
 	}
 	path, errSave := store.Save(context.Background(), record)
 	if errSave != nil {
-		log.Fatalf("vertex-import: save credential failed: %v", errSave)
+		log.Errorf("vertex-import: save credential failed: %v", errSave)
 		return
 	}
 	fmt.Printf("Vertex credentials imported: %s\n", path)
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -20,6 +20,9 @@ import (
 // Config represents the application's configuration, loaded from a YAML file.
 type Config struct {
 	config.SDKConfig `yaml:",inline"`
+	// Host is the network host/interface on which the API server will bind.
+	// Default is empty ("") to bind all interfaces (IPv4 + IPv6). Use "127.0.0.1" or "localhost" for local-only access.
+	Host string `yaml:"host" json:"-"`
 	// Port is the network port on which the API server will listen.
 	Port int `yaml:"port" json:"-"`

@@ -320,6 +323,7 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Unmarshal the YAML data into the Config struct.
 	var cfg Config
 	// Set defaults before unmarshal so that absent keys keep defaults.
+	cfg.Host = "" // Default empty: binds to all interfaces (IPv4 + IPv6)
 	cfg.LoggingToFile = false
 	cfg.UsageStatisticsEnabled = false
 	cfg.DisableCooling = false
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -693,8 +693,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Low",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			DisplayName:         "GPT 5.1 Nothink",
+			Description:         "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -719,8 +719,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Medium",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			DisplayName:         "GPT 5.1 Medium",
+			Description:         "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -732,8 +732,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 High",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			DisplayName:         "GPT 5.1 High",
+			Description:         "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -745,8 +745,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
+			DisplayName:         "GPT 5.1 Codex",
+			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -758,8 +758,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex Low",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
+			DisplayName:         "GPT 5.1 Codex Low",
+			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -771,8 +771,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex Medium",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
+			DisplayName:         "GPT 5.1 Codex Medium",
+			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -784,8 +784,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex High",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
+			DisplayName:         "GPT 5.1 Codex High",
+			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -797,8 +797,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex Mini",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
+			DisplayName:         "GPT 5.1 Codex Mini",
+			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -810,8 +810,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex Mini Medium",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
+			DisplayName:         "GPT 5.1 Codex Mini Medium",
+			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -823,8 +823,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex Mini High",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
+			DisplayName:         "GPT 5.1 Codex Mini High",
+			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -837,8 +837,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5 Codex Max",
-			Description:         "Stable version of GPT 5 Codex Max",
+			DisplayName:         "GPT 5.1 Codex Max",
+			Description:         "Stable version of GPT 5.1 Codex Max",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -850,8 +850,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5 Codex Max Low",
-			Description:         "Stable version of GPT 5 Codex Max Low",
+			DisplayName:         "GPT 5.1 Codex Max Low",
+			Description:         "Stable version of GPT 5.1 Codex Max Low",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -863,8 +863,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5 Codex Max Medium",
-			Description:         "Stable version of GPT 5 Codex Max Medium",
+			DisplayName:         "GPT 5.1 Codex Max Medium",
+			Description:         "Stable version of GPT 5.1 Codex Max Medium",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -876,8 +876,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5 Codex Max High",
-			Description:         "Stable version of GPT 5 Codex Max High",
+			DisplayName:         "GPT 5.1 Codex Max High",
+			Description:         "Stable version of GPT 5.1 Codex Max High",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -889,8 +889,8 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5 Codex Max XHigh",
-			Description:         "Stable version of GPT 5 Codex Max XHigh",
+			DisplayName:         "GPT 5.1 Codex Max XHigh",
+			Description:         "Stable version of GPT 5.1 Codex Max XHigh",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
@@ -991,16 +991,19 @@ func GetIFlowModels() []*ModelInfo {
 type AntigravityModelConfig struct {
 	Thinking            *ThinkingSupport
 	MaxCompletionTokens int
+	Name                string
 }

 // GetAntigravityModelConfig returns static configuration for antigravity models.
 // Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
 func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 	return map[string]*AntigravityModelConfig{
-		"gemini-2.5-flash":                  {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
-		"gemini-2.5-flash-lite":             {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
-		"gemini-3-pro-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
-		"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"gemini-claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"gemini-2.5-flash":                        {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
+		"gemini-2.5-flash-lite":                   {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
+		"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
+		"gemini-3-pro-preview":                    {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-preview"},
+		"gemini-3-pro-image-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-image-preview"},
+		"gemini-claude-sonnet-4-5-thinking":       {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"gemini-claude-opus-4-5-thinking":         {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 	}
 }
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -309,7 +309,9 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	to := sdktranslator.FromString("gemini")
 	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
 	payload = applyThinkingMetadata(payload, req.Metadata, req.Model)
+	payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
 	payload = util.ConvertThinkingLevelToBudget(payload)
+	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload)
 	payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
 	payload = fixGeminiImageAspectRatio(req.Model, payload)
 	payload = applyPayloadConfig(e.cfg, req.Model, payload)
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -27,18 +27,18 @@ import (
 )

 const (
-	antigravityBaseURLDaily        = "https://daily-cloudcode-pa.sandbox.googleapis.com"
-	antigravityBaseURLAutopush     = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
-	antigravityBaseURLProd         = "https://cloudcode-pa.googleapis.com"
-	antigravityStreamPath          = "/v1internal:streamGenerateContent"
-	antigravityGeneratePath        = "/v1internal:generateContent"
-	antigravityModelsPath          = "/v1internal:fetchAvailableModels"
-	antigravityClientID            = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
-	antigravityClientSecret        = "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"
-	defaultAntigravityAgent        = "antigravity/1.11.5 windows/amd64"
-	antigravityAuthType            = "antigravity"
-	refreshSkew                    = 3000 * time.Second
-	streamScannerBuffer        int = 20_971_520
+	antigravityBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com"
+	// antigravityBaseURLAutopush     = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
+	antigravityBaseURLProd      = "https://cloudcode-pa.googleapis.com"
+	antigravityStreamPath       = "/v1internal:streamGenerateContent"
+	antigravityGeneratePath     = "/v1internal:generateContent"
+	antigravityModelsPath       = "/v1internal:fetchAvailableModels"
+	antigravityClientID         = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
+	antigravityClientSecret     = "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"
+	defaultAntigravityAgent     = "antigravity/1.11.5 windows/amd64"
+	antigravityAuthType         = "antigravity"
+	refreshSkew                 = 3000 * time.Second
+	streamScannerBuffer     int = 20_971_520
 )

 var randSource = rand.New(rand.NewSource(time.Now().UnixNano()))
@@ -77,6 +77,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)

 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
@@ -171,6 +172,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
@@ -373,9 +375,14 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		for originalName := range result.Map() {
 			aliasName := modelName2Alias(originalName)
 			if aliasName != "" {
+				cfg := modelConfig[aliasName]
+				modelName := aliasName
+				if cfg != nil && cfg.Name != "" {
+					modelName = cfg.Name
+				}
 				modelInfo := &registry.ModelInfo{
 					ID:          aliasName,
-					Name:        aliasName,
+					Name:        modelName,
 					Description: aliasName,
 					DisplayName: aliasName,
 					Version:     aliasName,
@@ -385,7 +392,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 					Type:        antigravityAuthType,
 				}
 				// Look up Thinking support from static config using alias name
-				if cfg, ok := modelConfig[aliasName]; ok {
+				if cfg != nil {
 					if cfg.Thinking != nil {
 						modelInfo.Thinking = cfg.Thinking
 					}
@@ -536,6 +543,9 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		strJSON = util.DeleteKey(strJSON, "minLength")
 		strJSON = util.DeleteKey(strJSON, "maxLength")
 		strJSON = util.DeleteKey(strJSON, "exclusiveMinimum")
+		strJSON = util.DeleteKey(strJSON, "exclusiveMaximum")
+		strJSON = util.DeleteKey(strJSON, "$ref")
+		strJSON = util.DeleteKey(strJSON, "$defs")

 		paths = make([]string, 0)
 		util.Walk(gjson.Parse(strJSON), "", "anyOf", &paths)
@@ -651,7 +661,7 @@ func buildBaseURL(auth *cliproxyauth.Auth) string {
 	if baseURLs := antigravityBaseURLFallbackOrder(auth); len(baseURLs) > 0 {
 		return baseURLs[0]
 	}
-	return antigravityBaseURLAutopush
+	return antigravityBaseURLDaily
 }

 func resolveHost(base string) string {
@@ -687,7 +697,7 @@ func antigravityBaseURLFallbackOrder(auth *cliproxyauth.Auth) []string {
 	}
 	return []string{
 		antigravityBaseURLDaily,
-		antigravityBaseURLAutopush,
+		// antigravityBaseURLAutopush,
 		antigravityBaseURLProd,
 	}
 }
@@ -831,9 +841,12 @@ func normalizeAntigravityThinking(model string, payload []byte) []byte {
 		effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
 		if effectiveMax > 0 && normalized >= effectiveMax {
 			normalized = effectiveMax - 1
-			if normalized < 1 {
-				normalized = 1
-			}
+		}
+		minBudget := antigravityMinThinkingBudget(model)
+		if minBudget > 0 && normalized >= 0 && normalized < minBudget {
+			// Budget is below minimum, remove thinking config entirely
+			payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
+			return payload
 		}
 		if setDefaultMax {
 			if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil {
@@ -861,3 +874,12 @@ func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromM
 	}
 	return 0, false
 }
+
+// antigravityMinThinkingBudget returns the minimum thinking budget for a model.
+// Falls back to -1 if no model info is found.
+func antigravityMinThinkingBudget(model string) int {
+	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil {
+		return modelInfo.Thinking.Min
+	}
+	return -1
+}
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -64,6 +64,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	to := sdktranslator.FromString("gemini-cli")
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
+	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
 	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
@@ -199,6 +201,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	to := sdktranslator.FromString("gemini-cli")
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
+	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
 	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -80,6 +80,8 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = applyThinkingMetadata(body, req.Metadata, req.Model)
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -169,6 +171,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	body = applyThinkingMetadata(body, req.Metadata, req.Model)
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -296,6 +296,8 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -391,6 +393,8 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -487,6 +491,8 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -599,6 +605,8 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -180,7 +180,6 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
-				budget = util.NormalizeThinkingBudget(modelName, budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
--- a/internal/translator/antigravity/claude/antigravity_claude_response.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response.go
@@ -12,6 +12,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -36,6 +37,9 @@ type Params struct {
 	HasToolUse           bool   // Indicates if tool use was observed in the stream
 }

+// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
+var toolUseIDCounter uint64
+
 // ConvertAntigravityResponseToClaude performs sophisticated streaming response format conversion.
 // This function implements a complex state machine that translates backend client responses
 // into Claude Code-compatible Server-Sent Events (SSE) format. It manages different response types
@@ -216,7 +220,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq

 				// Create the tool use block with unique ID and function details
 				data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, params.ResponseIndex)
-				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
 				data, _ = sjson.Set(data, "content_block.name", fcName)
 				output = output + fmt.Sprintf("data: %s\n\n\n", data)

--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "low":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "medium":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "high":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		default:
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
-			var normalized int
+			var budget int

 			if v := tc.Get("thinkingBudget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}

@@ -82,7 +82,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && normalized != 0 {
+			} else if setBudget && budget != 0 {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
@@ -94,7 +94,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
 			if t.Get("type").String() == "enabled" {
 				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-					budget := util.NormalizeThinkingBudget(modelName, int(b.Int()))
+					budget := int(b.Int())
 					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 				}
@@ -102,15 +102,6 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		}
 	}

-	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
-	// This matches the official Gemini CLI behavior which always sends:
-	// { thinkingBudget: -1, includeThoughts: true }
-	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
-	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
-		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-	}
-
 	// Temperature/top_p/top_k/max_tokens
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
@@ -11,6 +11,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
@@ -24,6 +25,9 @@ type convertCliResponseToOpenAIChatParams struct {
 	FunctionIndex int
 }

+// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
+var functionCallIDCounter uint64
+
 // ConvertAntigravityResponseToOpenAI translates a single chunk of a streaming response from the
 // Gemini CLI API format to the OpenAI Chat Completions streaming format.
 // It processes various Gemini CLI event types and transforms them into OpenAI-compatible JSON responses.
@@ -146,7 +150,7 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq

 				functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
 				fcName := functionCallResult.Get("name").String()
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
 				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -165,7 +165,6 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
-				budget = util.NormalizeThinkingBudget(modelName, budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_response.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_response.go
@@ -12,6 +12,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -27,6 +28,9 @@ type Params struct {
 	ResponseIndex    int  // Index counter for content blocks in the streaming response
 }

+// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
+var toolUseIDCounter uint64
+
 // ConvertGeminiCLIResponseToClaude performs sophisticated streaming response format conversion.
 // This function implements a complex state machine that translates backend client responses
 // into Claude Code-compatible Server-Sent Events (SSE) format. It manages different response types
@@ -197,7 +201,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque

 				// Create the tool use block with unique ID and function details
 				data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
-				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
 				data, _ = sjson.Set(data, "content_block.name", fcName)
 				output = output + fmt.Sprintf("data: %s\n\n\n", data)

--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "low":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "medium":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "high":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		default:
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
-			var normalized int
+			var budget int

 			if v := tc.Get("thinkingBudget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}

@@ -82,21 +82,12 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && normalized != 0 {
+			} else if setBudget && budget != 0 {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
 	}

-	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
-	// This matches the official Gemini CLI behavior which always sends:
-	// { thinkingBudget: -1, includeThoughts: true }
-	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
-	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
-		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-	}
-
 	// Temperature/top_p/top_k
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
@@ -11,6 +11,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
@@ -24,6 +25,9 @@ type convertCliResponseToOpenAIChatParams struct {
 	FunctionIndex int
 }

+// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
+var functionCallIDCounter uint64
+
 // ConvertCliResponseToOpenAI translates a single chunk of a streaming response from the
 // Gemini CLI API format to the OpenAI Chat Completions streaming format.
 // It processes various Gemini CLI event types and transforms them into OpenAI-compatible JSON responses.
@@ -146,7 +150,7 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ

 				functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
 				fcName := functionCallResult.Get("name").String()
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
 				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -158,7 +158,6 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
-				budget = util.NormalizeThinkingBudget(modelName, budget)
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 			}
--- a/internal/translator/gemini/claude/gemini_claude_response.go
+++ b/internal/translator/gemini/claude/gemini_claude_response.go
@@ -12,6 +12,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -26,6 +27,9 @@ type Params struct {
 	ResponseIndex    int
 }

+// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
+var toolUseIDCounter uint64
+
 // ConvertGeminiResponseToClaude performs sophisticated streaming response format conversion.
 // This function implements a complex state machine that translates backend client responses
 // into Claude-compatible Server-Sent Events (SSE) format. It manages different response types
@@ -197,7 +201,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR

 				// Create the tool use block with unique ID and function details
 				data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
-				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
 				data, _ = sjson.Set(data, "content_block.name", fcName)
 				output = output + fmt.Sprintf("data: %s\n\n\n", data)

--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "low":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
+			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "medium":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
+			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "high":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
+			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		default:
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
-			var normalized int
+			var budget int

 			if v := tc.Get("thinkingBudget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}

@@ -82,7 +82,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && normalized != 0 {
+			} else if setBudget && budget != 0 {
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -11,6 +11,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -23,6 +24,9 @@ type convertGeminiResponseToOpenAIChatParams struct {
 	FunctionIndex int
 }

+// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
+var functionCallIDCounter uint64
+
 // ConvertGeminiResponseToOpenAI translates a single chunk of a streaming response from the
 // Gemini API format to the OpenAI Chat Completions streaming format.
 // It processes various Gemini event types and transforms them into OpenAI-compatible JSON responses.
@@ -148,7 +152,7 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR

 				functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
 				fcName := functionCallResult.Get("name").String()
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
 				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
@@ -281,7 +285,7 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
 				}
 				functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
 				fcName := functionCallResult.Get("name").String()
-				functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
 				functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
 				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
 					functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -400,16 +400,16 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "minimal":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
+			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "low":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096))
+			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "medium":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
+			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "high":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
+			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		default:
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -421,32 +421,22 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
 		if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
-			var normalized int
+			var budget int
 			if v := tc.Get("thinking_budget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}
 			if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if setBudget {
-				if normalized != 0 {
+				if budget != 0 {
 					out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 				}
 			}
 		}
 	}

-	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
-	// This matches the official Gemini CLI behavior which always sends:
-	// { thinkingBudget: -1, includeThoughts: true }
-	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
-	if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
-		out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-		out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		// log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true")
-	}
-
 	result := []byte(out)
 	result = common.AttachDefaultSafetySettings(result, "safetySettings")
 	return result
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -37,6 +38,12 @@ type geminiToResponsesState struct {
 	FuncCallIDs map[int]string
 }

+// responseIDCounter provides a process-wide unique counter for synthesized response identifiers.
+var responseIDCounter uint64
+
+// funcCallIDCounter provides a process-wide unique counter for function call identifiers.
+var funcCallIDCounter uint64
+
 func emitEvent(event string, payload string) string {
 	return fmt.Sprintf("event: %s\ndata: %s", event, payload)
 }
@@ -205,7 +212,7 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 					st.FuncArgsBuf[idx] = &strings.Builder{}
 				}
 				if st.FuncCallIDs[idx] == "" {
-					st.FuncCallIDs[idx] = fmt.Sprintf("call_%d", time.Now().UnixNano())
+					st.FuncCallIDs[idx] = fmt.Sprintf("call_%d_%d", time.Now().UnixNano(), atomic.AddUint64(&funcCallIDCounter, 1))
 				}
 				st.FuncNames[idx] = name

@@ -464,7 +471,7 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string
 	// id: prefer provider responseId, otherwise synthesize
 	id := root.Get("responseId").String()
 	if id == "" {
-		id = fmt.Sprintf("resp_%x", time.Now().UnixNano())
+		id = fmt.Sprintf("resp_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&responseIDCounter, 1))
 	}
 	// Normalize to response-style id (prefix resp_ if missing)
 	if !strings.HasPrefix(id, "resp_") {
@@ -575,7 +582,7 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string
 			if fc := p.Get("functionCall"); fc.Exists() {
 				name := fc.Get("name").String()
 				args := fc.Get("args")
-				callID := fmt.Sprintf("call_%x", time.Now().UnixNano())
+				callID := fmt.Sprintf("call_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&funcCallIDCounter, 1))
 				outputs = append(outputs, map[string]interface{}{
 					"id":     fmt.Sprintf("fc_%s", callID),
 					"type":   "function_call",
--- a/internal/translator/openai/openai/responses/openai_openai-responses_response.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_response.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -41,6 +42,9 @@ type oaiToResponsesState struct {
 	UsageSeen        bool
 }

+// responseIDCounter provides a process-wide unique counter for synthesized response identifiers.
+var responseIDCounter uint64
+
 func emitRespEvent(event string, payload string) string {
 	return fmt.Sprintf("event: %s\ndata: %s", event, payload)
 }
@@ -590,7 +594,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(_ context.Co
 	// id: use provider id if present, otherwise synthesize
 	id := root.Get("id").String()
 	if id == "" {
-		id = fmt.Sprintf("resp_%x", time.Now().UnixNano())
+		id = fmt.Sprintf("resp_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&responseIDCounter, 1))
 	}
 	resp, _ = sjson.Set(resp, "id", id)

--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -207,6 +207,47 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
 	return budgetPtr, includePtr, matched
 }

+// modelsWithDefaultThinking lists models that should have thinking enabled by default
+// when no explicit thinkingConfig is provided.
+var modelsWithDefaultThinking = map[string]bool{
+	"gemini-3-pro-preview": true,
+}
+
+// ModelHasDefaultThinking returns true if the model should have thinking enabled by default.
+func ModelHasDefaultThinking(model string) bool {
+	return modelsWithDefaultThinking[model]
+}
+
+// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it.
+// For standard Gemini API format (generationConfig.thinkingConfig path).
+// Returns the modified body if thinkingConfig was added, otherwise returns the original.
+func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
+	if !ModelHasDefaultThinking(model) {
+		return body
+	}
+	if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() {
+		return body
+	}
+	updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1)
+	updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true)
+	return updated
+}
+
+// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it.
+// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
+// Returns the modified body if thinkingConfig was added, otherwise returns the original.
+func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
+	if !ModelHasDefaultThinking(model) {
+		return body
+	}
+	if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
+		return body
+	}
+	updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
+	updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true)
+	return updated
+}
+
 // StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
 // when the target model does not advertise Thinking capability. It cleans both
 // standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
@@ -223,6 +264,32 @@ func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
 	return updated
 }

+// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini
+// request body (generationConfig.thinkingConfig.thinkingBudget path).
+func NormalizeGeminiThinkingBudget(model string, body []byte) []byte {
+	const budgetPath = "generationConfig.thinkingConfig.thinkingBudget"
+	budget := gjson.GetBytes(body, budgetPath)
+	if !budget.Exists() {
+		return body
+	}
+	normalized := NormalizeThinkingBudget(model, int(budget.Int()))
+	updated, _ := sjson.SetBytes(body, budgetPath, normalized)
+	return updated
+}
+
+// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI
+// request body (request.generationConfig.thinkingConfig.thinkingBudget path).
+func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte {
+	const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget"
+	budget := gjson.GetBytes(body, budgetPath)
+	if !budget.Exists() {
+		return body
+	}
+	normalized := NormalizeThinkingBudget(model, int(budget.Int()))
+	updated, _ := sjson.SetBytes(body, budgetPath, normalized)
+	return updated
+}
+
 // ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
 // and converts it to "thinkingBudget".
 // "high" -> 32768
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -48,8 +48,24 @@ func (h *GeminiAPIHandler) Models() []map[string]any {
 // GeminiModels handles the Gemini models listing endpoint.
 // It returns a JSON response containing available Gemini models and their specifications.
 func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
+	rawModels := h.Models()
+	normalizedModels := make([]map[string]any, 0, len(rawModels))
+	defaultMethods := []string{"generateContent"}
+	for _, model := range rawModels {
+		normalizedModel := make(map[string]any, len(model))
+		for k, v := range model {
+			normalizedModel[k] = v
+		}
+		if name, ok := normalizedModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
+			normalizedModel["name"] = "models/" + name
+		}
+		if _, ok := normalizedModel["supportedGenerationMethods"]; !ok {
+			normalizedModel["supportedGenerationMethods"] = defaultMethods
+		}
+		normalizedModels = append(normalizedModels, normalizedModel)
+	}
 	c.JSON(http.StatusOK, gin.H{
-		"models": h.Models(),
+		"models": normalizedModels,
 	})
 }

--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -498,7 +498,7 @@ func (s *Service) Run(ctx context.Context) error {
 	}()

 	time.Sleep(100 * time.Millisecond)
-	fmt.Printf("API server started successfully on: %d\n", s.cfg.Port)
+	fmt.Printf("API server started successfully on: %s:%d\n", s.cfg.Host, s.cfg.Port)

 	if s.hooks.OnAfterStart != nil {
 		s.hooks.OnAfterStart(s)
@@ -779,7 +779,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 							Created:     time.Now().Unix(),
 							OwnedBy:     compat.Name,
 							Type:        "openai-compatibility",
-							DisplayName: m.Name,
+							DisplayName: modelID,
 						})
 					}
 					// Register and return
Author	SHA1	Message	Date
Luis Pater	1249b07eb8	feat(responses): add unique identifiers for responses, function calls, and tool uses	2025-12-10 16:02:54 +08:00
Luis Pater	6b37f33d31	feat(antigravity): add unique identifier for tool use blocks in response	2025-12-10 15:27:57 +08:00
Luis Pater	f25f419e5a	fix(antigravity): remove references to `autopush` endpoint and update fallback logic	2025-12-10 00:13:20 +08:00
Luis Pater	b7e382008f	Merge pull request #465 from router-for-me/think Move thinking budget normalization from translators to executor	2025-12-09 21:10:33 +08:00
hkfires	70d6b95097	feat(amp): add /news.rss proxy route	2025-12-09 21:05:06 +08:00
hkfires	9b202b6c1c	fix(executor): centralize default thinking config	2025-12-09 21:05:06 +08:00
hkfires	6a66b6801a	feat(executor): enforce minimum thinking budget for antigravity models	2025-12-09 21:05:06 +08:00
hkfires	5b6d201408	refactor(translator): remove thinking budget normalization across all translators	2025-12-09 21:05:06 +08:00
hkfires	5ec9b5e5a9	feat(executor): normalize thinking budget across all Gemini executors	2025-12-09 21:05:06 +08:00
Luis Pater	5db3b58717	Merge pull request #470 from router-for-me/agry fix(gemini): normalize model listing output	2025-12-09 21:00:29 +08:00
hkfires	347769b3e3	fix(openai-compat): use model id for auth model display	2025-12-09 18:09:14 +08:00
hkfires	3cfe7008a2	fix(registry): update gpt 5.1 model names	2025-12-09 17:55:21 +08:00
hkfires	da23ddb061	fix(gemini): normalize model listing output	2025-12-09 17:34:15 +08:00
Luis Pater	39b6b3b289	Fixed: #463 fix(antigravity): remove `$ref` and `$defs` from JSON during key deletion	2025-12-09 17:32:17 +08:00
Luis Pater	c600519fa4	refactor(logging): replace log.Fatalf with log.Errorf and add error handling paths	2025-12-09 17:16:30 +08:00
hkfires	e5312fb5a2	feat(antigravity): support canonical names for antigravity models	2025-12-09 16:54:13 +08:00
Luis Pater	92df0cada9	Merge pull request #461 from router-for-me/aistudio feat(aistudio): normalize thinking budget in request translation	2025-12-09 08:41:46 +08:00
hkfires	96b55acff8	feat(aistudio): normalize thinking budget in request translation	2025-12-09 08:27:44 +08:00
Luis Pater	bb45fee1cf	Merge remote-tracking branch 'origin/dev' into dev	2025-12-08 23:28:22 +08:00
Luis Pater	af00304b0c	fix(antigravity): remove `exclusiveMaximum` from JSON during key deletion	2025-12-08 23:28:01 +08:00
vuonglv(Andy)	5c3a013cd1	feat(config): add configurable host binding for server (#454 ) * feat(config): add configurable host binding for server	2025-12-08 23:16:39 +08:00