mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-02 04:20:50 +08:00
Compare commits
77 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bb6312b4fc | ||
|
|
3c315551b0 | ||
|
|
27c9c5c4da | ||
|
|
fc9f6c974a | ||
|
|
a74ee3f319 | ||
|
|
564bcbaa54 | ||
|
|
88bdd25f06 | ||
|
|
e79f65fd8e | ||
|
|
2760989401 | ||
|
|
facfe7c518 | ||
|
|
6285459c08 | ||
|
|
21bbceca0c | ||
|
|
f6300c72b7 | ||
|
|
007572b58e | ||
|
|
3a81ab22fd | ||
|
|
519da2e042 | ||
|
|
169f4295d0 | ||
|
|
d06d0eab2f | ||
|
|
3ffd120ae9 | ||
|
|
a03d514095 | ||
|
|
1da03bfe15 | ||
|
|
423ce97665 | ||
|
|
e717939edb | ||
|
|
76c563d161 | ||
|
|
a89514951f | ||
|
|
94d61c7b2b | ||
|
|
1249b07eb8 | ||
|
|
6b37f33d31 | ||
|
|
f25f419e5a | ||
|
|
b7e382008f | ||
|
|
70d6b95097 | ||
|
|
9b202b6c1c | ||
|
|
6a66b6801a | ||
|
|
5b6d201408 | ||
|
|
5ec9b5e5a9 | ||
|
|
5db3b58717 | ||
|
|
347769b3e3 | ||
|
|
3cfe7008a2 | ||
|
|
da23ddb061 | ||
|
|
39b6b3b289 | ||
|
|
c600519fa4 | ||
|
|
e5312fb5a2 | ||
|
|
92df0cada9 | ||
|
|
96b55acff8 | ||
|
|
bb45fee1cf | ||
|
|
af00304b0c | ||
|
|
5c3a013cd1 | ||
|
|
6ad188921c | ||
|
|
15ed98d6a9 | ||
|
|
a283545b6b | ||
|
|
3efbd865a8 | ||
|
|
aee659fb66 | ||
|
|
5aa386d8b9 | ||
|
|
0adc0ee6aa | ||
|
|
92f13fc316 | ||
|
|
05cfa16e5f | ||
|
|
93a6e2d920 | ||
|
|
de77903915 | ||
|
|
56ed0d8d90 | ||
|
|
42e818ce05 | ||
|
|
2d4c54ba54 | ||
|
|
e9eb4db8bb | ||
|
|
d26ed069fa | ||
|
|
afcab5efda | ||
|
|
6cf1d8a947 | ||
|
|
a174d015f2 | ||
|
|
9c09128e00 | ||
|
|
549c0c2c5a | ||
|
|
f092801b61 | ||
|
|
1b638b3629 | ||
|
|
6f5f81753d | ||
|
|
76af454034 | ||
|
|
e54d2f6b2a | ||
|
|
bfc738b76a | ||
|
|
396899a530 | ||
|
|
edc654edf9 | ||
|
|
08586334af |
@@ -139,7 +139,8 @@ func main() {
|
||||
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
log.Fatalf("failed to get working directory: %v", err)
|
||||
log.Errorf("failed to get working directory: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Load environment variables from .env if present.
|
||||
@@ -233,13 +234,15 @@ func main() {
|
||||
})
|
||||
cancel()
|
||||
if err != nil {
|
||||
log.Fatalf("failed to initialize postgres token store: %v", err)
|
||||
log.Errorf("failed to initialize postgres token store: %v", err)
|
||||
return
|
||||
}
|
||||
examplePath := filepath.Join(wd, "config.example.yaml")
|
||||
ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
|
||||
if errBootstrap := pgStoreInst.Bootstrap(ctx, examplePath); errBootstrap != nil {
|
||||
cancel()
|
||||
log.Fatalf("failed to bootstrap postgres-backed config: %v", errBootstrap)
|
||||
log.Errorf("failed to bootstrap postgres-backed config: %v", errBootstrap)
|
||||
return
|
||||
}
|
||||
cancel()
|
||||
configFilePath = pgStoreInst.ConfigPath()
|
||||
@@ -262,7 +265,8 @@ func main() {
|
||||
if strings.Contains(resolvedEndpoint, "://") {
|
||||
parsed, errParse := url.Parse(resolvedEndpoint)
|
||||
if errParse != nil {
|
||||
log.Fatalf("failed to parse object store endpoint %q: %v", objectStoreEndpoint, errParse)
|
||||
log.Errorf("failed to parse object store endpoint %q: %v", objectStoreEndpoint, errParse)
|
||||
return
|
||||
}
|
||||
switch strings.ToLower(parsed.Scheme) {
|
||||
case "http":
|
||||
@@ -270,10 +274,12 @@ func main() {
|
||||
case "https":
|
||||
useSSL = true
|
||||
default:
|
||||
log.Fatalf("unsupported object store scheme %q (only http and https are allowed)", parsed.Scheme)
|
||||
log.Errorf("unsupported object store scheme %q (only http and https are allowed)", parsed.Scheme)
|
||||
return
|
||||
}
|
||||
if parsed.Host == "" {
|
||||
log.Fatalf("object store endpoint %q is missing host information", objectStoreEndpoint)
|
||||
log.Errorf("object store endpoint %q is missing host information", objectStoreEndpoint)
|
||||
return
|
||||
}
|
||||
resolvedEndpoint = parsed.Host
|
||||
if parsed.Path != "" && parsed.Path != "/" {
|
||||
@@ -292,13 +298,15 @@ func main() {
|
||||
}
|
||||
objectStoreInst, err = store.NewObjectTokenStore(objCfg)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to initialize object token store: %v", err)
|
||||
log.Errorf("failed to initialize object token store: %v", err)
|
||||
return
|
||||
}
|
||||
examplePath := filepath.Join(wd, "config.example.yaml")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
if errBootstrap := objectStoreInst.Bootstrap(ctx, examplePath); errBootstrap != nil {
|
||||
cancel()
|
||||
log.Fatalf("failed to bootstrap object-backed config: %v", errBootstrap)
|
||||
log.Errorf("failed to bootstrap object-backed config: %v", errBootstrap)
|
||||
return
|
||||
}
|
||||
cancel()
|
||||
configFilePath = objectStoreInst.ConfigPath()
|
||||
@@ -323,7 +331,8 @@ func main() {
|
||||
gitStoreInst = store.NewGitTokenStore(gitStoreRemoteURL, gitStoreUser, gitStorePassword)
|
||||
gitStoreInst.SetBaseDir(authDir)
|
||||
if errRepo := gitStoreInst.EnsureRepository(); errRepo != nil {
|
||||
log.Fatalf("failed to prepare git token store: %v", errRepo)
|
||||
log.Errorf("failed to prepare git token store: %v", errRepo)
|
||||
return
|
||||
}
|
||||
configFilePath = gitStoreInst.ConfigPath()
|
||||
if configFilePath == "" {
|
||||
@@ -332,17 +341,21 @@ func main() {
|
||||
if _, statErr := os.Stat(configFilePath); errors.Is(statErr, fs.ErrNotExist) {
|
||||
examplePath := filepath.Join(wd, "config.example.yaml")
|
||||
if _, errExample := os.Stat(examplePath); errExample != nil {
|
||||
log.Fatalf("failed to find template config file: %v", errExample)
|
||||
log.Errorf("failed to find template config file: %v", errExample)
|
||||
return
|
||||
}
|
||||
if errCopy := misc.CopyConfigTemplate(examplePath, configFilePath); errCopy != nil {
|
||||
log.Fatalf("failed to bootstrap git-backed config: %v", errCopy)
|
||||
log.Errorf("failed to bootstrap git-backed config: %v", errCopy)
|
||||
return
|
||||
}
|
||||
if errCommit := gitStoreInst.PersistConfig(context.Background()); errCommit != nil {
|
||||
log.Fatalf("failed to commit initial git-backed config: %v", errCommit)
|
||||
log.Errorf("failed to commit initial git-backed config: %v", errCommit)
|
||||
return
|
||||
}
|
||||
log.Infof("git-backed config initialized from template: %s", configFilePath)
|
||||
} else if statErr != nil {
|
||||
log.Fatalf("failed to inspect git-backed config: %v", statErr)
|
||||
log.Errorf("failed to inspect git-backed config: %v", statErr)
|
||||
return
|
||||
}
|
||||
cfg, err = config.LoadConfigOptional(configFilePath, isCloudDeploy)
|
||||
if err == nil {
|
||||
@@ -355,13 +368,15 @@ func main() {
|
||||
} else {
|
||||
wd, err = os.Getwd()
|
||||
if err != nil {
|
||||
log.Fatalf("failed to get working directory: %v", err)
|
||||
log.Errorf("failed to get working directory: %v", err)
|
||||
return
|
||||
}
|
||||
configFilePath = filepath.Join(wd, "config.yaml")
|
||||
cfg, err = config.LoadConfigOptional(configFilePath, isCloudDeploy)
|
||||
}
|
||||
if err != nil {
|
||||
log.Fatalf("failed to load config: %v", err)
|
||||
log.Errorf("failed to load config: %v", err)
|
||||
return
|
||||
}
|
||||
if cfg == nil {
|
||||
cfg = &config.Config{}
|
||||
@@ -391,7 +406,8 @@ func main() {
|
||||
coreauth.SetQuotaCooldownDisabled(cfg.DisableCooling)
|
||||
|
||||
if err = logging.ConfigureLogOutput(cfg.LoggingToFile); err != nil {
|
||||
log.Fatalf("failed to configure log output: %v", err)
|
||||
log.Errorf("failed to configure log output: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("CLIProxyAPI Version: %s, Commit: %s, BuiltAt: %s", buildinfo.Version, buildinfo.Commit, buildinfo.BuildDate)
|
||||
@@ -400,7 +416,8 @@ func main() {
|
||||
util.SetLogLevel(cfg)
|
||||
|
||||
if resolvedAuthDir, errResolveAuthDir := util.ResolveAuthDir(cfg.AuthDir); errResolveAuthDir != nil {
|
||||
log.Fatalf("failed to resolve auth directory: %v", errResolveAuthDir)
|
||||
log.Errorf("failed to resolve auth directory: %v", errResolveAuthDir)
|
||||
return
|
||||
} else {
|
||||
cfg.AuthDir = resolvedAuthDir
|
||||
}
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
# Server host/interface to bind to. Default is empty ("") to bind all interfaces (IPv4 + IPv6).
|
||||
# Use "127.0.0.1" or "localhost" to restrict access to local machine only.
|
||||
host: ""
|
||||
|
||||
# Server port
|
||||
port: 8317
|
||||
|
||||
@@ -96,7 +100,7 @@ ws-auth: false
|
||||
# excluded-models:
|
||||
# - "claude-opus-4-5-20251101" # exclude specific models (exact match)
|
||||
# - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
|
||||
# - "*-think" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
|
||||
# - "*-thinking" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
|
||||
# - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
|
||||
|
||||
# OpenAI compatibility providers
|
||||
@@ -134,6 +138,8 @@ ws-auth: false
|
||||
# upstream-api-key: ""
|
||||
# # Restrict Amp management routes (/api/auth, /api/user, etc.) to localhost only (recommended)
|
||||
# restrict-management-to-localhost: true
|
||||
# # Force model mappings to run before checking local API keys (default: false)
|
||||
# force-model-mappings: false
|
||||
# # Amp Model Mappings
|
||||
# # Route unavailable Amp models to alternative models available in your local proxy.
|
||||
# # Useful when Amp CLI requests models you don't have access to (e.g., Claude Opus 4.5)
|
||||
|
||||
@@ -713,14 +713,16 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
|
||||
// Generate PKCE codes
|
||||
pkceCodes, err := claude.GeneratePKCECodes()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to generate PKCE codes: %v", err)
|
||||
log.Errorf("Failed to generate PKCE codes: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate PKCE codes"})
|
||||
return
|
||||
}
|
||||
|
||||
// Generate random state parameter
|
||||
state, err := misc.GenerateRandomState()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to generate state parameter: %v", err)
|
||||
log.Errorf("Failed to generate state parameter: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate state parameter"})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -730,7 +732,8 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
|
||||
// Generate authorization URL (then override redirect_uri to reuse server port)
|
||||
authURL, state, err := anthropicAuth.GenerateAuthURL(state, pkceCodes)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to generate authorization URL: %v", err)
|
||||
log.Errorf("Failed to generate authorization URL: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -872,7 +875,7 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
|
||||
}
|
||||
savedPath, errSave := h.saveTokenRecord(ctx, record)
|
||||
if errSave != nil {
|
||||
log.Fatalf("Failed to save authentication tokens: %v", errSave)
|
||||
log.Errorf("Failed to save authentication tokens: %v", errSave)
|
||||
oauthStatus[state] = "Failed to save authentication tokens"
|
||||
return
|
||||
}
|
||||
@@ -1045,7 +1048,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
|
||||
gemAuth := geminiAuth.NewGeminiAuth()
|
||||
gemClient, errGetClient := gemAuth.GetAuthenticatedClient(ctx, &ts, h.cfg, true)
|
||||
if errGetClient != nil {
|
||||
log.Fatalf("failed to get authenticated client: %v", errGetClient)
|
||||
log.Errorf("failed to get authenticated client: %v", errGetClient)
|
||||
oauthStatus[state] = "Failed to get authenticated client"
|
||||
return
|
||||
}
|
||||
@@ -1110,7 +1113,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
|
||||
}
|
||||
savedPath, errSave := h.saveTokenRecord(ctx, record)
|
||||
if errSave != nil {
|
||||
log.Fatalf("Failed to save token to file: %v", errSave)
|
||||
log.Errorf("Failed to save token to file: %v", errSave)
|
||||
oauthStatus[state] = "Failed to save token to file"
|
||||
return
|
||||
}
|
||||
@@ -1131,14 +1134,16 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
|
||||
// Generate PKCE codes
|
||||
pkceCodes, err := codex.GeneratePKCECodes()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to generate PKCE codes: %v", err)
|
||||
log.Errorf("Failed to generate PKCE codes: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate PKCE codes"})
|
||||
return
|
||||
}
|
||||
|
||||
// Generate random state parameter
|
||||
state, err := misc.GenerateRandomState()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to generate state parameter: %v", err)
|
||||
log.Errorf("Failed to generate state parameter: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate state parameter"})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1148,7 +1153,8 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
|
||||
// Generate authorization URL
|
||||
authURL, err := openaiAuth.GenerateAuthURL(state, pkceCodes)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to generate authorization URL: %v", err)
|
||||
log.Errorf("Failed to generate authorization URL: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1283,7 +1289,7 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
|
||||
savedPath, errSave := h.saveTokenRecord(ctx, record)
|
||||
if errSave != nil {
|
||||
oauthStatus[state] = "Failed to save authentication tokens"
|
||||
log.Fatalf("Failed to save authentication tokens: %v", errSave)
|
||||
log.Errorf("Failed to save authentication tokens: %v", errSave)
|
||||
return
|
||||
}
|
||||
fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
|
||||
@@ -1318,7 +1324,8 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
|
||||
|
||||
state, errState := misc.GenerateRandomState()
|
||||
if errState != nil {
|
||||
log.Fatalf("Failed to generate state parameter: %v", errState)
|
||||
log.Errorf("Failed to generate state parameter: %v", errState)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate state parameter"})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1514,7 +1521,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
|
||||
}
|
||||
savedPath, errSave := h.saveTokenRecord(ctx, record)
|
||||
if errSave != nil {
|
||||
log.Fatalf("Failed to save token to file: %v", errSave)
|
||||
log.Errorf("Failed to save token to file: %v", errSave)
|
||||
oauthStatus[state] = "Failed to save token to file"
|
||||
return
|
||||
}
|
||||
@@ -1543,7 +1550,8 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
|
||||
// Generate authorization URL
|
||||
deviceFlow, err := qwenAuth.InitiateDeviceFlow(ctx)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to generate authorization URL: %v", err)
|
||||
log.Errorf("Failed to generate authorization URL: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
|
||||
return
|
||||
}
|
||||
authURL := deviceFlow.VerificationURIComplete
|
||||
@@ -1570,7 +1578,7 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
|
||||
}
|
||||
savedPath, errSave := h.saveTokenRecord(ctx, record)
|
||||
if errSave != nil {
|
||||
log.Fatalf("Failed to save authentication tokens: %v", errSave)
|
||||
log.Errorf("Failed to save authentication tokens: %v", errSave)
|
||||
oauthStatus[state] = "Failed to save authentication tokens"
|
||||
return
|
||||
}
|
||||
@@ -1674,7 +1682,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
|
||||
savedPath, errSave := h.saveTokenRecord(ctx, record)
|
||||
if errSave != nil {
|
||||
oauthStatus[state] = "Failed to save authentication tokens"
|
||||
log.Fatalf("Failed to save authentication tokens: %v", errSave)
|
||||
log.Errorf("Failed to save authentication tokens: %v", errSave)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -2103,6 +2111,7 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
|
||||
continue
|
||||
}
|
||||
}
|
||||
_ = resp.Body.Close()
|
||||
return false, fmt.Errorf("project activation required: %s", errMessage)
|
||||
}
|
||||
return true, nil
|
||||
|
||||
@@ -706,3 +706,155 @@ func normalizeClaudeKey(entry *config.ClaudeKey) {
|
||||
}
|
||||
entry.Models = normalized
|
||||
}
|
||||
|
||||
// GetAmpCode returns the complete ampcode configuration.
|
||||
func (h *Handler) GetAmpCode(c *gin.Context) {
|
||||
if h == nil || h.cfg == nil {
|
||||
c.JSON(200, gin.H{"ampcode": config.AmpCode{}})
|
||||
return
|
||||
}
|
||||
c.JSON(200, gin.H{"ampcode": h.cfg.AmpCode})
|
||||
}
|
||||
|
||||
// GetAmpUpstreamURL returns the ampcode upstream URL.
|
||||
func (h *Handler) GetAmpUpstreamURL(c *gin.Context) {
|
||||
if h == nil || h.cfg == nil {
|
||||
c.JSON(200, gin.H{"upstream-url": ""})
|
||||
return
|
||||
}
|
||||
c.JSON(200, gin.H{"upstream-url": h.cfg.AmpCode.UpstreamURL})
|
||||
}
|
||||
|
||||
// PutAmpUpstreamURL updates the ampcode upstream URL.
|
||||
func (h *Handler) PutAmpUpstreamURL(c *gin.Context) {
|
||||
h.updateStringField(c, func(v string) { h.cfg.AmpCode.UpstreamURL = strings.TrimSpace(v) })
|
||||
}
|
||||
|
||||
// DeleteAmpUpstreamURL clears the ampcode upstream URL.
|
||||
func (h *Handler) DeleteAmpUpstreamURL(c *gin.Context) {
|
||||
h.cfg.AmpCode.UpstreamURL = ""
|
||||
h.persist(c)
|
||||
}
|
||||
|
||||
// GetAmpUpstreamAPIKey returns the ampcode upstream API key.
|
||||
func (h *Handler) GetAmpUpstreamAPIKey(c *gin.Context) {
|
||||
if h == nil || h.cfg == nil {
|
||||
c.JSON(200, gin.H{"upstream-api-key": ""})
|
||||
return
|
||||
}
|
||||
c.JSON(200, gin.H{"upstream-api-key": h.cfg.AmpCode.UpstreamAPIKey})
|
||||
}
|
||||
|
||||
// PutAmpUpstreamAPIKey updates the ampcode upstream API key.
|
||||
func (h *Handler) PutAmpUpstreamAPIKey(c *gin.Context) {
|
||||
h.updateStringField(c, func(v string) { h.cfg.AmpCode.UpstreamAPIKey = strings.TrimSpace(v) })
|
||||
}
|
||||
|
||||
// DeleteAmpUpstreamAPIKey clears the ampcode upstream API key.
|
||||
func (h *Handler) DeleteAmpUpstreamAPIKey(c *gin.Context) {
|
||||
h.cfg.AmpCode.UpstreamAPIKey = ""
|
||||
h.persist(c)
|
||||
}
|
||||
|
||||
// GetAmpRestrictManagementToLocalhost returns the localhost restriction setting.
|
||||
func (h *Handler) GetAmpRestrictManagementToLocalhost(c *gin.Context) {
|
||||
if h == nil || h.cfg == nil {
|
||||
c.JSON(200, gin.H{"restrict-management-to-localhost": true})
|
||||
return
|
||||
}
|
||||
c.JSON(200, gin.H{"restrict-management-to-localhost": h.cfg.AmpCode.RestrictManagementToLocalhost})
|
||||
}
|
||||
|
||||
// PutAmpRestrictManagementToLocalhost updates the localhost restriction setting.
|
||||
func (h *Handler) PutAmpRestrictManagementToLocalhost(c *gin.Context) {
|
||||
h.updateBoolField(c, func(v bool) { h.cfg.AmpCode.RestrictManagementToLocalhost = v })
|
||||
}
|
||||
|
||||
// GetAmpModelMappings returns the ampcode model mappings.
|
||||
func (h *Handler) GetAmpModelMappings(c *gin.Context) {
|
||||
if h == nil || h.cfg == nil {
|
||||
c.JSON(200, gin.H{"model-mappings": []config.AmpModelMapping{}})
|
||||
return
|
||||
}
|
||||
c.JSON(200, gin.H{"model-mappings": h.cfg.AmpCode.ModelMappings})
|
||||
}
|
||||
|
||||
// PutAmpModelMappings replaces all ampcode model mappings.
|
||||
func (h *Handler) PutAmpModelMappings(c *gin.Context) {
|
||||
var body struct {
|
||||
Value []config.AmpModelMapping `json:"value"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&body); err != nil {
|
||||
c.JSON(400, gin.H{"error": "invalid body"})
|
||||
return
|
||||
}
|
||||
h.cfg.AmpCode.ModelMappings = body.Value
|
||||
h.persist(c)
|
||||
}
|
||||
|
||||
// PatchAmpModelMappings adds or updates model mappings.
|
||||
func (h *Handler) PatchAmpModelMappings(c *gin.Context) {
|
||||
var body struct {
|
||||
Value []config.AmpModelMapping `json:"value"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&body); err != nil {
|
||||
c.JSON(400, gin.H{"error": "invalid body"})
|
||||
return
|
||||
}
|
||||
|
||||
existing := make(map[string]int)
|
||||
for i, m := range h.cfg.AmpCode.ModelMappings {
|
||||
existing[strings.TrimSpace(m.From)] = i
|
||||
}
|
||||
|
||||
for _, newMapping := range body.Value {
|
||||
from := strings.TrimSpace(newMapping.From)
|
||||
if idx, ok := existing[from]; ok {
|
||||
h.cfg.AmpCode.ModelMappings[idx] = newMapping
|
||||
} else {
|
||||
h.cfg.AmpCode.ModelMappings = append(h.cfg.AmpCode.ModelMappings, newMapping)
|
||||
existing[from] = len(h.cfg.AmpCode.ModelMappings) - 1
|
||||
}
|
||||
}
|
||||
h.persist(c)
|
||||
}
|
||||
|
||||
// DeleteAmpModelMappings removes specified model mappings by "from" field.
|
||||
func (h *Handler) DeleteAmpModelMappings(c *gin.Context) {
|
||||
var body struct {
|
||||
Value []string `json:"value"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&body); err != nil || len(body.Value) == 0 {
|
||||
h.cfg.AmpCode.ModelMappings = nil
|
||||
h.persist(c)
|
||||
return
|
||||
}
|
||||
|
||||
toRemove := make(map[string]bool)
|
||||
for _, from := range body.Value {
|
||||
toRemove[strings.TrimSpace(from)] = true
|
||||
}
|
||||
|
||||
newMappings := make([]config.AmpModelMapping, 0, len(h.cfg.AmpCode.ModelMappings))
|
||||
for _, m := range h.cfg.AmpCode.ModelMappings {
|
||||
if !toRemove[strings.TrimSpace(m.From)] {
|
||||
newMappings = append(newMappings, m)
|
||||
}
|
||||
}
|
||||
h.cfg.AmpCode.ModelMappings = newMappings
|
||||
h.persist(c)
|
||||
}
|
||||
|
||||
// GetAmpForceModelMappings returns whether model mappings are forced.
|
||||
func (h *Handler) GetAmpForceModelMappings(c *gin.Context) {
|
||||
if h == nil || h.cfg == nil {
|
||||
c.JSON(200, gin.H{"force-model-mappings": false})
|
||||
return
|
||||
}
|
||||
c.JSON(200, gin.H{"force-model-mappings": h.cfg.AmpCode.ForceModelMappings})
|
||||
}
|
||||
|
||||
// PutAmpForceModelMappings updates the force model mappings setting.
|
||||
func (h *Handler) PutAmpForceModelMappings(c *gin.Context) {
|
||||
h.updateBoolField(c, func(v bool) { h.cfg.AmpCode.ForceModelMappings = v })
|
||||
}
|
||||
|
||||
@@ -240,16 +240,6 @@ func (h *Handler) updateBoolField(c *gin.Context, set func(bool)) {
|
||||
Value *bool `json:"value"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&body); err != nil || body.Value == nil {
|
||||
var m map[string]any
|
||||
if err2 := c.ShouldBindJSON(&m); err2 == nil {
|
||||
for _, v := range m {
|
||||
if b, ok := v.(bool); ok {
|
||||
set(b)
|
||||
h.persist(c)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid body"})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -232,7 +232,16 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
|
||||
w.streamDone = nil
|
||||
}
|
||||
|
||||
// Write API Request and Response to the streaming log before closing
|
||||
if w.streamWriter != nil {
|
||||
apiRequest := w.extractAPIRequest(c)
|
||||
if len(apiRequest) > 0 {
|
||||
_ = w.streamWriter.WriteAPIRequest(apiRequest)
|
||||
}
|
||||
apiResponse := w.extractAPIResponse(c)
|
||||
if len(apiResponse) > 0 {
|
||||
_ = w.streamWriter.WriteAPIResponse(apiResponse)
|
||||
}
|
||||
if err := w.streamWriter.Close(); err != nil {
|
||||
w.streamWriter = nil
|
||||
return err
|
||||
|
||||
@@ -100,6 +100,16 @@ func (m *AmpModule) Name() string {
|
||||
return "amp-routing"
|
||||
}
|
||||
|
||||
// forceModelMappings returns whether model mappings should take precedence over local API keys
|
||||
func (m *AmpModule) forceModelMappings() bool {
|
||||
m.configMu.RLock()
|
||||
defer m.configMu.RUnlock()
|
||||
if m.lastConfig == nil {
|
||||
return false
|
||||
}
|
||||
return m.lastConfig.ForceModelMappings
|
||||
}
|
||||
|
||||
// Register sets up Amp routes if configured.
|
||||
// This implements the RouteModuleV2 interface with Context.
|
||||
// Routes are registered only once via sync.Once for idempotent behavior.
|
||||
|
||||
@@ -28,6 +28,9 @@ const (
|
||||
RouteTypeNoProvider AmpRouteType = "NO_PROVIDER"
|
||||
)
|
||||
|
||||
// MappedModelContextKey is the Gin context key for passing mapped model names.
|
||||
const MappedModelContextKey = "mapped_model"
|
||||
|
||||
// logAmpRouting logs the routing decision for an Amp request with structured fields
|
||||
func logAmpRouting(routeType AmpRouteType, requestedModel, resolvedModel, provider, path string) {
|
||||
fields := log.Fields{
|
||||
@@ -74,23 +77,29 @@ func logAmpRouting(routeType AmpRouteType, requestedModel, resolvedModel, provid
|
||||
// FallbackHandler wraps a standard handler with fallback logic to ampcode.com
|
||||
// when the model's provider is not available in CLIProxyAPI
|
||||
type FallbackHandler struct {
|
||||
getProxy func() *httputil.ReverseProxy
|
||||
modelMapper ModelMapper
|
||||
getProxy func() *httputil.ReverseProxy
|
||||
modelMapper ModelMapper
|
||||
forceModelMappings func() bool
|
||||
}
|
||||
|
||||
// NewFallbackHandler creates a new fallback handler wrapper
|
||||
// The getProxy function allows lazy evaluation of the proxy (useful when proxy is created after routes)
|
||||
func NewFallbackHandler(getProxy func() *httputil.ReverseProxy) *FallbackHandler {
|
||||
return &FallbackHandler{
|
||||
getProxy: getProxy,
|
||||
getProxy: getProxy,
|
||||
forceModelMappings: func() bool { return false },
|
||||
}
|
||||
}
|
||||
|
||||
// NewFallbackHandlerWithMapper creates a new fallback handler with model mapping support
|
||||
func NewFallbackHandlerWithMapper(getProxy func() *httputil.ReverseProxy, mapper ModelMapper) *FallbackHandler {
|
||||
func NewFallbackHandlerWithMapper(getProxy func() *httputil.ReverseProxy, mapper ModelMapper, forceModelMappings func() bool) *FallbackHandler {
|
||||
if forceModelMappings == nil {
|
||||
forceModelMappings = func() bool { return false }
|
||||
}
|
||||
return &FallbackHandler{
|
||||
getProxy: getProxy,
|
||||
modelMapper: mapper,
|
||||
getProxy: getProxy,
|
||||
modelMapper: mapper,
|
||||
forceModelMappings: forceModelMappings,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,35 +133,68 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
|
||||
return
|
||||
}
|
||||
|
||||
// Normalize model (handles Gemini thinking suffixes)
|
||||
normalizedModel, _ := util.NormalizeGeminiThinkingModel(modelName)
|
||||
|
||||
// Check if we have providers for this model
|
||||
providers := util.GetProviderName(normalizedModel)
|
||||
// Normalize model (handles dynamic thinking suffixes)
|
||||
normalizedModel, _ := util.NormalizeThinkingModel(modelName)
|
||||
|
||||
// Track resolved model for logging (may change if mapping is applied)
|
||||
resolvedModel := normalizedModel
|
||||
usedMapping := false
|
||||
var providers []string
|
||||
|
||||
if len(providers) == 0 {
|
||||
// No providers configured - check if we have a model mapping
|
||||
// Check if model mappings should be forced ahead of local API keys
|
||||
forceMappings := fh.forceModelMappings != nil && fh.forceModelMappings()
|
||||
|
||||
if forceMappings {
|
||||
// FORCE MODE: Check model mappings FIRST (takes precedence over local API keys)
|
||||
// This allows users to route Amp requests to their preferred OAuth providers
|
||||
if fh.modelMapper != nil {
|
||||
if mappedModel := fh.modelMapper.MapModel(normalizedModel); mappedModel != "" {
|
||||
// Mapping found - rewrite the model in request body
|
||||
bodyBytes = rewriteModelInRequest(bodyBytes, mappedModel)
|
||||
c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
resolvedModel = mappedModel
|
||||
usedMapping = true
|
||||
|
||||
// Get providers for the mapped model
|
||||
providers = util.GetProviderName(mappedModel)
|
||||
|
||||
// Continue to handler with remapped model
|
||||
goto handleRequest
|
||||
// Mapping found - check if we have a provider for the mapped model
|
||||
mappedProviders := util.GetProviderName(mappedModel)
|
||||
if len(mappedProviders) > 0 {
|
||||
// Mapping found and provider available - rewrite the model in request body
|
||||
bodyBytes = rewriteModelInRequest(bodyBytes, mappedModel)
|
||||
c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
// Store mapped model in context for handlers that check it (like gemini bridge)
|
||||
c.Set(MappedModelContextKey, mappedModel)
|
||||
resolvedModel = mappedModel
|
||||
usedMapping = true
|
||||
providers = mappedProviders
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No mapping found - check if we have a proxy for fallback
|
||||
// If no mapping applied, check for local providers
|
||||
if !usedMapping {
|
||||
providers = util.GetProviderName(normalizedModel)
|
||||
}
|
||||
} else {
|
||||
// DEFAULT MODE: Check local providers first, then mappings as fallback
|
||||
providers = util.GetProviderName(normalizedModel)
|
||||
|
||||
if len(providers) == 0 {
|
||||
// No providers configured - check if we have a model mapping
|
||||
if fh.modelMapper != nil {
|
||||
if mappedModel := fh.modelMapper.MapModel(normalizedModel); mappedModel != "" {
|
||||
// Mapping found - check if we have a provider for the mapped model
|
||||
mappedProviders := util.GetProviderName(mappedModel)
|
||||
if len(mappedProviders) > 0 {
|
||||
// Mapping found and provider available - rewrite the model in request body
|
||||
bodyBytes = rewriteModelInRequest(bodyBytes, mappedModel)
|
||||
c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
// Store mapped model in context for handlers that check it (like gemini bridge)
|
||||
c.Set(MappedModelContextKey, mappedModel)
|
||||
resolvedModel = mappedModel
|
||||
usedMapping = true
|
||||
providers = mappedProviders
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no providers available, fallback to ampcode.com
|
||||
if len(providers) == 0 {
|
||||
proxy := fh.getProxy()
|
||||
if proxy != nil {
|
||||
// Log: Forwarding to ampcode.com (uses Amp credits)
|
||||
@@ -170,8 +212,6 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
|
||||
logAmpRouting(RouteTypeNoProvider, modelName, "", "", requestPath)
|
||||
}
|
||||
|
||||
handleRequest:
|
||||
|
||||
// Log the routing decision
|
||||
providerName := ""
|
||||
if len(providers) > 0 {
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/gemini"
|
||||
)
|
||||
|
||||
// createGeminiBridgeHandler creates a handler that bridges AMP CLI's non-standard Gemini paths
|
||||
@@ -15,16 +14,31 @@ import (
|
||||
//
|
||||
// This extracts the model+method from the AMP path and sets it as the :action parameter
|
||||
// so the standard Gemini handler can process it.
|
||||
func createGeminiBridgeHandler(geminiHandler *gemini.GeminiAPIHandler) gin.HandlerFunc {
|
||||
//
|
||||
// The handler parameter should be a Gemini-compatible handler that expects the :action param.
|
||||
func createGeminiBridgeHandler(handler gin.HandlerFunc) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
// Get the full path from the catch-all parameter
|
||||
path := c.Param("path")
|
||||
|
||||
// Extract model:method from AMP CLI path format
|
||||
// Example: /publishers/google/models/gemini-3-pro-preview:streamGenerateContent
|
||||
if idx := strings.Index(path, "/models/"); idx >= 0 {
|
||||
// Extract everything after "/models/"
|
||||
actionPart := path[idx+8:] // Skip "/models/"
|
||||
const modelsPrefix = "/models/"
|
||||
if idx := strings.Index(path, modelsPrefix); idx >= 0 {
|
||||
// Extract everything after modelsPrefix
|
||||
actionPart := path[idx+len(modelsPrefix):]
|
||||
|
||||
// Check if model was mapped by FallbackHandler
|
||||
if mappedModel, exists := c.Get(MappedModelContextKey); exists {
|
||||
if strModel, ok := mappedModel.(string); ok && strModel != "" {
|
||||
// Replace the model part in the action
|
||||
// actionPart is like "model-name:method"
|
||||
if colonIdx := strings.Index(actionPart, ":"); colonIdx > 0 {
|
||||
method := actionPart[colonIdx:] // ":method"
|
||||
actionPart = strModel + method
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set this as the :action parameter that the Gemini handler expects
|
||||
c.Params = append(c.Params, gin.Param{
|
||||
@@ -32,8 +46,8 @@ func createGeminiBridgeHandler(geminiHandler *gemini.GeminiAPIHandler) gin.Handl
|
||||
Value: actionPart,
|
||||
})
|
||||
|
||||
// Call the standard Gemini handler
|
||||
geminiHandler.GeminiHandler(c)
|
||||
// Call the handler
|
||||
handler(c)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
93
internal/api/modules/amp/gemini_bridge_test.go
Normal file
93
internal/api/modules/amp/gemini_bridge_test.go
Normal file
@@ -0,0 +1,93 @@
|
||||
package amp
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func TestCreateGeminiBridgeHandler_ActionParameterExtraction(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
path string
|
||||
mappedModel string // empty string means no mapping
|
||||
expectedAction string
|
||||
}{
|
||||
{
|
||||
name: "no_mapping_uses_url_model",
|
||||
path: "/publishers/google/models/gemini-pro:generateContent",
|
||||
mappedModel: "",
|
||||
expectedAction: "gemini-pro:generateContent",
|
||||
},
|
||||
{
|
||||
name: "mapped_model_replaces_url_model",
|
||||
path: "/publishers/google/models/gemini-exp:generateContent",
|
||||
mappedModel: "gemini-2.0-flash",
|
||||
expectedAction: "gemini-2.0-flash:generateContent",
|
||||
},
|
||||
{
|
||||
name: "mapping_preserves_method",
|
||||
path: "/publishers/google/models/gemini-2.5-preview:streamGenerateContent",
|
||||
mappedModel: "gemini-flash",
|
||||
expectedAction: "gemini-flash:streamGenerateContent",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var capturedAction string
|
||||
|
||||
mockGeminiHandler := func(c *gin.Context) {
|
||||
capturedAction = c.Param("action")
|
||||
c.JSON(http.StatusOK, gin.H{"captured": capturedAction})
|
||||
}
|
||||
|
||||
// Use the actual createGeminiBridgeHandler function
|
||||
bridgeHandler := createGeminiBridgeHandler(mockGeminiHandler)
|
||||
|
||||
r := gin.New()
|
||||
if tt.mappedModel != "" {
|
||||
r.Use(func(c *gin.Context) {
|
||||
c.Set(MappedModelContextKey, tt.mappedModel)
|
||||
c.Next()
|
||||
})
|
||||
}
|
||||
r.POST("/api/provider/google/v1beta1/*path", bridgeHandler)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/provider/google/v1beta1"+tt.path, nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("Expected status 200, got %d", w.Code)
|
||||
}
|
||||
if capturedAction != tt.expectedAction {
|
||||
t.Errorf("Expected action '%s', got '%s'", tt.expectedAction, capturedAction)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateGeminiBridgeHandler_InvalidPath(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
mockHandler := func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"ok": true})
|
||||
}
|
||||
bridgeHandler := createGeminiBridgeHandler(mockHandler)
|
||||
|
||||
r := gin.New()
|
||||
r.POST("/api/provider/google/v1beta1/*path", bridgeHandler)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/provider/google/v1beta1/invalid/path", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("Expected status 400 for invalid path, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/claude"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/gemini"
|
||||
@@ -157,6 +156,7 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
|
||||
rootMiddleware := []gin.HandlerFunc{m.managementAvailabilityMiddleware(), noCORSMiddleware(), m.localhostOnlyMiddleware()}
|
||||
engine.GET("/threads/*path", append(rootMiddleware, proxyHandler)...)
|
||||
engine.GET("/threads.rss", append(rootMiddleware, proxyHandler)...)
|
||||
engine.GET("/news.rss", append(rootMiddleware, proxyHandler)...)
|
||||
|
||||
// Root-level auth routes for CLI login flow
|
||||
// Amp uses multiple auth routes: /auth/cli-login, /auth/callback, /auth/sign-in, /auth/logout
|
||||
@@ -169,30 +169,22 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
|
||||
// We bridge these to our standard Gemini handler to enable local OAuth.
|
||||
// If no local OAuth is available, falls back to ampcode.com proxy.
|
||||
geminiHandlers := gemini.NewGeminiAPIHandler(baseHandler)
|
||||
geminiBridge := createGeminiBridgeHandler(geminiHandlers)
|
||||
geminiV1Beta1Fallback := NewFallbackHandler(func() *httputil.ReverseProxy {
|
||||
geminiBridge := createGeminiBridgeHandler(geminiHandlers.GeminiHandler)
|
||||
geminiV1Beta1Fallback := NewFallbackHandlerWithMapper(func() *httputil.ReverseProxy {
|
||||
return m.getProxy()
|
||||
})
|
||||
}, m.modelMapper, m.forceModelMappings)
|
||||
geminiV1Beta1Handler := geminiV1Beta1Fallback.WrapHandler(geminiBridge)
|
||||
|
||||
// Route POST model calls through Gemini bridge when a local provider exists, otherwise proxy.
|
||||
// Route POST model calls through Gemini bridge with FallbackHandler.
|
||||
// FallbackHandler checks provider -> mapping -> proxy fallback automatically.
|
||||
// All other methods (e.g., GET model listing) always proxy to upstream to preserve Amp CLI behavior.
|
||||
ampAPI.Any("/provider/google/v1beta1/*path", func(c *gin.Context) {
|
||||
if c.Request.Method == "POST" {
|
||||
// Attempt to extract the model name from the AMP-style path
|
||||
if path := c.Param("path"); strings.Contains(path, "/models/") {
|
||||
modelPart := path[strings.Index(path, "/models/")+len("/models/"):]
|
||||
if colonIdx := strings.Index(modelPart, ":"); colonIdx > 0 {
|
||||
modelPart = modelPart[:colonIdx]
|
||||
}
|
||||
if modelPart != "" {
|
||||
normalized, _ := util.NormalizeGeminiThinkingModel(modelPart)
|
||||
// Only handle locally when we have a provider; otherwise fall back to proxy
|
||||
if providers := util.GetProviderName(normalized); len(providers) > 0 {
|
||||
geminiV1Beta1Handler(c)
|
||||
return
|
||||
}
|
||||
}
|
||||
// POST with /models/ path -> use Gemini bridge with fallback handler
|
||||
// FallbackHandler will check provider/mapping and proxy if needed
|
||||
geminiV1Beta1Handler(c)
|
||||
return
|
||||
}
|
||||
}
|
||||
// Non-POST or no local provider available -> proxy upstream
|
||||
@@ -218,7 +210,7 @@ func (m *AmpModule) registerProviderAliases(engine *gin.Engine, baseHandler *han
|
||||
// Also includes model mapping support for routing unavailable models to alternatives
|
||||
fallbackHandler := NewFallbackHandlerWithMapper(func() *httputil.ReverseProxy {
|
||||
return m.getProxy()
|
||||
}, m.modelMapper)
|
||||
}, m.modelMapper, m.forceModelMappings)
|
||||
|
||||
// Provider-specific routes under /api/provider/:provider
|
||||
ampProviders := engine.Group("/api/provider")
|
||||
|
||||
@@ -300,7 +300,7 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
|
||||
|
||||
// Create HTTP server
|
||||
s.server = &http.Server{
|
||||
Addr: fmt.Sprintf(":%d", cfg.Port),
|
||||
Addr: fmt.Sprintf("%s:%d", cfg.Host, cfg.Port),
|
||||
Handler: engine,
|
||||
}
|
||||
|
||||
@@ -520,6 +520,26 @@ func (s *Server) registerManagementRoutes() {
|
||||
mgmt.PUT("/ws-auth", s.mgmt.PutWebsocketAuth)
|
||||
mgmt.PATCH("/ws-auth", s.mgmt.PutWebsocketAuth)
|
||||
|
||||
mgmt.GET("/ampcode", s.mgmt.GetAmpCode)
|
||||
mgmt.GET("/ampcode/upstream-url", s.mgmt.GetAmpUpstreamURL)
|
||||
mgmt.PUT("/ampcode/upstream-url", s.mgmt.PutAmpUpstreamURL)
|
||||
mgmt.PATCH("/ampcode/upstream-url", s.mgmt.PutAmpUpstreamURL)
|
||||
mgmt.DELETE("/ampcode/upstream-url", s.mgmt.DeleteAmpUpstreamURL)
|
||||
mgmt.GET("/ampcode/upstream-api-key", s.mgmt.GetAmpUpstreamAPIKey)
|
||||
mgmt.PUT("/ampcode/upstream-api-key", s.mgmt.PutAmpUpstreamAPIKey)
|
||||
mgmt.PATCH("/ampcode/upstream-api-key", s.mgmt.PutAmpUpstreamAPIKey)
|
||||
mgmt.DELETE("/ampcode/upstream-api-key", s.mgmt.DeleteAmpUpstreamAPIKey)
|
||||
mgmt.GET("/ampcode/restrict-management-to-localhost", s.mgmt.GetAmpRestrictManagementToLocalhost)
|
||||
mgmt.PUT("/ampcode/restrict-management-to-localhost", s.mgmt.PutAmpRestrictManagementToLocalhost)
|
||||
mgmt.PATCH("/ampcode/restrict-management-to-localhost", s.mgmt.PutAmpRestrictManagementToLocalhost)
|
||||
mgmt.GET("/ampcode/model-mappings", s.mgmt.GetAmpModelMappings)
|
||||
mgmt.PUT("/ampcode/model-mappings", s.mgmt.PutAmpModelMappings)
|
||||
mgmt.PATCH("/ampcode/model-mappings", s.mgmt.PatchAmpModelMappings)
|
||||
mgmt.DELETE("/ampcode/model-mappings", s.mgmt.DeleteAmpModelMappings)
|
||||
mgmt.GET("/ampcode/force-model-mappings", s.mgmt.GetAmpForceModelMappings)
|
||||
mgmt.PUT("/ampcode/force-model-mappings", s.mgmt.PutAmpForceModelMappings)
|
||||
mgmt.PATCH("/ampcode/force-model-mappings", s.mgmt.PutAmpForceModelMappings)
|
||||
|
||||
mgmt.GET("/request-retry", s.mgmt.GetRequestRetry)
|
||||
mgmt.PUT("/request-retry", s.mgmt.PutRequestRetry)
|
||||
mgmt.PATCH("/request-retry", s.mgmt.PutRequestRetry)
|
||||
|
||||
@@ -76,7 +76,8 @@ func (g *GeminiAuth) GetAuthenticatedClient(ctx context.Context, ts *GeminiToken
|
||||
auth := &proxy.Auth{User: username, Password: password}
|
||||
dialer, errSOCKS5 := proxy.SOCKS5("tcp", proxyURL.Host, auth, proxy.Direct)
|
||||
if errSOCKS5 != nil {
|
||||
log.Fatalf("create SOCKS5 dialer failed: %v", errSOCKS5)
|
||||
log.Errorf("create SOCKS5 dialer failed: %v", errSOCKS5)
|
||||
return nil, fmt.Errorf("create SOCKS5 dialer failed: %w", errSOCKS5)
|
||||
}
|
||||
transport = &http.Transport{
|
||||
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
@@ -238,7 +239,11 @@ func (g *GeminiAuth) getTokenFromWeb(ctx context.Context, config *oauth2.Config,
|
||||
// Start the server in a goroutine.
|
||||
go func() {
|
||||
if err := server.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) {
|
||||
log.Fatalf("ListenAndServe(): %v", err)
|
||||
log.Errorf("ListenAndServe(): %v", err)
|
||||
select {
|
||||
case errChan <- err:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
|
||||
@@ -309,17 +309,23 @@ func (ia *IFlowAuth) AuthenticateWithCookie(ctx context.Context, cookie string)
|
||||
return nil, fmt.Errorf("iflow cookie authentication: cookie is empty")
|
||||
}
|
||||
|
||||
// First, get initial API key information using GET request
|
||||
// First, get initial API key information using GET request to obtain the name
|
||||
keyInfo, err := ia.fetchAPIKeyInfo(ctx, cookie)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("iflow cookie authentication: fetch initial API key info failed: %w", err)
|
||||
}
|
||||
|
||||
// Convert to token data format
|
||||
// Refresh the API key using POST request
|
||||
refreshedKeyInfo, err := ia.RefreshAPIKey(ctx, cookie, keyInfo.Name)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("iflow cookie authentication: refresh API key failed: %w", err)
|
||||
}
|
||||
|
||||
// Convert to token data format using refreshed key
|
||||
data := &IFlowTokenData{
|
||||
APIKey: keyInfo.APIKey,
|
||||
Expire: keyInfo.ExpireTime,
|
||||
Email: keyInfo.Name,
|
||||
APIKey: refreshedKeyInfo.APIKey,
|
||||
Expire: refreshedKeyInfo.ExpireTime,
|
||||
Email: refreshedKeyInfo.Name,
|
||||
Cookie: cookie,
|
||||
}
|
||||
|
||||
|
||||
@@ -65,20 +65,20 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
|
||||
authenticator := sdkAuth.NewGeminiAuthenticator()
|
||||
record, errLogin := authenticator.Login(ctx, cfg, loginOpts)
|
||||
if errLogin != nil {
|
||||
log.Fatalf("Gemini authentication failed: %v", errLogin)
|
||||
log.Errorf("Gemini authentication failed: %v", errLogin)
|
||||
return
|
||||
}
|
||||
|
||||
storage, okStorage := record.Storage.(*gemini.GeminiTokenStorage)
|
||||
if !okStorage || storage == nil {
|
||||
log.Fatal("Gemini authentication failed: unsupported token storage")
|
||||
log.Error("Gemini authentication failed: unsupported token storage")
|
||||
return
|
||||
}
|
||||
|
||||
geminiAuth := gemini.NewGeminiAuth()
|
||||
httpClient, errClient := geminiAuth.GetAuthenticatedClient(ctx, storage, cfg, options.NoBrowser)
|
||||
if errClient != nil {
|
||||
log.Fatalf("Gemini authentication failed: %v", errClient)
|
||||
log.Errorf("Gemini authentication failed: %v", errClient)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
|
||||
|
||||
projects, errProjects := fetchGCPProjects(ctx, httpClient)
|
||||
if errProjects != nil {
|
||||
log.Fatalf("Failed to get project list: %v", errProjects)
|
||||
log.Errorf("Failed to get project list: %v", errProjects)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -98,11 +98,11 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
|
||||
selectedProjectID := promptForProjectSelection(projects, strings.TrimSpace(projectID), promptFn)
|
||||
projectSelections, errSelection := resolveProjectSelections(selectedProjectID, projects)
|
||||
if errSelection != nil {
|
||||
log.Fatalf("Invalid project selection: %v", errSelection)
|
||||
log.Errorf("Invalid project selection: %v", errSelection)
|
||||
return
|
||||
}
|
||||
if len(projectSelections) == 0 {
|
||||
log.Fatal("No project selected; aborting login.")
|
||||
log.Error("No project selected; aborting login.")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -116,7 +116,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
|
||||
showProjectSelectionHelp(storage.Email, projects)
|
||||
return
|
||||
}
|
||||
log.Fatalf("Failed to complete user setup: %v", errSetup)
|
||||
log.Errorf("Failed to complete user setup: %v", errSetup)
|
||||
return
|
||||
}
|
||||
finalID := strings.TrimSpace(storage.ProjectID)
|
||||
@@ -133,11 +133,11 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
|
||||
for _, pid := range activatedProjects {
|
||||
isChecked, errCheck := checkCloudAPIIsEnabled(ctx, httpClient, pid)
|
||||
if errCheck != nil {
|
||||
log.Fatalf("Failed to check if Cloud AI API is enabled for %s: %v", pid, errCheck)
|
||||
log.Errorf("Failed to check if Cloud AI API is enabled for %s: %v", pid, errCheck)
|
||||
return
|
||||
}
|
||||
if !isChecked {
|
||||
log.Fatalf("Failed to check if Cloud AI API is enabled for project %s. If you encounter an error message, please create an issue.", pid)
|
||||
log.Errorf("Failed to check if Cloud AI API is enabled for project %s. If you encounter an error message, please create an issue.", pid)
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -153,7 +153,7 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
|
||||
|
||||
savedPath, errSave := store.Save(ctx, record)
|
||||
if errSave != nil {
|
||||
log.Fatalf("Failed to save token to file: %v", errSave)
|
||||
log.Errorf("Failed to save token to file: %v", errSave)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -555,6 +555,7 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
|
||||
continue
|
||||
}
|
||||
}
|
||||
_ = resp.Body.Close()
|
||||
return false, fmt.Errorf("project activation required: %s", errMessage)
|
||||
}
|
||||
return true, nil
|
||||
|
||||
@@ -45,12 +45,13 @@ func StartService(cfg *config.Config, configPath string, localPassword string) {
|
||||
|
||||
service, err := builder.Build()
|
||||
if err != nil {
|
||||
log.Fatalf("failed to build proxy service: %v", err)
|
||||
log.Errorf("failed to build proxy service: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = service.Run(runCtx)
|
||||
if err != nil && !errors.Is(err, context.Canceled) {
|
||||
log.Fatalf("proxy service exited with error: %v", err)
|
||||
log.Errorf("proxy service exited with error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -29,30 +29,30 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
|
||||
}
|
||||
rawPath := strings.TrimSpace(keyPath)
|
||||
if rawPath == "" {
|
||||
log.Fatalf("vertex-import: missing service account key path")
|
||||
log.Errorf("vertex-import: missing service account key path")
|
||||
return
|
||||
}
|
||||
data, errRead := os.ReadFile(rawPath)
|
||||
if errRead != nil {
|
||||
log.Fatalf("vertex-import: read file failed: %v", errRead)
|
||||
log.Errorf("vertex-import: read file failed: %v", errRead)
|
||||
return
|
||||
}
|
||||
var sa map[string]any
|
||||
if errUnmarshal := json.Unmarshal(data, &sa); errUnmarshal != nil {
|
||||
log.Fatalf("vertex-import: invalid service account json: %v", errUnmarshal)
|
||||
log.Errorf("vertex-import: invalid service account json: %v", errUnmarshal)
|
||||
return
|
||||
}
|
||||
// Validate and normalize private_key before saving
|
||||
normalizedSA, errFix := vertex.NormalizeServiceAccountMap(sa)
|
||||
if errFix != nil {
|
||||
log.Fatalf("vertex-import: %v", errFix)
|
||||
log.Errorf("vertex-import: %v", errFix)
|
||||
return
|
||||
}
|
||||
sa = normalizedSA
|
||||
email, _ := sa["client_email"].(string)
|
||||
projectID, _ := sa["project_id"].(string)
|
||||
if strings.TrimSpace(projectID) == "" {
|
||||
log.Fatalf("vertex-import: project_id missing in service account json")
|
||||
log.Errorf("vertex-import: project_id missing in service account json")
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(email) == "" {
|
||||
@@ -92,7 +92,7 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
|
||||
}
|
||||
path, errSave := store.Save(context.Background(), record)
|
||||
if errSave != nil {
|
||||
log.Fatalf("vertex-import: save credential failed: %v", errSave)
|
||||
log.Errorf("vertex-import: save credential failed: %v", errSave)
|
||||
return
|
||||
}
|
||||
fmt.Printf("Vertex credentials imported: %s\n", path)
|
||||
|
||||
@@ -20,6 +20,9 @@ import (
|
||||
// Config represents the application's configuration, loaded from a YAML file.
|
||||
type Config struct {
|
||||
config.SDKConfig `yaml:",inline"`
|
||||
// Host is the network host/interface on which the API server will bind.
|
||||
// Default is empty ("") to bind all interfaces (IPv4 + IPv6). Use "127.0.0.1" or "localhost" for local-only access.
|
||||
Host string `yaml:"host" json:"-"`
|
||||
// Port is the network port on which the API server will listen.
|
||||
Port int `yaml:"port" json:"-"`
|
||||
|
||||
@@ -143,6 +146,10 @@ type AmpCode struct {
|
||||
// When Amp requests a model that isn't available locally, these mappings
|
||||
// allow routing to an alternative model that IS available.
|
||||
ModelMappings []AmpModelMapping `yaml:"model-mappings" json:"model-mappings"`
|
||||
|
||||
// ForceModelMappings when true, model mappings take precedence over local API keys.
|
||||
// When false (default), local API keys are used first if available.
|
||||
ForceModelMappings bool `yaml:"force-model-mappings" json:"force-model-mappings"`
|
||||
}
|
||||
|
||||
// PayloadConfig defines default and override parameter rules applied to provider payloads.
|
||||
@@ -316,6 +323,7 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
|
||||
// Unmarshal the YAML data into the Config struct.
|
||||
var cfg Config
|
||||
// Set defaults before unmarshal so that absent keys keep defaults.
|
||||
cfg.Host = "" // Default empty: binds to all interfaces (IPv4 + IPv6)
|
||||
cfg.LoggingToFile = false
|
||||
cfg.UsageStatisticsEnabled = false
|
||||
cfg.DisableCooling = false
|
||||
|
||||
@@ -56,6 +56,8 @@ type Content struct {
|
||||
// Part represents a distinct piece of content within a message.
|
||||
// A part can be text, inline data (like an image), a function call, or a function response.
|
||||
type Part struct {
|
||||
Thought bool `json:"thought,omitempty"`
|
||||
|
||||
// Text contains plain text content.
|
||||
Text string `json:"text,omitempty"`
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"github.com/klauspost/compress/zstd"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/buildinfo"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
)
|
||||
@@ -83,6 +84,26 @@ type StreamingLogWriter interface {
|
||||
// - error: An error if writing fails, nil otherwise
|
||||
WriteStatus(status int, headers map[string][]string) error
|
||||
|
||||
// WriteAPIRequest writes the upstream API request details to the log.
|
||||
// This should be called before WriteStatus to maintain proper log ordering.
|
||||
//
|
||||
// Parameters:
|
||||
// - apiRequest: The API request data (typically includes URL, headers, body sent upstream)
|
||||
//
|
||||
// Returns:
|
||||
// - error: An error if writing fails, nil otherwise
|
||||
WriteAPIRequest(apiRequest []byte) error
|
||||
|
||||
// WriteAPIResponse writes the upstream API response details to the log.
|
||||
// This should be called after the streaming response is complete.
|
||||
//
|
||||
// Parameters:
|
||||
// - apiResponse: The API response data
|
||||
//
|
||||
// Returns:
|
||||
// - error: An error if writing fails, nil otherwise
|
||||
WriteAPIResponse(apiResponse []byte) error
|
||||
|
||||
// Close finalizes the log file and cleans up resources.
|
||||
//
|
||||
// Returns:
|
||||
@@ -247,10 +268,11 @@ func (l *FileRequestLogger) LogStreamingRequest(url, method string, headers map[
|
||||
|
||||
// Create streaming writer
|
||||
writer := &FileStreamingLogWriter{
|
||||
file: file,
|
||||
chunkChan: make(chan []byte, 100), // Buffered channel for async writes
|
||||
closeChan: make(chan struct{}),
|
||||
errorChan: make(chan error, 1),
|
||||
file: file,
|
||||
chunkChan: make(chan []byte, 100), // Buffered channel for async writes
|
||||
closeChan: make(chan struct{}),
|
||||
errorChan: make(chan error, 1),
|
||||
bufferedChunks: &bytes.Buffer{},
|
||||
}
|
||||
|
||||
// Start async writer goroutine
|
||||
@@ -603,6 +625,7 @@ func (l *FileRequestLogger) formatRequestInfo(url, method string, headers map[st
|
||||
var content strings.Builder
|
||||
|
||||
content.WriteString("=== REQUEST INFO ===\n")
|
||||
content.WriteString(fmt.Sprintf("Version: %s\n", buildinfo.Version))
|
||||
content.WriteString(fmt.Sprintf("URL: %s\n", url))
|
||||
content.WriteString(fmt.Sprintf("Method: %s\n", method))
|
||||
content.WriteString(fmt.Sprintf("Timestamp: %s\n", time.Now().Format(time.RFC3339Nano)))
|
||||
@@ -626,11 +649,12 @@ func (l *FileRequestLogger) formatRequestInfo(url, method string, headers map[st
|
||||
|
||||
// FileStreamingLogWriter implements StreamingLogWriter for file-based streaming logs.
|
||||
// It handles asynchronous writing of streaming response chunks to a file.
|
||||
// All data is buffered and written in the correct order when Close is called.
|
||||
type FileStreamingLogWriter struct {
|
||||
// file is the file where log data is written.
|
||||
file *os.File
|
||||
|
||||
// chunkChan is a channel for receiving response chunks to write.
|
||||
// chunkChan is a channel for receiving response chunks to buffer.
|
||||
chunkChan chan []byte
|
||||
|
||||
// closeChan is a channel for signaling when the writer is closed.
|
||||
@@ -639,8 +663,23 @@ type FileStreamingLogWriter struct {
|
||||
// errorChan is a channel for reporting errors during writing.
|
||||
errorChan chan error
|
||||
|
||||
// statusWritten indicates whether the response status has been written.
|
||||
// bufferedChunks stores the response chunks in order.
|
||||
bufferedChunks *bytes.Buffer
|
||||
|
||||
// responseStatus stores the HTTP status code.
|
||||
responseStatus int
|
||||
|
||||
// statusWritten indicates whether a non-zero status was recorded.
|
||||
statusWritten bool
|
||||
|
||||
// responseHeaders stores the response headers.
|
||||
responseHeaders map[string][]string
|
||||
|
||||
// apiRequest stores the upstream API request data.
|
||||
apiRequest []byte
|
||||
|
||||
// apiResponse stores the upstream API response data.
|
||||
apiResponse []byte
|
||||
}
|
||||
|
||||
// WriteChunkAsync writes a response chunk asynchronously (non-blocking).
|
||||
@@ -664,39 +703,65 @@ func (w *FileStreamingLogWriter) WriteChunkAsync(chunk []byte) {
|
||||
}
|
||||
}
|
||||
|
||||
// WriteStatus writes the response status and headers to the log.
|
||||
// WriteStatus buffers the response status and headers for later writing.
|
||||
//
|
||||
// Parameters:
|
||||
// - status: The response status code
|
||||
// - headers: The response headers
|
||||
//
|
||||
// Returns:
|
||||
// - error: An error if writing fails, nil otherwise
|
||||
// - error: Always returns nil (buffering cannot fail)
|
||||
func (w *FileStreamingLogWriter) WriteStatus(status int, headers map[string][]string) error {
|
||||
if w.file == nil || w.statusWritten {
|
||||
if status == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var content strings.Builder
|
||||
content.WriteString("========================================\n")
|
||||
content.WriteString("=== RESPONSE ===\n")
|
||||
content.WriteString(fmt.Sprintf("Status: %d\n", status))
|
||||
|
||||
for key, values := range headers {
|
||||
for _, value := range values {
|
||||
content.WriteString(fmt.Sprintf("%s: %s\n", key, value))
|
||||
w.responseStatus = status
|
||||
if headers != nil {
|
||||
w.responseHeaders = make(map[string][]string, len(headers))
|
||||
for key, values := range headers {
|
||||
headerValues := make([]string, len(values))
|
||||
copy(headerValues, values)
|
||||
w.responseHeaders[key] = headerValues
|
||||
}
|
||||
}
|
||||
content.WriteString("\n")
|
||||
w.statusWritten = true
|
||||
return nil
|
||||
}
|
||||
|
||||
_, err := w.file.WriteString(content.String())
|
||||
if err == nil {
|
||||
w.statusWritten = true
|
||||
// WriteAPIRequest buffers the upstream API request details for later writing.
|
||||
//
|
||||
// Parameters:
|
||||
// - apiRequest: The API request data (typically includes URL, headers, body sent upstream)
|
||||
//
|
||||
// Returns:
|
||||
// - error: Always returns nil (buffering cannot fail)
|
||||
func (w *FileStreamingLogWriter) WriteAPIRequest(apiRequest []byte) error {
|
||||
if len(apiRequest) == 0 {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
w.apiRequest = bytes.Clone(apiRequest)
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteAPIResponse buffers the upstream API response details for later writing.
|
||||
//
|
||||
// Parameters:
|
||||
// - apiResponse: The API response data
|
||||
//
|
||||
// Returns:
|
||||
// - error: Always returns nil (buffering cannot fail)
|
||||
func (w *FileStreamingLogWriter) WriteAPIResponse(apiResponse []byte) error {
|
||||
if len(apiResponse) == 0 {
|
||||
return nil
|
||||
}
|
||||
w.apiResponse = bytes.Clone(apiResponse)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close finalizes the log file and cleans up resources.
|
||||
// It writes all buffered data to the file in the correct order:
|
||||
// API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
|
||||
//
|
||||
// Returns:
|
||||
// - error: An error if closing fails, nil otherwise
|
||||
@@ -705,27 +770,84 @@ func (w *FileStreamingLogWriter) Close() error {
|
||||
close(w.chunkChan)
|
||||
}
|
||||
|
||||
// Wait for async writer to finish
|
||||
// Wait for async writer to finish buffering chunks
|
||||
if w.closeChan != nil {
|
||||
<-w.closeChan
|
||||
w.chunkChan = nil
|
||||
}
|
||||
|
||||
if w.file != nil {
|
||||
return w.file.Close()
|
||||
if w.file == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
// Write all content in the correct order
|
||||
var content strings.Builder
|
||||
|
||||
// 1. Write API REQUEST section
|
||||
if len(w.apiRequest) > 0 {
|
||||
if bytes.HasPrefix(w.apiRequest, []byte("=== API REQUEST")) {
|
||||
content.Write(w.apiRequest)
|
||||
if !bytes.HasSuffix(w.apiRequest, []byte("\n")) {
|
||||
content.WriteString("\n")
|
||||
}
|
||||
} else {
|
||||
content.WriteString("=== API REQUEST ===\n")
|
||||
content.Write(w.apiRequest)
|
||||
content.WriteString("\n")
|
||||
}
|
||||
content.WriteString("\n")
|
||||
}
|
||||
|
||||
// 2. Write API RESPONSE section
|
||||
if len(w.apiResponse) > 0 {
|
||||
if bytes.HasPrefix(w.apiResponse, []byte("=== API RESPONSE")) {
|
||||
content.Write(w.apiResponse)
|
||||
if !bytes.HasSuffix(w.apiResponse, []byte("\n")) {
|
||||
content.WriteString("\n")
|
||||
}
|
||||
} else {
|
||||
content.WriteString("=== API RESPONSE ===\n")
|
||||
content.Write(w.apiResponse)
|
||||
content.WriteString("\n")
|
||||
}
|
||||
content.WriteString("\n")
|
||||
}
|
||||
|
||||
// 3. Write RESPONSE section (status, headers, buffered chunks)
|
||||
content.WriteString("=== RESPONSE ===\n")
|
||||
if w.statusWritten {
|
||||
content.WriteString(fmt.Sprintf("Status: %d\n", w.responseStatus))
|
||||
}
|
||||
|
||||
for key, values := range w.responseHeaders {
|
||||
for _, value := range values {
|
||||
content.WriteString(fmt.Sprintf("%s: %s\n", key, value))
|
||||
}
|
||||
}
|
||||
content.WriteString("\n")
|
||||
|
||||
// Write buffered response body chunks
|
||||
if w.bufferedChunks != nil && w.bufferedChunks.Len() > 0 {
|
||||
content.Write(w.bufferedChunks.Bytes())
|
||||
}
|
||||
|
||||
// Write the complete content to file
|
||||
if _, err := w.file.WriteString(content.String()); err != nil {
|
||||
_ = w.file.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
return w.file.Close()
|
||||
}
|
||||
|
||||
// asyncWriter runs in a goroutine to handle async chunk writing.
|
||||
// It continuously reads chunks from the channel and writes them to the file.
|
||||
// asyncWriter runs in a goroutine to buffer chunks from the channel.
|
||||
// It continuously reads chunks from the channel and buffers them for later writing.
|
||||
func (w *FileStreamingLogWriter) asyncWriter() {
|
||||
defer close(w.closeChan)
|
||||
|
||||
for chunk := range w.chunkChan {
|
||||
if w.file != nil {
|
||||
_, _ = w.file.Write(chunk)
|
||||
if w.bufferedChunks != nil {
|
||||
w.bufferedChunks.Write(chunk)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -752,6 +874,28 @@ func (w *NoOpStreamingLogWriter) WriteStatus(_ int, _ map[string][]string) error
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteAPIRequest is a no-op implementation that does nothing and always returns nil.
|
||||
//
|
||||
// Parameters:
|
||||
// - apiRequest: The API request data (ignored)
|
||||
//
|
||||
// Returns:
|
||||
// - error: Always returns nil
|
||||
func (w *NoOpStreamingLogWriter) WriteAPIRequest(_ []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteAPIResponse is a no-op implementation that does nothing and always returns nil.
|
||||
//
|
||||
// Parameters:
|
||||
// - apiResponse: The API response data (ignored)
|
||||
//
|
||||
// Returns:
|
||||
// - error: Always returns nil
|
||||
func (w *NoOpStreamingLogWriter) WriteAPIResponse(_ []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close is a no-op implementation that does nothing and always returns nil.
|
||||
//
|
||||
// Returns:
|
||||
|
||||
@@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4.5 Haiku",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
// Thinking: not supported for Haiku models
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-5-20250929",
|
||||
@@ -26,60 +27,6 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4.5 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-5-thinking",
|
||||
Object: "model",
|
||||
Created: 1759104000, // 2025-09-29
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Sonnet Thinking",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-thinking",
|
||||
Object: "model",
|
||||
Created: 1761955200, // 2025-11-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Opus Thinking",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-thinking-low",
|
||||
Object: "model",
|
||||
Created: 1761955200, // 2025-11-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Opus Thinking Low",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-thinking-medium",
|
||||
Object: "model",
|
||||
Created: 1761955200, // 2025-11-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Opus Thinking Medium",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-5-thinking-high",
|
||||
Object: "model",
|
||||
Created: 1761955200, // 2025-11-01
|
||||
OwnedBy: "anthropic",
|
||||
Type: "claude",
|
||||
DisplayName: "Claude 4.5 Opus Thinking High",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
@@ -92,6 +39,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
Description: "Premium model combining maximum intelligence with practical performance",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-1-20250805",
|
||||
@@ -102,6 +50,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4.1 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-opus-4-20250514",
|
||||
@@ -112,6 +61,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4 Opus",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 32000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-sonnet-4-20250514",
|
||||
@@ -122,6 +72,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 4 Sonnet",
|
||||
ContextLength: 200000,
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-7-sonnet-20250219",
|
||||
@@ -132,6 +83,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 3.7 Sonnet",
|
||||
ContextLength: 128000,
|
||||
MaxCompletionTokens: 8192,
|
||||
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
{
|
||||
ID: "claude-3-5-haiku-20241022",
|
||||
@@ -142,6 +94,7 @@ func GetClaudeModels() []*ModelInfo {
|
||||
DisplayName: "Claude 3.5 Haiku",
|
||||
ContextLength: 128000,
|
||||
MaxCompletionTokens: 8192,
|
||||
// Thinking: not supported for Haiku models
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -529,58 +482,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-minimal",
|
||||
Object: "model",
|
||||
Created: 1754524800,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-08-07",
|
||||
DisplayName: "GPT 5 Minimal",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-low",
|
||||
Object: "model",
|
||||
Created: 1754524800,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-08-07",
|
||||
DisplayName: "GPT 5 Low",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-medium",
|
||||
Object: "model",
|
||||
Created: 1754524800,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-08-07",
|
||||
DisplayName: "GPT 5 Medium",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-high",
|
||||
Object: "model",
|
||||
Created: 1754524800,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-08-07",
|
||||
DisplayName: "GPT 5 High",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex",
|
||||
@@ -594,45 +496,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-low",
|
||||
Object: "model",
|
||||
Created: 1757894400,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-09-15",
|
||||
DisplayName: "GPT 5 Codex Low",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-medium",
|
||||
Object: "model",
|
||||
Created: 1757894400,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-09-15",
|
||||
DisplayName: "GPT 5 Codex Medium",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-high",
|
||||
Object: "model",
|
||||
Created: 1757894400,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-09-15",
|
||||
DisplayName: "GPT 5 Codex High",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-mini",
|
||||
@@ -646,32 +510,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-mini-medium",
|
||||
Object: "model",
|
||||
Created: 1762473600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-11-07",
|
||||
DisplayName: "GPT 5 Codex Mini Medium",
|
||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5-codex-mini-high",
|
||||
Object: "model",
|
||||
Created: 1762473600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5-2025-11-07",
|
||||
DisplayName: "GPT 5 Codex Mini High",
|
||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1",
|
||||
@@ -685,58 +524,7 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-none",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Low",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-low",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Low",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-medium",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Medium",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-high",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 High",
|
||||
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex",
|
||||
@@ -745,50 +533,12 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Codex",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-low",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Codex Low",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-medium",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Codex Medium",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-high",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Codex High",
|
||||
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
|
||||
DisplayName: "GPT 5.1 Codex",
|
||||
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-mini",
|
||||
@@ -797,39 +547,13 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Codex Mini",
|
||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
||||
DisplayName: "GPT 5.1 Codex Mini",
|
||||
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-mini-medium",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Codex Mini Medium",
|
||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-mini-high",
|
||||
Object: "model",
|
||||
Created: 1762905600,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-2025-11-12",
|
||||
DisplayName: "GPT 5 Codex Mini High",
|
||||
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
|
||||
{
|
||||
ID: "gpt-5.1-codex-max",
|
||||
Object: "model",
|
||||
@@ -837,63 +561,12 @@ func GetOpenAIModels() []*ModelInfo {
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5 Codex Max",
|
||||
Description: "Stable version of GPT 5 Codex Max",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max-low",
|
||||
Object: "model",
|
||||
Created: 1763424000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5 Codex Max Low",
|
||||
Description: "Stable version of GPT 5 Codex Max Low",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max-medium",
|
||||
Object: "model",
|
||||
Created: 1763424000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5 Codex Max Medium",
|
||||
Description: "Stable version of GPT 5 Codex Max Medium",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max-high",
|
||||
Object: "model",
|
||||
Created: 1763424000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5 Codex Max High",
|
||||
Description: "Stable version of GPT 5 Codex Max High",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
},
|
||||
{
|
||||
ID: "gpt-5.1-codex-max-xhigh",
|
||||
Object: "model",
|
||||
Created: 1763424000,
|
||||
OwnedBy: "openai",
|
||||
Type: "openai",
|
||||
Version: "gpt-5.1-max",
|
||||
DisplayName: "GPT 5 Codex Max XHigh",
|
||||
Description: "Stable version of GPT 5 Codex Max XHigh",
|
||||
DisplayName: "GPT 5.1 Codex Max",
|
||||
Description: "Stable version of GPT 5.1 Codex Max",
|
||||
ContextLength: 400000,
|
||||
MaxCompletionTokens: 128000,
|
||||
SupportedParameters: []string{"tools"},
|
||||
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -944,13 +617,13 @@ func GetQwenModels() []*ModelInfo {
|
||||
}
|
||||
|
||||
// GetIFlowModels returns supported models for iFlow OAuth accounts.
|
||||
|
||||
func GetIFlowModels() []*ModelInfo {
|
||||
entries := []struct {
|
||||
ID string
|
||||
DisplayName string
|
||||
Description string
|
||||
Created int64
|
||||
Thinking *ThinkingSupport
|
||||
}{
|
||||
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
|
||||
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
|
||||
@@ -960,17 +633,17 @@ func GetIFlowModels() []*ModelInfo {
|
||||
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
|
||||
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
|
||||
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
|
||||
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200},
|
||||
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
|
||||
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
|
||||
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
|
||||
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
|
||||
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
|
||||
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
|
||||
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
|
||||
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
|
||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||
}
|
||||
models := make([]*ModelInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
@@ -982,7 +655,30 @@ func GetIFlowModels() []*ModelInfo {
|
||||
Type: "iflow",
|
||||
DisplayName: entry.DisplayName,
|
||||
Description: entry.Description,
|
||||
Thinking: entry.Thinking,
|
||||
})
|
||||
}
|
||||
return models
|
||||
}
|
||||
|
||||
// AntigravityModelConfig captures static antigravity model overrides, including
|
||||
// Thinking budget limits and provider max completion tokens.
|
||||
type AntigravityModelConfig struct {
|
||||
Thinking *ThinkingSupport
|
||||
MaxCompletionTokens int
|
||||
Name string
|
||||
}
|
||||
|
||||
// GetAntigravityModelConfig returns static configuration for antigravity models.
|
||||
// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
|
||||
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
||||
return map[string]*AntigravityModelConfig{
|
||||
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
|
||||
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
|
||||
"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
|
||||
"gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-preview"},
|
||||
"gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-image-preview"},
|
||||
"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -63,6 +63,9 @@ type ThinkingSupport struct {
|
||||
ZeroAllowed bool `json:"zero_allowed,omitempty"`
|
||||
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
|
||||
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
|
||||
// Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high").
|
||||
// When set, the model uses level-based reasoning instead of token budgets.
|
||||
Levels []string `json:"levels,omitempty"`
|
||||
}
|
||||
|
||||
// ModelRegistration tracks a model's availability
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// Package executor provides runtime execution capabilities for various AI service providers.
|
||||
// This file implements the AI Studio executor that routes requests through a websocket-backed
|
||||
// transport for the AI Studio provider.
|
||||
package executor
|
||||
|
||||
import (
|
||||
@@ -26,19 +29,28 @@ type AIStudioExecutor struct {
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewAIStudioExecutor constructs a websocket executor for the provider name.
|
||||
// NewAIStudioExecutor creates a new AI Studio executor instance.
|
||||
//
|
||||
// Parameters:
|
||||
// - cfg: The application configuration
|
||||
// - provider: The provider name
|
||||
// - relay: The websocket relay manager
|
||||
//
|
||||
// Returns:
|
||||
// - *AIStudioExecutor: A new AI Studio executor instance
|
||||
func NewAIStudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Manager) *AIStudioExecutor {
|
||||
return &AIStudioExecutor{provider: strings.ToLower(provider), relay: relay, cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier returns the logical provider key for routing.
|
||||
// Identifier returns the executor identifier.
|
||||
func (e *AIStudioExecutor) Identifier() string { return "aistudio" }
|
||||
|
||||
// PrepareRequest is a no-op because websocket transport already injects headers.
|
||||
// PrepareRequest prepares the HTTP request for execution (no-op for AI Studio).
|
||||
func (e *AIStudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Execute performs a non-streaming request to the AI Studio API.
|
||||
func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
@@ -92,6 +104,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// ExecuteStream performs a streaming request to the AI Studio API.
|
||||
func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
@@ -239,6 +252,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
|
||||
return stream, nil
|
||||
}
|
||||
|
||||
// CountTokens counts tokens for the given request using the AI Studio API.
|
||||
func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
_, body, err := e.translateRequest(req, opts, false)
|
||||
if err != nil {
|
||||
@@ -293,8 +307,8 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
|
||||
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
|
||||
}
|
||||
|
||||
func (e *AIStudioExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
_ = ctx
|
||||
// Refresh refreshes the authentication credentials (no-op for AI Studio).
|
||||
func (e *AIStudioExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
@@ -309,7 +323,9 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
|
||||
to := sdktranslator.FromString("gemini")
|
||||
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||
payload = applyThinkingMetadata(payload, req.Metadata, req.Model)
|
||||
payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
|
||||
payload = util.ConvertThinkingLevelToBudget(payload)
|
||||
payload = util.NormalizeGeminiThinkingBudget(req.Model, payload)
|
||||
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||
payload = fixGeminiImageAspectRatio(req.Model, payload)
|
||||
payload = applyPayloadConfig(e.cfg, req.Model, payload)
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// Package executor provides runtime execution capabilities for various AI service providers.
|
||||
// This file implements the Antigravity executor that proxies requests to the antigravity
|
||||
// upstream using OAuth credentials.
|
||||
package executor
|
||||
|
||||
import (
|
||||
@@ -27,18 +30,17 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
antigravityBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com"
|
||||
antigravityBaseURLAutopush = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
|
||||
antigravityBaseURLProd = "https://cloudcode-pa.googleapis.com"
|
||||
antigravityStreamPath = "/v1internal:streamGenerateContent"
|
||||
antigravityGeneratePath = "/v1internal:generateContent"
|
||||
antigravityModelsPath = "/v1internal:fetchAvailableModels"
|
||||
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
|
||||
antigravityClientSecret = "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"
|
||||
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
|
||||
antigravityAuthType = "antigravity"
|
||||
refreshSkew = 3000 * time.Second
|
||||
streamScannerBuffer int = 20_971_520
|
||||
antigravityBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com"
|
||||
// antigravityBaseURLAutopush = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
|
||||
antigravityBaseURLProd = "https://cloudcode-pa.googleapis.com"
|
||||
antigravityStreamPath = "/v1internal:streamGenerateContent"
|
||||
antigravityGeneratePath = "/v1internal:generateContent"
|
||||
antigravityModelsPath = "/v1internal:fetchAvailableModels"
|
||||
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
|
||||
antigravityClientSecret = "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"
|
||||
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
|
||||
antigravityAuthType = "antigravity"
|
||||
refreshSkew = 3000 * time.Second
|
||||
)
|
||||
|
||||
var randSource = rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
@@ -48,18 +50,24 @@ type AntigravityExecutor struct {
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewAntigravityExecutor constructs a new executor instance.
|
||||
// NewAntigravityExecutor creates a new Antigravity executor instance.
|
||||
//
|
||||
// Parameters:
|
||||
// - cfg: The application configuration
|
||||
//
|
||||
// Returns:
|
||||
// - *AntigravityExecutor: A new Antigravity executor instance
|
||||
func NewAntigravityExecutor(cfg *config.Config) *AntigravityExecutor {
|
||||
return &AntigravityExecutor{cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier implements ProviderExecutor.
|
||||
// Identifier returns the executor identifier.
|
||||
func (e *AntigravityExecutor) Identifier() string { return antigravityAuthType }
|
||||
|
||||
// PrepareRequest implements ProviderExecutor.
|
||||
// PrepareRequest prepares the HTTP request for execution (no-op for Antigravity).
|
||||
func (e *AntigravityExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { return nil }
|
||||
|
||||
// Execute handles non-streaming requests via the antigravity generate endpoint.
|
||||
// Execute performs a non-streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
|
||||
if errToken != nil {
|
||||
@@ -77,6 +85,8 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
|
||||
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated)
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -150,7 +160,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// ExecuteStream handles streaming requests via the antigravity upstream.
|
||||
// ExecuteStream performs a streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
ctx = context.WithValue(ctx, "alt", "")
|
||||
|
||||
@@ -170,6 +180,8 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated)
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -288,7 +300,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Refresh refreshes the OAuth token using the refresh token.
|
||||
// Refresh refreshes the authentication credentials using the refresh token.
|
||||
func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
if auth == nil {
|
||||
return auth, nil
|
||||
@@ -300,7 +312,7 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au
|
||||
return updated, nil
|
||||
}
|
||||
|
||||
// CountTokens is not supported for the antigravity provider.
|
||||
// CountTokens counts tokens for the given request (not supported for Antigravity).
|
||||
func (e *AntigravityExecutor) CountTokens(context.Context, *cliproxyauth.Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported"}
|
||||
}
|
||||
@@ -366,28 +378,34 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
|
||||
}
|
||||
|
||||
now := time.Now().Unix()
|
||||
modelConfig := registry.GetAntigravityModelConfig()
|
||||
models := make([]*registry.ModelInfo, 0, len(result.Map()))
|
||||
for id := range result.Map() {
|
||||
id = modelName2Alias(id)
|
||||
if id != "" {
|
||||
for originalName := range result.Map() {
|
||||
aliasName := modelName2Alias(originalName)
|
||||
if aliasName != "" {
|
||||
cfg := modelConfig[aliasName]
|
||||
modelName := aliasName
|
||||
if cfg != nil && cfg.Name != "" {
|
||||
modelName = cfg.Name
|
||||
}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
ID: id,
|
||||
Name: id,
|
||||
Description: id,
|
||||
DisplayName: id,
|
||||
Version: id,
|
||||
ID: aliasName,
|
||||
Name: modelName,
|
||||
Description: aliasName,
|
||||
DisplayName: aliasName,
|
||||
Version: aliasName,
|
||||
Object: "model",
|
||||
Created: now,
|
||||
OwnedBy: antigravityAuthType,
|
||||
Type: antigravityAuthType,
|
||||
}
|
||||
// Add Thinking support for thinking models
|
||||
if strings.HasSuffix(id, "-thinking") || strings.Contains(id, "-thinking-") {
|
||||
modelInfo.Thinking = ®istry.ThinkingSupport{
|
||||
Min: 1024,
|
||||
Max: 100000,
|
||||
ZeroAllowed: false,
|
||||
DynamicAllowed: true,
|
||||
// Look up Thinking support from static config using alias name
|
||||
if cfg != nil {
|
||||
if cfg.Thinking != nil {
|
||||
modelInfo.Thinking = cfg.Thinking
|
||||
}
|
||||
if cfg.MaxCompletionTokens > 0 {
|
||||
modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens
|
||||
}
|
||||
}
|
||||
models = append(models, modelInfo)
|
||||
@@ -533,6 +551,9 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
|
||||
strJSON = util.DeleteKey(strJSON, "minLength")
|
||||
strJSON = util.DeleteKey(strJSON, "maxLength")
|
||||
strJSON = util.DeleteKey(strJSON, "exclusiveMinimum")
|
||||
strJSON = util.DeleteKey(strJSON, "exclusiveMaximum")
|
||||
strJSON = util.DeleteKey(strJSON, "$ref")
|
||||
strJSON = util.DeleteKey(strJSON, "$defs")
|
||||
|
||||
paths = make([]string, 0)
|
||||
util.Walk(gjson.Parse(strJSON), "", "anyOf", &paths)
|
||||
@@ -648,7 +669,7 @@ func buildBaseURL(auth *cliproxyauth.Auth) string {
|
||||
if baseURLs := antigravityBaseURLFallbackOrder(auth); len(baseURLs) > 0 {
|
||||
return baseURLs[0]
|
||||
}
|
||||
return antigravityBaseURLAutopush
|
||||
return antigravityBaseURLDaily
|
||||
}
|
||||
|
||||
func resolveHost(base string) string {
|
||||
@@ -684,7 +705,7 @@ func antigravityBaseURLFallbackOrder(auth *cliproxyauth.Auth) []string {
|
||||
}
|
||||
return []string{
|
||||
antigravityBaseURLDaily,
|
||||
antigravityBaseURLAutopush,
|
||||
// antigravityBaseURLAutopush,
|
||||
antigravityBaseURLProd,
|
||||
}
|
||||
}
|
||||
@@ -808,3 +829,65 @@ func alias2ModelName(modelName string) string {
|
||||
return modelName
|
||||
}
|
||||
}
|
||||
|
||||
// normalizeAntigravityThinking clamps or removes thinking config based on model support.
|
||||
// For Claude models, it additionally ensures thinking budget < max_tokens.
|
||||
func normalizeAntigravityThinking(model string, payload []byte) []byte {
|
||||
payload = util.StripThinkingConfigIfUnsupported(model, payload)
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return payload
|
||||
}
|
||||
budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget")
|
||||
if !budget.Exists() {
|
||||
return payload
|
||||
}
|
||||
raw := int(budget.Int())
|
||||
normalized := util.NormalizeThinkingBudget(model, raw)
|
||||
|
||||
isClaude := strings.Contains(strings.ToLower(model), "claude")
|
||||
if isClaude {
|
||||
effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
|
||||
if effectiveMax > 0 && normalized >= effectiveMax {
|
||||
normalized = effectiveMax - 1
|
||||
}
|
||||
minBudget := antigravityMinThinkingBudget(model)
|
||||
if minBudget > 0 && normalized >= 0 && normalized < minBudget {
|
||||
// Budget is below minimum, remove thinking config entirely
|
||||
payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
|
||||
return payload
|
||||
}
|
||||
if setDefaultMax {
|
||||
if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil {
|
||||
payload = res
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
updated, err := sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
if err != nil {
|
||||
return payload
|
||||
}
|
||||
return updated
|
||||
}
|
||||
|
||||
// antigravityEffectiveMaxTokens returns the max tokens to cap thinking:
|
||||
// prefer request-provided maxOutputTokens; otherwise fall back to model default.
|
||||
// The boolean indicates whether the value came from the model default (and thus should be written back).
|
||||
func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromModel bool) {
|
||||
if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
|
||||
return int(maxTok.Int()), false
|
||||
}
|
||||
if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
|
||||
return modelInfo.MaxCompletionTokens, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// antigravityMinThinkingBudget returns the minimum thinking budget for a model.
|
||||
// Falls back to -1 if no model info is found.
|
||||
func antigravityMinThinkingBudget(model string) int {
|
||||
if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil {
|
||||
return modelInfo.Thinking.Min
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
@@ -54,15 +54,22 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
// Use streaming translation to preserve function calling, except for claude.
|
||||
stream := from != to
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||
modelForUpstream := req.Model
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", modelOverride)
|
||||
modelForUpstream = modelOverride
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
// Inject thinking config based on model suffix for thinking variants
|
||||
body = e.injectThinkingConfig(req.Model, body)
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
// Inject thinking config based on model metadata for thinking variants
|
||||
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
|
||||
|
||||
if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
|
||||
if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
|
||||
body = checkSystemInstructions(body)
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
@@ -161,11 +168,20 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("claude")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", modelOverride)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
// Inject thinking config based on model suffix for thinking variants
|
||||
body = e.injectThinkingConfig(req.Model, body)
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
// Inject thinking config based on model metadata for thinking variants
|
||||
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
|
||||
body = checkSystemInstructions(body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
|
||||
@@ -238,7 +254,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
// If from == to (Claude → Claude), directly forward the SSE stream without translation
|
||||
if from == to {
|
||||
scanner := bufio.NewScanner(decodedBody)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
appendAPIResponseChunk(ctx, e.cfg, line)
|
||||
@@ -261,7 +277,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
|
||||
// For other formats, use translation
|
||||
scanner := bufio.NewScanner(decodedBody)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -295,13 +311,20 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
// Use streaming translation to preserve function calling, except for claude.
|
||||
stream := from != to
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||
modelForUpstream := req.Model
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", modelOverride)
|
||||
modelForUpstream = modelOverride
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
|
||||
if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
|
||||
body = checkSystemInstructions(body)
|
||||
}
|
||||
|
||||
@@ -427,31 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
|
||||
return betas, body
|
||||
}
|
||||
|
||||
// injectThinkingConfig adds thinking configuration based on model name suffix
|
||||
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []byte {
|
||||
// Only inject if thinking config is not already present
|
||||
if gjson.GetBytes(body, "thinking").Exists() {
|
||||
// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
|
||||
// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
|
||||
// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
|
||||
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
|
||||
budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
|
||||
if !ok {
|
||||
return body
|
||||
}
|
||||
|
||||
var budgetTokens int
|
||||
switch {
|
||||
case strings.HasSuffix(modelName, "-thinking-low"):
|
||||
budgetTokens = 1024
|
||||
case strings.HasSuffix(modelName, "-thinking-medium"):
|
||||
budgetTokens = 8192
|
||||
case strings.HasSuffix(modelName, "-thinking-high"):
|
||||
budgetTokens = 24576
|
||||
case strings.HasSuffix(modelName, "-thinking"):
|
||||
// Default thinking without suffix uses medium budget
|
||||
budgetTokens = 8192
|
||||
default:
|
||||
return body
|
||||
}
|
||||
|
||||
body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
|
||||
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
|
||||
return body
|
||||
return util.ApplyClaudeThinkingConfig(body, budget)
|
||||
}
|
||||
|
||||
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
|
||||
@@ -491,35 +498,45 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
|
||||
}
|
||||
|
||||
func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
if alias == "" {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
// Hardcoded mappings for thinking models to actual Claude model names
|
||||
switch alias {
|
||||
case "claude-opus-4-5-thinking", "claude-opus-4-5-thinking-low", "claude-opus-4-5-thinking-medium", "claude-opus-4-5-thinking-high":
|
||||
return "claude-opus-4-5-20251101"
|
||||
case "claude-sonnet-4-5-thinking":
|
||||
return "claude-sonnet-4-5-20250929"
|
||||
}
|
||||
|
||||
entry := e.resolveClaudeConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
if modelAlias != "" {
|
||||
if strings.EqualFold(modelAlias, alias) {
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return alias
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
continue
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, alias) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
return ""
|
||||
|
||||
@@ -49,14 +49,18 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
|
||||
body = e.setReasoningEffortByAlias(req.Model, body)
|
||||
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body, _ = sjson.SetBytes(body, "stream", true)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
|
||||
@@ -142,13 +146,20 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = e.setReasoningEffortByAlias(req.Model, body)
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/responses"
|
||||
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
|
||||
@@ -205,7 +216,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -235,14 +246,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
|
||||
modelForCounting := req.Model
|
||||
|
||||
body = e.setReasoningEffortByAlias(req.Model, body)
|
||||
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.SetBytes(body, "stream", false)
|
||||
|
||||
@@ -261,83 +274,6 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
|
||||
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) setReasoningEffortByAlias(modelName string, payload []byte) []byte {
|
||||
if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5")
|
||||
switch modelName {
|
||||
case "gpt-5-minimal":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "minimal")
|
||||
case "gpt-5-low":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
||||
case "gpt-5-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex")
|
||||
switch modelName {
|
||||
case "gpt-5-codex-low":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
||||
case "gpt-5-codex-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5-codex-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex-mini")
|
||||
switch modelName {
|
||||
case "gpt-5-codex-mini-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5-codex-mini-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5.1", "gpt-5.1-none", "gpt-5.1-low", "gpt-5.1-medium", "gpt-5.1-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1")
|
||||
switch modelName {
|
||||
case "gpt-5.1-none":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "none")
|
||||
case "gpt-5.1-low":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
||||
case "gpt-5.1-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5.1-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5.1-codex", "gpt-5.1-codex-low", "gpt-5.1-codex-medium", "gpt-5.1-codex-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex")
|
||||
switch modelName {
|
||||
case "gpt-5.1-codex-low":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
||||
case "gpt-5.1-codex-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5.1-codex-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5.1-codex-mini", "gpt-5.1-codex-mini-medium", "gpt-5.1-codex-mini-high"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-mini")
|
||||
switch modelName {
|
||||
case "gpt-5.1-codex-mini-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5.1-codex-mini-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
}
|
||||
} else if util.InArray([]string{"gpt-5.1-codex-max", "gpt-5.1-codex-max-low", "gpt-5.1-codex-max-medium", "gpt-5.1-codex-max-high", "gpt-5.1-codex-max-xhigh"}, modelName) {
|
||||
payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-max")
|
||||
switch modelName {
|
||||
case "gpt-5.1-codex-max-low":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
|
||||
case "gpt-5.1-codex-max-medium":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
|
||||
case "gpt-5.1-codex-max-high":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
|
||||
case "gpt-5.1-codex-max-xhigh":
|
||||
payload, _ = sjson.SetBytes(payload, "reasoning.effort", "xhigh")
|
||||
}
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
func tokenizerForCodexModel(model string) (tokenizer.Codec, error) {
|
||||
sanitized := strings.ToLower(strings.TrimSpace(model))
|
||||
switch {
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// Package executor provides runtime execution capabilities for various AI service providers.
|
||||
// This file implements the Gemini CLI executor that talks to Cloud Code Assist endpoints
|
||||
// using OAuth credentials from auth metadata.
|
||||
package executor
|
||||
|
||||
import (
|
||||
@@ -29,11 +32,11 @@ import (
|
||||
const (
|
||||
codeAssistEndpoint = "https://cloudcode-pa.googleapis.com"
|
||||
codeAssistVersion = "v1internal"
|
||||
geminiOauthClientID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
|
||||
geminiOauthClientSecret = "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"
|
||||
geminiOAuthClientID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
|
||||
geminiOAuthClientSecret = "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"
|
||||
)
|
||||
|
||||
var geminiOauthScopes = []string{
|
||||
var geminiOAuthScopes = []string{
|
||||
"https://www.googleapis.com/auth/cloud-platform",
|
||||
"https://www.googleapis.com/auth/userinfo.email",
|
||||
"https://www.googleapis.com/auth/userinfo.profile",
|
||||
@@ -44,14 +47,24 @@ type GeminiCLIExecutor struct {
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewGeminiCLIExecutor creates a new Gemini CLI executor instance.
|
||||
//
|
||||
// Parameters:
|
||||
// - cfg: The application configuration
|
||||
//
|
||||
// Returns:
|
||||
// - *GeminiCLIExecutor: A new Gemini CLI executor instance
|
||||
func NewGeminiCLIExecutor(cfg *config.Config) *GeminiCLIExecutor {
|
||||
return &GeminiCLIExecutor{cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier returns the executor identifier.
|
||||
func (e *GeminiCLIExecutor) Identifier() string { return "gemini-cli" }
|
||||
|
||||
// PrepareRequest prepares the HTTP request for execution (no-op for Gemini CLI).
|
||||
func (e *GeminiCLIExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { return nil }
|
||||
|
||||
// Execute performs a non-streaming request to the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
@@ -64,6 +77,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
|
||||
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
|
||||
@@ -187,6 +202,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// ExecuteStream performs a streaming request to the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
@@ -199,6 +215,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
|
||||
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
|
||||
@@ -305,7 +323,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
}()
|
||||
if opts.Alt == "" {
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, streamScannerBuffer)
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -367,6 +385,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// CountTokens counts tokens for the given request using the Gemini CLI API.
|
||||
func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth)
|
||||
if err != nil {
|
||||
@@ -467,9 +486,8 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
||||
return cliproxyexecutor.Response{}, newGeminiStatusErr(lastStatus, lastBody)
|
||||
}
|
||||
|
||||
func (e *GeminiCLIExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
log.Debugf("gemini cli executor: refresh called")
|
||||
_ = ctx
|
||||
// Refresh refreshes the authentication credentials (no-op for Gemini CLI).
|
||||
func (e *GeminiCLIExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
@@ -511,9 +529,9 @@ func prepareGeminiCLITokenSource(ctx context.Context, cfg *config.Config, auth *
|
||||
}
|
||||
|
||||
conf := &oauth2.Config{
|
||||
ClientID: geminiOauthClientID,
|
||||
ClientSecret: geminiOauthClientSecret,
|
||||
Scopes: geminiOauthScopes,
|
||||
ClientID: geminiOAuthClientID,
|
||||
ClientSecret: geminiOAuthClientSecret,
|
||||
Scopes: geminiOAuthScopes,
|
||||
Endpoint: google.Endpoint,
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
@@ -21,8 +20,6 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
"golang.org/x/oauth2"
|
||||
"golang.org/x/oauth2/google"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -31,6 +28,9 @@ const (
|
||||
|
||||
// glAPIVersion is the API version used for Gemini requests.
|
||||
glAPIVersion = "v1beta"
|
||||
|
||||
// streamScannerBuffer is the buffer size for SSE stream scanning.
|
||||
streamScannerBuffer = 52_428_800
|
||||
)
|
||||
|
||||
// GeminiExecutor is a stateless executor for the official Gemini API using API keys.
|
||||
@@ -48,9 +48,11 @@ type GeminiExecutor struct {
|
||||
//
|
||||
// Returns:
|
||||
// - *GeminiExecutor: A new Gemini executor instance
|
||||
func NewGeminiExecutor(cfg *config.Config) *GeminiExecutor { return &GeminiExecutor{cfg: cfg} }
|
||||
func NewGeminiExecutor(cfg *config.Config) *GeminiExecutor {
|
||||
return &GeminiExecutor{cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier returns the executor identifier for Gemini.
|
||||
// Identifier returns the executor identifier.
|
||||
func (e *GeminiExecutor) Identifier() string { return "gemini" }
|
||||
|
||||
// PrepareRequest prepares the HTTP request for execution (no-op for Gemini).
|
||||
@@ -75,14 +77,19 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
// Official Gemini API via API key or OAuth bearer
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = applyThinkingMetadata(body, req.Metadata, req.Model)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -91,7 +98,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
}
|
||||
}
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, action)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -159,22 +166,28 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// ExecuteStream performs a streaming request to the Gemini API.
|
||||
func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
body = applyThinkingMetadata(body, req.Metadata, req.Model)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -239,7 +252,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, streamScannerBuffer)
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -270,6 +283,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
return stream, nil
|
||||
}
|
||||
|
||||
// CountTokens counts tokens for the given request using the Gemini API.
|
||||
func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
@@ -343,106 +357,8 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
|
||||
}
|
||||
|
||||
func (e *GeminiExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
log.Debugf("gemini executor: refresh called")
|
||||
// OAuth bearer token refresh for official Gemini API.
|
||||
if auth == nil {
|
||||
return nil, fmt.Errorf("gemini executor: auth is nil")
|
||||
}
|
||||
if auth.Metadata == nil {
|
||||
return auth, nil
|
||||
}
|
||||
// Token data is typically nested under "token" map in Gemini files.
|
||||
tokenMap, _ := auth.Metadata["token"].(map[string]any)
|
||||
var refreshToken, accessToken, clientID, clientSecret, tokenURI, expiryStr string
|
||||
if tokenMap != nil {
|
||||
if v, ok := tokenMap["refresh_token"].(string); ok {
|
||||
refreshToken = v
|
||||
}
|
||||
if v, ok := tokenMap["access_token"].(string); ok {
|
||||
accessToken = v
|
||||
}
|
||||
if v, ok := tokenMap["client_id"].(string); ok {
|
||||
clientID = v
|
||||
}
|
||||
if v, ok := tokenMap["client_secret"].(string); ok {
|
||||
clientSecret = v
|
||||
}
|
||||
if v, ok := tokenMap["token_uri"].(string); ok {
|
||||
tokenURI = v
|
||||
}
|
||||
if v, ok := tokenMap["expiry"].(string); ok {
|
||||
expiryStr = v
|
||||
}
|
||||
} else {
|
||||
// Fallback to top-level keys if present
|
||||
if v, ok := auth.Metadata["refresh_token"].(string); ok {
|
||||
refreshToken = v
|
||||
}
|
||||
if v, ok := auth.Metadata["access_token"].(string); ok {
|
||||
accessToken = v
|
||||
}
|
||||
if v, ok := auth.Metadata["client_id"].(string); ok {
|
||||
clientID = v
|
||||
}
|
||||
if v, ok := auth.Metadata["client_secret"].(string); ok {
|
||||
clientSecret = v
|
||||
}
|
||||
if v, ok := auth.Metadata["token_uri"].(string); ok {
|
||||
tokenURI = v
|
||||
}
|
||||
if v, ok := auth.Metadata["expiry"].(string); ok {
|
||||
expiryStr = v
|
||||
}
|
||||
}
|
||||
if refreshToken == "" {
|
||||
// Nothing to do for API key or cookie based entries
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
// Prepare oauth2 config; default to Google endpoints
|
||||
endpoint := google.Endpoint
|
||||
if tokenURI != "" {
|
||||
endpoint.TokenURL = tokenURI
|
||||
}
|
||||
conf := &oauth2.Config{ClientID: clientID, ClientSecret: clientSecret, Endpoint: endpoint}
|
||||
|
||||
// Ensure proxy-aware HTTP client for token refresh
|
||||
httpClient := util.SetProxy(&e.cfg.SDKConfig, &http.Client{})
|
||||
ctx = context.WithValue(ctx, oauth2.HTTPClient, httpClient)
|
||||
|
||||
// Build base token
|
||||
tok := &oauth2.Token{AccessToken: accessToken, RefreshToken: refreshToken}
|
||||
if t, err := time.Parse(time.RFC3339, expiryStr); err == nil {
|
||||
tok.Expiry = t
|
||||
}
|
||||
newTok, err := conf.TokenSource(ctx, tok).Token()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Persist back to metadata; prefer nested token map if present
|
||||
if tokenMap == nil {
|
||||
tokenMap = make(map[string]any)
|
||||
}
|
||||
tokenMap["access_token"] = newTok.AccessToken
|
||||
tokenMap["refresh_token"] = newTok.RefreshToken
|
||||
tokenMap["expiry"] = newTok.Expiry.Format(time.RFC3339)
|
||||
if clientID != "" {
|
||||
tokenMap["client_id"] = clientID
|
||||
}
|
||||
if clientSecret != "" {
|
||||
tokenMap["client_secret"] = clientSecret
|
||||
}
|
||||
if tokenURI != "" {
|
||||
tokenMap["token_uri"] = tokenURI
|
||||
}
|
||||
auth.Metadata["token"] = tokenMap
|
||||
|
||||
// Also mirror top-level access_token for compatibility if previously present
|
||||
if _, ok := auth.Metadata["access_token"]; ok {
|
||||
auth.Metadata["access_token"] = newTok.AccessToken
|
||||
}
|
||||
// Refresh refreshes the authentication credentials (no-op for Gemini API key).
|
||||
func (e *GeminiExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
return auth, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Package executor contains provider executors. This file implements the Vertex AI
|
||||
// Gemini executor that talks to Google Vertex AI endpoints using service account
|
||||
// credentials imported by the CLI.
|
||||
// Package executor provides runtime execution capabilities for various AI service providers.
|
||||
// This file implements the Vertex AI Gemini executor that talks to Google Vertex AI
|
||||
// endpoints using service account credentials or API keys.
|
||||
package executor
|
||||
|
||||
import (
|
||||
@@ -36,20 +36,26 @@ type GeminiVertexExecutor struct {
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewGeminiVertexExecutor constructs the Vertex executor.
|
||||
// NewGeminiVertexExecutor creates a new Vertex AI Gemini executor instance.
|
||||
//
|
||||
// Parameters:
|
||||
// - cfg: The application configuration
|
||||
//
|
||||
// Returns:
|
||||
// - *GeminiVertexExecutor: A new Vertex AI Gemini executor instance
|
||||
func NewGeminiVertexExecutor(cfg *config.Config) *GeminiVertexExecutor {
|
||||
return &GeminiVertexExecutor{cfg: cfg}
|
||||
}
|
||||
|
||||
// Identifier returns provider key for manager routing.
|
||||
// Identifier returns the executor identifier.
|
||||
func (e *GeminiVertexExecutor) Identifier() string { return "vertex" }
|
||||
|
||||
// PrepareRequest is a no-op for Vertex.
|
||||
// PrepareRequest prepares the HTTP request for execution (no-op for Vertex).
|
||||
func (e *GeminiVertexExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Execute handles non-streaming requests.
|
||||
// Execute performs a non-streaming request to the Vertex AI API.
|
||||
func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
// Try API key authentication first
|
||||
apiKey, baseURL := vertexAPICreds(auth)
|
||||
@@ -67,7 +73,7 @@ func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
return e.executeWithAPIKey(ctx, auth, req, opts, apiKey, baseURL)
|
||||
}
|
||||
|
||||
// ExecuteStream handles SSE streaming for Vertex.
|
||||
// ExecuteStream performs a streaming request to the Vertex AI API.
|
||||
func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
|
||||
// Try API key authentication first
|
||||
apiKey, baseURL := vertexAPICreds(auth)
|
||||
@@ -85,7 +91,7 @@ func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
return e.executeStreamWithAPIKey(ctx, auth, req, opts, apiKey, baseURL)
|
||||
}
|
||||
|
||||
// CountTokens calls Vertex countTokens endpoint.
|
||||
// CountTokens counts tokens for the given request using the Vertex AI API.
|
||||
func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
// Try API key authentication first
|
||||
apiKey, baseURL := vertexAPICreds(auth)
|
||||
@@ -103,179 +109,7 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau
|
||||
return e.countTokensWithAPIKey(ctx, auth, req, opts, apiKey, baseURL)
|
||||
}
|
||||
|
||||
// countTokensWithServiceAccount handles token counting using service account credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
return cliproxyexecutor.Response{}, errNewReq
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if token, errTok := vertexAccessToken(ctx, e.cfg, auth, saJSON); errTok == nil && token != "" {
|
||||
httpReq.Header.Set("Authorization", "Bearer "+token)
|
||||
} else if errTok != nil {
|
||||
log.Errorf("vertex executor: access token error: %v", errTok)
|
||||
return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"}
|
||||
}
|
||||
applyGeminiHeaders(httpReq, auth)
|
||||
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
authLabel = auth.Label
|
||||
authType, authValue = auth.AccountInfo()
|
||||
}
|
||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||
URL: url,
|
||||
Method: http.MethodPost,
|
||||
Headers: httpReq.Header.Clone(),
|
||||
Body: translatedReq,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpResp, errDo := httpClient.Do(httpReq)
|
||||
if errDo != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||
return cliproxyexecutor.Response{}, errDo
|
||||
}
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("vertex executor: close response body error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
}
|
||||
data, errRead := io.ReadAll(httpResp.Body)
|
||||
if errRead != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errRead)
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
}
|
||||
|
||||
// countTokensWithAPIKey handles token counting using API key credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
// For API key auth, use simpler URL format without project/location
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
return cliproxyexecutor.Response{}, errNewReq
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if apiKey != "" {
|
||||
httpReq.Header.Set("x-goog-api-key", apiKey)
|
||||
}
|
||||
applyGeminiHeaders(httpReq, auth)
|
||||
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
authLabel = auth.Label
|
||||
authType, authValue = auth.AccountInfo()
|
||||
}
|
||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||
URL: url,
|
||||
Method: http.MethodPost,
|
||||
Headers: httpReq.Header.Clone(),
|
||||
Body: translatedReq,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpResp, errDo := httpClient.Do(httpReq)
|
||||
if errDo != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||
return cliproxyexecutor.Response{}, errDo
|
||||
}
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("vertex executor: close response body error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
}
|
||||
data, errRead := io.ReadAll(httpResp.Body)
|
||||
if errRead != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errRead)
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
}
|
||||
|
||||
// Refresh is a no-op for service account based credentials.
|
||||
// Refresh refreshes the authentication credentials (no-op for Vertex).
|
||||
func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||
return auth, nil
|
||||
}
|
||||
@@ -286,19 +120,24 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -307,7 +146,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
}
|
||||
}
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -381,19 +220,24 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -406,7 +250,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, action)
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -477,22 +321,27 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -558,7 +407,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, streamScannerBuffer)
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -589,25 +438,30 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
|
||||
// For API key auth, use simpler URL format without project/location
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -670,7 +524,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, streamScannerBuffer)
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
@@ -696,6 +550,184 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
return stream, nil
|
||||
}
|
||||
|
||||
// countTokensWithServiceAccount counts tokens using service account credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
return cliproxyexecutor.Response{}, errNewReq
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if token, errTok := vertexAccessToken(ctx, e.cfg, auth, saJSON); errTok == nil && token != "" {
|
||||
httpReq.Header.Set("Authorization", "Bearer "+token)
|
||||
} else if errTok != nil {
|
||||
log.Errorf("vertex executor: access token error: %v", errTok)
|
||||
return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"}
|
||||
}
|
||||
applyGeminiHeaders(httpReq, auth)
|
||||
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
authLabel = auth.Label
|
||||
authType, authValue = auth.AccountInfo()
|
||||
}
|
||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||
URL: url,
|
||||
Method: http.MethodPost,
|
||||
Headers: httpReq.Header.Clone(),
|
||||
Body: translatedReq,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpResp, errDo := httpClient.Do(httpReq)
|
||||
if errDo != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||
return cliproxyexecutor.Response{}, errDo
|
||||
}
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("vertex executor: close response body error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
}
|
||||
data, errRead := io.ReadAll(httpResp.Body)
|
||||
if errRead != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errRead)
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
}
|
||||
|
||||
// countTokensWithAPIKey handles token counting using API key credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
// For API key auth, use simpler URL format without project/location
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
return cliproxyexecutor.Response{}, errNewReq
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if apiKey != "" {
|
||||
httpReq.Header.Set("x-goog-api-key", apiKey)
|
||||
}
|
||||
applyGeminiHeaders(httpReq, auth)
|
||||
|
||||
var authID, authLabel, authType, authValue string
|
||||
if auth != nil {
|
||||
authID = auth.ID
|
||||
authLabel = auth.Label
|
||||
authType, authValue = auth.AccountInfo()
|
||||
}
|
||||
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
|
||||
URL: url,
|
||||
Method: http.MethodPost,
|
||||
Headers: httpReq.Header.Clone(),
|
||||
Body: translatedReq,
|
||||
Provider: e.Identifier(),
|
||||
AuthID: authID,
|
||||
AuthLabel: authLabel,
|
||||
AuthType: authType,
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpResp, errDo := httpClient.Do(httpReq)
|
||||
if errDo != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||
return cliproxyexecutor.Response{}, errDo
|
||||
}
|
||||
defer func() {
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
log.Errorf("vertex executor: close response body error: %v", errClose)
|
||||
}
|
||||
}()
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
}
|
||||
data, errRead := io.ReadAll(httpResp.Body)
|
||||
if errRead != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errRead)
|
||||
return cliproxyexecutor.Response{}, errRead
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
}
|
||||
count := gjson.GetBytes(data, "totalTokens").Int()
|
||||
out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
|
||||
return cliproxyexecutor.Response{Payload: []byte(out)}, nil
|
||||
}
|
||||
|
||||
// vertexCreds extracts project, location and raw service account JSON from auth metadata.
|
||||
func vertexCreds(a *cliproxyauth.Auth) (projectID, location string, serviceAccountJSON []byte, err error) {
|
||||
if a == nil || a.Metadata == nil {
|
||||
|
||||
@@ -57,6 +57,15 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
|
||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||
@@ -139,6 +148,15 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
||||
@@ -201,7 +219,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}()
|
||||
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
|
||||
@@ -58,6 +58,15 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
}
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||
}
|
||||
translated = normalizeThinkingConfig(translated, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||
@@ -143,6 +152,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
translated = e.overrideModel(translated, modelOverride)
|
||||
}
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||
}
|
||||
translated = normalizeThinkingConfig(translated, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||
@@ -206,7 +224,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package executor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
@@ -9,11 +11,11 @@ import (
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
|
||||
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
|
||||
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
||||
budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
|
||||
if !ok {
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
@@ -26,20 +28,44 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
|
||||
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
|
||||
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
|
||||
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
|
||||
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
||||
budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
|
||||
if !ok {
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||
return payload
|
||||
}
|
||||
if budgetOverride != nil && util.ModelSupportsThinking(model) {
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return payload
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
|
||||
// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
|
||||
// Metadata values take precedence over any existing field when the model supports thinking, intentionally
|
||||
// overwriting caller-provided values to honor suffix/default metadata priority.
|
||||
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
|
||||
if len(metadata) == 0 {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return payload
|
||||
}
|
||||
if field == "" {
|
||||
return payload
|
||||
}
|
||||
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
||||
if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
|
||||
return updated
|
||||
}
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
// applyPayloadConfig applies payload default and override rules from configuration
|
||||
// to the given JSON payload for the specified model.
|
||||
// Defaults only fill missing fields, while overrides always overwrite existing values.
|
||||
@@ -189,3 +215,93 @@ func matchModelPattern(pattern, model string) bool {
|
||||
}
|
||||
return pi == len(pattern)
|
||||
}
|
||||
|
||||
// normalizeThinkingConfig normalizes thinking-related fields in the payload
|
||||
// based on model capabilities. For models without thinking support, it strips
|
||||
// reasoning fields. For models with level-based thinking, it validates and
|
||||
// normalizes the reasoning effort level.
|
||||
func normalizeThinkingConfig(payload []byte, model string) []byte {
|
||||
if len(payload) == 0 || model == "" {
|
||||
return payload
|
||||
}
|
||||
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
return stripThinkingFields(payload)
|
||||
}
|
||||
|
||||
if util.ModelUsesThinkingLevels(model) {
|
||||
return normalizeReasoningEffortLevel(payload, model)
|
||||
}
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
// stripThinkingFields removes thinking-related fields from the payload for
|
||||
// models that do not support thinking.
|
||||
func stripThinkingFields(payload []byte) []byte {
|
||||
fieldsToRemove := []string{
|
||||
"reasoning",
|
||||
"reasoning_effort",
|
||||
"reasoning.effort",
|
||||
}
|
||||
out := payload
|
||||
for _, field := range fieldsToRemove {
|
||||
if gjson.GetBytes(out, field).Exists() {
|
||||
out, _ = sjson.DeleteBytes(out, field)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
|
||||
// or reasoning.effort field for level-based thinking models.
|
||||
func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
|
||||
out := payload
|
||||
|
||||
if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
|
||||
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||
out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
|
||||
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||
out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
|
||||
// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
|
||||
// downgrading requests.
|
||||
func validateThinkingConfig(payload []byte, model string) error {
|
||||
if len(payload) == 0 || model == "" {
|
||||
return nil
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
|
||||
return nil
|
||||
}
|
||||
|
||||
levels := util.GetModelThinkingLevels(model)
|
||||
checkField := func(path string) error {
|
||||
if effort := gjson.GetBytes(payload, path); effort.Exists() {
|
||||
if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
|
||||
return statusErr{
|
||||
code: http.StatusBadRequest,
|
||||
msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := checkField("reasoning_effort"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkField("reasoning.effort"); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
@@ -50,6 +51,15 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||
@@ -121,6 +131,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = normalizeThinkingConfig(body, upstreamModel)
|
||||
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
||||
// This will have no real consequences. It's just to scare Qwen3.
|
||||
@@ -181,7 +200,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
}
|
||||
}()
|
||||
scanner := bufio.NewScanner(httpResp.Body)
|
||||
scanner.Buffer(nil, 20_971_520)
|
||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||
var param any
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
|
||||
@@ -83,7 +83,15 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
for j := 0; j < len(contentResults); j++ {
|
||||
contentResult := contentResults[j]
|
||||
contentTypeResult := contentResult.Get("type")
|
||||
if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "text" {
|
||||
if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "thinking" {
|
||||
prompt := contentResult.Get("thinking").String()
|
||||
signatureResult := contentResult.Get("signature")
|
||||
signature := geminiCLIClaudeThoughtSignature
|
||||
if signatureResult.Exists() {
|
||||
signature = signatureResult.String()
|
||||
}
|
||||
clientContent.Parts = append(clientContent.Parts, client.Part{Text: prompt, Thought: true, ThoughtSignature: signature})
|
||||
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "text" {
|
||||
prompt := contentResult.Get("text").String()
|
||||
clientContent.Parts = append(clientContent.Parts, client.Part{Text: prompt})
|
||||
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "tool_use" {
|
||||
@@ -92,10 +100,16 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
functionID := contentResult.Get("id").String()
|
||||
var args map[string]any
|
||||
if err := json.Unmarshal([]byte(functionArgs), &args); err == nil {
|
||||
clientContent.Parts = append(clientContent.Parts, client.Part{
|
||||
FunctionCall: &client.FunctionCall{ID: functionID, Name: functionName, Args: args},
|
||||
ThoughtSignature: geminiCLIClaudeThoughtSignature,
|
||||
})
|
||||
if strings.Contains(modelName, "claude") {
|
||||
clientContent.Parts = append(clientContent.Parts, client.Part{
|
||||
FunctionCall: &client.FunctionCall{ID: functionID, Name: functionName, Args: args},
|
||||
})
|
||||
} else {
|
||||
clientContent.Parts = append(clientContent.Parts, client.Part{
|
||||
FunctionCall: &client.FunctionCall{ID: functionID, Name: functionName, Args: args},
|
||||
ThoughtSignature: geminiCLIClaudeThoughtSignature,
|
||||
})
|
||||
}
|
||||
}
|
||||
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "tool_result" {
|
||||
toolCallID := contentResult.Get("tool_use_id").String()
|
||||
@@ -109,6 +123,15 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
functionResponse := client.FunctionResponse{ID: toolCallID, Name: funcName, Response: map[string]interface{}{"result": responseData}}
|
||||
clientContent.Parts = append(clientContent.Parts, client.Part{FunctionResponse: &functionResponse})
|
||||
}
|
||||
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "image" {
|
||||
sourceResult := contentResult.Get("source")
|
||||
if sourceResult.Get("type").String() == "base64" {
|
||||
inlineData := &client.InlineData{
|
||||
MimeType: sourceResult.Get("media_type").String(),
|
||||
Data: sourceResult.Get("data").String(),
|
||||
}
|
||||
clientContent.Parts = append(clientContent.Parts, client.Part{InlineData: inlineData})
|
||||
}
|
||||
}
|
||||
}
|
||||
contents = append(contents, clientContent)
|
||||
@@ -166,7 +189,6 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
budget = util.NormalizeThinkingBudget(modelName, budget)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -34,8 +35,12 @@ type Params struct {
|
||||
TotalTokenCount int64 // Cached total token count from usage metadata
|
||||
HasSentFinalEvents bool // Indicates if final content/message events have been sent
|
||||
HasToolUse bool // Indicates if tool use was observed in the stream
|
||||
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
|
||||
}
|
||||
|
||||
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
||||
var toolUseIDCounter uint64
|
||||
|
||||
// ConvertAntigravityResponseToClaude performs sophisticated streaming response format conversion.
|
||||
// This function implements a complex state machine that translates backend client responses
|
||||
// into Claude Code-compatible Server-Sent Events (SSE) format. It manages different response types
|
||||
@@ -65,11 +70,14 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
|
||||
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
||||
output := ""
|
||||
appendFinalEvents(params, &output, true)
|
||||
|
||||
return []string{
|
||||
output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
// Only send final events if we have actually output content
|
||||
if params.HasContent {
|
||||
appendFinalEvents(params, &output, true)
|
||||
return []string{
|
||||
output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
}
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
output := ""
|
||||
@@ -111,11 +119,16 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
if partTextResult.Exists() {
|
||||
// Process thinking content (internal reasoning)
|
||||
if partResult.Get("thought").Bool() {
|
||||
// Continue existing thinking block if already in thinking state
|
||||
if params.ResponseType == 2 {
|
||||
if thoughtSignature := partResult.Get("thoughtSignature"); thoughtSignature.Exists() && thoughtSignature.String() != "" {
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.HasContent = true
|
||||
} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.HasContent = true
|
||||
} else {
|
||||
// Transition from another state to thinking
|
||||
// First, close any existing content block
|
||||
@@ -139,6 +152,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.ResponseType = 2 // Set state to thinking
|
||||
params.HasContent = true
|
||||
}
|
||||
} else {
|
||||
finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason")
|
||||
@@ -149,6 +163,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.HasContent = true
|
||||
} else {
|
||||
// Transition from another state to text content
|
||||
// First, close any existing content block
|
||||
@@ -163,15 +178,17 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
output = output + "\n\n\n"
|
||||
params.ResponseIndex++
|
||||
}
|
||||
|
||||
// Start a new text content block
|
||||
output = output + "event: content_block_start\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"text","text":""}}`, params.ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.ResponseType = 1 // Set state to content
|
||||
if partTextResult.String() != "" {
|
||||
// Start a new text content block
|
||||
output = output + "event: content_block_start\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"text","text":""}}`, params.ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
params.ResponseType = 1 // Set state to content
|
||||
params.HasContent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -212,7 +229,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
|
||||
// Create the tool use block with unique ID and function details
|
||||
data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, params.ResponseIndex)
|
||||
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
|
||||
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
|
||||
data, _ = sjson.Set(data, "content_block.name", fcName)
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
|
||||
@@ -222,6 +239,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
}
|
||||
params.ResponseType = 3
|
||||
params.HasContent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -261,6 +279,11 @@ func appendFinalEvents(params *Params, output *string, force bool) {
|
||||
return
|
||||
}
|
||||
|
||||
// Only send final events if we have actually output content
|
||||
if !params.HasContent {
|
||||
return
|
||||
}
|
||||
|
||||
if params.ResponseType != 0 {
|
||||
*output = *output + "event: content_block_stop\n"
|
||||
*output = *output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, params.ResponseIndex)
|
||||
|
||||
@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "low":
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "medium":
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "high":
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
default:
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var normalized int
|
||||
var budget int
|
||||
|
||||
if v := tc.Get("thinkingBudget"); v.Exists() {
|
||||
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
} else if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
}
|
||||
|
||||
@@ -82,22 +82,27 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget && normalized != 0 {
|
||||
} else if setBudget && budget != 0 {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
|
||||
// This matches the official Gemini CLI behavior which always sends:
|
||||
// { thinkingBudget: -1, includeThoughts: true }
|
||||
// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
|
||||
if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
// Claude/Anthropic API format: thinking.type == "enabled" with budget_tokens
|
||||
// This allows Claude Code and other Claude API clients to pass thinking configuration
|
||||
if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && util.ModelSupportsThinking(modelName) {
|
||||
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Temperature/top_p/top_k
|
||||
// Temperature/top_p/top_k/max_tokens
|
||||
if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
|
||||
}
|
||||
@@ -107,6 +112,9 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
if tkr := gjson.GetBytes(rawJSON, "top_k"); tkr.Exists() && tkr.Type == gjson.Number {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num)
|
||||
}
|
||||
if maxTok := gjson.GetBytes(rawJSON, "max_tokens"); maxTok.Exists() && maxTok.Type == gjson.Number {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.maxOutputTokens", maxTok.Num)
|
||||
}
|
||||
|
||||
// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
|
||||
// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
|
||||
@@ -24,6 +25,9 @@ type convertCliResponseToOpenAIChatParams struct {
|
||||
FunctionIndex int
|
||||
}
|
||||
|
||||
// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
|
||||
var functionCallIDCounter uint64
|
||||
|
||||
// ConvertAntigravityResponseToOpenAI translates a single chunk of a streaming response from the
|
||||
// Gemini CLI API format to the OpenAI Chat Completions streaming format.
|
||||
// It processes various Gemini CLI event types and transforms them into OpenAI-compatible JSON responses.
|
||||
@@ -146,7 +150,7 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
|
||||
|
||||
functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
|
||||
@@ -331,8 +331,8 @@ func ConvertClaudeResponseToGeminiNonStream(_ context.Context, modelName string,
|
||||
streamingEvents := make([][]byte, 0)
|
||||
|
||||
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
|
||||
buffer := make([]byte, 20_971_520)
|
||||
scanner.Buffer(buffer, 20_971_520)
|
||||
buffer := make([]byte, 52_428_800) // 50MB
|
||||
scanner.Buffer(buffer, 52_428_800)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
// log.Debug(string(line))
|
||||
|
||||
@@ -445,8 +445,8 @@ func ConvertClaudeResponseToOpenAIResponsesNonStream(_ context.Context, _ string
|
||||
// Use a simple scanner to iterate through raw bytes
|
||||
// Note: extremely large responses may require increasing the buffer
|
||||
scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
|
||||
buf := make([]byte, 20_971_520)
|
||||
scanner.Buffer(buf, 20_971_520)
|
||||
buf := make([]byte, 52_428_800) // 50MB
|
||||
scanner.Buffer(buf, 52_428_800)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
if !bytes.HasPrefix(line, dataTag) {
|
||||
|
||||
@@ -165,7 +165,6 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
budget = util.NormalizeThinkingBudget(modelName, budget)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -25,8 +26,12 @@ type Params struct {
|
||||
HasFirstResponse bool // Indicates if the initial message_start event has been sent
|
||||
ResponseType int // Current response type: 0=none, 1=content, 2=thinking, 3=function
|
||||
ResponseIndex int // Index counter for content blocks in the streaming response
|
||||
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
|
||||
}
|
||||
|
||||
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
||||
var toolUseIDCounter uint64
|
||||
|
||||
// ConvertGeminiCLIResponseToClaude performs sophisticated streaming response format conversion.
|
||||
// This function implements a complex state machine that translates backend client responses
|
||||
// into Claude Code-compatible Server-Sent Events (SSE) format. It manages different response types
|
||||
@@ -53,9 +58,13 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
}
|
||||
|
||||
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
||||
return []string{
|
||||
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
// Only send message_stop if we have actually output content
|
||||
if (*param).(*Params).HasContent {
|
||||
return []string{
|
||||
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
}
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
// Track whether tools are being used in this response chunk
|
||||
@@ -104,6 +113,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).HasContent = true
|
||||
} else {
|
||||
// Transition from another state to thinking
|
||||
// First, close any existing content block
|
||||
@@ -127,6 +137,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).ResponseType = 2 // Set state to thinking
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
} else {
|
||||
// Process regular text content (user-visible output)
|
||||
@@ -135,6 +146,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).HasContent = true
|
||||
} else {
|
||||
// Transition from another state to text content
|
||||
// First, close any existing content block
|
||||
@@ -158,6 +170,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).ResponseType = 1 // Set state to content
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
}
|
||||
} else if functionCallResult.Exists() {
|
||||
@@ -197,7 +210,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
|
||||
// Create the tool use block with unique ID and function details
|
||||
data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
|
||||
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
|
||||
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
|
||||
data, _ = sjson.Set(data, "content_block.name", fcName)
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
|
||||
@@ -207,6 +220,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
}
|
||||
(*param).(*Params).ResponseType = 3
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -215,28 +229,31 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
|
||||
// Process usage metadata and finish reason when present in the response
|
||||
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
|
||||
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
||||
// Close the final content block
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
// Only send final events if we have actually output content
|
||||
if (*param).(*Params).HasContent {
|
||||
// Close the final content block
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
|
||||
// Send the final message delta with usage information and stop reason
|
||||
output = output + "event: message_delta\n"
|
||||
output = output + `data: `
|
||||
// Send the final message delta with usage information and stop reason
|
||||
output = output + "event: message_delta\n"
|
||||
output = output + `data: `
|
||||
|
||||
// Create the message delta template with appropriate stop reason
|
||||
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
// Set tool_use stop reason if tools were used in this response
|
||||
if usedTool {
|
||||
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
// Create the message delta template with appropriate stop reason
|
||||
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
// Set tool_use stop reason if tools were used in this response
|
||||
if usedTool {
|
||||
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
}
|
||||
|
||||
// Include thinking tokens in output token count if present
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
||||
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
||||
|
||||
output = output + template + "\n\n\n"
|
||||
}
|
||||
|
||||
// Include thinking tokens in output token count if present
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
||||
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
||||
|
||||
output = output + template + "\n\n\n"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "low":
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "medium":
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "high":
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
default:
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var normalized int
|
||||
var budget int
|
||||
|
||||
if v := tc.Get("thinkingBudget"); v.Exists() {
|
||||
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
} else if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
}
|
||||
|
||||
@@ -82,21 +82,12 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget && normalized != 0 {
|
||||
} else if setBudget && budget != 0 {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
|
||||
// This matches the official Gemini CLI behavior which always sends:
|
||||
// { thinkingBudget: -1, includeThoughts: true }
|
||||
// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
|
||||
if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
|
||||
// Temperature/top_p/top_k
|
||||
if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
|
||||
@@ -24,6 +25,9 @@ type convertCliResponseToOpenAIChatParams struct {
|
||||
FunctionIndex int
|
||||
}
|
||||
|
||||
// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
|
||||
var functionCallIDCounter uint64
|
||||
|
||||
// ConvertCliResponseToOpenAI translates a single chunk of a streaming response from the
|
||||
// Gemini CLI API format to the OpenAI Chat Completions streaming format.
|
||||
// It processes various Gemini CLI event types and transforms them into OpenAI-compatible JSON responses.
|
||||
@@ -146,7 +150,7 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
|
||||
|
||||
functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
|
||||
@@ -158,7 +158,6 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
if t.Get("type").String() == "enabled" {
|
||||
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||
budget := int(b.Int())
|
||||
budget = util.NormalizeThinkingBudget(modelName, budget)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -24,8 +25,12 @@ type Params struct {
|
||||
HasFirstResponse bool
|
||||
ResponseType int
|
||||
ResponseIndex int
|
||||
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
|
||||
}
|
||||
|
||||
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
|
||||
var toolUseIDCounter uint64
|
||||
|
||||
// ConvertGeminiResponseToClaude performs sophisticated streaming response format conversion.
|
||||
// This function implements a complex state machine that translates backend client responses
|
||||
// into Claude-compatible Server-Sent Events (SSE) format. It manages different response types
|
||||
@@ -53,9 +58,13 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
}
|
||||
|
||||
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
||||
return []string{
|
||||
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
// Only send message_stop if we have actually output content
|
||||
if (*param).(*Params).HasContent {
|
||||
return []string{
|
||||
"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
|
||||
}
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
// Track whether tools are being used in this response chunk
|
||||
@@ -104,6 +113,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).HasContent = true
|
||||
} else {
|
||||
// Transition from another state to thinking
|
||||
// First, close any existing content block
|
||||
@@ -127,6 +137,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).ResponseType = 2 // Set state to thinking
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
} else {
|
||||
// Process regular text content (user-visible output)
|
||||
@@ -135,6 +146,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
output = output + "event: content_block_delta\n"
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).HasContent = true
|
||||
} else {
|
||||
// Transition from another state to text content
|
||||
// First, close any existing content block
|
||||
@@ -158,6 +170,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
(*param).(*Params).ResponseType = 1 // Set state to content
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
}
|
||||
} else if functionCallResult.Exists() {
|
||||
@@ -197,7 +210,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
|
||||
// Create the tool use block with unique ID and function details
|
||||
data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
|
||||
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
|
||||
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
|
||||
data, _ = sjson.Set(data, "content_block.name", fcName)
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
|
||||
@@ -207,6 +220,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
output = output + fmt.Sprintf("data: %s\n\n\n", data)
|
||||
}
|
||||
(*param).(*Params).ResponseType = 3
|
||||
(*param).(*Params).HasContent = true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -214,23 +228,26 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
|
||||
usageResult := gjson.GetBytes(rawJSON, "usageMetadata")
|
||||
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
|
||||
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
// Only send final events if we have actually output content
|
||||
if (*param).(*Params).HasContent {
|
||||
output = output + "event: content_block_stop\n"
|
||||
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
|
||||
output = output + "\n\n\n"
|
||||
|
||||
output = output + "event: message_delta\n"
|
||||
output = output + `data: `
|
||||
output = output + "event: message_delta\n"
|
||||
output = output + `data: `
|
||||
|
||||
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
if usedTool {
|
||||
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
if usedTool {
|
||||
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
|
||||
}
|
||||
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
||||
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
||||
|
||||
output = output + template + "\n\n\n"
|
||||
}
|
||||
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
|
||||
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
|
||||
|
||||
output = output + template + "\n\n\n"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "low":
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "medium":
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "high":
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
default:
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var normalized int
|
||||
var budget int
|
||||
|
||||
if v := tc.Get("thinkingBudget"); v.Exists() {
|
||||
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
} else if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
}
|
||||
|
||||
@@ -82,7 +82,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget && normalized != 0 {
|
||||
} else if setBudget && budget != 0 {
|
||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -23,6 +24,9 @@ type convertGeminiResponseToOpenAIChatParams struct {
|
||||
FunctionIndex int
|
||||
}
|
||||
|
||||
// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
|
||||
var functionCallIDCounter uint64
|
||||
|
||||
// ConvertGeminiResponseToOpenAI translates a single chunk of a streaming response from the
|
||||
// Gemini API format to the OpenAI Chat Completions streaming format.
|
||||
// It processes various Gemini event types and transforms them into OpenAI-compatible JSON responses.
|
||||
@@ -148,7 +152,7 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
|
||||
|
||||
functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
@@ -281,7 +285,7 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
|
||||
}
|
||||
functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
|
||||
|
||||
@@ -400,16 +400,16 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "minimal":
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "low":
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096))
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "medium":
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
case "high":
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
default:
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
@@ -421,32 +421,22 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
|
||||
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||
if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||
var setBudget bool
|
||||
var normalized int
|
||||
var budget int
|
||||
if v := tc.Get("thinking_budget"); v.Exists() {
|
||||
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||
budget = int(v.Int())
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
setBudget = true
|
||||
}
|
||||
if v := tc.Get("include_thoughts"); v.Exists() {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||
} else if setBudget {
|
||||
if normalized != 0 {
|
||||
if budget != 0 {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
|
||||
// This matches the official Gemini CLI behavior which always sends:
|
||||
// { thinkingBudget: -1, includeThoughts: true }
|
||||
// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
|
||||
if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
// log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true")
|
||||
}
|
||||
|
||||
result := []byte(out)
|
||||
result = common.AttachDefaultSafetySettings(result, "safetySettings")
|
||||
return result
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -37,6 +38,12 @@ type geminiToResponsesState struct {
|
||||
FuncCallIDs map[int]string
|
||||
}
|
||||
|
||||
// responseIDCounter provides a process-wide unique counter for synthesized response identifiers.
|
||||
var responseIDCounter uint64
|
||||
|
||||
// funcCallIDCounter provides a process-wide unique counter for function call identifiers.
|
||||
var funcCallIDCounter uint64
|
||||
|
||||
func emitEvent(event string, payload string) string {
|
||||
return fmt.Sprintf("event: %s\ndata: %s", event, payload)
|
||||
}
|
||||
@@ -205,7 +212,7 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
|
||||
st.FuncArgsBuf[idx] = &strings.Builder{}
|
||||
}
|
||||
if st.FuncCallIDs[idx] == "" {
|
||||
st.FuncCallIDs[idx] = fmt.Sprintf("call_%d", time.Now().UnixNano())
|
||||
st.FuncCallIDs[idx] = fmt.Sprintf("call_%d_%d", time.Now().UnixNano(), atomic.AddUint64(&funcCallIDCounter, 1))
|
||||
}
|
||||
st.FuncNames[idx] = name
|
||||
|
||||
@@ -464,7 +471,7 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string
|
||||
// id: prefer provider responseId, otherwise synthesize
|
||||
id := root.Get("responseId").String()
|
||||
if id == "" {
|
||||
id = fmt.Sprintf("resp_%x", time.Now().UnixNano())
|
||||
id = fmt.Sprintf("resp_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&responseIDCounter, 1))
|
||||
}
|
||||
// Normalize to response-style id (prefix resp_ if missing)
|
||||
if !strings.HasPrefix(id, "resp_") {
|
||||
@@ -575,7 +582,7 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string
|
||||
if fc := p.Get("functionCall"); fc.Exists() {
|
||||
name := fc.Get("name").String()
|
||||
args := fc.Get("args")
|
||||
callID := fmt.Sprintf("call_%x", time.Now().UnixNano())
|
||||
callID := fmt.Sprintf("call_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&funcCallIDCounter, 1))
|
||||
outputs = append(outputs, map[string]interface{}{
|
||||
"id": fmt.Sprintf("fc_%s", callID),
|
||||
"type": "function_call",
|
||||
|
||||
@@ -8,6 +8,7 @@ package claude
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
@@ -242,11 +243,12 @@ func convertClaudeContentPart(part gjson.Result) (string, bool) {
|
||||
|
||||
switch partType {
|
||||
case "text":
|
||||
if !part.Get("text").Exists() {
|
||||
text := part.Get("text").String()
|
||||
if strings.TrimSpace(text) == "" {
|
||||
return "", false
|
||||
}
|
||||
textContent := `{"type":"text","text":""}`
|
||||
textContent, _ = sjson.Set(textContent, "text", part.Get("text").String())
|
||||
textContent, _ = sjson.Set(textContent, "text", text)
|
||||
return textContent, true
|
||||
|
||||
case "image":
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -41,6 +42,9 @@ type oaiToResponsesState struct {
|
||||
UsageSeen bool
|
||||
}
|
||||
|
||||
// responseIDCounter provides a process-wide unique counter for synthesized response identifiers.
|
||||
var responseIDCounter uint64
|
||||
|
||||
func emitRespEvent(event string, payload string) string {
|
||||
return fmt.Sprintf("event: %s\ndata: %s", event, payload)
|
||||
}
|
||||
@@ -590,7 +594,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(_ context.Co
|
||||
// id: use provider id if present, otherwise synthesize
|
||||
id := root.Get("id").String()
|
||||
if id == "" {
|
||||
id = fmt.Sprintf("resp_%x", time.Now().UnixNano())
|
||||
id = fmt.Sprintf("resp_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&responseIDCounter, 1))
|
||||
}
|
||||
resp, _ = sjson.Set(resp, "id", id)
|
||||
|
||||
|
||||
46
internal/util/claude_thinking.go
Normal file
46
internal/util/claude_thinking.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
|
||||
// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
|
||||
// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
|
||||
func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
|
||||
if budget == nil {
|
||||
return body
|
||||
}
|
||||
if gjson.GetBytes(body, "thinking").Exists() {
|
||||
return body
|
||||
}
|
||||
if *budget <= 0 {
|
||||
return body
|
||||
}
|
||||
updated := body
|
||||
updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
|
||||
updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
|
||||
return updated
|
||||
}
|
||||
|
||||
// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
|
||||
// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
|
||||
// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
|
||||
func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
|
||||
budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
|
||||
if !matched {
|
||||
return nil, false
|
||||
}
|
||||
if include != nil && !*include {
|
||||
return nil, true
|
||||
}
|
||||
if budget == nil {
|
||||
return nil, true
|
||||
}
|
||||
normalized := NormalizeThinkingBudget(modelName, *budget)
|
||||
if normalized <= 0 {
|
||||
return nil, true
|
||||
}
|
||||
return &normalized, true
|
||||
}
|
||||
@@ -1,8 +1,6 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -15,80 +13,6 @@ const (
|
||||
GeminiOriginalModelMetadataKey = "gemini_original_model"
|
||||
)
|
||||
|
||||
func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
|
||||
if model == "" {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
lower := strings.ToLower(model)
|
||||
if !strings.HasPrefix(lower, "gemini-") {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
|
||||
if strings.HasSuffix(lower, "-nothinking") {
|
||||
base := model[:len(model)-len("-nothinking")]
|
||||
budgetValue := 0
|
||||
if strings.HasPrefix(lower, "gemini-2.5-pro") {
|
||||
budgetValue = 128
|
||||
}
|
||||
include := false
|
||||
return base, &budgetValue, &include, true
|
||||
}
|
||||
|
||||
// Handle "-reasoning" suffix: enables thinking with dynamic budget (-1)
|
||||
// Maps: gemini-2.5-flash-reasoning -> gemini-2.5-flash with thinkingBudget=-1
|
||||
if strings.HasSuffix(lower, "-reasoning") {
|
||||
base := model[:len(model)-len("-reasoning")]
|
||||
budgetValue := -1 // Dynamic budget
|
||||
include := true
|
||||
return base, &budgetValue, &include, true
|
||||
}
|
||||
|
||||
idx := strings.LastIndex(lower, "-thinking-")
|
||||
if idx == -1 {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
|
||||
digits := model[idx+len("-thinking-"):]
|
||||
if digits == "" {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
end := len(digits)
|
||||
for i := 0; i < len(digits); i++ {
|
||||
if digits[i] < '0' || digits[i] > '9' {
|
||||
end = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if end == 0 {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
valueStr := digits[:end]
|
||||
value, err := strconv.Atoi(valueStr)
|
||||
if err != nil {
|
||||
return model, nil, nil, false
|
||||
}
|
||||
base := model[:idx]
|
||||
budgetValue := value
|
||||
return base, &budgetValue, nil, true
|
||||
}
|
||||
|
||||
func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
|
||||
baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
|
||||
if !matched {
|
||||
return baseModel, nil
|
||||
}
|
||||
metadata := map[string]any{
|
||||
GeminiOriginalModelMetadataKey: modelName,
|
||||
}
|
||||
if budget != nil {
|
||||
metadata[GeminiThinkingBudgetMetadataKey] = *budget
|
||||
}
|
||||
if include != nil {
|
||||
metadata[GeminiIncludeThoughtsMetadataKey] = *include
|
||||
}
|
||||
return baseModel, metadata
|
||||
}
|
||||
|
||||
func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
|
||||
if budget == nil && includeThoughts == nil {
|
||||
return body
|
||||
@@ -133,78 +57,45 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
|
||||
return updated
|
||||
}
|
||||
|
||||
func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
|
||||
if len(metadata) == 0 {
|
||||
return nil, nil, false
|
||||
// modelsWithDefaultThinking lists models that should have thinking enabled by default
|
||||
// when no explicit thinkingConfig is provided.
|
||||
var modelsWithDefaultThinking = map[string]bool{
|
||||
"gemini-3-pro-preview": true,
|
||||
}
|
||||
|
||||
// ModelHasDefaultThinking returns true if the model should have thinking enabled by default.
|
||||
func ModelHasDefaultThinking(model string) bool {
|
||||
return modelsWithDefaultThinking[model]
|
||||
}
|
||||
|
||||
// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it.
|
||||
// For standard Gemini API format (generationConfig.thinkingConfig path).
|
||||
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
|
||||
func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
|
||||
if !ModelHasDefaultThinking(model) {
|
||||
return body
|
||||
}
|
||||
var (
|
||||
budgetPtr *int
|
||||
includePtr *bool
|
||||
matched bool
|
||||
)
|
||||
if rawBudget, ok := metadata[GeminiThinkingBudgetMetadataKey]; ok {
|
||||
switch v := rawBudget.(type) {
|
||||
case int:
|
||||
budget := v
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
case int32:
|
||||
budget := int(v)
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
case int64:
|
||||
budget := int(v)
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
case float64:
|
||||
budget := int(v)
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
case json.Number:
|
||||
if val, err := v.Int64(); err == nil {
|
||||
budget := int(val)
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() {
|
||||
return body
|
||||
}
|
||||
if rawInclude, ok := metadata[GeminiIncludeThoughtsMetadataKey]; ok {
|
||||
switch v := rawInclude.(type) {
|
||||
case bool:
|
||||
include := v
|
||||
includePtr = &include
|
||||
matched = true
|
||||
case string:
|
||||
if parsed, err := strconv.ParseBool(v); err == nil {
|
||||
include := parsed
|
||||
includePtr = &include
|
||||
matched = true
|
||||
}
|
||||
case json.Number:
|
||||
if val, err := v.Int64(); err == nil {
|
||||
include := val != 0
|
||||
includePtr = &include
|
||||
matched = true
|
||||
}
|
||||
case int:
|
||||
include := v != 0
|
||||
includePtr = &include
|
||||
matched = true
|
||||
case int32:
|
||||
include := v != 0
|
||||
includePtr = &include
|
||||
matched = true
|
||||
case int64:
|
||||
include := v != 0
|
||||
includePtr = &include
|
||||
matched = true
|
||||
case float64:
|
||||
include := v != 0
|
||||
includePtr = &include
|
||||
matched = true
|
||||
}
|
||||
updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
return updated
|
||||
}
|
||||
|
||||
// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it.
|
||||
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
||||
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
|
||||
func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
|
||||
if !ModelHasDefaultThinking(model) {
|
||||
return body
|
||||
}
|
||||
return budgetPtr, includePtr, matched
|
||||
if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
|
||||
return body
|
||||
}
|
||||
updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
return updated
|
||||
}
|
||||
|
||||
// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
|
||||
@@ -223,6 +114,32 @@ func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
|
||||
return updated
|
||||
}
|
||||
|
||||
// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini
|
||||
// request body (generationConfig.thinkingConfig.thinkingBudget path).
|
||||
func NormalizeGeminiThinkingBudget(model string, body []byte) []byte {
|
||||
const budgetPath = "generationConfig.thinkingConfig.thinkingBudget"
|
||||
budget := gjson.GetBytes(body, budgetPath)
|
||||
if !budget.Exists() {
|
||||
return body
|
||||
}
|
||||
normalized := NormalizeThinkingBudget(model, int(budget.Int()))
|
||||
updated, _ := sjson.SetBytes(body, budgetPath, normalized)
|
||||
return updated
|
||||
}
|
||||
|
||||
// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI
|
||||
// request body (request.generationConfig.thinkingConfig.thinkingBudget path).
|
||||
func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte {
|
||||
const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget"
|
||||
budget := gjson.GetBytes(body, budgetPath)
|
||||
if !budget.Exists() {
|
||||
return body
|
||||
}
|
||||
normalized := NormalizeThinkingBudget(model, int(budget.Int()))
|
||||
updated, _ := sjson.SetBytes(body, budgetPath, normalized)
|
||||
return updated
|
||||
}
|
||||
|
||||
// ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
|
||||
// and converts it to "thinkingBudget".
|
||||
// "high" -> 32768
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
)
|
||||
|
||||
@@ -67,3 +69,39 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
|
||||
}
|
||||
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||
}
|
||||
|
||||
// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
|
||||
// Returns nil if the model has no thinking support or no levels defined.
|
||||
func GetModelThinkingLevels(model string) []string {
|
||||
if model == "" {
|
||||
return nil
|
||||
}
|
||||
info := registry.GetGlobalRegistry().GetModelInfo(model)
|
||||
if info == nil || info.Thinking == nil {
|
||||
return nil
|
||||
}
|
||||
return info.Thinking.Levels
|
||||
}
|
||||
|
||||
// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
|
||||
// effort levels instead of numeric budgets.
|
||||
func ModelUsesThinkingLevels(model string) bool {
|
||||
levels := GetModelThinkingLevels(model)
|
||||
return len(levels) > 0
|
||||
}
|
||||
|
||||
// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
|
||||
// level for the given model. Returns false when the level is not supported.
|
||||
func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
|
||||
levels := GetModelThinkingLevels(model)
|
||||
if len(levels) == 0 {
|
||||
return "", false
|
||||
}
|
||||
loweredEffort := strings.ToLower(strings.TrimSpace(effort))
|
||||
for _, lvl := range levels {
|
||||
if strings.ToLower(lvl) == loweredEffort {
|
||||
return lvl, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
313
internal/util/thinking_suffix.go
Normal file
313
internal/util/thinking_suffix.go
Normal file
@@ -0,0 +1,313 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
ThinkingBudgetMetadataKey = "thinking_budget"
|
||||
ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
|
||||
ReasoningEffortMetadataKey = "reasoning_effort"
|
||||
ThinkingOriginalModelMetadataKey = "thinking_original_model"
|
||||
)
|
||||
|
||||
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
|
||||
// the normalized base model with extracted metadata. Supported pattern:
|
||||
// - "(<value>)" where value can be:
|
||||
// - A numeric budget (e.g., "(8192)", "(16384)")
|
||||
// - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
|
||||
//
|
||||
// Examples:
|
||||
// - "claude-sonnet-4-5-20250929(16384)" → budget=16384
|
||||
// - "gpt-5.1(high)" → reasoning_effort="high"
|
||||
// - "gemini-2.5-pro(32768)" → budget=32768
|
||||
//
|
||||
// Note: Empty parentheses "()" are not supported and will be ignored.
|
||||
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
|
||||
if modelName == "" {
|
||||
return modelName, nil
|
||||
}
|
||||
|
||||
baseModel := modelName
|
||||
|
||||
var (
|
||||
budgetOverride *int
|
||||
reasoningEffort *string
|
||||
matched bool
|
||||
)
|
||||
|
||||
// Match "(<value>)" pattern at the end of the model name
|
||||
if idx := strings.LastIndex(modelName, "("); idx != -1 {
|
||||
if !strings.HasSuffix(modelName, ")") {
|
||||
// Incomplete parenthesis, ignore
|
||||
return baseModel, nil
|
||||
}
|
||||
|
||||
value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and )
|
||||
if value == "" {
|
||||
// Empty parentheses not supported
|
||||
return baseModel, nil
|
||||
}
|
||||
|
||||
candidateBase := modelName[:idx]
|
||||
|
||||
// Auto-detect: pure numeric → budget, string → reasoning effort level
|
||||
if parsed, ok := parseIntPrefix(value); ok {
|
||||
// Numeric value: treat as thinking budget
|
||||
baseModel = candidateBase
|
||||
budgetOverride = &parsed
|
||||
matched = true
|
||||
} else {
|
||||
// String value: treat as reasoning effort level
|
||||
baseModel = candidateBase
|
||||
raw := strings.ToLower(strings.TrimSpace(value))
|
||||
if raw != "" {
|
||||
reasoningEffort = &raw
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !matched {
|
||||
return baseModel, nil
|
||||
}
|
||||
|
||||
metadata := map[string]any{
|
||||
ThinkingOriginalModelMetadataKey: modelName,
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
|
||||
}
|
||||
if reasoningEffort != nil {
|
||||
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
|
||||
}
|
||||
return baseModel, metadata
|
||||
}
|
||||
|
||||
// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
|
||||
// It accepts both the new generic keys and legacy Gemini-specific keys.
|
||||
func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
|
||||
if len(metadata) == 0 {
|
||||
return nil, nil, nil, false
|
||||
}
|
||||
|
||||
var (
|
||||
budgetPtr *int
|
||||
includePtr *bool
|
||||
effortPtr *string
|
||||
matched bool
|
||||
)
|
||||
|
||||
readBudget := func(key string) {
|
||||
if budgetPtr != nil {
|
||||
return
|
||||
}
|
||||
if raw, ok := metadata[key]; ok {
|
||||
if v, okNumber := parseNumberToInt(raw); okNumber {
|
||||
budget := v
|
||||
budgetPtr = &budget
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readInclude := func(key string) {
|
||||
if includePtr != nil {
|
||||
return
|
||||
}
|
||||
if raw, ok := metadata[key]; ok {
|
||||
switch v := raw.(type) {
|
||||
case bool:
|
||||
val := v
|
||||
includePtr = &val
|
||||
matched = true
|
||||
case *bool:
|
||||
if v != nil {
|
||||
val := *v
|
||||
includePtr = &val
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readEffort := func(key string) {
|
||||
if effortPtr != nil {
|
||||
return
|
||||
}
|
||||
if raw, ok := metadata[key]; ok {
|
||||
if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" {
|
||||
normalized := strings.ToLower(strings.TrimSpace(val))
|
||||
effortPtr = &normalized
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readBudget(ThinkingBudgetMetadataKey)
|
||||
readBudget(GeminiThinkingBudgetMetadataKey)
|
||||
readInclude(ThinkingIncludeThoughtsMetadataKey)
|
||||
readInclude(GeminiIncludeThoughtsMetadataKey)
|
||||
readEffort(ReasoningEffortMetadataKey)
|
||||
readEffort("reasoning.effort")
|
||||
|
||||
return budgetPtr, includePtr, effortPtr, matched
|
||||
}
|
||||
|
||||
// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
|
||||
// converting reasoning effort strings into budgets when possible.
|
||||
func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
|
||||
budget, include, effort, matched := ThinkingFromMetadata(metadata)
|
||||
if !matched {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
if budget == nil && effort != nil {
|
||||
if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
|
||||
budget = &derived
|
||||
}
|
||||
}
|
||||
return budget, include, budget != nil || include != nil || effort != nil
|
||||
}
|
||||
|
||||
// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
|
||||
// inferring "auto" and "none" when budgets request dynamic or disabled thinking.
|
||||
func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
|
||||
budget, include, effort, matched := ThinkingFromMetadata(metadata)
|
||||
if !matched {
|
||||
return "", false
|
||||
}
|
||||
if effort != nil && *effort != "" {
|
||||
return strings.ToLower(strings.TrimSpace(*effort)), true
|
||||
}
|
||||
if budget != nil {
|
||||
switch *budget {
|
||||
case -1:
|
||||
return "auto", true
|
||||
case 0:
|
||||
return "none", true
|
||||
}
|
||||
}
|
||||
if include != nil && !*include {
|
||||
return "none", true
|
||||
}
|
||||
return "", true
|
||||
}
|
||||
|
||||
// ThinkingEffortToBudget maps reasoning effort levels to approximate budgets,
|
||||
// clamping the result to the model's supported range.
|
||||
func ThinkingEffortToBudget(model, effort string) (int, bool) {
|
||||
if effort == "" {
|
||||
return 0, false
|
||||
}
|
||||
normalized, ok := NormalizeReasoningEffortLevel(model, effort)
|
||||
if !ok {
|
||||
normalized = strings.ToLower(strings.TrimSpace(effort))
|
||||
}
|
||||
switch normalized {
|
||||
case "none":
|
||||
return 0, true
|
||||
case "auto":
|
||||
return NormalizeThinkingBudget(model, -1), true
|
||||
case "minimal":
|
||||
return NormalizeThinkingBudget(model, 512), true
|
||||
case "low":
|
||||
return NormalizeThinkingBudget(model, 1024), true
|
||||
case "medium":
|
||||
return NormalizeThinkingBudget(model, 8192), true
|
||||
case "high":
|
||||
return NormalizeThinkingBudget(model, 24576), true
|
||||
case "xhigh":
|
||||
return NormalizeThinkingBudget(model, 32768), true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
// ResolveOriginalModel returns the original model name stored in metadata (if present),
|
||||
// otherwise falls back to the provided model.
|
||||
func ResolveOriginalModel(model string, metadata map[string]any) string {
|
||||
normalize := func(name string) string {
|
||||
if name == "" {
|
||||
return ""
|
||||
}
|
||||
if base, _ := NormalizeThinkingModel(name); base != "" {
|
||||
return base
|
||||
}
|
||||
return strings.TrimSpace(name)
|
||||
}
|
||||
|
||||
if metadata != nil {
|
||||
if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok {
|
||||
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
|
||||
if base := normalize(s); base != "" {
|
||||
return base
|
||||
}
|
||||
}
|
||||
}
|
||||
if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok {
|
||||
if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
|
||||
if base := normalize(s); base != "" {
|
||||
return base
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fallback: try to re-normalize the model name when metadata was dropped.
|
||||
if base := normalize(model); base != "" {
|
||||
return base
|
||||
}
|
||||
return model
|
||||
}
|
||||
|
||||
func parseIntPrefix(value string) (int, bool) {
|
||||
if value == "" {
|
||||
return 0, false
|
||||
}
|
||||
digits := strings.TrimLeft(value, "-")
|
||||
if digits == "" {
|
||||
return 0, false
|
||||
}
|
||||
end := len(digits)
|
||||
for i := 0; i < len(digits); i++ {
|
||||
if digits[i] < '0' || digits[i] > '9' {
|
||||
end = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if end == 0 {
|
||||
return 0, false
|
||||
}
|
||||
val, err := strconv.Atoi(digits[:end])
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return val, true
|
||||
}
|
||||
|
||||
func parseNumberToInt(raw any) (int, bool) {
|
||||
switch v := raw.(type) {
|
||||
case int:
|
||||
return v, true
|
||||
case int32:
|
||||
return int(v), true
|
||||
case int64:
|
||||
return int(v), true
|
||||
case float64:
|
||||
return int(v), true
|
||||
case json.Number:
|
||||
if val, err := v.Int64(); err == nil {
|
||||
return int(val), true
|
||||
}
|
||||
case string:
|
||||
if strings.TrimSpace(v) == "" {
|
||||
return 0, false
|
||||
}
|
||||
if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
|
||||
return parsed, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
@@ -271,6 +271,11 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
|
||||
continue
|
||||
}
|
||||
if errMsg != nil {
|
||||
status := http.StatusInternalServerError
|
||||
if errMsg.StatusCode > 0 {
|
||||
status = errMsg.StatusCode
|
||||
}
|
||||
c.Status(status)
|
||||
// An error occurred: emit as a proper SSE error event
|
||||
errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
|
||||
_, _ = writer.WriteString("event: error\n")
|
||||
@@ -278,6 +283,7 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
|
||||
_, _ = writer.Write(errorBytes)
|
||||
_, _ = writer.WriteString("\n\n")
|
||||
_ = writer.Flush()
|
||||
flusher.Flush()
|
||||
}
|
||||
var execErr error
|
||||
if errMsg != nil {
|
||||
|
||||
@@ -48,8 +48,24 @@ func (h *GeminiAPIHandler) Models() []map[string]any {
|
||||
// GeminiModels handles the Gemini models listing endpoint.
|
||||
// It returns a JSON response containing available Gemini models and their specifications.
|
||||
func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
|
||||
rawModels := h.Models()
|
||||
normalizedModels := make([]map[string]any, 0, len(rawModels))
|
||||
defaultMethods := []string{"generateContent"}
|
||||
for _, model := range rawModels {
|
||||
normalizedModel := make(map[string]any, len(model))
|
||||
for k, v := range model {
|
||||
normalizedModel[k] = v
|
||||
}
|
||||
if name, ok := normalizedModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
|
||||
normalizedModel["name"] = "models/" + name
|
||||
}
|
||||
if _, ok := normalizedModel["supportedGenerationMethods"]; !ok {
|
||||
normalizedModel["supportedGenerationMethods"] = defaultMethods
|
||||
}
|
||||
normalizedModels = append(normalizedModels, normalizedModel)
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"models": h.Models(),
|
||||
"models": normalizedModels,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -323,18 +323,32 @@ func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string
|
||||
|
||||
providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName)
|
||||
|
||||
// First, normalize the model name to handle suffixes like "-thinking-128"
|
||||
// This needs to happen before determining the provider for non-dynamic models.
|
||||
normalizedModel, metadata = normalizeModelMetadata(resolvedModelName)
|
||||
targetModelName := resolvedModelName
|
||||
if isDynamic {
|
||||
targetModelName = extractedModelName
|
||||
}
|
||||
|
||||
// Normalize the model name to handle dynamic thinking suffixes before determining the provider.
|
||||
normalizedModel, metadata = normalizeModelMetadata(targetModelName)
|
||||
|
||||
if isDynamic {
|
||||
providers = []string{providerName}
|
||||
// For dynamic models, the extractedModelName is already normalized by parseDynamicModel
|
||||
// so we use it as the final normalizedModel.
|
||||
normalizedModel = extractedModelName
|
||||
} else {
|
||||
// For non-dynamic models, use the normalizedModel to get the provider name.
|
||||
providers = util.GetProviderName(normalizedModel)
|
||||
if len(providers) == 0 && metadata != nil {
|
||||
if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok {
|
||||
if originalModel, okStr := originalRaw.(string); okStr {
|
||||
originalModel = strings.TrimSpace(originalModel)
|
||||
if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) {
|
||||
if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 {
|
||||
providers = altProviders
|
||||
normalizedModel = originalModel
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(providers) == 0 {
|
||||
@@ -382,7 +396,7 @@ func cloneBytes(src []byte) []byte {
|
||||
}
|
||||
|
||||
func normalizeModelMetadata(modelName string) (string, map[string]any) {
|
||||
return util.NormalizeGeminiThinkingModel(modelName)
|
||||
return util.NormalizeThinkingModel(modelName)
|
||||
}
|
||||
|
||||
func cloneMetadata(src map[string]any) map[string]any {
|
||||
|
||||
@@ -498,7 +498,7 @@ func (s *Service) Run(ctx context.Context) error {
|
||||
}()
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
fmt.Printf("API server started successfully on: %d\n", s.cfg.Port)
|
||||
fmt.Printf("API server started successfully on: %s:%d\n", s.cfg.Host, s.cfg.Port)
|
||||
|
||||
if s.hooks.OnAfterStart != nil {
|
||||
s.hooks.OnAfterStart(s)
|
||||
@@ -779,7 +779,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
|
||||
Created: time.Now().Unix(),
|
||||
OwnedBy: compat.Name,
|
||||
Type: "openai-compatibility",
|
||||
DisplayName: m.Name,
|
||||
DisplayName: modelID,
|
||||
})
|
||||
}
|
||||
// Register and return
|
||||
|
||||
827
test/amp_management_test.go
Normal file
827
test/amp_management_test.go
Normal file
@@ -0,0 +1,827 @@
|
||||
package test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/api/handlers/management"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
)
|
||||
|
||||
func init() {
|
||||
gin.SetMode(gin.TestMode)
|
||||
}
|
||||
|
||||
// newAmpTestHandler creates a test handler with default ampcode configuration.
|
||||
func newAmpTestHandler(t *testing.T) (*management.Handler, string) {
|
||||
t.Helper()
|
||||
tmpDir := t.TempDir()
|
||||
configPath := filepath.Join(tmpDir, "config.yaml")
|
||||
|
||||
cfg := &config.Config{
|
||||
AmpCode: config.AmpCode{
|
||||
UpstreamURL: "https://example.com",
|
||||
UpstreamAPIKey: "test-api-key-12345",
|
||||
RestrictManagementToLocalhost: true,
|
||||
ForceModelMappings: false,
|
||||
ModelMappings: []config.AmpModelMapping{
|
||||
{From: "gpt-4", To: "gemini-pro"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if err := os.WriteFile(configPath, []byte("port: 8080\n"), 0644); err != nil {
|
||||
t.Fatalf("failed to write config file: %v", err)
|
||||
}
|
||||
|
||||
h := management.NewHandler(cfg, configPath, nil)
|
||||
return h, configPath
|
||||
}
|
||||
|
||||
// setupAmpRouter creates a test router with all ampcode management endpoints.
|
||||
func setupAmpRouter(h *management.Handler) *gin.Engine {
|
||||
r := gin.New()
|
||||
mgmt := r.Group("/v0/management")
|
||||
{
|
||||
mgmt.GET("/ampcode", h.GetAmpCode)
|
||||
mgmt.GET("/ampcode/upstream-url", h.GetAmpUpstreamURL)
|
||||
mgmt.PUT("/ampcode/upstream-url", h.PutAmpUpstreamURL)
|
||||
mgmt.DELETE("/ampcode/upstream-url", h.DeleteAmpUpstreamURL)
|
||||
mgmt.GET("/ampcode/upstream-api-key", h.GetAmpUpstreamAPIKey)
|
||||
mgmt.PUT("/ampcode/upstream-api-key", h.PutAmpUpstreamAPIKey)
|
||||
mgmt.DELETE("/ampcode/upstream-api-key", h.DeleteAmpUpstreamAPIKey)
|
||||
mgmt.GET("/ampcode/restrict-management-to-localhost", h.GetAmpRestrictManagementToLocalhost)
|
||||
mgmt.PUT("/ampcode/restrict-management-to-localhost", h.PutAmpRestrictManagementToLocalhost)
|
||||
mgmt.GET("/ampcode/model-mappings", h.GetAmpModelMappings)
|
||||
mgmt.PUT("/ampcode/model-mappings", h.PutAmpModelMappings)
|
||||
mgmt.PATCH("/ampcode/model-mappings", h.PatchAmpModelMappings)
|
||||
mgmt.DELETE("/ampcode/model-mappings", h.DeleteAmpModelMappings)
|
||||
mgmt.GET("/ampcode/force-model-mappings", h.GetAmpForceModelMappings)
|
||||
mgmt.PUT("/ampcode/force-model-mappings", h.PutAmpForceModelMappings)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// TestGetAmpCode verifies GET /v0/management/ampcode returns full ampcode config.
|
||||
func TestGetAmpCode(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/v0/management/ampcode", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
var resp map[string]config.AmpCode
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal response: %v", err)
|
||||
}
|
||||
|
||||
ampcode := resp["ampcode"]
|
||||
if ampcode.UpstreamURL != "https://example.com" {
|
||||
t.Errorf("expected upstream-url %q, got %q", "https://example.com", ampcode.UpstreamURL)
|
||||
}
|
||||
if len(ampcode.ModelMappings) != 1 {
|
||||
t.Errorf("expected 1 model mapping, got %d", len(ampcode.ModelMappings))
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetAmpUpstreamURL verifies GET /v0/management/ampcode/upstream-url returns the upstream URL.
|
||||
func TestGetAmpUpstreamURL(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/upstream-url", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
var resp map[string]string
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal response: %v", err)
|
||||
}
|
||||
|
||||
if resp["upstream-url"] != "https://example.com" {
|
||||
t.Errorf("expected %q, got %q", "https://example.com", resp["upstream-url"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpUpstreamURL verifies PUT /v0/management/ampcode/upstream-url updates the upstream URL.
|
||||
func TestPutAmpUpstreamURL(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": "https://new-upstream.com"}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/upstream-url", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d: %s", http.StatusOK, w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeleteAmpUpstreamURL verifies DELETE /v0/management/ampcode/upstream-url clears the upstream URL.
|
||||
func TestDeleteAmpUpstreamURL(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/v0/management/ampcode/upstream-url", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetAmpUpstreamAPIKey verifies GET /v0/management/ampcode/upstream-api-key returns the API key.
|
||||
func TestGetAmpUpstreamAPIKey(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/upstream-api-key", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
var resp map[string]any
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal response: %v", err)
|
||||
}
|
||||
|
||||
key := resp["upstream-api-key"].(string)
|
||||
if key != "test-api-key-12345" {
|
||||
t.Errorf("expected key %q, got %q", "test-api-key-12345", key)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpUpstreamAPIKey verifies PUT /v0/management/ampcode/upstream-api-key updates the API key.
|
||||
func TestPutAmpUpstreamAPIKey(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": "new-secret-key"}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/upstream-api-key", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeleteAmpUpstreamAPIKey verifies DELETE /v0/management/ampcode/upstream-api-key clears the API key.
|
||||
func TestDeleteAmpUpstreamAPIKey(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/v0/management/ampcode/upstream-api-key", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetAmpRestrictManagementToLocalhost verifies GET returns the localhost restriction setting.
|
||||
func TestGetAmpRestrictManagementToLocalhost(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/restrict-management-to-localhost", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
var resp map[string]bool
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal response: %v", err)
|
||||
}
|
||||
|
||||
if resp["restrict-management-to-localhost"] != true {
|
||||
t.Error("expected restrict-management-to-localhost to be true")
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpRestrictManagementToLocalhost verifies PUT updates the localhost restriction setting.
|
||||
func TestPutAmpRestrictManagementToLocalhost(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": false}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/restrict-management-to-localhost", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetAmpModelMappings verifies GET /v0/management/ampcode/model-mappings returns all mappings.
|
||||
func TestGetAmpModelMappings(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/model-mappings", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
var resp map[string][]config.AmpModelMapping
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal response: %v", err)
|
||||
}
|
||||
|
||||
mappings := resp["model-mappings"]
|
||||
if len(mappings) != 1 {
|
||||
t.Fatalf("expected 1 mapping, got %d", len(mappings))
|
||||
}
|
||||
if mappings[0].From != "gpt-4" || mappings[0].To != "gemini-pro" {
|
||||
t.Errorf("unexpected mapping: %+v", mappings[0])
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpModelMappings verifies PUT /v0/management/ampcode/model-mappings replaces all mappings.
|
||||
func TestPutAmpModelMappings(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": [{"from": "claude-3", "to": "gpt-4o"}, {"from": "gemini", "to": "claude"}]}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d: %s", http.StatusOK, w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestPatchAmpModelMappings verifies PATCH updates existing mappings and adds new ones.
|
||||
func TestPatchAmpModelMappings(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": [{"from": "gpt-4", "to": "updated-model"}, {"from": "new-model", "to": "target"}]}`
|
||||
req := httptest.NewRequest(http.MethodPatch, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d: %s", http.StatusOK, w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeleteAmpModelMappings_Specific verifies DELETE removes specified mappings by "from" field.
|
||||
func TestDeleteAmpModelMappings_Specific(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": ["gpt-4"]}`
|
||||
req := httptest.NewRequest(http.MethodDelete, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeleteAmpModelMappings_All verifies DELETE with empty body removes all mappings.
|
||||
func TestDeleteAmpModelMappings_All(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/v0/management/ampcode/model-mappings", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetAmpForceModelMappings verifies GET returns the force-model-mappings setting.
|
||||
func TestGetAmpForceModelMappings(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/force-model-mappings", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
var resp map[string]bool
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal response: %v", err)
|
||||
}
|
||||
|
||||
if resp["force-model-mappings"] != false {
|
||||
t.Error("expected force-model-mappings to be false")
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpForceModelMappings verifies PUT updates the force-model-mappings setting.
|
||||
func TestPutAmpForceModelMappings(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": true}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/force-model-mappings", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpModelMappings_VerifyState verifies PUT replaces mappings and state is persisted.
|
||||
func TestPutAmpModelMappings_VerifyState(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": [{"from": "model-a", "to": "model-b"}, {"from": "model-c", "to": "model-d"}, {"from": "model-e", "to": "model-f"}]}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("PUT failed: status %d, body: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/model-mappings", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string][]config.AmpModelMapping
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
mappings := resp["model-mappings"]
|
||||
if len(mappings) != 3 {
|
||||
t.Fatalf("expected 3 mappings, got %d", len(mappings))
|
||||
}
|
||||
|
||||
expected := map[string]string{"model-a": "model-b", "model-c": "model-d", "model-e": "model-f"}
|
||||
for _, m := range mappings {
|
||||
if expected[m.From] != m.To {
|
||||
t.Errorf("mapping %q -> expected %q, got %q", m.From, expected[m.From], m.To)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestPatchAmpModelMappings_VerifyState verifies PATCH merges mappings correctly.
|
||||
func TestPatchAmpModelMappings_VerifyState(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": [{"from": "gpt-4", "to": "updated-target"}, {"from": "new-model", "to": "new-target"}]}`
|
||||
req := httptest.NewRequest(http.MethodPatch, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("PATCH failed: status %d", w.Code)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/model-mappings", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string][]config.AmpModelMapping
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
mappings := resp["model-mappings"]
|
||||
if len(mappings) != 2 {
|
||||
t.Fatalf("expected 2 mappings (1 updated + 1 new), got %d", len(mappings))
|
||||
}
|
||||
|
||||
found := make(map[string]string)
|
||||
for _, m := range mappings {
|
||||
found[m.From] = m.To
|
||||
}
|
||||
|
||||
if found["gpt-4"] != "updated-target" {
|
||||
t.Errorf("gpt-4 should map to updated-target, got %q", found["gpt-4"])
|
||||
}
|
||||
if found["new-model"] != "new-target" {
|
||||
t.Errorf("new-model should map to new-target, got %q", found["new-model"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeleteAmpModelMappings_VerifyState verifies DELETE removes specific mappings and keeps others.
|
||||
func TestDeleteAmpModelMappings_VerifyState(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
putBody := `{"value": [{"from": "a", "to": "1"}, {"from": "b", "to": "2"}, {"from": "c", "to": "3"}]}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(putBody))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
delBody := `{"value": ["a", "c"]}`
|
||||
req = httptest.NewRequest(http.MethodDelete, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(delBody))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("DELETE failed: status %d", w.Code)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/model-mappings", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string][]config.AmpModelMapping
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
mappings := resp["model-mappings"]
|
||||
if len(mappings) != 1 {
|
||||
t.Fatalf("expected 1 mapping remaining, got %d", len(mappings))
|
||||
}
|
||||
if mappings[0].From != "b" || mappings[0].To != "2" {
|
||||
t.Errorf("expected b->2, got %s->%s", mappings[0].From, mappings[0].To)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeleteAmpModelMappings_NonExistent verifies DELETE with non-existent mapping doesn't affect existing ones.
|
||||
func TestDeleteAmpModelMappings_NonExistent(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
delBody := `{"value": ["non-existent-model"]}`
|
||||
req := httptest.NewRequest(http.MethodDelete, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(delBody))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/model-mappings", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string][]config.AmpModelMapping
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
if len(resp["model-mappings"]) != 1 {
|
||||
t.Errorf("original mapping should remain, got %d mappings", len(resp["model-mappings"]))
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpModelMappings_Empty verifies PUT with empty array clears all mappings.
|
||||
func TestPutAmpModelMappings_Empty(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": []}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/model-mappings", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string][]config.AmpModelMapping
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
if len(resp["model-mappings"]) != 0 {
|
||||
t.Errorf("expected 0 mappings, got %d", len(resp["model-mappings"]))
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpUpstreamURL_VerifyState verifies PUT updates upstream URL and persists state.
|
||||
func TestPutAmpUpstreamURL_VerifyState(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": "https://new-api.example.com"}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/upstream-url", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("PUT failed: status %d", w.Code)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/upstream-url", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string]string
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
if resp["upstream-url"] != "https://new-api.example.com" {
|
||||
t.Errorf("expected %q, got %q", "https://new-api.example.com", resp["upstream-url"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeleteAmpUpstreamURL_VerifyState verifies DELETE clears upstream URL.
|
||||
func TestDeleteAmpUpstreamURL_VerifyState(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/v0/management/ampcode/upstream-url", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("DELETE failed: status %d", w.Code)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/upstream-url", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string]string
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
if resp["upstream-url"] != "" {
|
||||
t.Errorf("expected empty string, got %q", resp["upstream-url"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpUpstreamAPIKey_VerifyState verifies PUT updates API key and persists state.
|
||||
func TestPutAmpUpstreamAPIKey_VerifyState(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": "new-secret-api-key-xyz"}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/upstream-api-key", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("PUT failed: status %d", w.Code)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/upstream-api-key", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string]string
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
if resp["upstream-api-key"] != "new-secret-api-key-xyz" {
|
||||
t.Errorf("expected %q, got %q", "new-secret-api-key-xyz", resp["upstream-api-key"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeleteAmpUpstreamAPIKey_VerifyState verifies DELETE clears API key.
|
||||
func TestDeleteAmpUpstreamAPIKey_VerifyState(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/v0/management/ampcode/upstream-api-key", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("DELETE failed: status %d", w.Code)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/upstream-api-key", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string]string
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
if resp["upstream-api-key"] != "" {
|
||||
t.Errorf("expected empty string, got %q", resp["upstream-api-key"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpRestrictManagementToLocalhost_VerifyState verifies PUT updates localhost restriction.
|
||||
func TestPutAmpRestrictManagementToLocalhost_VerifyState(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": false}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/restrict-management-to-localhost", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("PUT failed: status %d", w.Code)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/restrict-management-to-localhost", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string]bool
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
if resp["restrict-management-to-localhost"] != false {
|
||||
t.Error("expected false after update")
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutAmpForceModelMappings_VerifyState verifies PUT updates force-model-mappings setting.
|
||||
func TestPutAmpForceModelMappings_VerifyState(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{"value": true}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/force-model-mappings", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("PUT failed: status %d", w.Code)
|
||||
}
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/force-model-mappings", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string]bool
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
if resp["force-model-mappings"] != true {
|
||||
t.Error("expected true after update")
|
||||
}
|
||||
}
|
||||
|
||||
// TestPutBoolField_EmptyObject verifies PUT with empty object returns 400.
|
||||
func TestPutBoolField_EmptyObject(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
body := `{}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/force-model-mappings", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status %d for empty object, got %d", http.StatusBadRequest, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestComplexMappingsWorkflow tests a full workflow: PUT, PATCH, DELETE, and GET.
|
||||
func TestComplexMappingsWorkflow(t *testing.T) {
|
||||
h, _ := newAmpTestHandler(t)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
putBody := `{"value": [{"from": "m1", "to": "t1"}, {"from": "m2", "to": "t2"}, {"from": "m3", "to": "t3"}, {"from": "m4", "to": "t4"}]}`
|
||||
req := httptest.NewRequest(http.MethodPut, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(putBody))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
patchBody := `{"value": [{"from": "m2", "to": "t2-updated"}, {"from": "m5", "to": "t5"}]}`
|
||||
req = httptest.NewRequest(http.MethodPatch, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(patchBody))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
delBody := `{"value": ["m1", "m3"]}`
|
||||
req = httptest.NewRequest(http.MethodDelete, "/v0/management/ampcode/model-mappings", bytes.NewBufferString(delBody))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
req = httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/model-mappings", nil)
|
||||
w = httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
var resp map[string][]config.AmpModelMapping
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
mappings := resp["model-mappings"]
|
||||
if len(mappings) != 3 {
|
||||
t.Fatalf("expected 3 mappings (m2, m4, m5), got %d", len(mappings))
|
||||
}
|
||||
|
||||
expected := map[string]string{"m2": "t2-updated", "m4": "t4", "m5": "t5"}
|
||||
found := make(map[string]string)
|
||||
for _, m := range mappings {
|
||||
found[m.From] = m.To
|
||||
}
|
||||
|
||||
for from, to := range expected {
|
||||
if found[from] != to {
|
||||
t.Errorf("mapping %s: expected %q, got %q", from, to, found[from])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestNilHandlerGetAmpCode verifies handler works with empty config.
|
||||
func TestNilHandlerGetAmpCode(t *testing.T) {
|
||||
cfg := &config.Config{}
|
||||
h := management.NewHandler(cfg, "", nil)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/v0/management/ampcode", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestEmptyConfigGetAmpModelMappings verifies GET returns empty array for fresh config.
|
||||
func TestEmptyConfigGetAmpModelMappings(t *testing.T) {
|
||||
cfg := &config.Config{}
|
||||
tmpDir := t.TempDir()
|
||||
configPath := filepath.Join(tmpDir, "config.yaml")
|
||||
if err := os.WriteFile(configPath, []byte("port: 8080\n"), 0644); err != nil {
|
||||
t.Fatalf("failed to write config: %v", err)
|
||||
}
|
||||
|
||||
h := management.NewHandler(cfg, configPath, nil)
|
||||
r := setupAmpRouter(h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/v0/management/ampcode/model-mappings", nil)
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
var resp map[string][]config.AmpModelMapping
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal: %v", err)
|
||||
}
|
||||
|
||||
if len(resp["model-mappings"]) != 0 {
|
||||
t.Errorf("expected 0 mappings, got %d", len(resp["model-mappings"]))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user