mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-06 06:20:51 +08:00
Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
750b930679 | ||
|
|
3902fd7501 | ||
|
|
4fc3d5e935 | ||
|
|
2d2f4572a7 | ||
|
|
8f4c46f38d | ||
|
|
b6ba51bc2a | ||
|
|
6a66d32d37 | ||
|
|
8d15723195 | ||
|
|
736e0aae86 | ||
|
|
8bf3305b2b | ||
|
|
d00e3ea973 | ||
|
|
89db4e9481 | ||
|
|
e332419081 | ||
|
|
e998b1229a | ||
|
|
bbed134bd1 | ||
|
|
47b9503112 | ||
|
|
3b9253c2be | ||
|
|
d241359153 | ||
|
|
f4d4249ba5 | ||
|
|
cb56cb250e | ||
|
|
e0381a6ae0 | ||
|
|
2c01b2ef64 | ||
|
|
e947266743 | ||
|
|
c6b0e85b54 | ||
|
|
26efbed05c | ||
|
|
96340bf136 | ||
|
|
b055e00c1a |
@@ -24,10 +24,11 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
defaultManagementReleaseURL = "https://api.github.com/repos/router-for-me/Cli-Proxy-API-Management-Center/releases/latest"
|
||||
managementAssetName = "management.html"
|
||||
httpUserAgent = "CLIProxyAPI-management-updater"
|
||||
updateCheckInterval = 3 * time.Hour
|
||||
defaultManagementReleaseURL = "https://api.github.com/repos/router-for-me/Cli-Proxy-API-Management-Center/releases/latest"
|
||||
defaultManagementFallbackURL = "https://cpamc.router-for.me/"
|
||||
managementAssetName = "management.html"
|
||||
httpUserAgent = "CLIProxyAPI-management-updater"
|
||||
updateCheckInterval = 3 * time.Hour
|
||||
)
|
||||
|
||||
// ManagementFileName exposes the control panel asset filename.
|
||||
@@ -198,6 +199,16 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
|
||||
return
|
||||
}
|
||||
|
||||
localPath := filepath.Join(staticDir, managementAssetName)
|
||||
localFileMissing := false
|
||||
if _, errStat := os.Stat(localPath); errStat != nil {
|
||||
if errors.Is(errStat, os.ErrNotExist) {
|
||||
localFileMissing = true
|
||||
} else {
|
||||
log.WithError(errStat).Debug("failed to stat local management asset")
|
||||
}
|
||||
}
|
||||
|
||||
// Rate limiting: check only once every 3 hours
|
||||
lastUpdateCheckMu.Lock()
|
||||
now := time.Now()
|
||||
@@ -210,15 +221,14 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
|
||||
lastUpdateCheckTime = now
|
||||
lastUpdateCheckMu.Unlock()
|
||||
|
||||
if err := os.MkdirAll(staticDir, 0o755); err != nil {
|
||||
log.WithError(err).Warn("failed to prepare static directory for management asset")
|
||||
if errMkdirAll := os.MkdirAll(staticDir, 0o755); errMkdirAll != nil {
|
||||
log.WithError(errMkdirAll).Warn("failed to prepare static directory for management asset")
|
||||
return
|
||||
}
|
||||
|
||||
releaseURL := resolveReleaseURL(panelRepository)
|
||||
client := newHTTPClient(proxyURL)
|
||||
|
||||
localPath := filepath.Join(staticDir, managementAssetName)
|
||||
localHash, err := fileSHA256(localPath)
|
||||
if err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
@@ -229,6 +239,13 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
|
||||
|
||||
asset, remoteHash, err := fetchLatestAsset(ctx, client, releaseURL)
|
||||
if err != nil {
|
||||
if localFileMissing {
|
||||
log.WithError(err).Warn("failed to fetch latest management release information, trying fallback page")
|
||||
if ensureFallbackManagementHTML(ctx, client, localPath) {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
log.WithError(err).Warn("failed to fetch latest management release information")
|
||||
return
|
||||
}
|
||||
@@ -240,6 +257,13 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
|
||||
|
||||
data, downloadedHash, err := downloadAsset(ctx, client, asset.BrowserDownloadURL)
|
||||
if err != nil {
|
||||
if localFileMissing {
|
||||
log.WithError(err).Warn("failed to download management asset, trying fallback page")
|
||||
if ensureFallbackManagementHTML(ctx, client, localPath) {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
log.WithError(err).Warn("failed to download management asset")
|
||||
return
|
||||
}
|
||||
@@ -256,6 +280,22 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
|
||||
log.Infof("management asset updated successfully (hash=%s)", downloadedHash)
|
||||
}
|
||||
|
||||
func ensureFallbackManagementHTML(ctx context.Context, client *http.Client, localPath string) bool {
|
||||
data, downloadedHash, err := downloadAsset(ctx, client, defaultManagementFallbackURL)
|
||||
if err != nil {
|
||||
log.WithError(err).Warn("failed to download fallback management control panel page")
|
||||
return false
|
||||
}
|
||||
|
||||
if err = atomicWriteFile(localPath, data); err != nil {
|
||||
log.WithError(err).Warn("failed to persist fallback management control panel page")
|
||||
return false
|
||||
}
|
||||
|
||||
log.Infof("management asset updated from fallback page successfully (hash=%s)", downloadedHash)
|
||||
return true
|
||||
}
|
||||
|
||||
func resolveReleaseURL(repo string) string {
|
||||
repo = strings.TrimSpace(repo)
|
||||
if repo == "" {
|
||||
|
||||
@@ -740,7 +740,7 @@ func GetIFlowModels() []*ModelInfo {
|
||||
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
|
||||
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
|
||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
|
||||
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
|
||||
}
|
||||
models := make([]*ModelInfo, 0, len(entries))
|
||||
@@ -773,7 +773,7 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
||||
return map[string]*AntigravityModelConfig{
|
||||
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
|
||||
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
|
||||
"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
|
||||
"gemini-2.5-computer-use-preview-10-2025": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-2.5-computer-use-preview-10-2025"},
|
||||
"gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
|
||||
"gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
|
||||
"gemini-3-flash-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
|
||||
|
||||
@@ -625,6 +625,131 @@ func (r *ModelRegistry) GetAvailableModels(handlerType string) []map[string]any
|
||||
return models
|
||||
}
|
||||
|
||||
// GetAvailableModelsByProvider returns models available for the given provider identifier.
|
||||
// Parameters:
|
||||
// - provider: Provider identifier (e.g., "codex", "gemini", "antigravity")
|
||||
//
|
||||
// Returns:
|
||||
// - []*ModelInfo: List of available models for the provider
|
||||
func (r *ModelRegistry) GetAvailableModelsByProvider(provider string) []*ModelInfo {
|
||||
provider = strings.ToLower(strings.TrimSpace(provider))
|
||||
if provider == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
r.mutex.RLock()
|
||||
defer r.mutex.RUnlock()
|
||||
|
||||
type providerModel struct {
|
||||
count int
|
||||
info *ModelInfo
|
||||
}
|
||||
|
||||
providerModels := make(map[string]*providerModel)
|
||||
|
||||
for clientID, clientProvider := range r.clientProviders {
|
||||
if clientProvider != provider {
|
||||
continue
|
||||
}
|
||||
modelIDs := r.clientModels[clientID]
|
||||
if len(modelIDs) == 0 {
|
||||
continue
|
||||
}
|
||||
clientInfos := r.clientModelInfos[clientID]
|
||||
for _, modelID := range modelIDs {
|
||||
modelID = strings.TrimSpace(modelID)
|
||||
if modelID == "" {
|
||||
continue
|
||||
}
|
||||
entry := providerModels[modelID]
|
||||
if entry == nil {
|
||||
entry = &providerModel{}
|
||||
providerModels[modelID] = entry
|
||||
}
|
||||
entry.count++
|
||||
if entry.info == nil {
|
||||
if clientInfos != nil {
|
||||
if info := clientInfos[modelID]; info != nil {
|
||||
entry.info = info
|
||||
}
|
||||
}
|
||||
if entry.info == nil {
|
||||
if reg, ok := r.models[modelID]; ok && reg != nil && reg.Info != nil {
|
||||
entry.info = reg.Info
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(providerModels) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
quotaExpiredDuration := 5 * time.Minute
|
||||
now := time.Now()
|
||||
result := make([]*ModelInfo, 0, len(providerModels))
|
||||
|
||||
for modelID, entry := range providerModels {
|
||||
if entry == nil || entry.count <= 0 {
|
||||
continue
|
||||
}
|
||||
registration, ok := r.models[modelID]
|
||||
|
||||
expiredClients := 0
|
||||
cooldownSuspended := 0
|
||||
otherSuspended := 0
|
||||
if ok && registration != nil {
|
||||
if registration.QuotaExceededClients != nil {
|
||||
for clientID, quotaTime := range registration.QuotaExceededClients {
|
||||
if clientID == "" {
|
||||
continue
|
||||
}
|
||||
if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
|
||||
continue
|
||||
}
|
||||
if quotaTime != nil && now.Sub(*quotaTime) < quotaExpiredDuration {
|
||||
expiredClients++
|
||||
}
|
||||
}
|
||||
}
|
||||
if registration.SuspendedClients != nil {
|
||||
for clientID, reason := range registration.SuspendedClients {
|
||||
if clientID == "" {
|
||||
continue
|
||||
}
|
||||
if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
|
||||
continue
|
||||
}
|
||||
if strings.EqualFold(reason, "quota") {
|
||||
cooldownSuspended++
|
||||
continue
|
||||
}
|
||||
otherSuspended++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
availableClients := entry.count
|
||||
effectiveClients := availableClients - expiredClients - otherSuspended
|
||||
if effectiveClients < 0 {
|
||||
effectiveClients = 0
|
||||
}
|
||||
|
||||
if effectiveClients > 0 || (availableClients > 0 && (expiredClients > 0 || cooldownSuspended > 0) && otherSuspended == 0) {
|
||||
if entry.info != nil {
|
||||
result = append(result, entry.info)
|
||||
continue
|
||||
}
|
||||
if ok && registration != nil && registration.Info != nil {
|
||||
result = append(result, registration.Info)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// GetModelCount returns the number of available clients for a specific model
|
||||
// Parameters:
|
||||
// - modelID: The model ID to check
|
||||
|
||||
@@ -59,6 +59,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
|
||||
wsReq := &wsrelay.HTTPRequest{
|
||||
Method: http.MethodPost,
|
||||
@@ -113,6 +114,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt)
|
||||
wsReq := &wsrelay.HTTPRequest{
|
||||
Method: http.MethodPost,
|
||||
|
||||
@@ -76,11 +76,7 @@ func (e *AntigravityExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Au
|
||||
|
||||
// Execute performs a non-streaming request to the Antigravity API.
|
||||
func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
isClaude := strings.Contains(strings.ToLower(upstreamModel), "claude")
|
||||
isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
|
||||
if isClaude {
|
||||
return e.executeClaudeNonStream(ctx, auth, req, opts)
|
||||
}
|
||||
@@ -100,9 +96,9 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
|
||||
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
|
||||
|
||||
@@ -114,7 +110,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, upstreamModel, translated, false, opts.Alt, baseURL)
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, false, opts.Alt, baseURL)
|
||||
if errReq != nil {
|
||||
err = errReq
|
||||
return resp, err
|
||||
@@ -195,14 +191,9 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
|
||||
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated, true)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
|
||||
|
||||
@@ -214,7 +205,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, upstreamModel, translated, true, opts.Alt, baseURL)
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
|
||||
if errReq != nil {
|
||||
err = errReq
|
||||
return resp, err
|
||||
@@ -530,19 +521,15 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
isClaude := strings.Contains(strings.ToLower(upstreamModel), "claude")
|
||||
|
||||
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
|
||||
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
|
||||
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
|
||||
translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
|
||||
|
||||
@@ -554,7 +541,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
|
||||
var lastErr error
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, upstreamModel, translated, true, opts.Alt, baseURL)
|
||||
httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL)
|
||||
if errReq != nil {
|
||||
err = errReq
|
||||
return nil, err
|
||||
@@ -692,11 +679,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
|
||||
to := sdktranslator.FromString("antigravity")
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
isClaude := strings.Contains(strings.ToLower(upstreamModel), "claude")
|
||||
isClaude := strings.Contains(strings.ToLower(req.Model), "claude")
|
||||
|
||||
baseURLs := antigravityBaseURLFallbackOrder(auth)
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
@@ -714,8 +697,8 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
|
||||
|
||||
for idx, baseURL := range baseURLs {
|
||||
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
|
||||
payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload)
|
||||
payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
|
||||
payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload)
|
||||
payload = normalizeAntigravityThinking(req.Model, payload, isClaude)
|
||||
payload = deleteJSONField(payload, "project")
|
||||
payload = deleteJSONField(payload, "model")
|
||||
|
||||
@@ -49,36 +49,29 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
}
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("claude")
|
||||
// Use streaming translation to preserve function calling, except for claude.
|
||||
stream := from != to
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
// Inject thinking config based on model metadata for thinking variants
|
||||
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
|
||||
body = e.injectThinkingConfig(model, req.Metadata, body)
|
||||
|
||||
if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
|
||||
if !strings.HasPrefix(model, "claude-3-5-haiku") {
|
||||
body = checkSystemInstructions(body)
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, model, body)
|
||||
|
||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||
body = disableThinkingIfToolChoiceForced(body)
|
||||
|
||||
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
|
||||
body = ensureMaxTokensForThinking(req.Model, body)
|
||||
body = ensureMaxTokensForThinking(model, body)
|
||||
|
||||
// Extract betas from body and convert to header
|
||||
var extraBetas []string
|
||||
@@ -170,29 +163,22 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("claude")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
// Inject thinking config based on model metadata for thinking variants
|
||||
body = e.injectThinkingConfig(req.Model, req.Metadata, body)
|
||||
body = e.injectThinkingConfig(model, req.Metadata, body)
|
||||
body = checkSystemInstructions(body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, model, body)
|
||||
|
||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||
body = disableThinkingIfToolChoiceForced(body)
|
||||
|
||||
// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
|
||||
body = ensureMaxTokensForThinking(req.Model, body)
|
||||
body = ensureMaxTokensForThinking(model, body)
|
||||
|
||||
// Extract betas from body and convert to header
|
||||
var extraBetas []string
|
||||
@@ -316,21 +302,14 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
to := sdktranslator.FromString("claude")
|
||||
// Use streaming translation to preserve function calling, except for claude.
|
||||
stream := from != to
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
|
||||
if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
|
||||
if !strings.HasPrefix(model, "claude-3-5-haiku") {
|
||||
body = checkSystemInstructions(body)
|
||||
}
|
||||
|
||||
|
||||
@@ -49,28 +49,21 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
|
||||
body = NormalizeThinkingConfig(body, upstreamModel, false)
|
||||
if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
|
||||
body = NormalizeThinkingConfig(body, model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body = applyPayloadConfig(e.cfg, model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
body, _ = sjson.SetBytes(body, "stream", true)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
|
||||
@@ -156,30 +149,23 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
|
||||
body = NormalizeThinkingConfig(body, upstreamModel, false)
|
||||
if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
|
||||
body = NormalizeThinkingConfig(body, model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, model, body)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
|
||||
url := strings.TrimSuffix(baseURL, "/") + "/responses"
|
||||
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
|
||||
@@ -266,30 +252,21 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
}
|
||||
|
||||
func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel == "" {
|
||||
upstreamModel = req.Model
|
||||
}
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("codex")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
|
||||
modelForCounting := upstreamModel
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||
body, _ = sjson.SetBytes(body, "stream", false)
|
||||
|
||||
enc, err := tokenizerForCodexModel(modelForCounting)
|
||||
enc, err := tokenizerForCodexModel(model)
|
||||
if err != nil {
|
||||
return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: tokenizer init failed: %w", err)
|
||||
}
|
||||
|
||||
@@ -78,9 +78,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||
basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||
basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
|
||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||
@@ -217,9 +217,9 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini-cli")
|
||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||
basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
|
||||
basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
|
||||
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
|
||||
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
|
||||
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||
@@ -318,7 +318,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
|
||||
out := make(chan cliproxyexecutor.StreamChunk)
|
||||
stream = out
|
||||
go func(resp *http.Response, reqBody []byte, attempt string) {
|
||||
go func(resp *http.Response, reqBody []byte, attemptModel string) {
|
||||
defer close(out)
|
||||
defer func() {
|
||||
if errClose := resp.Body.Close(); errClose != nil {
|
||||
@@ -336,14 +336,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
reporter.publish(ctx, detail)
|
||||
}
|
||||
if bytes.HasPrefix(line, dataTag) {
|
||||
segments := sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone(line), ¶m)
|
||||
segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone(line), ¶m)
|
||||
for i := range segments {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
segments := sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m)
|
||||
segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m)
|
||||
for i := range segments {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
|
||||
}
|
||||
@@ -365,12 +365,12 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
appendAPIResponseChunk(ctx, e.cfg, data)
|
||||
reporter.publish(ctx, parseGeminiCLIUsage(data))
|
||||
var param any
|
||||
segments := sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, data, ¶m)
|
||||
segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, data, ¶m)
|
||||
for i := range segments {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
|
||||
}
|
||||
|
||||
segments = sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m)
|
||||
segments = sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m)
|
||||
for i := range segments {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
|
||||
}
|
||||
@@ -417,15 +417,17 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
||||
var lastStatus int
|
||||
var lastBody []byte
|
||||
|
||||
// The loop variable attemptModel is only used as the concrete model id sent to the upstream
|
||||
// Gemini CLI endpoint when iterating fallback variants.
|
||||
for _, attemptModel := range models {
|
||||
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
|
||||
payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
|
||||
payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
|
||||
payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
|
||||
payload = deleteJSONField(payload, "project")
|
||||
payload = deleteJSONField(payload, "model")
|
||||
payload = deleteJSONField(payload, "request.safetySettings")
|
||||
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||
payload = fixGeminiCLIImageAspectRatio(attemptModel, payload)
|
||||
payload = fixGeminiCLIImageAspectRatio(req.Model, payload)
|
||||
|
||||
tok, errTok := tokenSource.Token()
|
||||
if errTok != nil {
|
||||
|
||||
@@ -77,26 +77,22 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
// Official Gemini API via API key or OAuth bearer
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyThinkingMetadata(body, req.Metadata, req.Model)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyThinkingMetadata(body, req.Metadata, model)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfig(e.cfg, model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -105,7 +101,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
}
|
||||
}
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, action)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -180,28 +176,24 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
body = ApplyThinkingMetadata(body, req.Metadata, req.Model)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
|
||||
body = ApplyThinkingMetadata(body, req.Metadata, model)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfig(e.cfg, model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -301,29 +293,25 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||
apiKey, bearer := geminiCreds(auth)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
} else if !strings.EqualFold(upstreamModel, req.Model) {
|
||||
if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
|
||||
upstreamModel = modelOverride
|
||||
}
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, req.Model)
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, model)
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
|
||||
|
||||
baseURL := resolveGeminiBaseURL(auth)
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "countTokens")
|
||||
url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "countTokens")
|
||||
|
||||
requestBody := bytes.NewReader(translatedReq)
|
||||
|
||||
|
||||
@@ -120,8 +120,6 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
@@ -137,7 +135,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -146,7 +144,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
|
||||
}
|
||||
}
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, action)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -220,24 +218,27 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfig(e.cfg, model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
|
||||
action := "generateContent"
|
||||
if req.Metadata != nil {
|
||||
@@ -250,7 +251,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, action)
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, action)
|
||||
if opts.Alt != "" && action != "countTokens" {
|
||||
url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
|
||||
}
|
||||
@@ -321,8 +322,6 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
@@ -338,10 +337,10 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -438,30 +437,33 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||
}
|
||||
body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
body = util.ApplyDefaultThinkingIfNeeded(model, body)
|
||||
body = util.NormalizeGeminiThinkingBudget(model, body)
|
||||
body = util.StripThinkingConfigIfUnsupported(model, body)
|
||||
body = fixGeminiImageAspectRatio(model, body)
|
||||
body = applyPayloadConfig(e.cfg, model, body)
|
||||
body, _ = sjson.SetBytes(body, "model", model)
|
||||
|
||||
// For API key auth, use simpler URL format without project/location
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "streamGenerateContent")
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "streamGenerateContent")
|
||||
if opts.Alt == "" {
|
||||
url = url + "?alt=sse"
|
||||
} else {
|
||||
@@ -552,8 +554,6 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
|
||||
|
||||
// countTokensWithServiceAccount counts tokens using service account credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
@@ -566,14 +566,14 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", req.Model)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")
|
||||
|
||||
baseURL := vertexBaseURL(location)
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")
|
||||
url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
@@ -641,21 +641,24 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
|
||||
|
||||
// countTokensWithAPIKey handles token counting using API key credentials.
|
||||
func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
model := req.Model
|
||||
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
|
||||
model = override
|
||||
}
|
||||
|
||||
from := opts.SourceFormat
|
||||
to := sdktranslator.FromString("gemini")
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||
translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
|
||||
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||
}
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
|
||||
translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq)
|
||||
translatedReq = fixGeminiImageAspectRatio(model, translatedReq)
|
||||
translatedReq, _ = sjson.SetBytes(translatedReq, "model", model)
|
||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||
@@ -665,7 +668,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
|
||||
if baseURL == "" {
|
||||
baseURL = "https://generativelanguage.googleapis.com"
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "countTokens")
|
||||
url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "countTokens")
|
||||
|
||||
httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
|
||||
if errNewReq != nil {
|
||||
@@ -808,3 +811,90 @@ func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyau
|
||||
}
|
||||
return tok.AccessToken, nil
|
||||
}
|
||||
|
||||
// resolveUpstreamModel resolves the upstream model name from vertex-api-key configuration.
|
||||
// It matches the requested model alias against configured models and returns the actual upstream name.
|
||||
func (e *GeminiVertexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
|
||||
trimmed := strings.TrimSpace(alias)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
entry := e.resolveVertexConfig(auth)
|
||||
if entry == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
|
||||
|
||||
// Candidate names to match against configured aliases/names.
|
||||
candidates := []string{strings.TrimSpace(normalizedModel)}
|
||||
if !strings.EqualFold(normalizedModel, trimmed) {
|
||||
candidates = append(candidates, trimmed)
|
||||
}
|
||||
if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
|
||||
candidates = append(candidates, original)
|
||||
}
|
||||
|
||||
for i := range entry.Models {
|
||||
model := entry.Models[i]
|
||||
name := strings.TrimSpace(model.Name)
|
||||
modelAlias := strings.TrimSpace(model.Alias)
|
||||
|
||||
for _, candidate := range candidates {
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
|
||||
if name != "" {
|
||||
return name
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
if name != "" && strings.EqualFold(name, candidate) {
|
||||
return name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveVertexConfig finds the matching vertex-api-key configuration entry for the given auth.
|
||||
func (e *GeminiVertexExecutor) resolveVertexConfig(auth *cliproxyauth.Auth) *config.VertexCompatKey {
|
||||
if auth == nil || e.cfg == nil {
|
||||
return nil
|
||||
}
|
||||
var attrKey, attrBase string
|
||||
if auth.Attributes != nil {
|
||||
attrKey = strings.TrimSpace(auth.Attributes["api_key"])
|
||||
attrBase = strings.TrimSpace(auth.Attributes["base_url"])
|
||||
}
|
||||
for i := range e.cfg.VertexCompatAPIKey {
|
||||
entry := &e.cfg.VertexCompatAPIKey[i]
|
||||
cfgKey := strings.TrimSpace(entry.APIKey)
|
||||
cfgBase := strings.TrimSpace(entry.BaseURL)
|
||||
if attrKey != "" && attrBase != "" {
|
||||
if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
|
||||
return entry
|
||||
}
|
||||
continue
|
||||
}
|
||||
if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
|
||||
if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
if attrKey != "" {
|
||||
for i := range e.cfg.VertexCompatAPIKey {
|
||||
entry := &e.cfg.VertexCompatAPIKey[i]
|
||||
if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -58,12 +58,9 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = NormalizeThinkingConfig(body, upstreamModel, false)
|
||||
if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyIFlowThinkingConfig(body)
|
||||
@@ -151,12 +148,9 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = NormalizeThinkingConfig(body, upstreamModel, false)
|
||||
if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
body = applyIFlowThinkingConfig(body)
|
||||
@@ -447,21 +441,18 @@ func ensureToolsArray(body []byte) []byte {
|
||||
return updated
|
||||
}
|
||||
|
||||
// preserveReasoningContentInMessages ensures reasoning_content from assistant messages in the
|
||||
// conversation history is preserved when sending to iFlow models that support thinking.
|
||||
// This is critical for multi-turn conversations where the model needs to see its previous
|
||||
// reasoning to maintain coherent thought chains across tool calls and conversation turns.
|
||||
// preserveReasoningContentInMessages checks if reasoning_content from assistant messages
|
||||
// is preserved in conversation history for iFlow models that support thinking.
|
||||
// This is helpful for multi-turn conversations where the model may benefit from seeing
|
||||
// its previous reasoning to maintain coherent thought chains.
|
||||
//
|
||||
// For GLM-4.7 and MiniMax-M2.1, the full assistant response (including reasoning) must be
|
||||
// appended back into message history before the next call.
|
||||
// For GLM-4.6/4.7 and MiniMax M2/M2.1, it is recommended to include the full assistant
|
||||
// response (including reasoning_content) in message history for better context continuity.
|
||||
func preserveReasoningContentInMessages(body []byte) []byte {
|
||||
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||
|
||||
// Only apply to models that support thinking with history preservation
|
||||
needsPreservation := strings.HasPrefix(model, "glm-4.7") ||
|
||||
strings.HasPrefix(model, "glm-4-7") ||
|
||||
strings.HasPrefix(model, "minimax-m2.1") ||
|
||||
strings.HasPrefix(model, "minimax-m2-1")
|
||||
needsPreservation := strings.HasPrefix(model, "glm-4") || strings.HasPrefix(model, "minimax-m2")
|
||||
|
||||
if !needsPreservation {
|
||||
return body
|
||||
@@ -499,45 +490,35 @@ func preserveReasoningContentInMessages(body []byte) []byte {
|
||||
// This should be called after NormalizeThinkingConfig has processed the payload.
|
||||
//
|
||||
// Model-specific handling:
|
||||
// - GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false}
|
||||
// - MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
|
||||
// - Other iFlow models: Uses chat_template_kwargs.enable_thinking (boolean)
|
||||
// - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false
|
||||
// - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
|
||||
func applyIFlowThinkingConfig(body []byte) []byte {
|
||||
effort := gjson.GetBytes(body, "reasoning_effort")
|
||||
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||
|
||||
// Check if thinking should be enabled
|
||||
val := ""
|
||||
if effort.Exists() {
|
||||
val = strings.ToLower(strings.TrimSpace(effort.String()))
|
||||
if !effort.Exists() {
|
||||
return body
|
||||
}
|
||||
enableThinking := effort.Exists() && val != "none" && val != ""
|
||||
|
||||
model := strings.ToLower(gjson.GetBytes(body, "model").String())
|
||||
val := strings.ToLower(strings.TrimSpace(effort.String()))
|
||||
enableThinking := val != "none" && val != ""
|
||||
|
||||
// Remove reasoning_effort as we'll convert to model-specific format
|
||||
if effort.Exists() {
|
||||
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
|
||||
}
|
||||
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
|
||||
body, _ = sjson.DeleteBytes(body, "thinking")
|
||||
|
||||
// GLM-4.7: Use extra_body with thinking config and clear_thinking: false
|
||||
if strings.HasPrefix(model, "glm-4.7") || strings.HasPrefix(model, "glm-4-7") {
|
||||
if enableThinking {
|
||||
body, _ = sjson.SetBytes(body, "extra_body.thinking.type", "enabled")
|
||||
body, _ = sjson.SetBytes(body, "extra_body.clear_thinking", false)
|
||||
}
|
||||
return body
|
||||
}
|
||||
|
||||
// MiniMax-M2.1: Use reasoning_split=true for interleaved thinking
|
||||
if strings.HasPrefix(model, "minimax-m2.1") || strings.HasPrefix(model, "minimax-m2-1") {
|
||||
if enableThinking {
|
||||
body, _ = sjson.SetBytes(body, "reasoning_split", true)
|
||||
}
|
||||
return body
|
||||
}
|
||||
|
||||
// Other iFlow models (including GLM-4.6): Use chat_template_kwargs.enable_thinking
|
||||
if effort.Exists() {
|
||||
// GLM-4.6/4.7: Use chat_template_kwargs
|
||||
if strings.HasPrefix(model, "glm-4") {
|
||||
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
|
||||
if enableThinking {
|
||||
body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false)
|
||||
}
|
||||
return body
|
||||
}
|
||||
|
||||
// MiniMax M2/M2.1: Use reasoning_split
|
||||
if strings.HasPrefix(model, "minimax-m2") {
|
||||
body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking)
|
||||
return body
|
||||
}
|
||||
|
||||
return body
|
||||
|
||||
@@ -61,12 +61,8 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||
allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
|
||||
translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" && modelOverride == "" {
|
||||
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||
}
|
||||
translated = NormalizeThinkingConfig(translated, upstreamModel, allowCompat)
|
||||
if errValidate := ValidateThinkingConfig(translated, upstreamModel); errValidate != nil {
|
||||
translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
|
||||
if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
|
||||
@@ -157,12 +153,8 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||
allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
|
||||
translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" && modelOverride == "" {
|
||||
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||
}
|
||||
translated = NormalizeThinkingConfig(translated, upstreamModel, allowCompat)
|
||||
if errValidate := ValidateThinkingConfig(translated, upstreamModel); errValidate != nil {
|
||||
translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
|
||||
if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
|
||||
|
||||
@@ -14,32 +14,54 @@ import (
|
||||
// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
|
||||
func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||
// Use the alias from metadata if available, as it's registered in the global registry
|
||||
// with thinking metadata; the upstream model name may not be registered.
|
||||
lookupModel := util.ResolveOriginalModel(model, metadata)
|
||||
|
||||
// Determine which model to use for thinking support check.
|
||||
// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
|
||||
thinkingModel := lookupModel
|
||||
if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
|
||||
thinkingModel = model
|
||||
}
|
||||
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
|
||||
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
if !util.ModelSupportsThinking(thinkingModel) {
|
||||
return payload
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
}
|
||||
|
||||
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
|
||||
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
|
||||
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||
func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
||||
// Use the alias from metadata if available, as it's registered in the global registry
|
||||
// with thinking metadata; the upstream model name may not be registered.
|
||||
lookupModel := util.ResolveOriginalModel(model, metadata)
|
||||
|
||||
// Determine which model to use for thinking support check.
|
||||
// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
|
||||
thinkingModel := lookupModel
|
||||
if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
|
||||
thinkingModel = model
|
||||
}
|
||||
|
||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
|
||||
if !ok || (budgetOverride == nil && includeOverride == nil) {
|
||||
return payload
|
||||
}
|
||||
if !util.ModelSupportsThinking(model) {
|
||||
if !util.ModelSupportsThinking(thinkingModel) {
|
||||
return payload
|
||||
}
|
||||
if budgetOverride != nil {
|
||||
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
|
||||
norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
|
||||
budgetOverride = &norm
|
||||
}
|
||||
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
||||
|
||||
@@ -12,7 +12,6 @@ import (
|
||||
|
||||
qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
@@ -52,12 +51,9 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
to := sdktranslator.FromString("openai")
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = NormalizeThinkingConfig(body, upstreamModel, false)
|
||||
if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return resp, errValidate
|
||||
}
|
||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||
@@ -132,12 +128,9 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||
|
||||
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
|
||||
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||
if upstreamModel != "" {
|
||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||
}
|
||||
body = NormalizeThinkingConfig(body, upstreamModel, false)
|
||||
if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||
body, _ = sjson.SetBytes(body, "model", req.Model)
|
||||
body = NormalizeThinkingConfig(body, req.Model, false)
|
||||
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
|
||||
return nil, errValidate
|
||||
}
|
||||
toolsResult := gjson.GetBytes(body, "tools")
|
||||
|
||||
@@ -23,6 +23,7 @@ type geminiToResponsesState struct {
|
||||
MsgIndex int
|
||||
CurrentMsgID string
|
||||
TextBuf strings.Builder
|
||||
ItemTextBuf strings.Builder
|
||||
|
||||
// reasoning aggregation
|
||||
ReasoningOpened bool
|
||||
@@ -189,6 +190,8 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
|
||||
partAdded, _ = sjson.Set(partAdded, "item_id", st.CurrentMsgID)
|
||||
partAdded, _ = sjson.Set(partAdded, "output_index", st.MsgIndex)
|
||||
out = append(out, emitEvent("response.content_part.added", partAdded))
|
||||
st.ItemTextBuf.Reset()
|
||||
st.ItemTextBuf.WriteString(t.String())
|
||||
}
|
||||
st.TextBuf.WriteString(t.String())
|
||||
msg := `{"type":"response.output_text.delta","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"delta":"","logprobs":[]}`
|
||||
@@ -250,20 +253,24 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
|
||||
finalizeReasoning()
|
||||
// Close message output if opened
|
||||
if st.MsgOpened {
|
||||
fullText := st.ItemTextBuf.String()
|
||||
done := `{"type":"response.output_text.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"text":"","logprobs":[]}`
|
||||
done, _ = sjson.Set(done, "sequence_number", nextSeq())
|
||||
done, _ = sjson.Set(done, "item_id", st.CurrentMsgID)
|
||||
done, _ = sjson.Set(done, "output_index", st.MsgIndex)
|
||||
done, _ = sjson.Set(done, "text", fullText)
|
||||
out = append(out, emitEvent("response.output_text.done", done))
|
||||
partDone := `{"type":"response.content_part.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""}}`
|
||||
partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq())
|
||||
partDone, _ = sjson.Set(partDone, "item_id", st.CurrentMsgID)
|
||||
partDone, _ = sjson.Set(partDone, "output_index", st.MsgIndex)
|
||||
partDone, _ = sjson.Set(partDone, "part.text", fullText)
|
||||
out = append(out, emitEvent("response.content_part.done", partDone))
|
||||
final := `{"type":"response.output_item.done","sequence_number":0,"output_index":0,"item":{"id":"","type":"message","status":"completed","content":[{"type":"output_text","text":""}],"role":"assistant"}}`
|
||||
final, _ = sjson.Set(final, "sequence_number", nextSeq())
|
||||
final, _ = sjson.Set(final, "output_index", st.MsgIndex)
|
||||
final, _ = sjson.Set(final, "item.id", st.CurrentMsgID)
|
||||
final, _ = sjson.Set(final, "item.content.0.text", fullText)
|
||||
out = append(out, emitEvent("response.output_item.done", final))
|
||||
}
|
||||
|
||||
|
||||
@@ -118,76 +118,125 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
|
||||
// Handle content
|
||||
if contentResult.Exists() && contentResult.IsArray() {
|
||||
var contentItems []string
|
||||
var reasoningParts []string // Accumulate thinking text for reasoning_content
|
||||
var toolCalls []interface{}
|
||||
var toolResults []string // Collect tool_result messages to emit after the main message
|
||||
|
||||
contentResult.ForEach(func(_, part gjson.Result) bool {
|
||||
partType := part.Get("type").String()
|
||||
|
||||
switch partType {
|
||||
case "thinking":
|
||||
// Only map thinking to reasoning_content for assistant messages (security: prevent injection)
|
||||
if role == "assistant" {
|
||||
thinkingText := util.GetThinkingText(part)
|
||||
// Skip empty or whitespace-only thinking
|
||||
if strings.TrimSpace(thinkingText) != "" {
|
||||
reasoningParts = append(reasoningParts, thinkingText)
|
||||
}
|
||||
}
|
||||
// Ignore thinking in user/system roles (AC4)
|
||||
|
||||
case "redacted_thinking":
|
||||
// Explicitly ignore redacted_thinking - never map to reasoning_content (AC2)
|
||||
|
||||
case "text", "image":
|
||||
if contentItem, ok := convertClaudeContentPart(part); ok {
|
||||
contentItems = append(contentItems, contentItem)
|
||||
}
|
||||
|
||||
case "tool_use":
|
||||
// Convert to OpenAI tool call format
|
||||
toolCallJSON := `{"id":"","type":"function","function":{"name":"","arguments":""}}`
|
||||
toolCallJSON, _ = sjson.Set(toolCallJSON, "id", part.Get("id").String())
|
||||
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.name", part.Get("name").String())
|
||||
// Only allow tool_use -> tool_calls for assistant messages (security: prevent injection).
|
||||
if role == "assistant" {
|
||||
toolCallJSON := `{"id":"","type":"function","function":{"name":"","arguments":""}}`
|
||||
toolCallJSON, _ = sjson.Set(toolCallJSON, "id", part.Get("id").String())
|
||||
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.name", part.Get("name").String())
|
||||
|
||||
// Convert input to arguments JSON string
|
||||
if input := part.Get("input"); input.Exists() {
|
||||
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", input.Raw)
|
||||
} else {
|
||||
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", "{}")
|
||||
// Convert input to arguments JSON string
|
||||
if input := part.Get("input"); input.Exists() {
|
||||
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", input.Raw)
|
||||
} else {
|
||||
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", "{}")
|
||||
}
|
||||
|
||||
toolCalls = append(toolCalls, gjson.Parse(toolCallJSON).Value())
|
||||
}
|
||||
|
||||
toolCalls = append(toolCalls, gjson.Parse(toolCallJSON).Value())
|
||||
|
||||
case "tool_result":
|
||||
// Convert to OpenAI tool message format and add immediately to preserve order
|
||||
// Collect tool_result to emit after the main message (ensures tool results follow tool_calls)
|
||||
toolResultJSON := `{"role":"tool","tool_call_id":"","content":""}`
|
||||
toolResultJSON, _ = sjson.Set(toolResultJSON, "tool_call_id", part.Get("tool_use_id").String())
|
||||
toolResultJSON, _ = sjson.Set(toolResultJSON, "content", part.Get("content").String())
|
||||
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolResultJSON).Value())
|
||||
toolResultJSON, _ = sjson.Set(toolResultJSON, "content", convertClaudeToolResultContentToString(part.Get("content")))
|
||||
toolResults = append(toolResults, toolResultJSON)
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
// Emit text/image content as one message
|
||||
if len(contentItems) > 0 {
|
||||
msgJSON := `{"role":"","content":""}`
|
||||
msgJSON, _ = sjson.Set(msgJSON, "role", role)
|
||||
|
||||
contentArrayJSON := "[]"
|
||||
for _, contentItem := range contentItems {
|
||||
contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
|
||||
}
|
||||
msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
|
||||
|
||||
contentValue := gjson.Get(msgJSON, "content")
|
||||
hasContent := false
|
||||
switch {
|
||||
case !contentValue.Exists():
|
||||
hasContent = false
|
||||
case contentValue.Type == gjson.String:
|
||||
hasContent = contentValue.String() != ""
|
||||
case contentValue.IsArray():
|
||||
hasContent = len(contentValue.Array()) > 0
|
||||
default:
|
||||
hasContent = contentValue.Raw != "" && contentValue.Raw != "null"
|
||||
}
|
||||
|
||||
if hasContent {
|
||||
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
|
||||
}
|
||||
// Build reasoning content string
|
||||
reasoningContent := ""
|
||||
if len(reasoningParts) > 0 {
|
||||
reasoningContent = strings.Join(reasoningParts, "\n\n")
|
||||
}
|
||||
|
||||
// Emit tool calls in a separate assistant message
|
||||
if role == "assistant" && len(toolCalls) > 0 {
|
||||
toolCallMsgJSON := `{"role":"assistant","tool_calls":[]}`
|
||||
toolCallMsgJSON, _ = sjson.Set(toolCallMsgJSON, "tool_calls", toolCalls)
|
||||
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolCallMsgJSON).Value())
|
||||
hasContent := len(contentItems) > 0
|
||||
hasReasoning := reasoningContent != ""
|
||||
hasToolCalls := len(toolCalls) > 0
|
||||
hasToolResults := len(toolResults) > 0
|
||||
|
||||
// OpenAI requires: tool messages MUST immediately follow the assistant message with tool_calls.
|
||||
// Therefore, we emit tool_result messages FIRST (they respond to the previous assistant's tool_calls),
|
||||
// then emit the current message's content.
|
||||
for _, toolResultJSON := range toolResults {
|
||||
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolResultJSON).Value())
|
||||
}
|
||||
|
||||
// For assistant messages: emit a single unified message with content, tool_calls, and reasoning_content
|
||||
// This avoids splitting into multiple assistant messages which breaks OpenAI tool-call adjacency
|
||||
if role == "assistant" {
|
||||
if hasContent || hasReasoning || hasToolCalls {
|
||||
msgJSON := `{"role":"assistant"}`
|
||||
|
||||
// Add content (as array if we have items, empty string if reasoning-only)
|
||||
if hasContent {
|
||||
contentArrayJSON := "[]"
|
||||
for _, contentItem := range contentItems {
|
||||
contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
|
||||
}
|
||||
msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
|
||||
} else {
|
||||
// Ensure content field exists for OpenAI compatibility
|
||||
msgJSON, _ = sjson.Set(msgJSON, "content", "")
|
||||
}
|
||||
|
||||
// Add reasoning_content if present
|
||||
if hasReasoning {
|
||||
msgJSON, _ = sjson.Set(msgJSON, "reasoning_content", reasoningContent)
|
||||
}
|
||||
|
||||
// Add tool_calls if present (in same message as content)
|
||||
if hasToolCalls {
|
||||
msgJSON, _ = sjson.Set(msgJSON, "tool_calls", toolCalls)
|
||||
}
|
||||
|
||||
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
|
||||
}
|
||||
} else {
|
||||
// For non-assistant roles: emit content message if we have content
|
||||
// If the message only contains tool_results (no text/image), we still processed them above
|
||||
if hasContent {
|
||||
msgJSON := `{"role":""}`
|
||||
msgJSON, _ = sjson.Set(msgJSON, "role", role)
|
||||
|
||||
contentArrayJSON := "[]"
|
||||
for _, contentItem := range contentItems {
|
||||
contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
|
||||
}
|
||||
msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
|
||||
|
||||
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
|
||||
} else if hasToolResults && !hasContent {
|
||||
// tool_results already emitted above, no additional user message needed
|
||||
}
|
||||
}
|
||||
|
||||
} else if contentResult.Exists() && contentResult.Type == gjson.String {
|
||||
@@ -307,3 +356,43 @@ func convertClaudeContentPart(part gjson.Result) (string, bool) {
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
|
||||
func convertClaudeToolResultContentToString(content gjson.Result) string {
|
||||
if !content.Exists() {
|
||||
return ""
|
||||
}
|
||||
|
||||
if content.Type == gjson.String {
|
||||
return content.String()
|
||||
}
|
||||
|
||||
if content.IsArray() {
|
||||
var parts []string
|
||||
content.ForEach(func(_, item gjson.Result) bool {
|
||||
switch {
|
||||
case item.Type == gjson.String:
|
||||
parts = append(parts, item.String())
|
||||
case item.IsObject() && item.Get("text").Exists() && item.Get("text").Type == gjson.String:
|
||||
parts = append(parts, item.Get("text").String())
|
||||
default:
|
||||
parts = append(parts, item.Raw)
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
joined := strings.Join(parts, "\n\n")
|
||||
if strings.TrimSpace(joined) != "" {
|
||||
return joined
|
||||
}
|
||||
return content.Raw
|
||||
}
|
||||
|
||||
if content.IsObject() {
|
||||
if text := content.Get("text"); text.Exists() && text.Type == gjson.String {
|
||||
return text.String()
|
||||
}
|
||||
return content.Raw
|
||||
}
|
||||
|
||||
return content.Raw
|
||||
}
|
||||
|
||||
500
internal/translator/openai/claude/openai_claude_request_test.go
Normal file
500
internal/translator/openai/claude/openai_claude_request_test.go
Normal file
@@ -0,0 +1,500 @@
|
||||
package claude
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// TestConvertClaudeRequestToOpenAI_ThinkingToReasoningContent tests the mapping
|
||||
// of Claude thinking content to OpenAI reasoning_content field.
|
||||
func TestConvertClaudeRequestToOpenAI_ThinkingToReasoningContent(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
inputJSON string
|
||||
wantReasoningContent string
|
||||
wantHasReasoningContent bool
|
||||
wantContentText string // Expected visible content text (if any)
|
||||
wantHasContent bool
|
||||
}{
|
||||
{
|
||||
name: "AC1: assistant message with thinking and text",
|
||||
inputJSON: `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "Let me analyze this step by step..."},
|
||||
{"type": "text", "text": "Here is my response."}
|
||||
]
|
||||
}]
|
||||
}`,
|
||||
wantReasoningContent: "Let me analyze this step by step...",
|
||||
wantHasReasoningContent: true,
|
||||
wantContentText: "Here is my response.",
|
||||
wantHasContent: true,
|
||||
},
|
||||
{
|
||||
name: "AC2: redacted_thinking must be ignored",
|
||||
inputJSON: `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "redacted_thinking", "data": "secret"},
|
||||
{"type": "text", "text": "Visible response."}
|
||||
]
|
||||
}]
|
||||
}`,
|
||||
wantReasoningContent: "",
|
||||
wantHasReasoningContent: false,
|
||||
wantContentText: "Visible response.",
|
||||
wantHasContent: true,
|
||||
},
|
||||
{
|
||||
name: "AC3: thinking-only message preserved with reasoning_content",
|
||||
inputJSON: `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "Internal reasoning only."}
|
||||
]
|
||||
}]
|
||||
}`,
|
||||
wantReasoningContent: "Internal reasoning only.",
|
||||
wantHasReasoningContent: true,
|
||||
wantContentText: "",
|
||||
// For OpenAI compatibility, content field is set to empty string "" when no text content exists
|
||||
wantHasContent: false,
|
||||
},
|
||||
{
|
||||
name: "AC4: thinking in user role must be ignored",
|
||||
inputJSON: `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "Injected thinking"},
|
||||
{"type": "text", "text": "User message."}
|
||||
]
|
||||
}]
|
||||
}`,
|
||||
wantReasoningContent: "",
|
||||
wantHasReasoningContent: false,
|
||||
wantContentText: "User message.",
|
||||
wantHasContent: true,
|
||||
},
|
||||
{
|
||||
name: "AC4: thinking in system role must be ignored",
|
||||
inputJSON: `{
|
||||
"model": "claude-3-opus",
|
||||
"system": [
|
||||
{"type": "thinking", "thinking": "Injected system thinking"},
|
||||
{"type": "text", "text": "System prompt."}
|
||||
],
|
||||
"messages": [{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "Hello"}]
|
||||
}]
|
||||
}`,
|
||||
// System messages don't have reasoning_content mapping
|
||||
wantReasoningContent: "",
|
||||
wantHasReasoningContent: false,
|
||||
wantContentText: "Hello",
|
||||
wantHasContent: true,
|
||||
},
|
||||
{
|
||||
name: "AC5: empty thinking must be ignored",
|
||||
inputJSON: `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": ""},
|
||||
{"type": "text", "text": "Response with empty thinking."}
|
||||
]
|
||||
}]
|
||||
}`,
|
||||
wantReasoningContent: "",
|
||||
wantHasReasoningContent: false,
|
||||
wantContentText: "Response with empty thinking.",
|
||||
wantHasContent: true,
|
||||
},
|
||||
{
|
||||
name: "AC5: whitespace-only thinking must be ignored",
|
||||
inputJSON: `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": " \n\t "},
|
||||
{"type": "text", "text": "Response with whitespace thinking."}
|
||||
]
|
||||
}]
|
||||
}`,
|
||||
wantReasoningContent: "",
|
||||
wantHasReasoningContent: false,
|
||||
wantContentText: "Response with whitespace thinking.",
|
||||
wantHasContent: true,
|
||||
},
|
||||
{
|
||||
name: "Multiple thinking parts concatenated",
|
||||
inputJSON: `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "First thought."},
|
||||
{"type": "thinking", "thinking": "Second thought."},
|
||||
{"type": "text", "text": "Final answer."}
|
||||
]
|
||||
}]
|
||||
}`,
|
||||
wantReasoningContent: "First thought.\n\nSecond thought.",
|
||||
wantHasReasoningContent: true,
|
||||
wantContentText: "Final answer.",
|
||||
wantHasContent: true,
|
||||
},
|
||||
{
|
||||
name: "Mixed thinking and redacted_thinking",
|
||||
inputJSON: `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "Visible thought."},
|
||||
{"type": "redacted_thinking", "data": "hidden"},
|
||||
{"type": "text", "text": "Answer."}
|
||||
]
|
||||
}]
|
||||
}`,
|
||||
wantReasoningContent: "Visible thought.",
|
||||
wantHasReasoningContent: true,
|
||||
wantContentText: "Answer.",
|
||||
wantHasContent: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := ConvertClaudeRequestToOpenAI("test-model", []byte(tt.inputJSON), false)
|
||||
resultJSON := gjson.ParseBytes(result)
|
||||
|
||||
// Find the relevant message (skip system message at index 0)
|
||||
messages := resultJSON.Get("messages").Array()
|
||||
if len(messages) < 2 {
|
||||
if tt.wantHasReasoningContent || tt.wantHasContent {
|
||||
t.Fatalf("Expected at least 2 messages (system + user/assistant), got %d", len(messages))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Check the last non-system message
|
||||
var targetMsg gjson.Result
|
||||
for i := len(messages) - 1; i >= 0; i-- {
|
||||
if messages[i].Get("role").String() != "system" {
|
||||
targetMsg = messages[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Check reasoning_content
|
||||
gotReasoningContent := targetMsg.Get("reasoning_content").String()
|
||||
gotHasReasoningContent := targetMsg.Get("reasoning_content").Exists()
|
||||
|
||||
if gotHasReasoningContent != tt.wantHasReasoningContent {
|
||||
t.Errorf("reasoning_content existence = %v, want %v", gotHasReasoningContent, tt.wantHasReasoningContent)
|
||||
}
|
||||
|
||||
if gotReasoningContent != tt.wantReasoningContent {
|
||||
t.Errorf("reasoning_content = %q, want %q", gotReasoningContent, tt.wantReasoningContent)
|
||||
}
|
||||
|
||||
// Check content
|
||||
content := targetMsg.Get("content")
|
||||
// content has meaningful content if it's a non-empty array, or a non-empty string
|
||||
var gotHasContent bool
|
||||
switch {
|
||||
case content.IsArray():
|
||||
gotHasContent = len(content.Array()) > 0
|
||||
case content.Type == gjson.String:
|
||||
gotHasContent = content.String() != ""
|
||||
default:
|
||||
gotHasContent = false
|
||||
}
|
||||
|
||||
if gotHasContent != tt.wantHasContent {
|
||||
t.Errorf("content existence = %v, want %v", gotHasContent, tt.wantHasContent)
|
||||
}
|
||||
|
||||
if tt.wantHasContent && tt.wantContentText != "" {
|
||||
// Find text content
|
||||
var foundText string
|
||||
content.ForEach(func(_, v gjson.Result) bool {
|
||||
if v.Get("type").String() == "text" {
|
||||
foundText = v.Get("text").String()
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
if foundText != tt.wantContentText {
|
||||
t.Errorf("content text = %q, want %q", foundText, tt.wantContentText)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestConvertClaudeRequestToOpenAI_ThinkingOnlyMessagePreserved tests AC3:
|
||||
// that a message with only thinking content is preserved (not dropped).
|
||||
func TestConvertClaudeRequestToOpenAI_ThinkingOnlyMessagePreserved(t *testing.T) {
|
||||
inputJSON := `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "What is 2+2?"}]
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [{"type": "thinking", "thinking": "Let me calculate: 2+2=4"}]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "Thanks"}]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
|
||||
resultJSON := gjson.ParseBytes(result)
|
||||
|
||||
messages := resultJSON.Get("messages").Array()
|
||||
|
||||
// Should have: system (auto-added) + user + assistant (thinking-only) + user = 4 messages
|
||||
if len(messages) != 4 {
|
||||
t.Fatalf("Expected 4 messages, got %d. Messages: %v", len(messages), resultJSON.Get("messages").Raw)
|
||||
}
|
||||
|
||||
// Check the assistant message (index 2) has reasoning_content
|
||||
assistantMsg := messages[2]
|
||||
if assistantMsg.Get("role").String() != "assistant" {
|
||||
t.Errorf("Expected message[2] to be assistant, got %s", assistantMsg.Get("role").String())
|
||||
}
|
||||
|
||||
if !assistantMsg.Get("reasoning_content").Exists() {
|
||||
t.Error("Expected assistant message to have reasoning_content")
|
||||
}
|
||||
|
||||
if assistantMsg.Get("reasoning_content").String() != "Let me calculate: 2+2=4" {
|
||||
t.Errorf("Unexpected reasoning_content: %s", assistantMsg.Get("reasoning_content").String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeRequestToOpenAI_ToolResultOrderAndContent(t *testing.T) {
|
||||
inputJSON := `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "before"},
|
||||
{"type": "tool_result", "tool_use_id": "call_1", "content": [{"type":"text","text":"tool ok"}]},
|
||||
{"type": "text", "text": "after"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
|
||||
resultJSON := gjson.ParseBytes(result)
|
||||
messages := resultJSON.Get("messages").Array()
|
||||
|
||||
// OpenAI requires: tool messages MUST immediately follow assistant(tool_calls).
|
||||
// Correct order: system + assistant(tool_calls) + tool(result) + user(before+after)
|
||||
if len(messages) != 4 {
|
||||
t.Fatalf("Expected 4 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
|
||||
}
|
||||
|
||||
if messages[0].Get("role").String() != "system" {
|
||||
t.Fatalf("Expected messages[0] to be system, got %s", messages[0].Get("role").String())
|
||||
}
|
||||
|
||||
if messages[1].Get("role").String() != "assistant" || !messages[1].Get("tool_calls").Exists() {
|
||||
t.Fatalf("Expected messages[1] to be assistant tool_calls, got %s: %s", messages[1].Get("role").String(), messages[1].Raw)
|
||||
}
|
||||
|
||||
// tool message MUST immediately follow assistant(tool_calls) per OpenAI spec
|
||||
if messages[2].Get("role").String() != "tool" {
|
||||
t.Fatalf("Expected messages[2] to be tool (must follow tool_calls), got %s", messages[2].Get("role").String())
|
||||
}
|
||||
if got := messages[2].Get("tool_call_id").String(); got != "call_1" {
|
||||
t.Fatalf("Expected tool_call_id %q, got %q", "call_1", got)
|
||||
}
|
||||
if got := messages[2].Get("content").String(); got != "tool ok" {
|
||||
t.Fatalf("Expected tool content %q, got %q", "tool ok", got)
|
||||
}
|
||||
|
||||
// User message comes after tool message
|
||||
if messages[3].Get("role").String() != "user" {
|
||||
t.Fatalf("Expected messages[3] to be user, got %s", messages[3].Get("role").String())
|
||||
}
|
||||
// User message should contain both "before" and "after" text
|
||||
if got := messages[3].Get("content.0.text").String(); got != "before" {
|
||||
t.Fatalf("Expected user text[0] %q, got %q", "before", got)
|
||||
}
|
||||
if got := messages[3].Get("content.1.text").String(); got != "after" {
|
||||
t.Fatalf("Expected user text[1] %q, got %q", "after", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeRequestToOpenAI_ToolResultObjectContent(t *testing.T) {
|
||||
inputJSON := `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "tool_result", "tool_use_id": "call_1", "content": {"foo": "bar"}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
|
||||
resultJSON := gjson.ParseBytes(result)
|
||||
messages := resultJSON.Get("messages").Array()
|
||||
|
||||
// system + assistant(tool_calls) + tool(result)
|
||||
if len(messages) != 3 {
|
||||
t.Fatalf("Expected 3 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
|
||||
}
|
||||
|
||||
if messages[2].Get("role").String() != "tool" {
|
||||
t.Fatalf("Expected messages[2] to be tool, got %s", messages[2].Get("role").String())
|
||||
}
|
||||
|
||||
toolContent := messages[2].Get("content").String()
|
||||
parsed := gjson.Parse(toolContent)
|
||||
if parsed.Get("foo").String() != "bar" {
|
||||
t.Fatalf("Expected tool content JSON foo=bar, got %q", toolContent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeRequestToOpenAI_AssistantTextToolUseTextOrder(t *testing.T) {
|
||||
inputJSON := `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "text", "text": "pre"},
|
||||
{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}},
|
||||
{"type": "text", "text": "post"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
|
||||
resultJSON := gjson.ParseBytes(result)
|
||||
messages := resultJSON.Get("messages").Array()
|
||||
|
||||
// New behavior: content + tool_calls unified in single assistant message
|
||||
// Expect: system + assistant(content[pre,post] + tool_calls)
|
||||
if len(messages) != 2 {
|
||||
t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
|
||||
}
|
||||
|
||||
if messages[0].Get("role").String() != "system" {
|
||||
t.Fatalf("Expected messages[0] to be system, got %s", messages[0].Get("role").String())
|
||||
}
|
||||
|
||||
assistantMsg := messages[1]
|
||||
if assistantMsg.Get("role").String() != "assistant" {
|
||||
t.Fatalf("Expected messages[1] to be assistant, got %s", assistantMsg.Get("role").String())
|
||||
}
|
||||
|
||||
// Should have both content and tool_calls in same message
|
||||
if !assistantMsg.Get("tool_calls").Exists() {
|
||||
t.Fatalf("Expected assistant message to have tool_calls")
|
||||
}
|
||||
if got := assistantMsg.Get("tool_calls.0.id").String(); got != "call_1" {
|
||||
t.Fatalf("Expected tool_call id %q, got %q", "call_1", got)
|
||||
}
|
||||
if got := assistantMsg.Get("tool_calls.0.function.name").String(); got != "do_work" {
|
||||
t.Fatalf("Expected tool_call name %q, got %q", "do_work", got)
|
||||
}
|
||||
|
||||
// Content should have both pre and post text
|
||||
if got := assistantMsg.Get("content.0.text").String(); got != "pre" {
|
||||
t.Fatalf("Expected content[0] text %q, got %q", "pre", got)
|
||||
}
|
||||
if got := assistantMsg.Get("content.1.text").String(); got != "post" {
|
||||
t.Fatalf("Expected content[1] text %q, got %q", "post", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeRequestToOpenAI_AssistantThinkingToolUseThinkingSplit(t *testing.T) {
|
||||
inputJSON := `{
|
||||
"model": "claude-3-opus",
|
||||
"messages": [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "t1"},
|
||||
{"type": "text", "text": "pre"},
|
||||
{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}},
|
||||
{"type": "thinking", "thinking": "t2"},
|
||||
{"type": "text", "text": "post"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
|
||||
resultJSON := gjson.ParseBytes(result)
|
||||
messages := resultJSON.Get("messages").Array()
|
||||
|
||||
// New behavior: all content, thinking, and tool_calls unified in single assistant message
|
||||
// Expect: system + assistant(content[pre,post] + tool_calls + reasoning_content[t1+t2])
|
||||
if len(messages) != 2 {
|
||||
t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
|
||||
}
|
||||
|
||||
assistantMsg := messages[1]
|
||||
if assistantMsg.Get("role").String() != "assistant" {
|
||||
t.Fatalf("Expected messages[1] to be assistant, got %s", assistantMsg.Get("role").String())
|
||||
}
|
||||
|
||||
// Should have content with both pre and post
|
||||
if got := assistantMsg.Get("content.0.text").String(); got != "pre" {
|
||||
t.Fatalf("Expected content[0] text %q, got %q", "pre", got)
|
||||
}
|
||||
if got := assistantMsg.Get("content.1.text").String(); got != "post" {
|
||||
t.Fatalf("Expected content[1] text %q, got %q", "post", got)
|
||||
}
|
||||
|
||||
// Should have tool_calls
|
||||
if !assistantMsg.Get("tool_calls").Exists() {
|
||||
t.Fatalf("Expected assistant message to have tool_calls")
|
||||
}
|
||||
|
||||
// Should have combined reasoning_content from both thinking blocks
|
||||
if got := assistantMsg.Get("reasoning_content").String(); got != "t1\n\nt2" {
|
||||
t.Fatalf("Expected reasoning_content %q, got %q", "t1\n\nt2", got)
|
||||
}
|
||||
}
|
||||
@@ -480,15 +480,15 @@ func collectOpenAIReasoningTexts(node gjson.Result) []string {
|
||||
|
||||
switch node.Type {
|
||||
case gjson.String:
|
||||
if text := strings.TrimSpace(node.String()); text != "" {
|
||||
if text := node.String(); text != "" {
|
||||
texts = append(texts, text)
|
||||
}
|
||||
case gjson.JSON:
|
||||
if text := node.Get("text"); text.Exists() {
|
||||
if trimmed := strings.TrimSpace(text.String()); trimmed != "" {
|
||||
texts = append(texts, trimmed)
|
||||
if textStr := text.String(); textStr != "" {
|
||||
texts = append(texts, textStr)
|
||||
}
|
||||
} else if raw := strings.TrimSpace(node.Raw); raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
|
||||
} else if raw := node.Raw; raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
|
||||
texts = append(texts, raw)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -390,6 +390,11 @@ func addEmptySchemaPlaceholder(jsonStr string) string {
|
||||
|
||||
// If schema has properties but none are required, add a minimal placeholder.
|
||||
if propsVal.IsObject() && !hasRequiredProperties {
|
||||
// DO NOT add placeholder if it's a top-level schema (parentPath is empty)
|
||||
// or if we've already added a placeholder reason above.
|
||||
if parentPath == "" {
|
||||
continue
|
||||
}
|
||||
placeholderPath := joinPath(propsPath, "_")
|
||||
if !gjson.Get(jsonStr, placeholderPath).Exists() {
|
||||
jsonStr, _ = sjson.Set(jsonStr, placeholderPath+".type", "boolean")
|
||||
|
||||
@@ -127,8 +127,10 @@ func TestCleanJSONSchemaForAntigravity_AnyOfFlattening_SmartSelection(t *testing
|
||||
"type": "object",
|
||||
"description": "Accepts: null | object",
|
||||
"properties": {
|
||||
"_": { "type": "boolean" },
|
||||
"kind": { "type": "string" }
|
||||
}
|
||||
},
|
||||
"required": ["_"]
|
||||
}
|
||||
}
|
||||
}`
|
||||
|
||||
@@ -288,37 +288,73 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
|
||||
|
||||
// ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models.
|
||||
// For standard Gemini API format (generationConfig.thinkingConfig path).
|
||||
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
|
||||
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
|
||||
// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
|
||||
func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte {
|
||||
if !IsGemini3Model(model) {
|
||||
// Use the alias from metadata if available for model type detection
|
||||
lookupModel := ResolveOriginalModel(model, metadata)
|
||||
if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
|
||||
return body
|
||||
}
|
||||
|
||||
// Determine which model to use for validation
|
||||
checkModel := model
|
||||
if IsGemini3Model(lookupModel) {
|
||||
checkModel = lookupModel
|
||||
}
|
||||
|
||||
// First try to get effort string from metadata
|
||||
effort, ok := ReasoningEffortFromMetadata(metadata)
|
||||
if !ok || effort == "" {
|
||||
return body
|
||||
if ok && effort != "" {
|
||||
if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
|
||||
return ApplyGeminiThinkingLevel(body, level, nil)
|
||||
}
|
||||
}
|
||||
// Validate and apply the thinkingLevel
|
||||
if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
|
||||
return ApplyGeminiThinkingLevel(body, level, nil)
|
||||
|
||||
// Fallback: check for numeric budget and convert to thinkingLevel
|
||||
budget, _, _, matched := ThinkingFromMetadata(metadata)
|
||||
if matched && budget != nil {
|
||||
if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
|
||||
return ApplyGeminiThinkingLevel(body, level, nil)
|
||||
}
|
||||
}
|
||||
|
||||
return body
|
||||
}
|
||||
|
||||
// ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models.
|
||||
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
||||
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
|
||||
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
|
||||
// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
|
||||
func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte {
|
||||
if !IsGemini3Model(model) {
|
||||
// Use the alias from metadata if available for model type detection
|
||||
lookupModel := ResolveOriginalModel(model, metadata)
|
||||
if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
|
||||
return body
|
||||
}
|
||||
|
||||
// Determine which model to use for validation
|
||||
checkModel := model
|
||||
if IsGemini3Model(lookupModel) {
|
||||
checkModel = lookupModel
|
||||
}
|
||||
|
||||
// First try to get effort string from metadata
|
||||
effort, ok := ReasoningEffortFromMetadata(metadata)
|
||||
if !ok || effort == "" {
|
||||
return body
|
||||
if ok && effort != "" {
|
||||
if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
|
||||
return ApplyGeminiCLIThinkingLevel(body, level, nil)
|
||||
}
|
||||
}
|
||||
// Validate and apply the thinkingLevel
|
||||
if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
|
||||
return ApplyGeminiCLIThinkingLevel(body, level, nil)
|
||||
|
||||
// Fallback: check for numeric budget and convert to thinkingLevel
|
||||
budget, _, _, matched := ThinkingFromMetadata(metadata)
|
||||
if matched && budget != nil {
|
||||
if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
|
||||
return ApplyGeminiCLIThinkingLevel(body, level, nil)
|
||||
}
|
||||
}
|
||||
|
||||
return body
|
||||
}
|
||||
|
||||
@@ -326,15 +362,17 @@ func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]
|
||||
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
|
||||
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
|
||||
// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
|
||||
func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
|
||||
if !ModelHasDefaultThinking(model) {
|
||||
func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte {
|
||||
// Use the alias from metadata if available for model property lookup
|
||||
lookupModel := ResolveOriginalModel(model, metadata)
|
||||
if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) {
|
||||
return body
|
||||
}
|
||||
if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
|
||||
return body
|
||||
}
|
||||
// Gemini 3 models use thinkingLevel instead of thinkingBudget
|
||||
if IsGemini3Model(model) {
|
||||
if IsGemini3Model(lookupModel) || IsGemini3Model(model) {
|
||||
// Don't set a default - let the API use its dynamic default ("high")
|
||||
// Only set includeThoughts
|
||||
updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
|
||||
56
internal/util/sanitize_test.go
Normal file
56
internal/util/sanitize_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSanitizeFunctionName(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"Normal", "valid_name", "valid_name"},
|
||||
{"With Dots", "name.with.dots", "name.with.dots"},
|
||||
{"With Colons", "name:with:colons", "name:with:colons"},
|
||||
{"With Dashes", "name-with-dashes", "name-with-dashes"},
|
||||
{"Mixed Allowed", "name.with_dots:colons-dashes", "name.with_dots:colons-dashes"},
|
||||
{"Invalid Characters", "name!with@invalid#chars", "name_with_invalid_chars"},
|
||||
{"Spaces", "name with spaces", "name_with_spaces"},
|
||||
{"Non-ASCII", "name_with_你好_chars", "name_with____chars"},
|
||||
{"Starts with digit", "123name", "_123name"},
|
||||
{"Starts with dot", ".name", "_.name"},
|
||||
{"Starts with colon", ":name", "_:name"},
|
||||
{"Starts with dash", "-name", "_-name"},
|
||||
{"Starts with invalid char", "!name", "_name"},
|
||||
{"Exactly 64 chars", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact"},
|
||||
{"Too long (65 chars)", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charactX", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact"},
|
||||
{"Very long", "this_is_a_very_long_name_that_exceeds_the_sixty_four_character_limit_for_function_names", "this_is_a_very_long_name_that_exceeds_the_sixty_four_character_l"},
|
||||
{"Starts with digit (64 chars total)", "1234567890123456789012345678901234567890123456789012345678901234", "_123456789012345678901234567890123456789012345678901234567890123"},
|
||||
{"Starts with invalid char (64 chars total)", "!234567890123456789012345678901234567890123456789012345678901234", "_234567890123456789012345678901234567890123456789012345678901234"},
|
||||
{"Empty", "", ""},
|
||||
{"Single character invalid", "@", "_"},
|
||||
{"Single character valid", "a", "a"},
|
||||
{"Single character digit", "1", "_1"},
|
||||
{"Single character underscore", "_", "_"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := SanitizeFunctionName(tt.input)
|
||||
if got != tt.expected {
|
||||
t.Errorf("SanitizeFunctionName(%q) = %v, want %v", tt.input, got, tt.expected)
|
||||
}
|
||||
// Verify Gemini compliance
|
||||
if len(got) > 64 {
|
||||
t.Errorf("SanitizeFunctionName(%q) result too long: %d", tt.input, len(got))
|
||||
}
|
||||
if len(got) > 0 {
|
||||
first := got[0]
|
||||
if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
|
||||
t.Errorf("SanitizeFunctionName(%q) result starts with invalid char: %c", tt.input, first)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -12,9 +12,18 @@ func ModelSupportsThinking(model string) bool {
|
||||
if model == "" {
|
||||
return false
|
||||
}
|
||||
// First check the global dynamic registry
|
||||
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
|
||||
return info.Thinking != nil
|
||||
}
|
||||
// Fallback: check static model definitions
|
||||
if info := registry.LookupStaticModelInfo(model); info != nil {
|
||||
return info.Thinking != nil
|
||||
}
|
||||
// Fallback: check Antigravity static config
|
||||
if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil {
|
||||
return cfg.Thinking != nil
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -63,11 +72,19 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
|
||||
if model == "" {
|
||||
return false, 0, 0, false, false
|
||||
}
|
||||
info := registry.GetGlobalRegistry().GetModelInfo(model)
|
||||
if info == nil || info.Thinking == nil {
|
||||
return false, 0, 0, false, false
|
||||
// First check global dynamic registry
|
||||
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil && info.Thinking != nil {
|
||||
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||
}
|
||||
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||
// Fallback: check static model definitions
|
||||
if info := registry.LookupStaticModelInfo(model); info != nil && info.Thinking != nil {
|
||||
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||
}
|
||||
// Fallback: check Antigravity static config
|
||||
if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil && cfg.Thinking != nil {
|
||||
return true, cfg.Thinking.Min, cfg.Thinking.Max, cfg.Thinking.ZeroAllowed, cfg.Thinking.DynamicAllowed
|
||||
}
|
||||
return false, 0, 0, false, false
|
||||
}
|
||||
|
||||
// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
|
||||
|
||||
@@ -8,12 +8,52 @@ import (
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var functionNameSanitizer = regexp.MustCompile(`[^a-zA-Z0-9_.:-]`)
|
||||
|
||||
// SanitizeFunctionName ensures a function name matches the requirements for Gemini/Vertex AI.
|
||||
// It replaces invalid characters with underscores, ensures it starts with a letter or underscore,
|
||||
// and truncates it to 64 characters if necessary.
|
||||
// Regex Rule: [^a-zA-Z0-9_.:-] replaced with _.
|
||||
func SanitizeFunctionName(name string) string {
|
||||
if name == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Replace invalid characters with underscore
|
||||
sanitized := functionNameSanitizer.ReplaceAllString(name, "_")
|
||||
|
||||
// Ensure it starts with a letter or underscore
|
||||
// Re-reading requirements: Must start with a letter or an underscore.
|
||||
if len(sanitized) > 0 {
|
||||
first := sanitized[0]
|
||||
if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
|
||||
// If it starts with an allowed character but not allowed at the beginning (digit, dot, colon, dash),
|
||||
// we must prepend an underscore.
|
||||
|
||||
// To stay within the 64-character limit while prepending, we must truncate first.
|
||||
if len(sanitized) >= 64 {
|
||||
sanitized = sanitized[:63]
|
||||
}
|
||||
sanitized = "_" + sanitized
|
||||
}
|
||||
} else {
|
||||
sanitized = "_"
|
||||
}
|
||||
|
||||
// Truncate to 64 characters
|
||||
if len(sanitized) > 64 {
|
||||
sanitized = sanitized[:64]
|
||||
}
|
||||
return sanitized
|
||||
}
|
||||
|
||||
// SetLogLevel configures the logrus log level based on the configuration.
|
||||
// It sets the log level to DebugLevel if debug mode is enabled, otherwise to InfoLevel.
|
||||
func SetLogLevel(cfg *config.Config) {
|
||||
|
||||
@@ -90,6 +90,11 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
|
||||
if !equalStringMap(o.Headers, n.Headers) {
|
||||
changes = append(changes, fmt.Sprintf("gemini[%d].headers: updated", i))
|
||||
}
|
||||
oldModels := SummarizeGeminiModels(o.Models)
|
||||
newModels := SummarizeGeminiModels(n.Models)
|
||||
if oldModels.hash != newModels.hash {
|
||||
changes = append(changes, fmt.Sprintf("gemini[%d].models: updated (%d -> %d entries)", i, oldModels.count, newModels.count))
|
||||
}
|
||||
oldExcluded := SummarizeExcludedModels(o.ExcludedModels)
|
||||
newExcluded := SummarizeExcludedModels(n.ExcludedModels)
|
||||
if oldExcluded.hash != newExcluded.hash {
|
||||
@@ -120,6 +125,11 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
|
||||
if !equalStringMap(o.Headers, n.Headers) {
|
||||
changes = append(changes, fmt.Sprintf("claude[%d].headers: updated", i))
|
||||
}
|
||||
oldModels := SummarizeClaudeModels(o.Models)
|
||||
newModels := SummarizeClaudeModels(n.Models)
|
||||
if oldModels.hash != newModels.hash {
|
||||
changes = append(changes, fmt.Sprintf("claude[%d].models: updated (%d -> %d entries)", i, oldModels.count, newModels.count))
|
||||
}
|
||||
oldExcluded := SummarizeExcludedModels(o.ExcludedModels)
|
||||
newExcluded := SummarizeExcludedModels(n.ExcludedModels)
|
||||
if oldExcluded.hash != newExcluded.hash {
|
||||
@@ -150,6 +160,11 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
|
||||
if !equalStringMap(o.Headers, n.Headers) {
|
||||
changes = append(changes, fmt.Sprintf("codex[%d].headers: updated", i))
|
||||
}
|
||||
oldModels := SummarizeCodexModels(o.Models)
|
||||
newModels := SummarizeCodexModels(n.Models)
|
||||
if oldModels.hash != newModels.hash {
|
||||
changes = append(changes, fmt.Sprintf("codex[%d].models: updated (%d -> %d entries)", i, oldModels.count, newModels.count))
|
||||
}
|
||||
oldExcluded := SummarizeExcludedModels(o.ExcludedModels)
|
||||
newExcluded := SummarizeExcludedModels(n.ExcludedModels)
|
||||
if oldExcluded.hash != newExcluded.hash {
|
||||
@@ -194,6 +209,9 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
|
||||
if entries, _ := DiffOAuthExcludedModelChanges(oldCfg.OAuthExcludedModels, newCfg.OAuthExcludedModels); len(entries) > 0 {
|
||||
changes = append(changes, entries...)
|
||||
}
|
||||
if entries, _ := DiffOAuthModelMappingChanges(oldCfg.OAuthModelMappings, newCfg.OAuthModelMappings); len(entries) > 0 {
|
||||
changes = append(changes, entries...)
|
||||
}
|
||||
|
||||
// Remote management (never print the key)
|
||||
if oldCfg.RemoteManagement.AllowRemote != newCfg.RemoteManagement.AllowRemote {
|
||||
|
||||
@@ -71,6 +71,21 @@ func ComputeCodexModelsHash(models []config.CodexModel) string {
|
||||
return hashJoined(keys)
|
||||
}
|
||||
|
||||
// ComputeGeminiModelsHash returns a stable hash for Gemini model aliases.
|
||||
func ComputeGeminiModelsHash(models []config.GeminiModel) string {
|
||||
keys := normalizeModelPairs(func(out func(key string)) {
|
||||
for _, model := range models {
|
||||
name := strings.TrimSpace(model.Name)
|
||||
alias := strings.TrimSpace(model.Alias)
|
||||
if name == "" && alias == "" {
|
||||
continue
|
||||
}
|
||||
out(strings.ToLower(name) + "|" + strings.ToLower(alias))
|
||||
}
|
||||
})
|
||||
return hashJoined(keys)
|
||||
}
|
||||
|
||||
// ComputeExcludedModelsHash returns a normalized hash for excluded model lists.
|
||||
func ComputeExcludedModelsHash(excluded []string) string {
|
||||
if len(excluded) == 0 {
|
||||
|
||||
121
internal/watcher/diff/models_summary.go
Normal file
121
internal/watcher/diff/models_summary.go
Normal file
@@ -0,0 +1,121 @@
|
||||
package diff
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
)
|
||||
|
||||
type GeminiModelsSummary struct {
|
||||
hash string
|
||||
count int
|
||||
}
|
||||
|
||||
type ClaudeModelsSummary struct {
|
||||
hash string
|
||||
count int
|
||||
}
|
||||
|
||||
type CodexModelsSummary struct {
|
||||
hash string
|
||||
count int
|
||||
}
|
||||
|
||||
type VertexModelsSummary struct {
|
||||
hash string
|
||||
count int
|
||||
}
|
||||
|
||||
// SummarizeGeminiModels hashes Gemini model aliases for change detection.
|
||||
func SummarizeGeminiModels(models []config.GeminiModel) GeminiModelsSummary {
|
||||
if len(models) == 0 {
|
||||
return GeminiModelsSummary{}
|
||||
}
|
||||
keys := normalizeModelPairs(func(out func(key string)) {
|
||||
for _, model := range models {
|
||||
name := strings.TrimSpace(model.Name)
|
||||
alias := strings.TrimSpace(model.Alias)
|
||||
if name == "" && alias == "" {
|
||||
continue
|
||||
}
|
||||
out(strings.ToLower(name) + "|" + strings.ToLower(alias))
|
||||
}
|
||||
})
|
||||
return GeminiModelsSummary{
|
||||
hash: hashJoined(keys),
|
||||
count: len(keys),
|
||||
}
|
||||
}
|
||||
|
||||
// SummarizeClaudeModels hashes Claude model aliases for change detection.
|
||||
func SummarizeClaudeModels(models []config.ClaudeModel) ClaudeModelsSummary {
|
||||
if len(models) == 0 {
|
||||
return ClaudeModelsSummary{}
|
||||
}
|
||||
keys := normalizeModelPairs(func(out func(key string)) {
|
||||
for _, model := range models {
|
||||
name := strings.TrimSpace(model.Name)
|
||||
alias := strings.TrimSpace(model.Alias)
|
||||
if name == "" && alias == "" {
|
||||
continue
|
||||
}
|
||||
out(strings.ToLower(name) + "|" + strings.ToLower(alias))
|
||||
}
|
||||
})
|
||||
return ClaudeModelsSummary{
|
||||
hash: hashJoined(keys),
|
||||
count: len(keys),
|
||||
}
|
||||
}
|
||||
|
||||
// SummarizeCodexModels hashes Codex model aliases for change detection.
|
||||
func SummarizeCodexModels(models []config.CodexModel) CodexModelsSummary {
|
||||
if len(models) == 0 {
|
||||
return CodexModelsSummary{}
|
||||
}
|
||||
keys := normalizeModelPairs(func(out func(key string)) {
|
||||
for _, model := range models {
|
||||
name := strings.TrimSpace(model.Name)
|
||||
alias := strings.TrimSpace(model.Alias)
|
||||
if name == "" && alias == "" {
|
||||
continue
|
||||
}
|
||||
out(strings.ToLower(name) + "|" + strings.ToLower(alias))
|
||||
}
|
||||
})
|
||||
return CodexModelsSummary{
|
||||
hash: hashJoined(keys),
|
||||
count: len(keys),
|
||||
}
|
||||
}
|
||||
|
||||
// SummarizeVertexModels hashes Vertex-compatible model aliases for change detection.
|
||||
func SummarizeVertexModels(models []config.VertexCompatModel) VertexModelsSummary {
|
||||
if len(models) == 0 {
|
||||
return VertexModelsSummary{}
|
||||
}
|
||||
names := make([]string, 0, len(models))
|
||||
for _, model := range models {
|
||||
name := strings.TrimSpace(model.Name)
|
||||
alias := strings.TrimSpace(model.Alias)
|
||||
if name == "" && alias == "" {
|
||||
continue
|
||||
}
|
||||
if alias != "" {
|
||||
name = alias
|
||||
}
|
||||
names = append(names, name)
|
||||
}
|
||||
if len(names) == 0 {
|
||||
return VertexModelsSummary{}
|
||||
}
|
||||
sort.Strings(names)
|
||||
sum := sha256.Sum256([]byte(strings.Join(names, "|")))
|
||||
return VertexModelsSummary{
|
||||
hash: hex.EncodeToString(sum[:]),
|
||||
count: len(names),
|
||||
}
|
||||
}
|
||||
@@ -116,36 +116,3 @@ func SummarizeAmpModelMappings(mappings []config.AmpModelMapping) AmpModelMappin
|
||||
count: len(entries),
|
||||
}
|
||||
}
|
||||
|
||||
type VertexModelsSummary struct {
|
||||
hash string
|
||||
count int
|
||||
}
|
||||
|
||||
// SummarizeVertexModels hashes vertex-compatible models for change detection.
|
||||
func SummarizeVertexModels(models []config.VertexCompatModel) VertexModelsSummary {
|
||||
if len(models) == 0 {
|
||||
return VertexModelsSummary{}
|
||||
}
|
||||
names := make([]string, 0, len(models))
|
||||
for _, m := range models {
|
||||
name := strings.TrimSpace(m.Name)
|
||||
alias := strings.TrimSpace(m.Alias)
|
||||
if name == "" && alias == "" {
|
||||
continue
|
||||
}
|
||||
if alias != "" {
|
||||
name = alias
|
||||
}
|
||||
names = append(names, name)
|
||||
}
|
||||
if len(names) == 0 {
|
||||
return VertexModelsSummary{}
|
||||
}
|
||||
sort.Strings(names)
|
||||
sum := sha256.Sum256([]byte(strings.Join(names, "|")))
|
||||
return VertexModelsSummary{
|
||||
hash: hex.EncodeToString(sum[:]),
|
||||
count: len(names),
|
||||
}
|
||||
}
|
||||
|
||||
98
internal/watcher/diff/oauth_model_mappings.go
Normal file
98
internal/watcher/diff/oauth_model_mappings.go
Normal file
@@ -0,0 +1,98 @@
|
||||
package diff
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
)
|
||||
|
||||
type OAuthModelMappingsSummary struct {
|
||||
hash string
|
||||
count int
|
||||
}
|
||||
|
||||
// SummarizeOAuthModelMappings summarizes OAuth model mappings per channel.
|
||||
func SummarizeOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string]OAuthModelMappingsSummary {
|
||||
if len(entries) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make(map[string]OAuthModelMappingsSummary, len(entries))
|
||||
for k, v := range entries {
|
||||
key := strings.ToLower(strings.TrimSpace(k))
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
out[key] = summarizeOAuthModelMappingList(v)
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// DiffOAuthModelMappingChanges compares OAuth model mappings maps.
|
||||
func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMapping) ([]string, []string) {
|
||||
oldSummary := SummarizeOAuthModelMappings(oldMap)
|
||||
newSummary := SummarizeOAuthModelMappings(newMap)
|
||||
keys := make(map[string]struct{}, len(oldSummary)+len(newSummary))
|
||||
for k := range oldSummary {
|
||||
keys[k] = struct{}{}
|
||||
}
|
||||
for k := range newSummary {
|
||||
keys[k] = struct{}{}
|
||||
}
|
||||
changes := make([]string, 0, len(keys))
|
||||
affected := make([]string, 0, len(keys))
|
||||
for key := range keys {
|
||||
oldInfo, okOld := oldSummary[key]
|
||||
newInfo, okNew := newSummary[key]
|
||||
switch {
|
||||
case okOld && !okNew:
|
||||
changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: removed", key))
|
||||
affected = append(affected, key)
|
||||
case !okOld && okNew:
|
||||
changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: added (%d entries)", key, newInfo.count))
|
||||
affected = append(affected, key)
|
||||
case okOld && okNew && oldInfo.hash != newInfo.hash:
|
||||
changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count))
|
||||
affected = append(affected, key)
|
||||
}
|
||||
}
|
||||
sort.Strings(changes)
|
||||
sort.Strings(affected)
|
||||
return changes, affected
|
||||
}
|
||||
|
||||
func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMappingsSummary {
|
||||
if len(list) == 0 {
|
||||
return OAuthModelMappingsSummary{}
|
||||
}
|
||||
seen := make(map[string]struct{}, len(list))
|
||||
normalized := make([]string, 0, len(list))
|
||||
for _, mapping := range list {
|
||||
name := strings.ToLower(strings.TrimSpace(mapping.Name))
|
||||
alias := strings.ToLower(strings.TrimSpace(mapping.Alias))
|
||||
if name == "" || alias == "" {
|
||||
continue
|
||||
}
|
||||
key := name + "->" + alias
|
||||
if _, exists := seen[key]; exists {
|
||||
continue
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
normalized = append(normalized, key)
|
||||
}
|
||||
if len(normalized) == 0 {
|
||||
return OAuthModelMappingsSummary{}
|
||||
}
|
||||
sort.Strings(normalized)
|
||||
sum := sha256.Sum256([]byte(strings.Join(normalized, "|")))
|
||||
return OAuthModelMappingsSummary{
|
||||
hash: hex.EncodeToString(sum[:]),
|
||||
count: len(normalized),
|
||||
}
|
||||
}
|
||||
@@ -62,6 +62,9 @@ func (s *ConfigSynthesizer) synthesizeGeminiKeys(ctx *SynthesisContext) []*corea
|
||||
if base != "" {
|
||||
attrs["base_url"] = base
|
||||
}
|
||||
if hash := diff.ComputeGeminiModelsHash(entry.Models); hash != "" {
|
||||
attrs["models_hash"] = hash
|
||||
}
|
||||
addConfigHeadersToAttrs(entry.Headers, attrs)
|
||||
a := &coreauth.Auth{
|
||||
ID: id,
|
||||
|
||||
@@ -20,6 +20,7 @@ type ManagementTokenRequester interface {
|
||||
RequestQwenToken(*gin.Context)
|
||||
RequestIFlowToken(*gin.Context)
|
||||
RequestIFlowCookieToken(*gin.Context)
|
||||
GetAuthStatus(c *gin.Context)
|
||||
}
|
||||
|
||||
type managementTokenRequester struct {
|
||||
@@ -60,3 +61,7 @@ func (m *managementTokenRequester) RequestIFlowToken(c *gin.Context) {
|
||||
func (m *managementTokenRequester) RequestIFlowCookieToken(c *gin.Context) {
|
||||
m.handler.RequestIFlowCookieToken(c)
|
||||
}
|
||||
|
||||
func (m *managementTokenRequester) GetAuthStatus(c *gin.Context) {
|
||||
m.handler.GetAuthStatus(c)
|
||||
}
|
||||
|
||||
@@ -413,7 +413,7 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req
|
||||
}
|
||||
execReq := req
|
||||
execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
|
||||
execReq.Metadata = m.applyOAuthModelMappingMetadata(auth, execReq.Model, execReq.Metadata)
|
||||
execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
|
||||
resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
|
||||
result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
|
||||
if errExec != nil {
|
||||
@@ -475,7 +475,7 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,
|
||||
}
|
||||
execReq := req
|
||||
execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
|
||||
execReq.Metadata = m.applyOAuthModelMappingMetadata(auth, execReq.Model, execReq.Metadata)
|
||||
execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
|
||||
resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
|
||||
result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
|
||||
if errExec != nil {
|
||||
@@ -537,7 +537,7 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string
|
||||
}
|
||||
execReq := req
|
||||
execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth)
|
||||
execReq.Metadata = m.applyOAuthModelMappingMetadata(auth, execReq.Model, execReq.Metadata)
|
||||
execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata)
|
||||
chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
|
||||
if errStream != nil {
|
||||
rerr := &Error{Message: errStream.Error()}
|
||||
|
||||
@@ -65,17 +65,14 @@ func (m *Manager) SetOAuthModelMappings(mappings map[string][]internalconfig.Mod
|
||||
m.modelNameMappings.Store(table)
|
||||
}
|
||||
|
||||
func (m *Manager) applyOAuthModelMappingMetadata(auth *Auth, requestedModel string, metadata map[string]any) map[string]any {
|
||||
original := m.resolveOAuthUpstreamModel(auth, requestedModel)
|
||||
if original == "" {
|
||||
return metadata
|
||||
}
|
||||
if metadata != nil {
|
||||
if v, ok := metadata[util.ModelMappingOriginalModelMetadataKey]; ok {
|
||||
if s, okStr := v.(string); okStr && strings.EqualFold(s, original) {
|
||||
return metadata
|
||||
}
|
||||
}
|
||||
// applyOAuthModelMapping resolves the upstream model from OAuth model mappings
|
||||
// and returns the resolved model along with updated metadata. If a mapping exists,
|
||||
// the returned model is the upstream model and metadata contains the original
|
||||
// requested model for response translation.
|
||||
func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) {
|
||||
upstreamModel := m.resolveOAuthUpstreamModel(auth, requestedModel)
|
||||
if upstreamModel == "" {
|
||||
return requestedModel, metadata
|
||||
}
|
||||
out := make(map[string]any, 1)
|
||||
if len(metadata) > 0 {
|
||||
@@ -84,8 +81,10 @@ func (m *Manager) applyOAuthModelMappingMetadata(auth *Auth, requestedModel stri
|
||||
out[k] = v
|
||||
}
|
||||
}
|
||||
out[util.ModelMappingOriginalModelMetadataKey] = original
|
||||
return out
|
||||
// Store the requested alias (e.g., "gp") so downstream can use it to look up
|
||||
// model metadata from the global registry where it was registered under this alias.
|
||||
out[util.ModelMappingOriginalModelMetadataKey] = requestedModel
|
||||
return upstreamModel, out
|
||||
}
|
||||
|
||||
func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) string {
|
||||
|
||||
@@ -13,6 +13,7 @@ type ModelRegistry interface {
|
||||
ClearModelQuotaExceeded(clientID, modelID string)
|
||||
ClientSupportsModel(clientID, modelID string) bool
|
||||
GetAvailableModels(handlerType string) []map[string]any
|
||||
GetAvailableModelsByProvider(provider string) []*ModelInfo
|
||||
}
|
||||
|
||||
// GlobalModelRegistry returns the shared registry instance.
|
||||
|
||||
211
test/model_alias_thinking_suffix_test.go
Normal file
211
test/model_alias_thinking_suffix_test.go
Normal file
@@ -0,0 +1,211 @@
|
||||
package test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// TestModelAliasThinkingSuffix tests the 32 test cases defined in docs/thinking_suffix_test_cases.md
|
||||
// These tests verify the thinking suffix parsing and application logic across different providers.
|
||||
func TestModelAliasThinkingSuffix(t *testing.T) {
|
||||
tests := []struct {
|
||||
id int
|
||||
name string
|
||||
provider string
|
||||
requestModel string
|
||||
suffixType string
|
||||
expectedField string // "thinkingBudget", "thinkingLevel", "budget_tokens", "reasoning_effort", "enable_thinking"
|
||||
expectedValue any
|
||||
upstreamModel string // The upstream model after alias resolution
|
||||
isAlias bool
|
||||
}{
|
||||
// === 1. Antigravity Provider ===
|
||||
// 1.1 Budget-only models (Gemini 2.5)
|
||||
{1, "antigravity_original_numeric", "antigravity", "gemini-2.5-computer-use-preview-10-2025(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", false},
|
||||
{2, "antigravity_alias_numeric", "antigravity", "gp(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", true},
|
||||
// 1.2 Budget+Levels models (Gemini 3)
|
||||
{3, "antigravity_original_numeric_to_level", "antigravity", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
|
||||
{4, "antigravity_original_level", "antigravity", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
|
||||
{5, "antigravity_alias_numeric_to_level", "antigravity", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
|
||||
{6, "antigravity_alias_level", "antigravity", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
|
||||
|
||||
// === 2. Gemini CLI Provider ===
|
||||
// 2.1 Budget-only models
|
||||
{7, "gemini_cli_original_numeric", "gemini-cli", "gemini-2.5-pro(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", false},
|
||||
{8, "gemini_cli_alias_numeric", "gemini-cli", "g25p(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", true},
|
||||
// 2.2 Budget+Levels models
|
||||
{9, "gemini_cli_original_numeric_to_level", "gemini-cli", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
|
||||
{10, "gemini_cli_original_level", "gemini-cli", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
|
||||
{11, "gemini_cli_alias_numeric_to_level", "gemini-cli", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
|
||||
{12, "gemini_cli_alias_level", "gemini-cli", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
|
||||
|
||||
// === 3. Vertex Provider ===
|
||||
// 3.1 Budget-only models
|
||||
{13, "vertex_original_numeric", "vertex", "gemini-2.5-pro(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", false},
|
||||
{14, "vertex_alias_numeric", "vertex", "vg25p(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", true},
|
||||
// 3.2 Budget+Levels models
|
||||
{15, "vertex_original_numeric_to_level", "vertex", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
|
||||
{16, "vertex_original_level", "vertex", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
|
||||
{17, "vertex_alias_numeric_to_level", "vertex", "vgf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
|
||||
{18, "vertex_alias_level", "vertex", "vgf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
|
||||
|
||||
// === 4. AI Studio Provider ===
|
||||
// 4.1 Budget-only models
|
||||
{19, "aistudio_original_numeric", "aistudio", "gemini-2.5-pro(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", false},
|
||||
{20, "aistudio_alias_numeric", "aistudio", "ag25p(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", true},
|
||||
// 4.2 Budget+Levels models
|
||||
{21, "aistudio_original_numeric_to_level", "aistudio", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
|
||||
{22, "aistudio_original_level", "aistudio", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
|
||||
{23, "aistudio_alias_numeric_to_level", "aistudio", "agf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
|
||||
{24, "aistudio_alias_level", "aistudio", "agf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
|
||||
|
||||
// === 5. Claude Provider ===
|
||||
{25, "claude_original_numeric", "claude", "claude-sonnet-4-5-20250929(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", false},
|
||||
{26, "claude_alias_numeric", "claude", "cs45(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", true},
|
||||
|
||||
// === 6. Codex Provider ===
|
||||
{27, "codex_original_level", "codex", "gpt-5(high)", "level", "reasoning_effort", "high", "gpt-5", false},
|
||||
{28, "codex_alias_level", "codex", "g5(high)", "level", "reasoning_effort", "high", "gpt-5", true},
|
||||
|
||||
// === 7. Qwen Provider ===
|
||||
{29, "qwen_original_level", "qwen", "qwen3-coder-plus(high)", "level", "enable_thinking", true, "qwen3-coder-plus", false},
|
||||
{30, "qwen_alias_level", "qwen", "qcp(high)", "level", "enable_thinking", true, "qwen3-coder-plus", true},
|
||||
|
||||
// === 8. iFlow Provider ===
|
||||
{31, "iflow_original_level", "iflow", "glm-4.7(high)", "level", "reasoning_effort", "high", "glm-4.7", false},
|
||||
{32, "iflow_alias_level", "iflow", "glm(high)", "level", "reasoning_effort", "high", "glm-4.7", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Step 1: Parse model suffix (simulates SDK layer normalization)
|
||||
// For "gp(1000)" -> requestedModel="gp", metadata={thinking_budget: 1000}
|
||||
requestedModel, metadata := util.NormalizeThinkingModel(tt.requestModel)
|
||||
|
||||
// Verify suffix was parsed
|
||||
if metadata == nil && (tt.suffixType == "numeric" || tt.suffixType == "level") {
|
||||
t.Errorf("Case #%d: NormalizeThinkingModel(%q) metadata is nil", tt.id, tt.requestModel)
|
||||
return
|
||||
}
|
||||
|
||||
// Step 2: Simulate OAuth model mapping
|
||||
// Real flow: applyOAuthModelMapping stores requestedModel (the alias) in metadata
|
||||
if tt.isAlias {
|
||||
if metadata == nil {
|
||||
metadata = make(map[string]any)
|
||||
}
|
||||
metadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
|
||||
}
|
||||
|
||||
// Step 3: Verify metadata extraction
|
||||
switch tt.suffixType {
|
||||
case "numeric":
|
||||
budget, _, _, matched := util.ThinkingFromMetadata(metadata)
|
||||
if !matched {
|
||||
t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id)
|
||||
return
|
||||
}
|
||||
if budget == nil {
|
||||
t.Errorf("Case #%d: expected budget in metadata", tt.id)
|
||||
return
|
||||
}
|
||||
// For thinkingBudget/budget_tokens, verify the parsed budget value
|
||||
if tt.expectedField == "thinkingBudget" || tt.expectedField == "budget_tokens" {
|
||||
expectedBudget := tt.expectedValue.(int)
|
||||
if *budget != expectedBudget {
|
||||
t.Errorf("Case #%d: budget = %d, want %d", tt.id, *budget, expectedBudget)
|
||||
}
|
||||
}
|
||||
// For thinkingLevel (Gemini 3), verify conversion from budget to level
|
||||
if tt.expectedField == "thinkingLevel" {
|
||||
level, ok := util.ThinkingBudgetToGemini3Level(tt.upstreamModel, *budget)
|
||||
if !ok {
|
||||
t.Errorf("Case #%d: ThinkingBudgetToGemini3Level failed", tt.id)
|
||||
return
|
||||
}
|
||||
expectedLevel := tt.expectedValue.(string)
|
||||
if level != expectedLevel {
|
||||
t.Errorf("Case #%d: converted level = %q, want %q", tt.id, level, expectedLevel)
|
||||
}
|
||||
}
|
||||
|
||||
case "level":
|
||||
_, _, effort, matched := util.ThinkingFromMetadata(metadata)
|
||||
if !matched {
|
||||
t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id)
|
||||
return
|
||||
}
|
||||
if effort == nil {
|
||||
t.Errorf("Case #%d: expected effort in metadata", tt.id)
|
||||
return
|
||||
}
|
||||
if tt.expectedField == "thinkingLevel" || tt.expectedField == "reasoning_effort" {
|
||||
expectedEffort := tt.expectedValue.(string)
|
||||
if *effort != expectedEffort {
|
||||
t.Errorf("Case #%d: effort = %q, want %q", tt.id, *effort, expectedEffort)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Test Gemini-specific thinkingLevel conversion for Gemini 3 models
|
||||
if tt.expectedField == "thinkingLevel" && util.IsGemini3Model(tt.upstreamModel) {
|
||||
body := []byte(`{"request":{"contents":[]}}`)
|
||||
|
||||
// Build metadata simulating real OAuth flow:
|
||||
// - requestedModel (alias like "gf") is stored in model_mapping_original_model
|
||||
// - upstreamModel is passed as the model parameter
|
||||
testMetadata := make(map[string]any)
|
||||
if tt.isAlias {
|
||||
// Real flow: applyOAuthModelMapping stores requestedModel (the alias)
|
||||
testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
|
||||
}
|
||||
// Copy parsed metadata (thinking_budget, reasoning_effort, etc.)
|
||||
for k, v := range metadata {
|
||||
testMetadata[k] = v
|
||||
}
|
||||
|
||||
result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(tt.upstreamModel, testMetadata, body)
|
||||
levelVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel")
|
||||
|
||||
expectedLevel := tt.expectedValue.(string)
|
||||
if !levelVal.Exists() {
|
||||
t.Errorf("Case #%d: expected thinkingLevel in result", tt.id)
|
||||
} else if levelVal.String() != expectedLevel {
|
||||
t.Errorf("Case #%d: thinkingLevel = %q, want %q", tt.id, levelVal.String(), expectedLevel)
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5: Test Gemini 2.5 thinkingBudget application using real ApplyThinkingMetadataCLI flow
|
||||
if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) {
|
||||
body := []byte(`{"request":{"contents":[]}}`)
|
||||
|
||||
// Build metadata simulating real OAuth flow:
|
||||
// - requestedModel (alias like "gp") is stored in model_mapping_original_model
|
||||
// - upstreamModel is passed as the model parameter
|
||||
testMetadata := make(map[string]any)
|
||||
if tt.isAlias {
|
||||
// Real flow: applyOAuthModelMapping stores requestedModel (the alias)
|
||||
testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
|
||||
}
|
||||
// Copy parsed metadata (thinking_budget, reasoning_effort, etc.)
|
||||
for k, v := range metadata {
|
||||
testMetadata[k] = v
|
||||
}
|
||||
|
||||
// Use the exported ApplyThinkingMetadataCLI which includes the fallback logic
|
||||
result := executor.ApplyThinkingMetadataCLI(body, testMetadata, tt.upstreamModel)
|
||||
budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget")
|
||||
|
||||
expectedBudget := tt.expectedValue.(int)
|
||||
if !budgetVal.Exists() {
|
||||
t.Errorf("Case #%d: expected thinkingBudget in result", tt.id)
|
||||
} else if int(budgetVal.Int()) != expectedBudget {
|
||||
t.Errorf("Case #%d: thinkingBudget = %d, want %d", tt.id, int(budgetVal.Int()), expectedBudget)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user