Compare commits

..

20 Commits

Author SHA1 Message Date
Luis Pater
2a663d5cba feat(executor): enhance payload translation with original request context
Refactored `applyPayloadConfig` to `applyPayloadConfigWithRoot`, adding support for default rule validation against the original payload when available. Updated all executors to use `applyPayloadConfigWithRoot` and incorporate an optional original request payload for translations.
2026-01-02 00:03:26 +08:00
Luis Pater
750b930679 Merge pull request #823 from router-for-me/translator
feat(translator): enhance Claude-to-OpenAI conversion with thinking block and tool result handling
2026-01-01 20:16:10 +08:00
hkfires
3902fd7501 fix(iflow): remove thinking field from request body in thinking config handler 2026-01-01 19:40:28 +08:00
hkfires
4fc3d5e935 refactor(iflow): simplify thinking config handling for GLM and MiniMax models 2026-01-01 19:31:08 +08:00
hkfires
2d2f4572a7 fix(translator): remove unnecessary whitespace trimming in reasoning text collection 2026-01-01 12:39:09 +08:00
hkfires
8f4c46f38d fix(translator): emit tool_result messages before user content in Claude-to-OpenAI conversion 2026-01-01 11:11:43 +08:00
hkfires
b6ba51bc2a feat(translator): add thinking block and tool result handling for Claude-to-OpenAI conversion 2026-01-01 09:41:25 +08:00
Luis Pater
6a66d32d37 Merge pull request #803 from HsnSaboor/fix-invalid-function-names-sanitization-v2
feat(translator): resolve invalid function name errors by sanitizing Claude tool names
2026-01-01 01:15:50 +08:00
Luis Pater
8d15723195 feat(registry): add GetAvailableModelsByProvider method for retrieving models by provider 2025-12-31 23:37:46 +08:00
Chén Mù
736e0aae86 Merge pull request #814 from router-for-me/aistudio
Fix model alias thinking suffix
2025-12-31 03:08:05 -08:00
hkfires
8bf3305b2b fix(thinking): fallback to upstream model for thinking support when alias not in registry 2025-12-31 18:07:13 +08:00
hkfires
d00e3ea973 feat(thinking): add numeric budget to thinkingLevel conversion fallback 2025-12-31 17:14:47 +08:00
hkfires
89db4e9481 fix(thinking): use model alias for thinking config resolution in mapped models 2025-12-31 17:09:22 +08:00
hkfires
e332419081 feat(registry): add thinking support for gemini-2.5-computer-use-preview model 2025-12-31 17:09:22 +08:00
Luis Pater
e998b1229a feat(updater): add fallback URL and logic for missing management asset 2025-12-31 11:51:20 +08:00
Luis Pater
bbed134bd1 feat(api): add GetAuthStatus method to ManagementTokenRequester interface 2025-12-31 09:40:48 +08:00
Saboor Hassan
47b9503112 chore: revert changes to internal/translator to comply with path guard
This commit reverts all modifications within internal/translator. A separate issue
will be created for the maintenance team to integrate SanitizeFunctionName into
the translators.

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
2025-12-31 02:19:26 +05:00
Saboor Hassan
3b9253c2be fix(translator): resolve invalid function name errors by sanitizing Claude tool names
This commit centralizes tool name sanitization in SanitizeFunctionName,
applying character compliance, starting character rules, and length limits.
It also fixes a regression in gemini_schema tests and preserves MCP-specific
shortening logic while ensuring compliance.

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
2025-12-31 02:14:46 +05:00
Saboor Hassan
d241359153 fix(translator): address PR feedback for tool name sanitization
- Pre-compile sanitization regex for better performance.
- Optimize SanitizeFunctionName for conciseness and correctness.
- Handle 64-char edge cases by truncating before prepending underscore.
- Fix bug in Antigravity translator (incorrect join index).
- Refactor Gemini translators to avoid redundant sanitization calls.
- Add comprehensive unit tests including 64-char edge cases.

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
2025-12-31 01:54:41 +05:00
Saboor Hassan
f4d4249ba5 feat(translator): sanitize tool/function names for upstream provider compatibility
Implemented SanitizeFunctionName utility to ensure Claude tool names meet
Gemini/Upstream strict naming conventions (alphanumeric, starts with letter/underscore, max 64 chars).
Applied sanitization to tool definitions and usage in all relevant translators.

Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
2025-12-31 01:41:07 +05:00
27 changed files with 1430 additions and 177 deletions

View File

@@ -24,10 +24,11 @@ import (
)
const (
defaultManagementReleaseURL = "https://api.github.com/repos/router-for-me/Cli-Proxy-API-Management-Center/releases/latest"
managementAssetName = "management.html"
httpUserAgent = "CLIProxyAPI-management-updater"
updateCheckInterval = 3 * time.Hour
defaultManagementReleaseURL = "https://api.github.com/repos/router-for-me/Cli-Proxy-API-Management-Center/releases/latest"
defaultManagementFallbackURL = "https://cpamc.router-for.me/"
managementAssetName = "management.html"
httpUserAgent = "CLIProxyAPI-management-updater"
updateCheckInterval = 3 * time.Hour
)
// ManagementFileName exposes the control panel asset filename.
@@ -198,6 +199,16 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
return
}
localPath := filepath.Join(staticDir, managementAssetName)
localFileMissing := false
if _, errStat := os.Stat(localPath); errStat != nil {
if errors.Is(errStat, os.ErrNotExist) {
localFileMissing = true
} else {
log.WithError(errStat).Debug("failed to stat local management asset")
}
}
// Rate limiting: check only once every 3 hours
lastUpdateCheckMu.Lock()
now := time.Now()
@@ -210,15 +221,14 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
lastUpdateCheckTime = now
lastUpdateCheckMu.Unlock()
if err := os.MkdirAll(staticDir, 0o755); err != nil {
log.WithError(err).Warn("failed to prepare static directory for management asset")
if errMkdirAll := os.MkdirAll(staticDir, 0o755); errMkdirAll != nil {
log.WithError(errMkdirAll).Warn("failed to prepare static directory for management asset")
return
}
releaseURL := resolveReleaseURL(panelRepository)
client := newHTTPClient(proxyURL)
localPath := filepath.Join(staticDir, managementAssetName)
localHash, err := fileSHA256(localPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
@@ -229,6 +239,13 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
asset, remoteHash, err := fetchLatestAsset(ctx, client, releaseURL)
if err != nil {
if localFileMissing {
log.WithError(err).Warn("failed to fetch latest management release information, trying fallback page")
if ensureFallbackManagementHTML(ctx, client, localPath) {
return
}
return
}
log.WithError(err).Warn("failed to fetch latest management release information")
return
}
@@ -240,6 +257,13 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
data, downloadedHash, err := downloadAsset(ctx, client, asset.BrowserDownloadURL)
if err != nil {
if localFileMissing {
log.WithError(err).Warn("failed to download management asset, trying fallback page")
if ensureFallbackManagementHTML(ctx, client, localPath) {
return
}
return
}
log.WithError(err).Warn("failed to download management asset")
return
}
@@ -256,6 +280,22 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
log.Infof("management asset updated successfully (hash=%s)", downloadedHash)
}
func ensureFallbackManagementHTML(ctx context.Context, client *http.Client, localPath string) bool {
data, downloadedHash, err := downloadAsset(ctx, client, defaultManagementFallbackURL)
if err != nil {
log.WithError(err).Warn("failed to download fallback management control panel page")
return false
}
if err = atomicWriteFile(localPath, data); err != nil {
log.WithError(err).Warn("failed to persist fallback management control panel page")
return false
}
log.Infof("management asset updated from fallback page successfully (hash=%s)", downloadedHash)
return true
}
func resolveReleaseURL(repo string) string {
repo = strings.TrimSpace(repo)
if repo == "" {

View File

@@ -740,7 +740,7 @@ func GetIFlowModels() []*ModelInfo {
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
}
models := make([]*ModelInfo, 0, len(entries))
@@ -773,7 +773,7 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
return map[string]*AntigravityModelConfig{
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
"gemini-2.5-computer-use-preview-10-2025": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-2.5-computer-use-preview-10-2025"},
"gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
"gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
"gemini-3-flash-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},

View File

@@ -625,6 +625,131 @@ func (r *ModelRegistry) GetAvailableModels(handlerType string) []map[string]any
return models
}
// GetAvailableModelsByProvider returns models available for the given provider identifier.
// Parameters:
// - provider: Provider identifier (e.g., "codex", "gemini", "antigravity")
//
// Returns:
// - []*ModelInfo: List of available models for the provider
func (r *ModelRegistry) GetAvailableModelsByProvider(provider string) []*ModelInfo {
provider = strings.ToLower(strings.TrimSpace(provider))
if provider == "" {
return nil
}
r.mutex.RLock()
defer r.mutex.RUnlock()
type providerModel struct {
count int
info *ModelInfo
}
providerModels := make(map[string]*providerModel)
for clientID, clientProvider := range r.clientProviders {
if clientProvider != provider {
continue
}
modelIDs := r.clientModels[clientID]
if len(modelIDs) == 0 {
continue
}
clientInfos := r.clientModelInfos[clientID]
for _, modelID := range modelIDs {
modelID = strings.TrimSpace(modelID)
if modelID == "" {
continue
}
entry := providerModels[modelID]
if entry == nil {
entry = &providerModel{}
providerModels[modelID] = entry
}
entry.count++
if entry.info == nil {
if clientInfos != nil {
if info := clientInfos[modelID]; info != nil {
entry.info = info
}
}
if entry.info == nil {
if reg, ok := r.models[modelID]; ok && reg != nil && reg.Info != nil {
entry.info = reg.Info
}
}
}
}
}
if len(providerModels) == 0 {
return nil
}
quotaExpiredDuration := 5 * time.Minute
now := time.Now()
result := make([]*ModelInfo, 0, len(providerModels))
for modelID, entry := range providerModels {
if entry == nil || entry.count <= 0 {
continue
}
registration, ok := r.models[modelID]
expiredClients := 0
cooldownSuspended := 0
otherSuspended := 0
if ok && registration != nil {
if registration.QuotaExceededClients != nil {
for clientID, quotaTime := range registration.QuotaExceededClients {
if clientID == "" {
continue
}
if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
continue
}
if quotaTime != nil && now.Sub(*quotaTime) < quotaExpiredDuration {
expiredClients++
}
}
}
if registration.SuspendedClients != nil {
for clientID, reason := range registration.SuspendedClients {
if clientID == "" {
continue
}
if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
continue
}
if strings.EqualFold(reason, "quota") {
cooldownSuspended++
continue
}
otherSuspended++
}
}
}
availableClients := entry.count
effectiveClients := availableClients - expiredClients - otherSuspended
if effectiveClients < 0 {
effectiveClients = 0
}
if effectiveClients > 0 || (availableClients > 0 && (expiredClients > 0 || cooldownSuspended > 0) && otherSuspended == 0) {
if entry.info != nil {
result = append(result, entry.info)
continue
}
if ok && registration != nil && registration.Info != nil {
result = append(result, registration.Info)
}
}
}
return result
}
// GetModelCount returns the number of available clients for a specific model
// Parameters:
// - modelID: The model ID to check

View File

@@ -323,6 +323,11 @@ type translatedPayload struct {
func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) {
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream)
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload)
@@ -331,7 +336,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true)
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
payload = fixGeminiImageAspectRatio(req.Model, payload)
payload = applyPayloadConfig(e.cfg, req.Model, payload)
payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated)
payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens")
payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType")
payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema")

View File

@@ -94,13 +94,18 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
from := opts.SourceFormat
to := sdktranslator.FromString("antigravity")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
baseURLs := antigravityBaseURLFallbackOrder(auth)
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -189,13 +194,18 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
from := opts.SourceFormat
to := sdktranslator.FromString("antigravity")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
translated = normalizeAntigravityThinking(req.Model, translated, true)
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
baseURLs := antigravityBaseURLFallbackOrder(auth)
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -525,13 +535,18 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
from := opts.SourceFormat
to := sdktranslator.FromString("antigravity")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated)
baseURLs := antigravityBaseURLFallbackOrder(auth)
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -697,8 +712,8 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
for idx, baseURL := range baseURLs {
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload)
payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload)
payload = normalizeAntigravityThinking(req.Model, payload, isClaude)
payload = deleteJSONField(payload, "project")
payload = deleteJSONField(payload, "model")

View File

@@ -57,6 +57,11 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
to := sdktranslator.FromString("claude")
// Use streaming translation to preserve function calling, except for claude.
stream := from != to
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream)
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream)
body, _ = sjson.SetBytes(body, "model", model)
// Inject thinking config based on model metadata for thinking variants
@@ -65,7 +70,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
if !strings.HasPrefix(model, "claude-3-5-haiku") {
body = checkSystemInstructions(body)
}
body = applyPayloadConfig(e.cfg, model, body)
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body)
@@ -167,12 +172,17 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
if override := e.resolveUpstreamModel(req.Model, auth); override != "" {
model = override
}
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
body, _ = sjson.SetBytes(body, "model", model)
// Inject thinking config based on model metadata for thinking variants
body = e.injectThinkingConfig(model, req.Metadata, body)
body = checkSystemInstructions(body)
body = applyPayloadConfig(e.cfg, model, body)
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body)

View File

@@ -56,13 +56,18 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
from := opts.SourceFormat
to := sdktranslator.FromString("codex")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
body = NormalizeThinkingConfig(body, model, false)
if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
return resp, errValidate
}
body = applyPayloadConfig(e.cfg, model, body)
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
body, _ = sjson.SetBytes(body, "model", model)
body, _ = sjson.SetBytes(body, "stream", true)
body, _ = sjson.DeleteBytes(body, "previous_response_id")
@@ -156,6 +161,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
from := opts.SourceFormat
to := sdktranslator.FromString("codex")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
@@ -163,7 +173,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
if errValidate := ValidateThinkingConfig(body, model); errValidate != nil {
return nil, errValidate
}
body = applyPayloadConfig(e.cfg, model, body)
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
body, _ = sjson.DeleteBytes(body, "previous_response_id")
body, _ = sjson.SetBytes(body, "model", model)

View File

@@ -77,14 +77,19 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
from := opts.SourceFormat
to := sdktranslator.FromString("gemini-cli")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
action := "generateContent"
if req.Metadata != nil {
@@ -216,14 +221,19 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
from := opts.SourceFormat
to := sdktranslator.FromString("gemini-cli")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated)
projectID := resolveGeminiProjectID(auth)
@@ -421,7 +431,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
// Gemini CLI endpoint when iterating fallback variants.
for _, attemptModel := range models {
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
payload = deleteJSONField(payload, "project")
payload = deleteJSONField(payload, "model")

View File

@@ -85,13 +85,18 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
// Official Gemini API via API key or OAuth bearer
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
body = ApplyThinkingMetadata(body, req.Metadata, model)
body = util.ApplyDefaultThinkingIfNeeded(model, body)
body = util.NormalizeGeminiThinkingBudget(model, body)
body = util.StripThinkingConfigIfUnsupported(model, body)
body = fixGeminiImageAspectRatio(model, body)
body = applyPayloadConfig(e.cfg, model, body)
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
body, _ = sjson.SetBytes(body, "model", model)
action := "generateContent"
@@ -183,13 +188,18 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
body = ApplyThinkingMetadata(body, req.Metadata, model)
body = util.ApplyDefaultThinkingIfNeeded(model, body)
body = util.NormalizeGeminiThinkingBudget(model, body)
body = util.StripThinkingConfigIfUnsupported(model, body)
body = fixGeminiImageAspectRatio(model, body)
body = applyPayloadConfig(e.cfg, model, body)
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
body, _ = sjson.SetBytes(body, "model", model)
baseURL := resolveGeminiBaseURL(auth)

View File

@@ -122,6 +122,11 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
@@ -134,7 +139,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body)
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
body, _ = sjson.SetBytes(body, "model", req.Model)
action := "generateContent"
@@ -225,6 +230,11 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false)
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
if budgetOverride != nil {
@@ -237,7 +247,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
body = util.NormalizeGeminiThinkingBudget(model, body)
body = util.StripThinkingConfigIfUnsupported(model, body)
body = fixGeminiImageAspectRatio(model, body)
body = applyPayloadConfig(e.cfg, model, body)
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
body, _ = sjson.SetBytes(body, "model", model)
action := "generateContent"
@@ -324,6 +334,11 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
@@ -336,7 +351,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
body = util.NormalizeGeminiThinkingBudget(req.Model, body)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
body = applyPayloadConfig(e.cfg, req.Model, body)
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
body, _ = sjson.SetBytes(body, "model", req.Model)
baseURL := vertexBaseURL(location)
@@ -444,6 +459,11 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true)
body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true)
if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) {
if budgetOverride != nil {
@@ -456,7 +476,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
body = util.NormalizeGeminiThinkingBudget(model, body)
body = util.StripThinkingConfigIfUnsupported(model, body)
body = fixGeminiImageAspectRatio(model, body)
body = applyPayloadConfig(e.cfg, model, body)
body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
body, _ = sjson.SetBytes(body, "model", model)
// For API key auth, use simpler URL format without project/location

View File

@@ -56,6 +56,11 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
body, _ = sjson.SetBytes(body, "model", req.Model)
@@ -65,7 +70,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
}
body = applyIFlowThinkingConfig(body)
body = preserveReasoningContentInMessages(body)
body = applyPayloadConfig(e.cfg, req.Model, body)
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -145,6 +150,11 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
@@ -160,7 +170,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
body = ensureToolsArray(body)
}
body = applyPayloadConfig(e.cfg, req.Model, body)
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -441,21 +451,18 @@ func ensureToolsArray(body []byte) []byte {
return updated
}
// preserveReasoningContentInMessages ensures reasoning_content from assistant messages in the
// conversation history is preserved when sending to iFlow models that support thinking.
// This is critical for multi-turn conversations where the model needs to see its previous
// reasoning to maintain coherent thought chains across tool calls and conversation turns.
// preserveReasoningContentInMessages checks if reasoning_content from assistant messages
// is preserved in conversation history for iFlow models that support thinking.
// This is helpful for multi-turn conversations where the model may benefit from seeing
// its previous reasoning to maintain coherent thought chains.
//
// For GLM-4.7 and MiniMax-M2.1, the full assistant response (including reasoning) must be
// appended back into message history before the next call.
// For GLM-4.6/4.7 and MiniMax M2/M2.1, it is recommended to include the full assistant
// response (including reasoning_content) in message history for better context continuity.
func preserveReasoningContentInMessages(body []byte) []byte {
model := strings.ToLower(gjson.GetBytes(body, "model").String())
// Only apply to models that support thinking with history preservation
needsPreservation := strings.HasPrefix(model, "glm-4.7") ||
strings.HasPrefix(model, "glm-4-7") ||
strings.HasPrefix(model, "minimax-m2.1") ||
strings.HasPrefix(model, "minimax-m2-1")
needsPreservation := strings.HasPrefix(model, "glm-4") || strings.HasPrefix(model, "minimax-m2")
if !needsPreservation {
return body
@@ -493,45 +500,35 @@ func preserveReasoningContentInMessages(body []byte) []byte {
// This should be called after NormalizeThinkingConfig has processed the payload.
//
// Model-specific handling:
// - GLM-4.7: Uses extra_body={"thinking": {"type": "enabled"}, "clear_thinking": false}
// - MiniMax-M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
// - Other iFlow models: Uses chat_template_kwargs.enable_thinking (boolean)
// - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false
// - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation
func applyIFlowThinkingConfig(body []byte) []byte {
effort := gjson.GetBytes(body, "reasoning_effort")
model := strings.ToLower(gjson.GetBytes(body, "model").String())
// Check if thinking should be enabled
val := ""
if effort.Exists() {
val = strings.ToLower(strings.TrimSpace(effort.String()))
if !effort.Exists() {
return body
}
enableThinking := effort.Exists() && val != "none" && val != ""
model := strings.ToLower(gjson.GetBytes(body, "model").String())
val := strings.ToLower(strings.TrimSpace(effort.String()))
enableThinking := val != "none" && val != ""
// Remove reasoning_effort as we'll convert to model-specific format
if effort.Exists() {
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
}
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
body, _ = sjson.DeleteBytes(body, "thinking")
// GLM-4.7: Use extra_body with thinking config and clear_thinking: false
if strings.HasPrefix(model, "glm-4.7") || strings.HasPrefix(model, "glm-4-7") {
if enableThinking {
body, _ = sjson.SetBytes(body, "extra_body.thinking.type", "enabled")
body, _ = sjson.SetBytes(body, "extra_body.clear_thinking", false)
}
return body
}
// MiniMax-M2.1: Use reasoning_split=true for interleaved thinking
if strings.HasPrefix(model, "minimax-m2.1") || strings.HasPrefix(model, "minimax-m2-1") {
if enableThinking {
body, _ = sjson.SetBytes(body, "reasoning_split", true)
}
return body
}
// Other iFlow models (including GLM-4.6): Use chat_template_kwargs.enable_thinking
if effort.Exists() {
// GLM-4.6/4.7: Use chat_template_kwargs
if strings.HasPrefix(model, "glm-4") {
body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
if enableThinking {
body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false)
}
return body
}
// MiniMax M2/M2.1: Use reasoning_split
if strings.HasPrefix(model, "minimax-m2") {
body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking)
return body
}
return body

View File

@@ -53,12 +53,17 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
// Translate inbound request to OpenAI format
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream)
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream)
modelOverride := e.resolveUpstreamModel(req.Model, auth)
if modelOverride != "" {
translated = e.overrideModel(translated, modelOverride)
}
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)
@@ -145,12 +150,17 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
}
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
modelOverride := e.resolveUpstreamModel(req.Model, auth)
if modelOverride != "" {
translated = e.overrideModel(translated, modelOverride)
}
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated)
allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
translated = NormalizeThinkingConfig(translated, req.Model, allowCompat)

View File

@@ -14,32 +14,54 @@ import (
// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
// Use the alias from metadata if available, as it's registered in the global registry
// with thinking metadata; the upstream model name may not be registered.
lookupModel := util.ResolveOriginalModel(model, metadata)
// Determine which model to use for thinking support check.
// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
thinkingModel := lookupModel
if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
thinkingModel = model
}
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
if !ok || (budgetOverride == nil && includeOverride == nil) {
return payload
}
if !util.ModelSupportsThinking(model) {
if !util.ModelSupportsThinking(thinkingModel) {
return payload
}
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
budgetOverride = &norm
}
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
}
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
// Use the alias from metadata if available, as it's registered in the global registry
// with thinking metadata; the upstream model name may not be registered.
lookupModel := util.ResolveOriginalModel(model, metadata)
// Determine which model to use for thinking support check.
// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
thinkingModel := lookupModel
if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
thinkingModel = model
}
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
if !ok || (budgetOverride == nil && includeOverride == nil) {
return payload
}
if !util.ModelSupportsThinking(model) {
if !util.ModelSupportsThinking(thinkingModel) {
return payload
}
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(model, *budgetOverride)
norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
budgetOverride = &norm
}
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
@@ -82,17 +104,11 @@ func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model
return payload
}
// applyPayloadConfig applies payload default and override rules from configuration
// to the given JSON payload for the specified model.
// Defaults only fill missing fields, while overrides always overwrite existing values.
func applyPayloadConfig(cfg *config.Config, model string, payload []byte) []byte {
return applyPayloadConfigWithRoot(cfg, model, "", "", payload)
}
// applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter
// paths as relative to the provided root path (for example, "request" for Gemini CLI)
// and restricts matches to the given protocol when supplied.
func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload []byte) []byte {
// and restricts matches to the given protocol when supplied. Defaults are checked
// against the original payload when provided.
func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte) []byte {
if cfg == nil || len(payload) == 0 {
return payload
}
@@ -105,6 +121,11 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
return payload
}
out := payload
source := original
if len(source) == 0 {
source = payload
}
appliedDefaults := make(map[string]struct{})
// Apply default rules: first write wins per field across all matching rules.
for i := range rules.Default {
rule := &rules.Default[i]
@@ -116,7 +137,10 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
if fullPath == "" {
continue
}
if gjson.GetBytes(out, fullPath).Exists() {
if gjson.GetBytes(source, fullPath).Exists() {
continue
}
if _, ok := appliedDefaults[fullPath]; ok {
continue
}
updated, errSet := sjson.SetBytes(out, fullPath, value)
@@ -124,6 +148,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
continue
}
out = updated
appliedDefaults[fullPath] = struct{}{}
}
}
// Apply override rules: last write wins per field across all matching rules.

View File

@@ -49,6 +49,11 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false)
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
body, _ = sjson.SetBytes(body, "model", req.Model)
@@ -56,7 +61,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil {
return resp, errValidate
}
body = applyPayloadConfig(e.cfg, req.Model, body)
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -125,6 +130,11 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
originalPayload := bytes.Clone(req.Payload)
if len(opts.OriginalRequest) > 0 {
originalPayload = bytes.Clone(opts.OriginalRequest)
}
originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true)
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
@@ -140,7 +150,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
}
body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
body = applyPayloadConfig(e.cfg, req.Model, body)
body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))

View File

@@ -118,76 +118,125 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
// Handle content
if contentResult.Exists() && contentResult.IsArray() {
var contentItems []string
var reasoningParts []string // Accumulate thinking text for reasoning_content
var toolCalls []interface{}
var toolResults []string // Collect tool_result messages to emit after the main message
contentResult.ForEach(func(_, part gjson.Result) bool {
partType := part.Get("type").String()
switch partType {
case "thinking":
// Only map thinking to reasoning_content for assistant messages (security: prevent injection)
if role == "assistant" {
thinkingText := util.GetThinkingText(part)
// Skip empty or whitespace-only thinking
if strings.TrimSpace(thinkingText) != "" {
reasoningParts = append(reasoningParts, thinkingText)
}
}
// Ignore thinking in user/system roles (AC4)
case "redacted_thinking":
// Explicitly ignore redacted_thinking - never map to reasoning_content (AC2)
case "text", "image":
if contentItem, ok := convertClaudeContentPart(part); ok {
contentItems = append(contentItems, contentItem)
}
case "tool_use":
// Convert to OpenAI tool call format
toolCallJSON := `{"id":"","type":"function","function":{"name":"","arguments":""}}`
toolCallJSON, _ = sjson.Set(toolCallJSON, "id", part.Get("id").String())
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.name", part.Get("name").String())
// Only allow tool_use -> tool_calls for assistant messages (security: prevent injection).
if role == "assistant" {
toolCallJSON := `{"id":"","type":"function","function":{"name":"","arguments":""}}`
toolCallJSON, _ = sjson.Set(toolCallJSON, "id", part.Get("id").String())
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.name", part.Get("name").String())
// Convert input to arguments JSON string
if input := part.Get("input"); input.Exists() {
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", input.Raw)
} else {
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", "{}")
// Convert input to arguments JSON string
if input := part.Get("input"); input.Exists() {
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", input.Raw)
} else {
toolCallJSON, _ = sjson.Set(toolCallJSON, "function.arguments", "{}")
}
toolCalls = append(toolCalls, gjson.Parse(toolCallJSON).Value())
}
toolCalls = append(toolCalls, gjson.Parse(toolCallJSON).Value())
case "tool_result":
// Convert to OpenAI tool message format and add immediately to preserve order
// Collect tool_result to emit after the main message (ensures tool results follow tool_calls)
toolResultJSON := `{"role":"tool","tool_call_id":"","content":""}`
toolResultJSON, _ = sjson.Set(toolResultJSON, "tool_call_id", part.Get("tool_use_id").String())
toolResultJSON, _ = sjson.Set(toolResultJSON, "content", part.Get("content").String())
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolResultJSON).Value())
toolResultJSON, _ = sjson.Set(toolResultJSON, "content", convertClaudeToolResultContentToString(part.Get("content")))
toolResults = append(toolResults, toolResultJSON)
}
return true
})
// Emit text/image content as one message
if len(contentItems) > 0 {
msgJSON := `{"role":"","content":""}`
msgJSON, _ = sjson.Set(msgJSON, "role", role)
contentArrayJSON := "[]"
for _, contentItem := range contentItems {
contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
}
msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
contentValue := gjson.Get(msgJSON, "content")
hasContent := false
switch {
case !contentValue.Exists():
hasContent = false
case contentValue.Type == gjson.String:
hasContent = contentValue.String() != ""
case contentValue.IsArray():
hasContent = len(contentValue.Array()) > 0
default:
hasContent = contentValue.Raw != "" && contentValue.Raw != "null"
}
if hasContent {
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
}
// Build reasoning content string
reasoningContent := ""
if len(reasoningParts) > 0 {
reasoningContent = strings.Join(reasoningParts, "\n\n")
}
// Emit tool calls in a separate assistant message
if role == "assistant" && len(toolCalls) > 0 {
toolCallMsgJSON := `{"role":"assistant","tool_calls":[]}`
toolCallMsgJSON, _ = sjson.Set(toolCallMsgJSON, "tool_calls", toolCalls)
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolCallMsgJSON).Value())
hasContent := len(contentItems) > 0
hasReasoning := reasoningContent != ""
hasToolCalls := len(toolCalls) > 0
hasToolResults := len(toolResults) > 0
// OpenAI requires: tool messages MUST immediately follow the assistant message with tool_calls.
// Therefore, we emit tool_result messages FIRST (they respond to the previous assistant's tool_calls),
// then emit the current message's content.
for _, toolResultJSON := range toolResults {
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(toolResultJSON).Value())
}
// For assistant messages: emit a single unified message with content, tool_calls, and reasoning_content
// This avoids splitting into multiple assistant messages which breaks OpenAI tool-call adjacency
if role == "assistant" {
if hasContent || hasReasoning || hasToolCalls {
msgJSON := `{"role":"assistant"}`
// Add content (as array if we have items, empty string if reasoning-only)
if hasContent {
contentArrayJSON := "[]"
for _, contentItem := range contentItems {
contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
}
msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
} else {
// Ensure content field exists for OpenAI compatibility
msgJSON, _ = sjson.Set(msgJSON, "content", "")
}
// Add reasoning_content if present
if hasReasoning {
msgJSON, _ = sjson.Set(msgJSON, "reasoning_content", reasoningContent)
}
// Add tool_calls if present (in same message as content)
if hasToolCalls {
msgJSON, _ = sjson.Set(msgJSON, "tool_calls", toolCalls)
}
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
}
} else {
// For non-assistant roles: emit content message if we have content
// If the message only contains tool_results (no text/image), we still processed them above
if hasContent {
msgJSON := `{"role":""}`
msgJSON, _ = sjson.Set(msgJSON, "role", role)
contentArrayJSON := "[]"
for _, contentItem := range contentItems {
contentArrayJSON, _ = sjson.SetRaw(contentArrayJSON, "-1", contentItem)
}
msgJSON, _ = sjson.SetRaw(msgJSON, "content", contentArrayJSON)
messagesJSON, _ = sjson.Set(messagesJSON, "-1", gjson.Parse(msgJSON).Value())
} else if hasToolResults && !hasContent {
// tool_results already emitted above, no additional user message needed
}
}
} else if contentResult.Exists() && contentResult.Type == gjson.String {
@@ -307,3 +356,43 @@ func convertClaudeContentPart(part gjson.Result) (string, bool) {
return "", false
}
}
func convertClaudeToolResultContentToString(content gjson.Result) string {
if !content.Exists() {
return ""
}
if content.Type == gjson.String {
return content.String()
}
if content.IsArray() {
var parts []string
content.ForEach(func(_, item gjson.Result) bool {
switch {
case item.Type == gjson.String:
parts = append(parts, item.String())
case item.IsObject() && item.Get("text").Exists() && item.Get("text").Type == gjson.String:
parts = append(parts, item.Get("text").String())
default:
parts = append(parts, item.Raw)
}
return true
})
joined := strings.Join(parts, "\n\n")
if strings.TrimSpace(joined) != "" {
return joined
}
return content.Raw
}
if content.IsObject() {
if text := content.Get("text"); text.Exists() && text.Type == gjson.String {
return text.String()
}
return content.Raw
}
return content.Raw
}

View File

@@ -0,0 +1,500 @@
package claude
import (
"testing"
"github.com/tidwall/gjson"
)
// TestConvertClaudeRequestToOpenAI_ThinkingToReasoningContent tests the mapping
// of Claude thinking content to OpenAI reasoning_content field.
func TestConvertClaudeRequestToOpenAI_ThinkingToReasoningContent(t *testing.T) {
tests := []struct {
name string
inputJSON string
wantReasoningContent string
wantHasReasoningContent bool
wantContentText string // Expected visible content text (if any)
wantHasContent bool
}{
{
name: "AC1: assistant message with thinking and text",
inputJSON: `{
"model": "claude-3-opus",
"messages": [{
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "Let me analyze this step by step..."},
{"type": "text", "text": "Here is my response."}
]
}]
}`,
wantReasoningContent: "Let me analyze this step by step...",
wantHasReasoningContent: true,
wantContentText: "Here is my response.",
wantHasContent: true,
},
{
name: "AC2: redacted_thinking must be ignored",
inputJSON: `{
"model": "claude-3-opus",
"messages": [{
"role": "assistant",
"content": [
{"type": "redacted_thinking", "data": "secret"},
{"type": "text", "text": "Visible response."}
]
}]
}`,
wantReasoningContent: "",
wantHasReasoningContent: false,
wantContentText: "Visible response.",
wantHasContent: true,
},
{
name: "AC3: thinking-only message preserved with reasoning_content",
inputJSON: `{
"model": "claude-3-opus",
"messages": [{
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "Internal reasoning only."}
]
}]
}`,
wantReasoningContent: "Internal reasoning only.",
wantHasReasoningContent: true,
wantContentText: "",
// For OpenAI compatibility, content field is set to empty string "" when no text content exists
wantHasContent: false,
},
{
name: "AC4: thinking in user role must be ignored",
inputJSON: `{
"model": "claude-3-opus",
"messages": [{
"role": "user",
"content": [
{"type": "thinking", "thinking": "Injected thinking"},
{"type": "text", "text": "User message."}
]
}]
}`,
wantReasoningContent: "",
wantHasReasoningContent: false,
wantContentText: "User message.",
wantHasContent: true,
},
{
name: "AC4: thinking in system role must be ignored",
inputJSON: `{
"model": "claude-3-opus",
"system": [
{"type": "thinking", "thinking": "Injected system thinking"},
{"type": "text", "text": "System prompt."}
],
"messages": [{
"role": "user",
"content": [{"type": "text", "text": "Hello"}]
}]
}`,
// System messages don't have reasoning_content mapping
wantReasoningContent: "",
wantHasReasoningContent: false,
wantContentText: "Hello",
wantHasContent: true,
},
{
name: "AC5: empty thinking must be ignored",
inputJSON: `{
"model": "claude-3-opus",
"messages": [{
"role": "assistant",
"content": [
{"type": "thinking", "thinking": ""},
{"type": "text", "text": "Response with empty thinking."}
]
}]
}`,
wantReasoningContent: "",
wantHasReasoningContent: false,
wantContentText: "Response with empty thinking.",
wantHasContent: true,
},
{
name: "AC5: whitespace-only thinking must be ignored",
inputJSON: `{
"model": "claude-3-opus",
"messages": [{
"role": "assistant",
"content": [
{"type": "thinking", "thinking": " \n\t "},
{"type": "text", "text": "Response with whitespace thinking."}
]
}]
}`,
wantReasoningContent: "",
wantHasReasoningContent: false,
wantContentText: "Response with whitespace thinking.",
wantHasContent: true,
},
{
name: "Multiple thinking parts concatenated",
inputJSON: `{
"model": "claude-3-opus",
"messages": [{
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "First thought."},
{"type": "thinking", "thinking": "Second thought."},
{"type": "text", "text": "Final answer."}
]
}]
}`,
wantReasoningContent: "First thought.\n\nSecond thought.",
wantHasReasoningContent: true,
wantContentText: "Final answer.",
wantHasContent: true,
},
{
name: "Mixed thinking and redacted_thinking",
inputJSON: `{
"model": "claude-3-opus",
"messages": [{
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "Visible thought."},
{"type": "redacted_thinking", "data": "hidden"},
{"type": "text", "text": "Answer."}
]
}]
}`,
wantReasoningContent: "Visible thought.",
wantHasReasoningContent: true,
wantContentText: "Answer.",
wantHasContent: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := ConvertClaudeRequestToOpenAI("test-model", []byte(tt.inputJSON), false)
resultJSON := gjson.ParseBytes(result)
// Find the relevant message (skip system message at index 0)
messages := resultJSON.Get("messages").Array()
if len(messages) < 2 {
if tt.wantHasReasoningContent || tt.wantHasContent {
t.Fatalf("Expected at least 2 messages (system + user/assistant), got %d", len(messages))
}
return
}
// Check the last non-system message
var targetMsg gjson.Result
for i := len(messages) - 1; i >= 0; i-- {
if messages[i].Get("role").String() != "system" {
targetMsg = messages[i]
break
}
}
// Check reasoning_content
gotReasoningContent := targetMsg.Get("reasoning_content").String()
gotHasReasoningContent := targetMsg.Get("reasoning_content").Exists()
if gotHasReasoningContent != tt.wantHasReasoningContent {
t.Errorf("reasoning_content existence = %v, want %v", gotHasReasoningContent, tt.wantHasReasoningContent)
}
if gotReasoningContent != tt.wantReasoningContent {
t.Errorf("reasoning_content = %q, want %q", gotReasoningContent, tt.wantReasoningContent)
}
// Check content
content := targetMsg.Get("content")
// content has meaningful content if it's a non-empty array, or a non-empty string
var gotHasContent bool
switch {
case content.IsArray():
gotHasContent = len(content.Array()) > 0
case content.Type == gjson.String:
gotHasContent = content.String() != ""
default:
gotHasContent = false
}
if gotHasContent != tt.wantHasContent {
t.Errorf("content existence = %v, want %v", gotHasContent, tt.wantHasContent)
}
if tt.wantHasContent && tt.wantContentText != "" {
// Find text content
var foundText string
content.ForEach(func(_, v gjson.Result) bool {
if v.Get("type").String() == "text" {
foundText = v.Get("text").String()
return false
}
return true
})
if foundText != tt.wantContentText {
t.Errorf("content text = %q, want %q", foundText, tt.wantContentText)
}
}
})
}
}
// TestConvertClaudeRequestToOpenAI_ThinkingOnlyMessagePreserved tests AC3:
// that a message with only thinking content is preserved (not dropped).
func TestConvertClaudeRequestToOpenAI_ThinkingOnlyMessagePreserved(t *testing.T) {
inputJSON := `{
"model": "claude-3-opus",
"messages": [
{
"role": "user",
"content": [{"type": "text", "text": "What is 2+2?"}]
},
{
"role": "assistant",
"content": [{"type": "thinking", "thinking": "Let me calculate: 2+2=4"}]
},
{
"role": "user",
"content": [{"type": "text", "text": "Thanks"}]
}
]
}`
result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
resultJSON := gjson.ParseBytes(result)
messages := resultJSON.Get("messages").Array()
// Should have: system (auto-added) + user + assistant (thinking-only) + user = 4 messages
if len(messages) != 4 {
t.Fatalf("Expected 4 messages, got %d. Messages: %v", len(messages), resultJSON.Get("messages").Raw)
}
// Check the assistant message (index 2) has reasoning_content
assistantMsg := messages[2]
if assistantMsg.Get("role").String() != "assistant" {
t.Errorf("Expected message[2] to be assistant, got %s", assistantMsg.Get("role").String())
}
if !assistantMsg.Get("reasoning_content").Exists() {
t.Error("Expected assistant message to have reasoning_content")
}
if assistantMsg.Get("reasoning_content").String() != "Let me calculate: 2+2=4" {
t.Errorf("Unexpected reasoning_content: %s", assistantMsg.Get("reasoning_content").String())
}
}
func TestConvertClaudeRequestToOpenAI_ToolResultOrderAndContent(t *testing.T) {
inputJSON := `{
"model": "claude-3-opus",
"messages": [
{
"role": "assistant",
"content": [
{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}}
]
},
{
"role": "user",
"content": [
{"type": "text", "text": "before"},
{"type": "tool_result", "tool_use_id": "call_1", "content": [{"type":"text","text":"tool ok"}]},
{"type": "text", "text": "after"}
]
}
]
}`
result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
resultJSON := gjson.ParseBytes(result)
messages := resultJSON.Get("messages").Array()
// OpenAI requires: tool messages MUST immediately follow assistant(tool_calls).
// Correct order: system + assistant(tool_calls) + tool(result) + user(before+after)
if len(messages) != 4 {
t.Fatalf("Expected 4 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
}
if messages[0].Get("role").String() != "system" {
t.Fatalf("Expected messages[0] to be system, got %s", messages[0].Get("role").String())
}
if messages[1].Get("role").String() != "assistant" || !messages[1].Get("tool_calls").Exists() {
t.Fatalf("Expected messages[1] to be assistant tool_calls, got %s: %s", messages[1].Get("role").String(), messages[1].Raw)
}
// tool message MUST immediately follow assistant(tool_calls) per OpenAI spec
if messages[2].Get("role").String() != "tool" {
t.Fatalf("Expected messages[2] to be tool (must follow tool_calls), got %s", messages[2].Get("role").String())
}
if got := messages[2].Get("tool_call_id").String(); got != "call_1" {
t.Fatalf("Expected tool_call_id %q, got %q", "call_1", got)
}
if got := messages[2].Get("content").String(); got != "tool ok" {
t.Fatalf("Expected tool content %q, got %q", "tool ok", got)
}
// User message comes after tool message
if messages[3].Get("role").String() != "user" {
t.Fatalf("Expected messages[3] to be user, got %s", messages[3].Get("role").String())
}
// User message should contain both "before" and "after" text
if got := messages[3].Get("content.0.text").String(); got != "before" {
t.Fatalf("Expected user text[0] %q, got %q", "before", got)
}
if got := messages[3].Get("content.1.text").String(); got != "after" {
t.Fatalf("Expected user text[1] %q, got %q", "after", got)
}
}
func TestConvertClaudeRequestToOpenAI_ToolResultObjectContent(t *testing.T) {
inputJSON := `{
"model": "claude-3-opus",
"messages": [
{
"role": "assistant",
"content": [
{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}}
]
},
{
"role": "user",
"content": [
{"type": "tool_result", "tool_use_id": "call_1", "content": {"foo": "bar"}}
]
}
]
}`
result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
resultJSON := gjson.ParseBytes(result)
messages := resultJSON.Get("messages").Array()
// system + assistant(tool_calls) + tool(result)
if len(messages) != 3 {
t.Fatalf("Expected 3 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
}
if messages[2].Get("role").String() != "tool" {
t.Fatalf("Expected messages[2] to be tool, got %s", messages[2].Get("role").String())
}
toolContent := messages[2].Get("content").String()
parsed := gjson.Parse(toolContent)
if parsed.Get("foo").String() != "bar" {
t.Fatalf("Expected tool content JSON foo=bar, got %q", toolContent)
}
}
func TestConvertClaudeRequestToOpenAI_AssistantTextToolUseTextOrder(t *testing.T) {
inputJSON := `{
"model": "claude-3-opus",
"messages": [
{
"role": "assistant",
"content": [
{"type": "text", "text": "pre"},
{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}},
{"type": "text", "text": "post"}
]
}
]
}`
result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
resultJSON := gjson.ParseBytes(result)
messages := resultJSON.Get("messages").Array()
// New behavior: content + tool_calls unified in single assistant message
// Expect: system + assistant(content[pre,post] + tool_calls)
if len(messages) != 2 {
t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
}
if messages[0].Get("role").String() != "system" {
t.Fatalf("Expected messages[0] to be system, got %s", messages[0].Get("role").String())
}
assistantMsg := messages[1]
if assistantMsg.Get("role").String() != "assistant" {
t.Fatalf("Expected messages[1] to be assistant, got %s", assistantMsg.Get("role").String())
}
// Should have both content and tool_calls in same message
if !assistantMsg.Get("tool_calls").Exists() {
t.Fatalf("Expected assistant message to have tool_calls")
}
if got := assistantMsg.Get("tool_calls.0.id").String(); got != "call_1" {
t.Fatalf("Expected tool_call id %q, got %q", "call_1", got)
}
if got := assistantMsg.Get("tool_calls.0.function.name").String(); got != "do_work" {
t.Fatalf("Expected tool_call name %q, got %q", "do_work", got)
}
// Content should have both pre and post text
if got := assistantMsg.Get("content.0.text").String(); got != "pre" {
t.Fatalf("Expected content[0] text %q, got %q", "pre", got)
}
if got := assistantMsg.Get("content.1.text").String(); got != "post" {
t.Fatalf("Expected content[1] text %q, got %q", "post", got)
}
}
func TestConvertClaudeRequestToOpenAI_AssistantThinkingToolUseThinkingSplit(t *testing.T) {
inputJSON := `{
"model": "claude-3-opus",
"messages": [
{
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "t1"},
{"type": "text", "text": "pre"},
{"type": "tool_use", "id": "call_1", "name": "do_work", "input": {"a": 1}},
{"type": "thinking", "thinking": "t2"},
{"type": "text", "text": "post"}
]
}
]
}`
result := ConvertClaudeRequestToOpenAI("test-model", []byte(inputJSON), false)
resultJSON := gjson.ParseBytes(result)
messages := resultJSON.Get("messages").Array()
// New behavior: all content, thinking, and tool_calls unified in single assistant message
// Expect: system + assistant(content[pre,post] + tool_calls + reasoning_content[t1+t2])
if len(messages) != 2 {
t.Fatalf("Expected 2 messages, got %d. Messages: %s", len(messages), resultJSON.Get("messages").Raw)
}
assistantMsg := messages[1]
if assistantMsg.Get("role").String() != "assistant" {
t.Fatalf("Expected messages[1] to be assistant, got %s", assistantMsg.Get("role").String())
}
// Should have content with both pre and post
if got := assistantMsg.Get("content.0.text").String(); got != "pre" {
t.Fatalf("Expected content[0] text %q, got %q", "pre", got)
}
if got := assistantMsg.Get("content.1.text").String(); got != "post" {
t.Fatalf("Expected content[1] text %q, got %q", "post", got)
}
// Should have tool_calls
if !assistantMsg.Get("tool_calls").Exists() {
t.Fatalf("Expected assistant message to have tool_calls")
}
// Should have combined reasoning_content from both thinking blocks
if got := assistantMsg.Get("reasoning_content").String(); got != "t1\n\nt2" {
t.Fatalf("Expected reasoning_content %q, got %q", "t1\n\nt2", got)
}
}

View File

@@ -480,15 +480,15 @@ func collectOpenAIReasoningTexts(node gjson.Result) []string {
switch node.Type {
case gjson.String:
if text := strings.TrimSpace(node.String()); text != "" {
if text := node.String(); text != "" {
texts = append(texts, text)
}
case gjson.JSON:
if text := node.Get("text"); text.Exists() {
if trimmed := strings.TrimSpace(text.String()); trimmed != "" {
texts = append(texts, trimmed)
if textStr := text.String(); textStr != "" {
texts = append(texts, textStr)
}
} else if raw := strings.TrimSpace(node.Raw); raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
} else if raw := node.Raw; raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
texts = append(texts, raw)
}
}

View File

@@ -390,6 +390,11 @@ func addEmptySchemaPlaceholder(jsonStr string) string {
// If schema has properties but none are required, add a minimal placeholder.
if propsVal.IsObject() && !hasRequiredProperties {
// DO NOT add placeholder if it's a top-level schema (parentPath is empty)
// or if we've already added a placeholder reason above.
if parentPath == "" {
continue
}
placeholderPath := joinPath(propsPath, "_")
if !gjson.Get(jsonStr, placeholderPath).Exists() {
jsonStr, _ = sjson.Set(jsonStr, placeholderPath+".type", "boolean")

View File

@@ -127,8 +127,10 @@ func TestCleanJSONSchemaForAntigravity_AnyOfFlattening_SmartSelection(t *testing
"type": "object",
"description": "Accepts: null | object",
"properties": {
"_": { "type": "boolean" },
"kind": { "type": "string" }
}
},
"required": ["_"]
}
}
}`

View File

@@ -288,37 +288,73 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
// ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models.
// For standard Gemini API format (generationConfig.thinkingConfig path).
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte {
if !IsGemini3Model(model) {
// Use the alias from metadata if available for model type detection
lookupModel := ResolveOriginalModel(model, metadata)
if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
return body
}
// Determine which model to use for validation
checkModel := model
if IsGemini3Model(lookupModel) {
checkModel = lookupModel
}
// First try to get effort string from metadata
effort, ok := ReasoningEffortFromMetadata(metadata)
if !ok || effort == "" {
return body
if ok && effort != "" {
if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
return ApplyGeminiThinkingLevel(body, level, nil)
}
}
// Validate and apply the thinkingLevel
if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
return ApplyGeminiThinkingLevel(body, level, nil)
// Fallback: check for numeric budget and convert to thinkingLevel
budget, _, _, matched := ThinkingFromMetadata(metadata)
if matched && budget != nil {
if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
return ApplyGeminiThinkingLevel(body, level, nil)
}
}
return body
}
// ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models.
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte {
if !IsGemini3Model(model) {
// Use the alias from metadata if available for model type detection
lookupModel := ResolveOriginalModel(model, metadata)
if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
return body
}
// Determine which model to use for validation
checkModel := model
if IsGemini3Model(lookupModel) {
checkModel = lookupModel
}
// First try to get effort string from metadata
effort, ok := ReasoningEffortFromMetadata(metadata)
if !ok || effort == "" {
return body
if ok && effort != "" {
if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
return ApplyGeminiCLIThinkingLevel(body, level, nil)
}
}
// Validate and apply the thinkingLevel
if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
return ApplyGeminiCLIThinkingLevel(body, level, nil)
// Fallback: check for numeric budget and convert to thinkingLevel
budget, _, _, matched := ThinkingFromMetadata(metadata)
if matched && budget != nil {
if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
return ApplyGeminiCLIThinkingLevel(body, level, nil)
}
}
return body
}
@@ -326,15 +362,17 @@ func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]
// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
// Returns the modified body if thinkingConfig was added, otherwise returns the original.
// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
if !ModelHasDefaultThinking(model) {
func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte {
// Use the alias from metadata if available for model property lookup
lookupModel := ResolveOriginalModel(model, metadata)
if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) {
return body
}
if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
return body
}
// Gemini 3 models use thinkingLevel instead of thinkingBudget
if IsGemini3Model(model) {
if IsGemini3Model(lookupModel) || IsGemini3Model(model) {
// Don't set a default - let the API use its dynamic default ("high")
// Only set includeThoughts
updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true)

View File

@@ -0,0 +1,56 @@
package util
import (
"testing"
)
func TestSanitizeFunctionName(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{"Normal", "valid_name", "valid_name"},
{"With Dots", "name.with.dots", "name.with.dots"},
{"With Colons", "name:with:colons", "name:with:colons"},
{"With Dashes", "name-with-dashes", "name-with-dashes"},
{"Mixed Allowed", "name.with_dots:colons-dashes", "name.with_dots:colons-dashes"},
{"Invalid Characters", "name!with@invalid#chars", "name_with_invalid_chars"},
{"Spaces", "name with spaces", "name_with_spaces"},
{"Non-ASCII", "name_with_你好_chars", "name_with____chars"},
{"Starts with digit", "123name", "_123name"},
{"Starts with dot", ".name", "_.name"},
{"Starts with colon", ":name", "_:name"},
{"Starts with dash", "-name", "_-name"},
{"Starts with invalid char", "!name", "_name"},
{"Exactly 64 chars", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact"},
{"Too long (65 chars)", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charactX", "this_is_a_very_long_name_that_exactly_reaches_sixty_four_charact"},
{"Very long", "this_is_a_very_long_name_that_exceeds_the_sixty_four_character_limit_for_function_names", "this_is_a_very_long_name_that_exceeds_the_sixty_four_character_l"},
{"Starts with digit (64 chars total)", "1234567890123456789012345678901234567890123456789012345678901234", "_123456789012345678901234567890123456789012345678901234567890123"},
{"Starts with invalid char (64 chars total)", "!234567890123456789012345678901234567890123456789012345678901234", "_234567890123456789012345678901234567890123456789012345678901234"},
{"Empty", "", ""},
{"Single character invalid", "@", "_"},
{"Single character valid", "a", "a"},
{"Single character digit", "1", "_1"},
{"Single character underscore", "_", "_"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := SanitizeFunctionName(tt.input)
if got != tt.expected {
t.Errorf("SanitizeFunctionName(%q) = %v, want %v", tt.input, got, tt.expected)
}
// Verify Gemini compliance
if len(got) > 64 {
t.Errorf("SanitizeFunctionName(%q) result too long: %d", tt.input, len(got))
}
if len(got) > 0 {
first := got[0]
if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
t.Errorf("SanitizeFunctionName(%q) result starts with invalid char: %c", tt.input, first)
}
}
})
}
}

View File

@@ -12,9 +12,18 @@ func ModelSupportsThinking(model string) bool {
if model == "" {
return false
}
// First check the global dynamic registry
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
return info.Thinking != nil
}
// Fallback: check static model definitions
if info := registry.LookupStaticModelInfo(model); info != nil {
return info.Thinking != nil
}
// Fallback: check Antigravity static config
if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil {
return cfg.Thinking != nil
}
return false
}
@@ -63,11 +72,19 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
if model == "" {
return false, 0, 0, false, false
}
info := registry.GetGlobalRegistry().GetModelInfo(model)
if info == nil || info.Thinking == nil {
return false, 0, 0, false, false
// First check global dynamic registry
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil && info.Thinking != nil {
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
}
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
// Fallback: check static model definitions
if info := registry.LookupStaticModelInfo(model); info != nil && info.Thinking != nil {
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
}
// Fallback: check Antigravity static config
if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil && cfg.Thinking != nil {
return true, cfg.Thinking.Min, cfg.Thinking.Max, cfg.Thinking.ZeroAllowed, cfg.Thinking.DynamicAllowed
}
return false, 0, 0, false, false
}
// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.

View File

@@ -8,12 +8,52 @@ import (
"io/fs"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
log "github.com/sirupsen/logrus"
)
var functionNameSanitizer = regexp.MustCompile(`[^a-zA-Z0-9_.:-]`)
// SanitizeFunctionName ensures a function name matches the requirements for Gemini/Vertex AI.
// It replaces invalid characters with underscores, ensures it starts with a letter or underscore,
// and truncates it to 64 characters if necessary.
// Regex Rule: [^a-zA-Z0-9_.:-] replaced with _.
func SanitizeFunctionName(name string) string {
if name == "" {
return ""
}
// Replace invalid characters with underscore
sanitized := functionNameSanitizer.ReplaceAllString(name, "_")
// Ensure it starts with a letter or underscore
// Re-reading requirements: Must start with a letter or an underscore.
if len(sanitized) > 0 {
first := sanitized[0]
if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
// If it starts with an allowed character but not allowed at the beginning (digit, dot, colon, dash),
// we must prepend an underscore.
// To stay within the 64-character limit while prepending, we must truncate first.
if len(sanitized) >= 64 {
sanitized = sanitized[:63]
}
sanitized = "_" + sanitized
}
} else {
sanitized = "_"
}
// Truncate to 64 characters
if len(sanitized) > 64 {
sanitized = sanitized[:64]
}
return sanitized
}
// SetLogLevel configures the logrus log level based on the configuration.
// It sets the log level to DebugLevel if debug mode is enabled, otherwise to InfoLevel.
func SetLogLevel(cfg *config.Config) {

View File

@@ -20,6 +20,7 @@ type ManagementTokenRequester interface {
RequestQwenToken(*gin.Context)
RequestIFlowToken(*gin.Context)
RequestIFlowCookieToken(*gin.Context)
GetAuthStatus(c *gin.Context)
}
type managementTokenRequester struct {
@@ -60,3 +61,7 @@ func (m *managementTokenRequester) RequestIFlowToken(c *gin.Context) {
func (m *managementTokenRequester) RequestIFlowCookieToken(c *gin.Context) {
m.handler.RequestIFlowCookieToken(c)
}
func (m *managementTokenRequester) GetAuthStatus(c *gin.Context) {
m.handler.GetAuthStatus(c)
}

View File

@@ -81,7 +81,9 @@ func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, meta
out[k] = v
}
}
out[util.ModelMappingOriginalModelMetadataKey] = upstreamModel
// Store the requested alias (e.g., "gp") so downstream can use it to look up
// model metadata from the global registry where it was registered under this alias.
out[util.ModelMappingOriginalModelMetadataKey] = requestedModel
return upstreamModel, out
}

View File

@@ -13,6 +13,7 @@ type ModelRegistry interface {
ClearModelQuotaExceeded(clientID, modelID string)
ClientSupportsModel(clientID, modelID string) bool
GetAvailableModels(handlerType string) []map[string]any
GetAvailableModelsByProvider(provider string) []*ModelInfo
}
// GlobalModelRegistry returns the shared registry instance.

View File

@@ -0,0 +1,211 @@
package test
import (
"testing"
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson"
)
// TestModelAliasThinkingSuffix tests the 32 test cases defined in docs/thinking_suffix_test_cases.md
// These tests verify the thinking suffix parsing and application logic across different providers.
func TestModelAliasThinkingSuffix(t *testing.T) {
tests := []struct {
id int
name string
provider string
requestModel string
suffixType string
expectedField string // "thinkingBudget", "thinkingLevel", "budget_tokens", "reasoning_effort", "enable_thinking"
expectedValue any
upstreamModel string // The upstream model after alias resolution
isAlias bool
}{
// === 1. Antigravity Provider ===
// 1.1 Budget-only models (Gemini 2.5)
{1, "antigravity_original_numeric", "antigravity", "gemini-2.5-computer-use-preview-10-2025(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", false},
{2, "antigravity_alias_numeric", "antigravity", "gp(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", true},
// 1.2 Budget+Levels models (Gemini 3)
{3, "antigravity_original_numeric_to_level", "antigravity", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
{4, "antigravity_original_level", "antigravity", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
{5, "antigravity_alias_numeric_to_level", "antigravity", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
{6, "antigravity_alias_level", "antigravity", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
// === 2. Gemini CLI Provider ===
// 2.1 Budget-only models
{7, "gemini_cli_original_numeric", "gemini-cli", "gemini-2.5-pro(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", false},
{8, "gemini_cli_alias_numeric", "gemini-cli", "g25p(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", true},
// 2.2 Budget+Levels models
{9, "gemini_cli_original_numeric_to_level", "gemini-cli", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
{10, "gemini_cli_original_level", "gemini-cli", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
{11, "gemini_cli_alias_numeric_to_level", "gemini-cli", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
{12, "gemini_cli_alias_level", "gemini-cli", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
// === 3. Vertex Provider ===
// 3.1 Budget-only models
{13, "vertex_original_numeric", "vertex", "gemini-2.5-pro(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", false},
{14, "vertex_alias_numeric", "vertex", "vg25p(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", true},
// 3.2 Budget+Levels models
{15, "vertex_original_numeric_to_level", "vertex", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
{16, "vertex_original_level", "vertex", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
{17, "vertex_alias_numeric_to_level", "vertex", "vgf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
{18, "vertex_alias_level", "vertex", "vgf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
// === 4. AI Studio Provider ===
// 4.1 Budget-only models
{19, "aistudio_original_numeric", "aistudio", "gemini-2.5-pro(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", false},
{20, "aistudio_alias_numeric", "aistudio", "ag25p(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", true},
// 4.2 Budget+Levels models
{21, "aistudio_original_numeric_to_level", "aistudio", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
{22, "aistudio_original_level", "aistudio", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
{23, "aistudio_alias_numeric_to_level", "aistudio", "agf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
{24, "aistudio_alias_level", "aistudio", "agf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
// === 5. Claude Provider ===
{25, "claude_original_numeric", "claude", "claude-sonnet-4-5-20250929(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", false},
{26, "claude_alias_numeric", "claude", "cs45(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", true},
// === 6. Codex Provider ===
{27, "codex_original_level", "codex", "gpt-5(high)", "level", "reasoning_effort", "high", "gpt-5", false},
{28, "codex_alias_level", "codex", "g5(high)", "level", "reasoning_effort", "high", "gpt-5", true},
// === 7. Qwen Provider ===
{29, "qwen_original_level", "qwen", "qwen3-coder-plus(high)", "level", "enable_thinking", true, "qwen3-coder-plus", false},
{30, "qwen_alias_level", "qwen", "qcp(high)", "level", "enable_thinking", true, "qwen3-coder-plus", true},
// === 8. iFlow Provider ===
{31, "iflow_original_level", "iflow", "glm-4.7(high)", "level", "reasoning_effort", "high", "glm-4.7", false},
{32, "iflow_alias_level", "iflow", "glm(high)", "level", "reasoning_effort", "high", "glm-4.7", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Step 1: Parse model suffix (simulates SDK layer normalization)
// For "gp(1000)" -> requestedModel="gp", metadata={thinking_budget: 1000}
requestedModel, metadata := util.NormalizeThinkingModel(tt.requestModel)
// Verify suffix was parsed
if metadata == nil && (tt.suffixType == "numeric" || tt.suffixType == "level") {
t.Errorf("Case #%d: NormalizeThinkingModel(%q) metadata is nil", tt.id, tt.requestModel)
return
}
// Step 2: Simulate OAuth model mapping
// Real flow: applyOAuthModelMapping stores requestedModel (the alias) in metadata
if tt.isAlias {
if metadata == nil {
metadata = make(map[string]any)
}
metadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
}
// Step 3: Verify metadata extraction
switch tt.suffixType {
case "numeric":
budget, _, _, matched := util.ThinkingFromMetadata(metadata)
if !matched {
t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id)
return
}
if budget == nil {
t.Errorf("Case #%d: expected budget in metadata", tt.id)
return
}
// For thinkingBudget/budget_tokens, verify the parsed budget value
if tt.expectedField == "thinkingBudget" || tt.expectedField == "budget_tokens" {
expectedBudget := tt.expectedValue.(int)
if *budget != expectedBudget {
t.Errorf("Case #%d: budget = %d, want %d", tt.id, *budget, expectedBudget)
}
}
// For thinkingLevel (Gemini 3), verify conversion from budget to level
if tt.expectedField == "thinkingLevel" {
level, ok := util.ThinkingBudgetToGemini3Level(tt.upstreamModel, *budget)
if !ok {
t.Errorf("Case #%d: ThinkingBudgetToGemini3Level failed", tt.id)
return
}
expectedLevel := tt.expectedValue.(string)
if level != expectedLevel {
t.Errorf("Case #%d: converted level = %q, want %q", tt.id, level, expectedLevel)
}
}
case "level":
_, _, effort, matched := util.ThinkingFromMetadata(metadata)
if !matched {
t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id)
return
}
if effort == nil {
t.Errorf("Case #%d: expected effort in metadata", tt.id)
return
}
if tt.expectedField == "thinkingLevel" || tt.expectedField == "reasoning_effort" {
expectedEffort := tt.expectedValue.(string)
if *effort != expectedEffort {
t.Errorf("Case #%d: effort = %q, want %q", tt.id, *effort, expectedEffort)
}
}
}
// Step 4: Test Gemini-specific thinkingLevel conversion for Gemini 3 models
if tt.expectedField == "thinkingLevel" && util.IsGemini3Model(tt.upstreamModel) {
body := []byte(`{"request":{"contents":[]}}`)
// Build metadata simulating real OAuth flow:
// - requestedModel (alias like "gf") is stored in model_mapping_original_model
// - upstreamModel is passed as the model parameter
testMetadata := make(map[string]any)
if tt.isAlias {
// Real flow: applyOAuthModelMapping stores requestedModel (the alias)
testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
}
// Copy parsed metadata (thinking_budget, reasoning_effort, etc.)
for k, v := range metadata {
testMetadata[k] = v
}
result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(tt.upstreamModel, testMetadata, body)
levelVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel")
expectedLevel := tt.expectedValue.(string)
if !levelVal.Exists() {
t.Errorf("Case #%d: expected thinkingLevel in result", tt.id)
} else if levelVal.String() != expectedLevel {
t.Errorf("Case #%d: thinkingLevel = %q, want %q", tt.id, levelVal.String(), expectedLevel)
}
}
// Step 5: Test Gemini 2.5 thinkingBudget application using real ApplyThinkingMetadataCLI flow
if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) {
body := []byte(`{"request":{"contents":[]}}`)
// Build metadata simulating real OAuth flow:
// - requestedModel (alias like "gp") is stored in model_mapping_original_model
// - upstreamModel is passed as the model parameter
testMetadata := make(map[string]any)
if tt.isAlias {
// Real flow: applyOAuthModelMapping stores requestedModel (the alias)
testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
}
// Copy parsed metadata (thinking_budget, reasoning_effort, etc.)
for k, v := range metadata {
testMetadata[k] = v
}
// Use the exported ApplyThinkingMetadataCLI which includes the fallback logic
result := executor.ApplyThinkingMetadataCLI(body, testMetadata, tt.upstreamModel)
budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget")
expectedBudget := tt.expectedValue.(int)
if !budgetVal.Exists() {
t.Errorf("Case #%d: expected thinkingBudget in result", tt.id)
} else if int(budgetVal.Int()) != expectedBudget {
t.Errorf("Case #%d: thinkingBudget = %d, want %d", tt.id, int(budgetVal.Int()), expectedBudget)
}
}
})
}
}