Compare commits

..

40 Commits

Author SHA1 Message Date
Luis Pater
7ff3936efe fix(caching): ensure prompt-caching beta is always appended and add multi-turn cache control tests 2026-01-31 01:42:58 +08:00
Luis Pater
f36a5f5654 Merge pull request #1294 from Darley-Wey/fix/claude2gemini
fix: skip empty text parts and messages to avoid Gemini API error
2026-01-31 01:05:41 +08:00
Luis Pater
c1facdff67 Merge pull request #1295 from SchneeMart/feature/claude-caching
feat(caching): implement Claude prompt caching with multi-turn support
2026-01-31 01:04:19 +08:00
Luis Pater
4ee46bc9f2 Merge pull request #1311 from router-for-me/fix/gemini-schema
fix(gemini): Removes unsupported extension fields
2026-01-30 23:55:56 +08:00
Luis Pater
c3e94a8277 Merge pull request #1317 from yinkev/feat/gemini-tools-passthrough
feat(translator): add code_execution and url_context tool passthrough
2026-01-30 23:46:44 +08:00
Luis Pater
6b6d030ed3 feat(auth): add custom HTTP client with utls for Claude API authentication
Introduce a custom HTTP client utilizing utls with Firefox TLS fingerprinting to bypass Cloudflare fingerprinting on Anthropic domains. Includes support for proxy configuration and enhanced connection management for HTTP/2.
2026-01-30 21:29:41 +08:00
kyinhub
538039f583 feat(translator): add code_execution and url_context tool passthrough
Add support for Gemini's code_execution and url_context tools in the
request translators, enabling:

- Agentic Vision: Image analysis with Python code execution for
  bounding boxes, annotations, and visual reasoning
- URL Context: Live web page content fetching and analysis

Tools are passed through using the same pattern as google_search:
- code_execution: {} -> codeExecution: {}
- url_context: {} -> urlContext: {}

Tested with Gemini 3 Flash Preview agentic vision successfully.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 21:14:52 -08:00
이대희
ca796510e9 refactor(gemini): optimize removeExtensionFields with post-order traversal and DeleteBytes
Amp-Thread-ID: https://ampcode.com/threads/T-019c0d09-330d-7399-b794-652b94847df1
Co-authored-by: Amp <amp@ampcode.com>
2026-01-30 13:02:58 +09:00
이대희
d0d66cdcb7 fix(gemini): Removes unsupported extension fields
Removes x-* extension fields from JSON schemas to ensure compatibility with the Gemini API.

These fields, while valid in OpenAPI/JSON Schema, are not recognized by the Gemini API and can cause issues.
The change recursively walks the schema, identifies these extension fields, and removes them, except when they define properties.

Amp-Thread-ID: https://ampcode.com/threads/T-019c0cd1-9e59-722b-83f0-e0582aba6914
Co-authored-by: Amp <amp@ampcode.com>
2026-01-30 12:31:26 +09:00
Luis Pater
d7d54fa2cc feat(ci): add cleanup step for temporary Docker tags in workflow 2026-01-30 09:15:00 +08:00
Luis Pater
31649325f0 feat(ci): add multi-arch Docker builds and manifest creation to workflow 2026-01-30 07:26:36 +08:00
Martin Schneeweiss
3a43ecb19b feat(caching): implement Claude prompt caching with multi-turn support
- Add ensureCacheControl() to auto-inject cache breakpoints
- Cache tools (last tool), system (last element), and messages (2nd-to-last user turn)
- Add prompt-caching-2024-07-31 beta header
- Return original payload on sjson error to prevent corruption
- Include verification test for caching logic

Enables up to 90% cost reduction on cached tokens.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 22:59:33 +01:00
Luis Pater
a709e5a12d fix(config): ensure empty mapping persists for oauth-model-alias deletions #1305 2026-01-30 04:17:56 +08:00
Luis Pater
f0ac77197b Merge pull request #1300 from sususu98/feat/log-api-response-timestamp
fix(logging): add API response timestamp and fix request timestamp timing
2026-01-30 03:27:17 +08:00
Luis Pater
da0bbf2a3f Merge pull request #1298 from sususu98/fix/restore-usageMetadata-in-gemini-translator
fix(translator): restore usageMetadata in Gemini responses from Antigravity
2026-01-30 02:59:41 +08:00
sususu98
295f34d7f0 fix(logging): capture streaming TTFB on first chunk and make timestamps required
- Add firstChunkTimestamp field to ResponseWriterWrapper for sync capture
- Capture TTFB in Write() and WriteString() before async channel send
- Add SetFirstChunkTimestamp() to StreamingLogWriter interface
- Make requestTimestamp/apiResponseTimestamp required in LogRequest()
- Remove timestamp capture from WriteAPIResponse() (now via setter)
- Fix Gemini handler to set API_RESPONSE_TIMESTAMP before writing response

This ensures accurate TTFB measurement for all streaming API formats
(OpenAI, Gemini, Claude) by capturing timestamp synchronously when
the first response chunk arrives, not when the stream finalizes.
2026-01-29 22:32:24 +08:00
sususu98
c41ce77eea fix(logging): add API response timestamp and fix request timestamp timing
Previously:
- REQUEST INFO timestamp was captured at log write time (not request arrival)
- API RESPONSE had NO timestamp at all

This fix:
- Captures REQUEST INFO timestamp when request first arrives
- Adds API RESPONSE timestamp when upstream response arrives

Changes:
- Add Timestamp field to RequestInfo, set at middleware initialization
- Set API_RESPONSE_TIMESTAMP in appendAPIResponse() and gemini handler
- Pass timestamps through logging chain to writeNonStreamingLog()
- Add timestamp output to API RESPONSE section

This enables accurate measurement of backend response latency in error logs.
2026-01-29 22:22:18 +08:00
Luis Pater
4eb1e6093f feat(handlers): add test to verify no retries after partial stream response
Introduce `TestExecuteStreamWithAuthManager_DoesNotRetryAfterFirstByte` to validate that stream executions do not retry after receiving partial responses. Implement `payloadThenErrorStreamExecutor` for test coverage of this behavior.
2026-01-29 17:30:48 +08:00
Luis Pater
189a066807 Merge pull request #1296 from router-for-me/log
fix(api): update amp module only on config changes
2026-01-29 17:27:52 +08:00
hkfires
d0bada7a43 fix(config): prune oauth-model-alias when preserving config 2026-01-29 14:06:52 +08:00
sususu98
9dc0e6d08b fix(translator): restore usageMetadata in Gemini responses from Antigravity
When using Gemini API format with Antigravity backend, the executor
renames usageMetadata to cpaUsageMetadata in non-terminal chunks.
The Gemini translator was returning this internal field name directly
to clients instead of the standard usageMetadata field.

Add restoreUsageMetadata() to rename cpaUsageMetadata back to
usageMetadata before returning responses to clients.
2026-01-29 11:16:00 +08:00
hkfires
8510fc313e fix(api): update amp module only on config changes 2026-01-29 09:28:49 +08:00
Darley
2666708c30 fix: skip empty text parts and messages to avoid Gemini API error
When Claude API sends an assistant message with empty text content like:
{"role":"assistant","content":[{"type":"text","text":""}]}
The translator was creating a part object {} with no data field,
causing Gemini API to return error:
"required oneof field 'data' must have one initialized field"
This fix:
1. Skips empty text parts (text="") during translation
2. Skips entire messages when their parts array becomes empty
This ensures compatibility when clients send empty assistant messages
in their conversation history.
2026-01-29 04:13:07 +08:00
Luis Pater
9e5b1d24e8 Merge pull request #1276 from router-for-me/thinking
feat(thinking): enable thinking toggle for qwen3 and deepseek models
2026-01-28 11:16:54 +08:00
Luis Pater
a7dae6ad52 Merge remote-tracking branch 'origin/dev' into dev 2026-01-28 10:59:00 +08:00
Luis Pater
e93e05ae25 refactor: consolidate channel send logic with context-safe handlers
Optimize channel operations by introducing reusable context-aware send functions (`send` and `sendErr`) across `wsrelay`, `handlers`, and `cliproxy`. Ensure graceful handling of canceled contexts during stream operations.
2026-01-28 10:58:35 +08:00
hkfires
c8c27325dc feat(thinking): enable thinking toggle for qwen3 and deepseek models
Fix #1245
2026-01-28 09:54:05 +08:00
hkfires
c3b6f3918c chore(git): stop ignoring .idea and data directories 2026-01-28 09:52:44 +08:00
Luis Pater
bbb55a8ab4 Merge pull request #1170 from BianBianY/main
feat: optimization enable/disable auth files
2026-01-28 09:34:35 +08:00
Luis Pater
7583193c2a Merge pull request #1257 from router-for-me/model
feat(api): add management model definitions endpoint
2026-01-27 20:32:04 +08:00
hkfires
7cc3bd4ba0 chore(deps): mark golang.org/x/text as indirect 2026-01-27 19:19:52 +08:00
hkfires
88a0f095e8 chore(registry): disable gemini 2.5 flash image preview model 2026-01-27 18:33:13 +08:00
hkfires
c65f64dce0 chore(registry): comment out rev19-uic3-1p model config 2026-01-27 18:33:13 +08:00
hkfires
d18cd217e1 feat(api): add management model definitions endpoint 2026-01-27 18:33:12 +08:00
Luis Pater
ba4a1ab433 Merge pull request #1261 from Darley-Wey/fix/gemini_scheme
fix(gemini): force type to string for enum fields to fix Antigravity Gemini API error
2026-01-27 17:02:25 +08:00
Darley
decddb521e fix(gemini): force type to string for enum fields to fix Antigravity Gemini API error (Relates to #1260) 2026-01-27 11:14:08 +03:30
Luis Pater
70897247b2 feat(auth): add support for request_retry and disable_cooling overrides
Implement `request_retry` and `disable_cooling` metadata overrides for authentication management. Update retry and cooling logic accordingly across `Manager`, Antigravity executor, and file synthesizer. Add tests to validate new behaviors.
2026-01-26 21:59:08 +08:00
Luis Pater
9c341f5aa5 feat(auth): add skip persistence context key for file watcher events
Introduce `WithSkipPersist` to disable persistence during Manager Update/Register calls, preventing write-back loops caused by redundant file writes. Add corresponding tests and integrate with existing file store and conductor logic.
2026-01-26 18:20:19 +08:00
Yang Bian
f7bfa8a05c Merge branch 'upstream-main' 2026-01-24 16:28:08 +08:00
Yang Bian
c8620d1633 feat: optimization enable/disable auth files 2026-01-23 18:03:09 +08:00
40 changed files with 2832 additions and 1019 deletions

View File

@@ -10,13 +10,11 @@ env:
DOCKERHUB_REPO: eceasy/cli-proxy-api
jobs:
docker:
docker_amd64:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
@@ -29,18 +27,113 @@ jobs:
echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
- name: Build and push
- name: Build and push (amd64)
uses: docker/build-push-action@v6
with:
context: .
platforms: |
linux/amd64
linux/arm64
platforms: linux/amd64
push: true
build-args: |
VERSION=${{ env.VERSION }}
COMMIT=${{ env.COMMIT }}
BUILD_DATE=${{ env.BUILD_DATE }}
tags: |
${{ env.DOCKERHUB_REPO }}:latest
${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}
${{ env.DOCKERHUB_REPO }}:latest-amd64
${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}-amd64
docker_arm64:
runs-on: ubuntu-24.04-arm
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Generate Build Metadata
run: |
echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
- name: Build and push (arm64)
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/arm64
push: true
build-args: |
VERSION=${{ env.VERSION }}
COMMIT=${{ env.COMMIT }}
BUILD_DATE=${{ env.BUILD_DATE }}
tags: |
${{ env.DOCKERHUB_REPO }}:latest-arm64
${{ env.DOCKERHUB_REPO }}:${{ env.VERSION }}-arm64
docker_manifest:
runs-on: ubuntu-latest
needs:
- docker_amd64
- docker_arm64
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Generate Build Metadata
run: |
echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
- name: Create and push multi-arch manifests
run: |
docker buildx imagetools create \
--tag "${DOCKERHUB_REPO}:latest" \
"${DOCKERHUB_REPO}:latest-amd64" \
"${DOCKERHUB_REPO}:latest-arm64"
docker buildx imagetools create \
--tag "${DOCKERHUB_REPO}:${VERSION}" \
"${DOCKERHUB_REPO}:${VERSION}-amd64" \
"${DOCKERHUB_REPO}:${VERSION}-arm64"
- name: Cleanup temporary tags
continue-on-error: true
env:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
run: |
set -euo pipefail
namespace="${DOCKERHUB_REPO%%/*}"
repo_name="${DOCKERHUB_REPO#*/}"
token="$(
curl -fsSL \
-H 'Content-Type: application/json' \
-d "{\"username\":\"${DOCKERHUB_USERNAME}\",\"password\":\"${DOCKERHUB_TOKEN}\"}" \
'https://hub.docker.com/v2/users/login/' \
| python3 -c 'import json,sys; print(json.load(sys.stdin)["token"])'
)"
delete_tag() {
local tag="$1"
local url="https://hub.docker.com/v2/repositories/${namespace}/${repo_name}/tags/${tag}/"
local http_code
http_code="$(curl -sS -o /dev/null -w "%{http_code}" -X DELETE -H "Authorization: JWT ${token}" "${url}" || true)"
if [ "${http_code}" = "204" ] || [ "${http_code}" = "404" ]; then
echo "Docker Hub tag removed (or missing): ${DOCKERHUB_REPO}:${tag} (HTTP ${http_code})"
return 0
fi
echo "Docker Hub tag delete failed: ${DOCKERHUB_REPO}:${tag} (HTTP ${http_code})"
return 0
}
delete_tag "latest-amd64"
delete_tag "latest-arm64"
delete_tag "${VERSION}-amd64"
delete_tag "${VERSION}-arm64"

3
go.mod
View File

@@ -13,6 +13,7 @@ require (
github.com/joho/godotenv v1.5.1
github.com/klauspost/compress v1.17.4
github.com/minio/minio-go/v7 v7.0.66
github.com/refraction-networking/utls v1.8.2
github.com/sirupsen/logrus v1.9.3
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
github.com/tidwall/gjson v1.18.0
@@ -21,7 +22,6 @@ require (
golang.org/x/crypto v0.45.0
golang.org/x/net v0.47.0
golang.org/x/oauth2 v0.30.0
golang.org/x/text v0.31.0
gopkg.in/natefinch/lumberjack.v2 v2.2.1
gopkg.in/yaml.v3 v3.0.1
)
@@ -71,6 +71,7 @@ require (
golang.org/x/arch v0.8.0 // indirect
golang.org/x/sync v0.18.0 // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/text v0.31.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
)

2
go.sum
View File

@@ -118,6 +118,8 @@ github.com/pjbgf/sha1cd v0.5.0 h1:a+UkboSi1znleCDUNT3M5YxjOnN1fz2FhN48FlwCxs0=
github.com/pjbgf/sha1cd v0.5.0/go.mod h1:lhpGlyHLpQZoxMv8HcgXvZEhcGs0PG/vsZnEJ7H0iCM=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/refraction-networking/utls v1.8.2 h1:j4Q1gJj0xngdeH+Ox/qND11aEfhpgoEvV+S9iJ2IdQo=
github.com/refraction-networking/utls v1.8.2/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc=

View File

@@ -0,0 +1,33 @@
package management
import (
"net/http"
"strings"
"github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
)
// GetStaticModelDefinitions returns static model metadata for a given channel.
// Channel is provided via path param (:channel) or query param (?channel=...).
func (h *Handler) GetStaticModelDefinitions(c *gin.Context) {
channel := strings.TrimSpace(c.Param("channel"))
if channel == "" {
channel = strings.TrimSpace(c.Query("channel"))
}
if channel == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "channel is required"})
return
}
models := registry.GetStaticModelDefinitionsByChannel(channel)
if models == nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "unknown channel", "channel": channel})
return
}
c.JSON(http.StatusOK, gin.H{
"channel": strings.ToLower(strings.TrimSpace(channel)),
"models": models,
})
}

View File

@@ -8,6 +8,7 @@ import (
"io"
"net/http"
"strings"
"time"
"github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
@@ -103,6 +104,7 @@ func captureRequestInfo(c *gin.Context) (*RequestInfo, error) {
Headers: headers,
Body: body,
RequestID: logging.GetGinRequestID(c),
Timestamp: time.Now(),
}, nil
}

View File

@@ -7,6 +7,7 @@ import (
"bytes"
"net/http"
"strings"
"time"
"github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
@@ -20,22 +21,24 @@ type RequestInfo struct {
Headers map[string][]string // Headers contains the request headers.
Body []byte // Body is the raw request body.
RequestID string // RequestID is the unique identifier for the request.
Timestamp time.Time // Timestamp is when the request was received.
}
// ResponseWriterWrapper wraps the standard gin.ResponseWriter to intercept and log response data.
// It is designed to handle both standard and streaming responses, ensuring that logging operations do not block the client response.
type ResponseWriterWrapper struct {
gin.ResponseWriter
body *bytes.Buffer // body is a buffer to store the response body for non-streaming responses.
isStreaming bool // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
streamWriter logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
chunkChannel chan []byte // chunkChannel is a channel for asynchronously passing response chunks to the logger.
streamDone chan struct{} // streamDone signals when the streaming goroutine completes.
logger logging.RequestLogger // logger is the instance of the request logger service.
requestInfo *RequestInfo // requestInfo holds the details of the original request.
statusCode int // statusCode stores the HTTP status code of the response.
headers map[string][]string // headers stores the response headers.
logOnErrorOnly bool // logOnErrorOnly enables logging only when an error response is detected.
body *bytes.Buffer // body is a buffer to store the response body for non-streaming responses.
isStreaming bool // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
streamWriter logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
chunkChannel chan []byte // chunkChannel is a channel for asynchronously passing response chunks to the logger.
streamDone chan struct{} // streamDone signals when the streaming goroutine completes.
logger logging.RequestLogger // logger is the instance of the request logger service.
requestInfo *RequestInfo // requestInfo holds the details of the original request.
statusCode int // statusCode stores the HTTP status code of the response.
headers map[string][]string // headers stores the response headers.
logOnErrorOnly bool // logOnErrorOnly enables logging only when an error response is detected.
firstChunkTimestamp time.Time // firstChunkTimestamp captures TTFB for streaming responses.
}
// NewResponseWriterWrapper creates and initializes a new ResponseWriterWrapper.
@@ -73,6 +76,10 @@ func (w *ResponseWriterWrapper) Write(data []byte) (int, error) {
// THEN: Handle logging based on response type
if w.isStreaming && w.chunkChannel != nil {
// Capture TTFB on first chunk (synchronous, before async channel send)
if w.firstChunkTimestamp.IsZero() {
w.firstChunkTimestamp = time.Now()
}
// For streaming responses: Send to async logging channel (non-blocking)
select {
case w.chunkChannel <- append([]byte(nil), data...): // Non-blocking send with copy
@@ -117,6 +124,10 @@ func (w *ResponseWriterWrapper) WriteString(data string) (int, error) {
// THEN: Capture for logging
if w.isStreaming && w.chunkChannel != nil {
// Capture TTFB on first chunk (synchronous, before async channel send)
if w.firstChunkTimestamp.IsZero() {
w.firstChunkTimestamp = time.Now()
}
select {
case w.chunkChannel <- []byte(data):
default:
@@ -280,6 +291,8 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
w.streamDone = nil
}
w.streamWriter.SetFirstChunkTimestamp(w.firstChunkTimestamp)
// Write API Request and Response to the streaming log before closing
apiRequest := w.extractAPIRequest(c)
if len(apiRequest) > 0 {
@@ -297,7 +310,7 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
return nil
}
return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), slicesAPIResponseError, forceLog)
return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
}
func (w *ResponseWriterWrapper) cloneHeaders() map[string][]string {
@@ -337,7 +350,18 @@ func (w *ResponseWriterWrapper) extractAPIResponse(c *gin.Context) []byte {
return data
}
func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
func (w *ResponseWriterWrapper) extractAPIResponseTimestamp(c *gin.Context) time.Time {
ts, isExist := c.Get("API_RESPONSE_TIMESTAMP")
if !isExist {
return time.Time{}
}
if t, ok := ts.(time.Time); ok {
return t
}
return time.Time{}
}
func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
if w.requestInfo == nil {
return nil
}
@@ -348,7 +372,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
}
if loggerWithOptions, ok := w.logger.(interface {
LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string) error
LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string, time.Time, time.Time) error
}); ok {
return loggerWithOptions.LogRequestWithOptions(
w.requestInfo.URL,
@@ -363,6 +387,8 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
apiResponseErrors,
forceLog,
w.requestInfo.RequestID,
w.requestInfo.Timestamp,
apiResponseTimestamp,
)
}
@@ -378,5 +404,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
apiResponseBody,
apiResponseErrors,
w.requestInfo.RequestID,
w.requestInfo.Timestamp,
apiResponseTimestamp,
)
}

View File

@@ -12,6 +12,7 @@ import (
"net/http"
"os"
"path/filepath"
"reflect"
"strings"
"sync"
"sync/atomic"
@@ -607,6 +608,7 @@ func (s *Server) registerManagementRoutes() {
mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
mgmt.GET("/model-definitions/:channel", s.mgmt.GetStaticModelDefinitions)
mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)
@@ -989,14 +991,17 @@ func (s *Server) UpdateClients(cfg *config.Config) {
s.mgmt.SetAuthManager(s.handlers.AuthManager)
}
// Notify Amp module of config changes (for model mapping hot-reload)
if s.ampModule != nil {
log.Debugf("triggering amp module config update")
if err := s.ampModule.OnConfigUpdated(cfg); err != nil {
log.Errorf("failed to update Amp module config: %v", err)
// Notify Amp module only when Amp config has changed.
ampConfigChanged := oldCfg == nil || !reflect.DeepEqual(oldCfg.AmpCode, cfg.AmpCode)
if ampConfigChanged {
if s.ampModule != nil {
log.Debugf("triggering amp module config update")
if err := s.ampModule.OnConfigUpdated(cfg); err != nil {
log.Errorf("failed to update Amp module config: %v", err)
}
} else {
log.Warnf("amp module is nil, skipping config update")
}
} else {
log.Warnf("amp module is nil, skipping config update")
}
// Count client sources from configuration and auth store.

View File

@@ -14,7 +14,6 @@ import (
"time"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
log "github.com/sirupsen/logrus"
)
@@ -51,7 +50,8 @@ type ClaudeAuth struct {
}
// NewClaudeAuth creates a new Anthropic authentication service.
// It initializes the HTTP client with proxy settings from the configuration.
// It initializes the HTTP client with a custom TLS transport that uses Firefox
// fingerprint to bypass Cloudflare's TLS fingerprinting on Anthropic domains.
//
// Parameters:
// - cfg: The application configuration containing proxy settings
@@ -59,8 +59,10 @@ type ClaudeAuth struct {
// Returns:
// - *ClaudeAuth: A new Claude authentication service instance
func NewClaudeAuth(cfg *config.Config) *ClaudeAuth {
// Use custom HTTP client with Firefox TLS fingerprint to bypass
// Cloudflare's bot detection on Anthropic domains
return &ClaudeAuth{
httpClient: util.SetProxy(&cfg.SDKConfig, &http.Client{}),
httpClient: NewAnthropicHttpClient(&cfg.SDKConfig),
}
}

View File

@@ -0,0 +1,165 @@
// Package claude provides authentication functionality for Anthropic's Claude API.
// This file implements a custom HTTP transport using utls to bypass TLS fingerprinting.
package claude
import (
"net/http"
"net/url"
"strings"
"sync"
tls "github.com/refraction-networking/utls"
"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
log "github.com/sirupsen/logrus"
"golang.org/x/net/http2"
"golang.org/x/net/proxy"
)
// utlsRoundTripper implements http.RoundTripper using utls with Firefox fingerprint
// to bypass Cloudflare's TLS fingerprinting on Anthropic domains.
type utlsRoundTripper struct {
// mu protects the connections map and pending map
mu sync.Mutex
// connections caches HTTP/2 client connections per host
connections map[string]*http2.ClientConn
// pending tracks hosts that are currently being connected to (prevents race condition)
pending map[string]*sync.Cond
// dialer is used to create network connections, supporting proxies
dialer proxy.Dialer
}
// newUtlsRoundTripper creates a new utls-based round tripper with optional proxy support
func newUtlsRoundTripper(cfg *config.SDKConfig) *utlsRoundTripper {
var dialer proxy.Dialer = proxy.Direct
if cfg != nil && cfg.ProxyURL != "" {
proxyURL, err := url.Parse(cfg.ProxyURL)
if err != nil {
log.Errorf("failed to parse proxy URL %q: %v", cfg.ProxyURL, err)
} else {
pDialer, err := proxy.FromURL(proxyURL, proxy.Direct)
if err != nil {
log.Errorf("failed to create proxy dialer for %q: %v", cfg.ProxyURL, err)
} else {
dialer = pDialer
}
}
}
return &utlsRoundTripper{
connections: make(map[string]*http2.ClientConn),
pending: make(map[string]*sync.Cond),
dialer: dialer,
}
}
// getOrCreateConnection gets an existing connection or creates a new one.
// It uses a per-host locking mechanism to prevent multiple goroutines from
// creating connections to the same host simultaneously.
func (t *utlsRoundTripper) getOrCreateConnection(host, addr string) (*http2.ClientConn, error) {
t.mu.Lock()
// Check if connection exists and is usable
if h2Conn, ok := t.connections[host]; ok && h2Conn.CanTakeNewRequest() {
t.mu.Unlock()
return h2Conn, nil
}
// Check if another goroutine is already creating a connection
if cond, ok := t.pending[host]; ok {
// Wait for the other goroutine to finish
cond.Wait()
// Check if connection is now available
if h2Conn, ok := t.connections[host]; ok && h2Conn.CanTakeNewRequest() {
t.mu.Unlock()
return h2Conn, nil
}
// Connection still not available, we'll create one
}
// Mark this host as pending
cond := sync.NewCond(&t.mu)
t.pending[host] = cond
t.mu.Unlock()
// Create connection outside the lock
h2Conn, err := t.createConnection(host, addr)
t.mu.Lock()
defer t.mu.Unlock()
// Remove pending marker and wake up waiting goroutines
delete(t.pending, host)
cond.Broadcast()
if err != nil {
return nil, err
}
// Store the new connection
t.connections[host] = h2Conn
return h2Conn, nil
}
// createConnection creates a new HTTP/2 connection with Firefox TLS fingerprint
func (t *utlsRoundTripper) createConnection(host, addr string) (*http2.ClientConn, error) {
conn, err := t.dialer.Dial("tcp", addr)
if err != nil {
return nil, err
}
tlsConfig := &tls.Config{ServerName: host}
tlsConn := tls.UClient(conn, tlsConfig, tls.HelloFirefox_Auto)
if err := tlsConn.Handshake(); err != nil {
conn.Close()
return nil, err
}
tr := &http2.Transport{}
h2Conn, err := tr.NewClientConn(tlsConn)
if err != nil {
tlsConn.Close()
return nil, err
}
return h2Conn, nil
}
// RoundTrip implements http.RoundTripper
func (t *utlsRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
host := req.URL.Host
addr := host
if !strings.Contains(addr, ":") {
addr += ":443"
}
// Get hostname without port for TLS ServerName
hostname := req.URL.Hostname()
h2Conn, err := t.getOrCreateConnection(hostname, addr)
if err != nil {
return nil, err
}
resp, err := h2Conn.RoundTrip(req)
if err != nil {
// Connection failed, remove it from cache
t.mu.Lock()
if cached, ok := t.connections[hostname]; ok && cached == h2Conn {
delete(t.connections, hostname)
}
t.mu.Unlock()
return nil, err
}
return resp, nil
}
// NewAnthropicHttpClient creates an HTTP client that bypasses TLS fingerprinting
// for Anthropic domains by using utls with Firefox fingerprint.
// It accepts optional SDK configuration for proxy settings.
func NewAnthropicHttpClient(cfg *config.SDKConfig) *http.Client {
return &http.Client{
Transport: newUtlsRoundTripper(cfg),
}
}

View File

@@ -923,6 +923,7 @@ func SaveConfigPreserveComments(configFile string, cfg *Config) error {
removeLegacyGenerativeLanguageKeys(original.Content[0])
pruneMappingToGeneratedKeys(original.Content[0], generated.Content[0], "oauth-excluded-models")
pruneMappingToGeneratedKeys(original.Content[0], generated.Content[0], "oauth-model-alias")
// Merge generated into original in-place, preserving comments/order of existing nodes.
mergeMappingPreserve(original.Content[0], generated.Content[0])
@@ -1413,6 +1414,16 @@ func pruneMappingToGeneratedKeys(dstRoot, srcRoot *yaml.Node, key string) {
}
srcIdx := findMapKeyIndex(srcRoot, key)
if srcIdx < 0 {
// Keep an explicit empty mapping for oauth-model-alias when it was previously present.
//
// Rationale: LoadConfig runs MigrateOAuthModelAlias before unmarshalling. If the
// oauth-model-alias key is missing, migration will add the default antigravity aliases.
// When users delete the last channel from oauth-model-alias via the management API,
// we want that deletion to persist across hot reloads and restarts.
if key == "oauth-model-alias" {
dstRoot.Content[dstIdx+1] = &yaml.Node{Kind: yaml.MappingNode, Tag: "!!map"}
return
}
removeMapKey(dstRoot, key)
return
}

View File

@@ -44,10 +44,12 @@ type RequestLogger interface {
// - apiRequest: The API request data
// - apiResponse: The API response data
// - requestID: Optional request ID for log file naming
// - requestTimestamp: When the request was received
// - apiResponseTimestamp: When the API response was received
//
// Returns:
// - error: An error if logging fails, nil otherwise
LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error
LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error
// LogStreamingRequest initiates logging for a streaming request and returns a writer for chunks.
//
@@ -109,6 +111,12 @@ type StreamingLogWriter interface {
// - error: An error if writing fails, nil otherwise
WriteAPIResponse(apiResponse []byte) error
// SetFirstChunkTimestamp sets the TTFB timestamp captured when first chunk was received.
//
// Parameters:
// - timestamp: The time when first response chunk was received
SetFirstChunkTimestamp(timestamp time.Time)
// Close finalizes the log file and cleans up resources.
//
// Returns:
@@ -180,20 +188,22 @@ func (l *FileRequestLogger) SetEnabled(enabled bool) {
// - apiRequest: The API request data
// - apiResponse: The API response data
// - requestID: Optional request ID for log file naming
// - requestTimestamp: When the request was received
// - apiResponseTimestamp: When the API response was received
//
// Returns:
// - error: An error if logging fails, nil otherwise
func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error {
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID)
func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID, requestTimestamp, apiResponseTimestamp)
}
// LogRequestWithOptions logs a request with optional forced logging behavior.
// The force flag allows writing error logs even when regular request logging is disabled.
func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID)
func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID, requestTimestamp, apiResponseTimestamp)
}
func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
if !l.enabled && !force {
return nil
}
@@ -247,6 +257,8 @@ func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[st
responseHeaders,
responseToWrite,
decompressErr,
requestTimestamp,
apiResponseTimestamp,
)
if errClose := logFile.Close(); errClose != nil {
log.WithError(errClose).Warn("failed to close request log file")
@@ -499,17 +511,22 @@ func (l *FileRequestLogger) writeNonStreamingLog(
responseHeaders map[string][]string,
response []byte,
decompressErr error,
requestTimestamp time.Time,
apiResponseTimestamp time.Time,
) error {
if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, time.Now()); errWrite != nil {
if requestTimestamp.IsZero() {
requestTimestamp = time.Now()
}
if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, requestTimestamp); errWrite != nil {
return errWrite
}
if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest); errWrite != nil {
if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest, time.Time{}); errWrite != nil {
return errWrite
}
if errWrite := writeAPIErrorResponses(w, apiResponseErrors); errWrite != nil {
return errWrite
}
if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse); errWrite != nil {
if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse, apiResponseTimestamp); errWrite != nil {
return errWrite
}
return writeResponseSection(w, statusCode, true, responseHeaders, bytes.NewReader(response), decompressErr, true)
@@ -583,7 +600,7 @@ func writeRequestInfoWithBody(
return nil
}
func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte) error {
func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte, timestamp time.Time) error {
if len(payload) == 0 {
return nil
}
@@ -601,6 +618,11 @@ func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, pa
if _, errWrite := io.WriteString(w, sectionHeader); errWrite != nil {
return errWrite
}
if !timestamp.IsZero() {
if _, errWrite := io.WriteString(w, fmt.Sprintf("Timestamp: %s\n", timestamp.Format(time.RFC3339Nano))); errWrite != nil {
return errWrite
}
}
if _, errWrite := w.Write(payload); errWrite != nil {
return errWrite
}
@@ -974,6 +996,9 @@ type FileStreamingLogWriter struct {
// apiResponse stores the upstream API response data.
apiResponse []byte
// apiResponseTimestamp captures when the API response was received.
apiResponseTimestamp time.Time
}
// WriteChunkAsync writes a response chunk asynchronously (non-blocking).
@@ -1053,6 +1078,12 @@ func (w *FileStreamingLogWriter) WriteAPIResponse(apiResponse []byte) error {
return nil
}
func (w *FileStreamingLogWriter) SetFirstChunkTimestamp(timestamp time.Time) {
if !timestamp.IsZero() {
w.apiResponseTimestamp = timestamp
}
}
// Close finalizes the log file and cleans up resources.
// It writes all buffered data to the file in the correct order:
// API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
@@ -1140,10 +1171,10 @@ func (w *FileStreamingLogWriter) writeFinalLog(logFile *os.File) error {
if errWrite := writeRequestInfoWithBody(logFile, w.url, w.method, w.requestHeaders, nil, w.requestBodyPath, w.timestamp); errWrite != nil {
return errWrite
}
if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest); errWrite != nil {
if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest, time.Time{}); errWrite != nil {
return errWrite
}
if errWrite := writeAPISection(logFile, "=== API RESPONSE ===\n", "=== API RESPONSE", w.apiResponse); errWrite != nil {
if errWrite := writeAPISection(logFile, "=== API RESPONSE ===\n", "=== API RESPONSE", w.apiResponse, w.apiResponseTimestamp); errWrite != nil {
return errWrite
}
@@ -1220,6 +1251,8 @@ func (w *NoOpStreamingLogWriter) WriteAPIResponse(_ []byte) error {
return nil
}
func (w *NoOpStreamingLogWriter) SetFirstChunkTimestamp(_ time.Time) {}
// Close is a no-op implementation that does nothing and always returns nil.
//
// Returns:

View File

@@ -1,848 +1,69 @@
// Package registry provides model definitions for various AI service providers.
// This file contains static model definitions that can be used by clients
// when registering their supported models.
// Package registry provides model definitions and lookup helpers for various AI providers.
// Static model metadata is stored in model_definitions_static_data.go.
package registry
// GetClaudeModels returns the standard Claude model definitions
func GetClaudeModels() []*ModelInfo {
return []*ModelInfo{
import (
"sort"
"strings"
)
{
ID: "claude-haiku-4-5-20251001",
Object: "model",
Created: 1759276800, // 2025-10-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Haiku",
ContextLength: 200000,
MaxCompletionTokens: 64000,
// Thinking: not supported for Haiku models
},
{
ID: "claude-sonnet-4-5-20250929",
Object: "model",
Created: 1759104000, // 2025-09-29
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
},
{
ID: "claude-opus-4-5-20251101",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus",
Description: "Premium model combining maximum intelligence with practical performance",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
},
{
ID: "claude-opus-4-1-20250805",
Object: "model",
Created: 1722945600, // 2025-08-05
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.1 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-opus-4-20250514",
Object: "model",
Created: 1715644800, // 2025-05-14
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-sonnet-4-20250514",
Object: "model",
Created: 1715644800, // 2025-05-14
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-3-7-sonnet-20250219",
Object: "model",
Created: 1708300800, // 2025-02-19
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 3.7 Sonnet",
ContextLength: 128000,
MaxCompletionTokens: 8192,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-3-5-haiku-20241022",
Object: "model",
Created: 1729555200, // 2024-10-22
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 3.5 Haiku",
ContextLength: 128000,
MaxCompletionTokens: 8192,
// Thinking: not supported for Haiku models
},
}
}
// GetGeminiModels returns the standard Gemini model definitions
func GetGeminiModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Gemini 3 Flash Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-image-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Image Preview",
Description: "Gemini 3 Pro Image Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
}
}
func GetGeminiVertexModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-image-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Image Preview",
Description: "Gemini 3 Pro Image Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
// Imagen image generation models - use :predict action
{
ID: "imagen-4.0-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Generate",
Description: "Imagen 4.0 image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-4.0-ultra-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-ultra-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Ultra Generate",
Description: "Imagen 4.0 Ultra high-quality image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-3.0-generate-002",
Object: "model",
Created: 1740000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-3.0-generate-002",
Version: "3.0",
DisplayName: "Imagen 3.0 Generate",
Description: "Imagen 3.0 image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-3.0-fast-generate-001",
Object: "model",
Created: 1740000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-3.0-fast-generate-001",
Version: "3.0",
DisplayName: "Imagen 3.0 Fast Generate",
Description: "Imagen 3.0 fast image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-4.0-fast-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-fast-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Fast Generate",
Description: "Imagen 4.0 fast image generation model",
SupportedGenerationMethods: []string{"predict"},
},
}
}
// GetGeminiCLIModels returns the standard Gemini model definitions
func GetGeminiCLIModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
}
}
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
func GetAIStudioModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-pro-latest",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-pro-latest",
Version: "2.5",
DisplayName: "Gemini Pro Latest",
Description: "Latest release of Gemini Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-flash-latest",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-latest",
Version: "2.5",
DisplayName: "Gemini Flash Latest",
Description: "Latest release of Gemini Flash",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-flash-lite-latest",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-lite-latest",
Version: "2.5",
DisplayName: "Gemini Flash-Lite Latest",
Description: "Latest release of Gemini Flash-Lite",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-image-preview",
Object: "model",
Created: 1756166400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image-preview",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image Preview",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
{
ID: "gemini-2.5-flash-image",
Object: "model",
Created: 1759363200,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
}
}
// GetOpenAIModels returns the standard OpenAI model definitions
func GetOpenAIModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gpt-5",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gpt-5-codex",
Object: "model",
Created: 1757894400,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-09-15",
DisplayName: "GPT 5 Codex",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5-codex-mini",
Object: "model",
Created: 1762473600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-11-07",
DisplayName: "GPT 5 Codex Mini",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex",
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-mini",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex Mini",
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-max",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5.1 Codex Max",
Description: "Stable version of GPT 5.1 Codex Max",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
{
ID: "gpt-5.2",
Object: "model",
Created: 1765440000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.2",
DisplayName: "GPT 5.2",
Description: "Stable version of GPT 5.2",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
},
{
ID: "gpt-5.2-codex",
Object: "model",
Created: 1765440000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.2",
DisplayName: "GPT 5.2 Codex",
Description: "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
}
}
// GetQwenModels returns the standard Qwen model definitions
func GetQwenModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "qwen3-coder-plus",
Object: "model",
Created: 1753228800,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Coder Plus",
Description: "Advanced code generation and understanding model",
ContextLength: 32768,
MaxCompletionTokens: 8192,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
{
ID: "qwen3-coder-flash",
Object: "model",
Created: 1753228800,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Coder Flash",
Description: "Fast code generation model",
ContextLength: 8192,
MaxCompletionTokens: 2048,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
{
ID: "vision-model",
Object: "model",
Created: 1758672000,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Vision Model",
Description: "Vision model model",
ContextLength: 32768,
MaxCompletionTokens: 2048,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
}
}
// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
// Uses level-based configuration so standard normalization flows apply before conversion.
var iFlowThinkingSupport = &ThinkingSupport{
Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
}
// GetIFlowModels returns supported models for iFlow OAuth accounts.
func GetIFlowModels() []*ModelInfo {
entries := []struct {
ID string
DisplayName string
Description string
Created int64
Thinking *ThinkingSupport
}{
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400},
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
}
models := make([]*ModelInfo, 0, len(entries))
for _, entry := range entries {
models = append(models, &ModelInfo{
ID: entry.ID,
Object: "model",
Created: entry.Created,
OwnedBy: "iflow",
Type: "iflow",
DisplayName: entry.DisplayName,
Description: entry.Description,
Thinking: entry.Thinking,
// GetStaticModelDefinitionsByChannel returns static model definitions for a given channel/provider.
// It returns nil when the channel is unknown.
//
// Supported channels:
// - claude
// - gemini
// - vertex
// - gemini-cli
// - aistudio
// - codex
// - qwen
// - iflow
// - antigravity (returns static overrides only)
func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
key := strings.ToLower(strings.TrimSpace(channel))
switch key {
case "claude":
return GetClaudeModels()
case "gemini":
return GetGeminiModels()
case "vertex":
return GetGeminiVertexModels()
case "gemini-cli":
return GetGeminiCLIModels()
case "aistudio":
return GetAIStudioModels()
case "codex":
return GetOpenAIModels()
case "qwen":
return GetQwenModels()
case "iflow":
return GetIFlowModels()
case "antigravity":
cfg := GetAntigravityModelConfig()
if len(cfg) == 0 {
return nil
}
models := make([]*ModelInfo, 0, len(cfg))
for modelID, entry := range cfg {
if modelID == "" || entry == nil {
continue
}
models = append(models, &ModelInfo{
ID: modelID,
Object: "model",
OwnedBy: "antigravity",
Type: "antigravity",
Thinking: entry.Thinking,
MaxCompletionTokens: entry.MaxCompletionTokens,
})
}
sort.Slice(models, func(i, j int) bool {
return strings.ToLower(models[i].ID) < strings.ToLower(models[j].ID)
})
}
return models
}
// AntigravityModelConfig captures static antigravity model overrides, including
// Thinking budget limits and provider max completion tokens.
type AntigravityModelConfig struct {
Thinking *ThinkingSupport
MaxCompletionTokens int
}
// GetAntigravityModelConfig returns static configuration for antigravity models.
// Keys use upstream model names returned by the Antigravity models endpoint.
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
return map[string]*AntigravityModelConfig{
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
"rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
"gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
"gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"claude-sonnet-4-5": {MaxCompletionTokens: 64000},
"gpt-oss-120b-medium": {},
"tab_flash_lite_preview": {},
return models
default:
return nil
}
}

View File

@@ -0,0 +1,846 @@
// Package registry provides model definitions for various AI service providers.
// This file stores the static model metadata catalog.
package registry
// GetClaudeModels returns the standard Claude model definitions
func GetClaudeModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "claude-haiku-4-5-20251001",
Object: "model",
Created: 1759276800, // 2025-10-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Haiku",
ContextLength: 200000,
MaxCompletionTokens: 64000,
// Thinking: not supported for Haiku models
},
{
ID: "claude-sonnet-4-5-20250929",
Object: "model",
Created: 1759104000, // 2025-09-29
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
},
{
ID: "claude-opus-4-5-20251101",
Object: "model",
Created: 1761955200, // 2025-11-01
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.5 Opus",
Description: "Premium model combining maximum intelligence with practical performance",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
},
{
ID: "claude-opus-4-1-20250805",
Object: "model",
Created: 1722945600, // 2025-08-05
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4.1 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-opus-4-20250514",
Object: "model",
Created: 1715644800, // 2025-05-14
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4 Opus",
ContextLength: 200000,
MaxCompletionTokens: 32000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-sonnet-4-20250514",
Object: "model",
Created: 1715644800, // 2025-05-14
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 4 Sonnet",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-3-7-sonnet-20250219",
Object: "model",
Created: 1708300800, // 2025-02-19
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 3.7 Sonnet",
ContextLength: 128000,
MaxCompletionTokens: 8192,
Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false},
},
{
ID: "claude-3-5-haiku-20241022",
Object: "model",
Created: 1729555200, // 2024-10-22
OwnedBy: "anthropic",
Type: "claude",
DisplayName: "Claude 3.5 Haiku",
ContextLength: 128000,
MaxCompletionTokens: 8192,
// Thinking: not supported for Haiku models
},
}
}
// GetGeminiModels returns the standard Gemini model definitions
func GetGeminiModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Gemini 3 Flash Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-image-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Image Preview",
Description: "Gemini 3 Pro Image Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
}
}
func GetGeminiVertexModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-image-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Image Preview",
Description: "Gemini 3 Pro Image Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
// Imagen image generation models - use :predict action
{
ID: "imagen-4.0-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Generate",
Description: "Imagen 4.0 image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-4.0-ultra-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-ultra-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Ultra Generate",
Description: "Imagen 4.0 Ultra high-quality image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-3.0-generate-002",
Object: "model",
Created: 1740000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-3.0-generate-002",
Version: "3.0",
DisplayName: "Imagen 3.0 Generate",
Description: "Imagen 3.0 image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-3.0-fast-generate-001",
Object: "model",
Created: 1740000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-3.0-fast-generate-001",
Version: "3.0",
DisplayName: "Imagen 3.0 Fast Generate",
Description: "Imagen 3.0 fast image generation model",
SupportedGenerationMethods: []string{"predict"},
},
{
ID: "imagen-4.0-fast-generate-001",
Object: "model",
Created: 1750000000,
OwnedBy: "google",
Type: "gemini",
Name: "models/imagen-4.0-fast-generate-001",
Version: "4.0",
DisplayName: "Imagen 4.0 Fast Generate",
Description: "Imagen 4.0 fast image generation model",
SupportedGenerationMethods: []string{"predict"},
},
}
}
// GetGeminiCLIModels returns the standard Gemini model definitions
func GetGeminiCLIModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
}
}
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
func GetAIStudioModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gemini-2.5-pro",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-pro",
Version: "2.5",
DisplayName: "Gemini 2.5 Pro",
Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash",
Version: "001",
DisplayName: "Gemini 2.5 Flash",
Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-lite",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Lite",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-3-pro-preview",
Object: "model",
Created: 1737158400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-pro-preview",
Version: "3.0",
DisplayName: "Gemini 3 Pro Preview",
Description: "Gemini 3 Pro Preview",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-3-flash-preview",
Object: "model",
Created: 1765929600,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-pro-latest",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-pro-latest",
Version: "2.5",
DisplayName: "Gemini Pro Latest",
Description: "Latest release of Gemini Pro",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-flash-latest",
Object: "model",
Created: 1750118400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-latest",
Version: "2.5",
DisplayName: "Gemini Flash Latest",
Description: "Latest release of Gemini Flash",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-flash-lite-latest",
Object: "model",
Created: 1753142400,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-flash-lite-latest",
Version: "2.5",
DisplayName: "Gemini Flash-Lite Latest",
Description: "Latest release of Gemini Flash-Lite",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
// {
// ID: "gemini-2.5-flash-image-preview",
// Object: "model",
// Created: 1756166400,
// OwnedBy: "google",
// Type: "gemini",
// Name: "models/gemini-2.5-flash-image-preview",
// Version: "2.5",
// DisplayName: "Gemini 2.5 Flash Image Preview",
// Description: "State-of-the-art image generation and editing model.",
// InputTokenLimit: 1048576,
// OutputTokenLimit: 8192,
// SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// // image models don't support thinkingConfig; leave Thinking nil
// },
{
ID: "gemini-2.5-flash-image",
Object: "model",
Created: 1759363200,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
}
}
// GetOpenAIModels returns the standard OpenAI model definitions
func GetOpenAIModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "gpt-5",
Object: "model",
Created: 1754524800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-08-07",
DisplayName: "GPT 5",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gpt-5-codex",
Object: "model",
Created: 1757894400,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-09-15",
DisplayName: "GPT 5 Codex",
Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5-codex-mini",
Object: "model",
Created: 1762473600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5-2025-11-07",
DisplayName: "GPT 5 Codex Mini",
Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5",
Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex",
Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-mini",
Object: "model",
Created: 1762905600,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-2025-11-12",
DisplayName: "GPT 5.1 Codex Mini",
Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gpt-5.1-codex-max",
Object: "model",
Created: 1763424000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.1-max",
DisplayName: "GPT 5.1 Codex Max",
Description: "Stable version of GPT 5.1 Codex Max",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
{
ID: "gpt-5.2",
Object: "model",
Created: 1765440000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.2",
DisplayName: "GPT 5.2",
Description: "Stable version of GPT 5.2",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
},
{
ID: "gpt-5.2-codex",
Object: "model",
Created: 1765440000,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.2",
DisplayName: "GPT 5.2 Codex",
Description: "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
ContextLength: 400000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
}
}
// GetQwenModels returns the standard Qwen model definitions
func GetQwenModels() []*ModelInfo {
return []*ModelInfo{
{
ID: "qwen3-coder-plus",
Object: "model",
Created: 1753228800,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Coder Plus",
Description: "Advanced code generation and understanding model",
ContextLength: 32768,
MaxCompletionTokens: 8192,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
{
ID: "qwen3-coder-flash",
Object: "model",
Created: 1753228800,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Coder Flash",
Description: "Fast code generation model",
ContextLength: 8192,
MaxCompletionTokens: 2048,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
{
ID: "vision-model",
Object: "model",
Created: 1758672000,
OwnedBy: "qwen",
Type: "qwen",
Version: "3.0",
DisplayName: "Qwen3 Vision Model",
Description: "Vision model model",
ContextLength: 32768,
MaxCompletionTokens: 2048,
SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
},
}
}
// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models
// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle).
// Uses level-based configuration so standard normalization flows apply before conversion.
var iFlowThinkingSupport = &ThinkingSupport{
Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"},
}
// GetIFlowModels returns supported models for iFlow OAuth accounts.
func GetIFlowModels() []*ModelInfo {
entries := []struct {
ID string
DisplayName string
Description string
Created int64
Thinking *ThinkingSupport
}{
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
{ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000},
{ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000},
{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400, Thinking: iFlowThinkingSupport},
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000, Thinking: iFlowThinkingSupport},
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200, Thinking: iFlowThinkingSupport},
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
}
models := make([]*ModelInfo, 0, len(entries))
for _, entry := range entries {
models = append(models, &ModelInfo{
ID: entry.ID,
Object: "model",
Created: entry.Created,
OwnedBy: "iflow",
Type: "iflow",
DisplayName: entry.DisplayName,
Description: entry.Description,
Thinking: entry.Thinking,
})
}
return models
}
// AntigravityModelConfig captures static antigravity model overrides, including
// Thinking budget limits and provider max completion tokens.
type AntigravityModelConfig struct {
Thinking *ThinkingSupport
MaxCompletionTokens int
}
// GetAntigravityModelConfig returns static configuration for antigravity models.
// Keys use upstream model names returned by the Antigravity models endpoint.
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
return map[string]*AntigravityModelConfig{
// "rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
"gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
"gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"claude-sonnet-4-5": {MaxCompletionTokens: 64000},
"gpt-oss-120b-medium": {},
"tab_flash_lite_preview": {},
}
}

View File

@@ -148,7 +148,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
baseURLs := antigravityBaseURLFallbackOrder(auth)
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
attempts := antigravityRetryAttempts(e.cfg)
attempts := antigravityRetryAttempts(auth, e.cfg)
attemptLoop:
for attempt := 0; attempt < attempts; attempt++ {
@@ -289,7 +289,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
baseURLs := antigravityBaseURLFallbackOrder(auth)
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
attempts := antigravityRetryAttempts(e.cfg)
attempts := antigravityRetryAttempts(auth, e.cfg)
attemptLoop:
for attempt := 0; attempt < attempts; attempt++ {
@@ -677,7 +677,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
baseURLs := antigravityBaseURLFallbackOrder(auth)
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
attempts := antigravityRetryAttempts(e.cfg)
attempts := antigravityRetryAttempts(auth, e.cfg)
attemptLoop:
for attempt := 0; attempt < attempts; attempt++ {
@@ -1447,11 +1447,16 @@ func resolveUserAgent(auth *cliproxyauth.Auth) string {
return defaultAntigravityAgent
}
func antigravityRetryAttempts(cfg *config.Config) int {
if cfg == nil {
return 1
func antigravityRetryAttempts(auth *cliproxyauth.Auth, cfg *config.Config) int {
retry := 0
if cfg != nil {
retry = cfg.RequestRetry
}
if auth != nil {
if override, ok := auth.RequestRetryOverride(); ok {
retry = override
}
}
retry := cfg.RequestRetry
if retry < 0 {
retry = 0
}

View File

@@ -0,0 +1,258 @@
package executor
import (
"fmt"
"testing"
"github.com/tidwall/gjson"
)
func TestEnsureCacheControl(t *testing.T) {
// Test case 1: System prompt as string
t.Run("String System Prompt", func(t *testing.T) {
input := []byte(`{"model": "claude-3-5-sonnet", "system": "This is a long system prompt", "messages": []}`)
output := ensureCacheControl(input)
res := gjson.GetBytes(output, "system.0.cache_control.type")
if res.String() != "ephemeral" {
t.Errorf("cache_control not found in system string. Output: %s", string(output))
}
})
// Test case 2: System prompt as array
t.Run("Array System Prompt", func(t *testing.T) {
input := []byte(`{"model": "claude-3-5-sonnet", "system": [{"type": "text", "text": "Part 1"}, {"type": "text", "text": "Part 2"}], "messages": []}`)
output := ensureCacheControl(input)
// cache_control should only be on the LAST element
res0 := gjson.GetBytes(output, "system.0.cache_control")
res1 := gjson.GetBytes(output, "system.1.cache_control.type")
if res0.Exists() {
t.Errorf("cache_control should NOT be on the first element")
}
if res1.String() != "ephemeral" {
t.Errorf("cache_control not found on last system element. Output: %s", string(output))
}
})
// Test case 3: Tools are cached
t.Run("Tools Caching", func(t *testing.T) {
input := []byte(`{
"model": "claude-3-5-sonnet",
"tools": [
{"name": "tool1", "description": "First tool", "input_schema": {"type": "object"}},
{"name": "tool2", "description": "Second tool", "input_schema": {"type": "object"}}
],
"system": "System prompt",
"messages": []
}`)
output := ensureCacheControl(input)
// cache_control should only be on the LAST tool
tool0Cache := gjson.GetBytes(output, "tools.0.cache_control")
tool1Cache := gjson.GetBytes(output, "tools.1.cache_control.type")
if tool0Cache.Exists() {
t.Errorf("cache_control should NOT be on the first tool")
}
if tool1Cache.String() != "ephemeral" {
t.Errorf("cache_control not found on last tool. Output: %s", string(output))
}
// System should also have cache_control
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
if systemCache.String() != "ephemeral" {
t.Errorf("cache_control not found in system. Output: %s", string(output))
}
})
// Test case 4: Tools and system are INDEPENDENT breakpoints
// Per Anthropic docs: Up to 4 breakpoints allowed, tools and system are cached separately
t.Run("Independent Cache Breakpoints", func(t *testing.T) {
input := []byte(`{
"model": "claude-3-5-sonnet",
"tools": [
{"name": "tool1", "description": "First tool", "input_schema": {"type": "object"}, "cache_control": {"type": "ephemeral"}}
],
"system": [{"type": "text", "text": "System"}],
"messages": []
}`)
output := ensureCacheControl(input)
// Tool already has cache_control - should not be changed
tool0Cache := gjson.GetBytes(output, "tools.0.cache_control.type")
if tool0Cache.String() != "ephemeral" {
t.Errorf("existing cache_control was incorrectly removed")
}
// System SHOULD get cache_control because it is an INDEPENDENT breakpoint
// Tools and system are separate cache levels in the hierarchy
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
if systemCache.String() != "ephemeral" {
t.Errorf("system should have its own cache_control breakpoint (independent of tools)")
}
})
// Test case 5: Only tools, no system
t.Run("Only Tools No System", func(t *testing.T) {
input := []byte(`{
"model": "claude-3-5-sonnet",
"tools": [
{"name": "tool1", "description": "Tool", "input_schema": {"type": "object"}}
],
"messages": [{"role": "user", "content": "Hi"}]
}`)
output := ensureCacheControl(input)
toolCache := gjson.GetBytes(output, "tools.0.cache_control.type")
if toolCache.String() != "ephemeral" {
t.Errorf("cache_control not found on tool. Output: %s", string(output))
}
})
// Test case 6: Many tools (Claude Code scenario)
t.Run("Many Tools (Claude Code Scenario)", func(t *testing.T) {
// Simulate Claude Code with many tools
toolsJSON := `[`
for i := 0; i < 50; i++ {
if i > 0 {
toolsJSON += ","
}
toolsJSON += fmt.Sprintf(`{"name": "tool%d", "description": "Tool %d", "input_schema": {"type": "object"}}`, i, i)
}
toolsJSON += `]`
input := []byte(fmt.Sprintf(`{
"model": "claude-3-5-sonnet",
"tools": %s,
"system": [{"type": "text", "text": "You are Claude Code"}],
"messages": [{"role": "user", "content": "Hello"}]
}`, toolsJSON))
output := ensureCacheControl(input)
// Only the last tool (index 49) should have cache_control
for i := 0; i < 49; i++ {
path := fmt.Sprintf("tools.%d.cache_control", i)
if gjson.GetBytes(output, path).Exists() {
t.Errorf("tool %d should NOT have cache_control", i)
}
}
lastToolCache := gjson.GetBytes(output, "tools.49.cache_control.type")
if lastToolCache.String() != "ephemeral" {
t.Errorf("last tool (49) should have cache_control")
}
// System should also have cache_control
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
if systemCache.String() != "ephemeral" {
t.Errorf("system should have cache_control")
}
t.Log("test passed: 50 tools - cache_control only on last tool")
})
// Test case 7: Empty tools array
t.Run("Empty Tools Array", func(t *testing.T) {
input := []byte(`{"model": "claude-3-5-sonnet", "tools": [], "system": "Test", "messages": []}`)
output := ensureCacheControl(input)
// System should still get cache_control
systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
if systemCache.String() != "ephemeral" {
t.Errorf("system should have cache_control even with empty tools array")
}
})
// Test case 8: Messages caching for multi-turn (second-to-last user)
t.Run("Messages Caching Second-To-Last User", func(t *testing.T) {
input := []byte(`{
"model": "claude-3-5-sonnet",
"messages": [
{"role": "user", "content": "First user"},
{"role": "assistant", "content": "Assistant reply"},
{"role": "user", "content": "Second user"},
{"role": "assistant", "content": "Assistant reply 2"},
{"role": "user", "content": "Third user"}
]
}`)
output := ensureCacheControl(input)
cacheType := gjson.GetBytes(output, "messages.2.content.0.cache_control.type")
if cacheType.String() != "ephemeral" {
t.Errorf("cache_control not found on second-to-last user turn. Output: %s", string(output))
}
lastUserCache := gjson.GetBytes(output, "messages.4.content.0.cache_control")
if lastUserCache.Exists() {
t.Errorf("last user turn should NOT have cache_control")
}
})
// Test case 9: Existing message cache_control should skip injection
t.Run("Messages Skip When Cache Control Exists", func(t *testing.T) {
input := []byte(`{
"model": "claude-3-5-sonnet",
"messages": [
{"role": "user", "content": [{"type": "text", "text": "First user"}]},
{"role": "assistant", "content": [{"type": "text", "text": "Assistant reply", "cache_control": {"type": "ephemeral"}}]},
{"role": "user", "content": [{"type": "text", "text": "Second user"}]}
]
}`)
output := ensureCacheControl(input)
userCache := gjson.GetBytes(output, "messages.0.content.0.cache_control")
if userCache.Exists() {
t.Errorf("cache_control should NOT be injected when a message already has cache_control")
}
existingCache := gjson.GetBytes(output, "messages.1.content.0.cache_control.type")
if existingCache.String() != "ephemeral" {
t.Errorf("existing cache_control should be preserved. Output: %s", string(output))
}
})
}
// TestCacheControlOrder verifies the correct order: tools -> system -> messages
func TestCacheControlOrder(t *testing.T) {
input := []byte(`{
"model": "claude-sonnet-4",
"tools": [
{"name": "Read", "description": "Read file", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}}},
{"name": "Write", "description": "Write file", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}}}
],
"system": [
{"type": "text", "text": "You are Claude Code, Anthropic's official CLI for Claude."},
{"type": "text", "text": "Additional instructions here..."}
],
"messages": [
{"role": "user", "content": "Hello"}
]
}`)
output := ensureCacheControl(input)
// 1. Last tool has cache_control
if gjson.GetBytes(output, "tools.1.cache_control.type").String() != "ephemeral" {
t.Error("last tool should have cache_control")
}
// 2. First tool has NO cache_control
if gjson.GetBytes(output, "tools.0.cache_control").Exists() {
t.Error("first tool should NOT have cache_control")
}
// 3. Last system element has cache_control
if gjson.GetBytes(output, "system.1.cache_control.type").String() != "ephemeral" {
t.Error("last system element should have cache_control")
}
// 4. First system element has NO cache_control
if gjson.GetBytes(output, "system.0.cache_control").Exists() {
t.Error("first system element should NOT have cache_control")
}
t.Log("cache order correct: tools -> system")
}

View File

@@ -120,6 +120,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body)
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
body = ensureCacheControl(body)
// Extract betas from body and convert to header
var extraBetas []string
extraBetas, body = extractAndRemoveBetas(body)
@@ -252,6 +255,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body)
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
body = ensureCacheControl(body)
// Extract betas from body and convert to header
var extraBetas []string
extraBetas, body = extractAndRemoveBetas(body)
@@ -636,13 +642,17 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
ginHeaders = ginCtx.Request.Header
}
baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
promptCachingBeta := "prompt-caching-2024-07-31"
baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14," + promptCachingBeta
if val := strings.TrimSpace(ginHeaders.Get("Anthropic-Beta")); val != "" {
baseBetas = val
if !strings.Contains(val, "oauth") {
baseBetas += ",oauth-2025-04-20"
}
}
if !strings.Contains(baseBetas, promptCachingBeta) {
baseBetas += "," + promptCachingBeta
}
// Merge extra betas from request body
if len(extraBetas) > 0 {
@@ -990,3 +1000,214 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A
return payload
}
// ensureCacheControl injects cache_control breakpoints into the payload for optimal prompt caching.
// According to Anthropic's documentation, cache prefixes are created in order: tools -> system -> messages.
// This function adds cache_control to:
// 1. The LAST tool in the tools array (caches all tool definitions)
// 2. The LAST element in the system array (caches system prompt)
// 3. The SECOND-TO-LAST user turn (caches conversation history for multi-turn)
//
// Up to 4 cache breakpoints are allowed per request. Tools, System, and Messages are INDEPENDENT breakpoints.
// This enables up to 90% cost reduction on cached tokens (cache read = 0.1x base price).
// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
func ensureCacheControl(payload []byte) []byte {
// 1. Inject cache_control into the LAST tool (caches all tool definitions)
// Tools are cached first in the hierarchy, so this is the most important breakpoint.
payload = injectToolsCacheControl(payload)
// 2. Inject cache_control into the LAST system prompt element
// System is the second level in the cache hierarchy.
payload = injectSystemCacheControl(payload)
// 3. Inject cache_control into messages for multi-turn conversation caching
// This caches the conversation history up to the second-to-last user turn.
payload = injectMessagesCacheControl(payload)
return payload
}
// injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching.
// Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache."
// This enables caching of conversation history, which is especially beneficial for long multi-turn conversations.
// Only adds cache_control if:
// - There are at least 2 user turns in the conversation
// - No message content already has cache_control
func injectMessagesCacheControl(payload []byte) []byte {
messages := gjson.GetBytes(payload, "messages")
if !messages.Exists() || !messages.IsArray() {
return payload
}
// Check if ANY message content already has cache_control
hasCacheControlInMessages := false
messages.ForEach(func(_, msg gjson.Result) bool {
content := msg.Get("content")
if content.IsArray() {
content.ForEach(func(_, item gjson.Result) bool {
if item.Get("cache_control").Exists() {
hasCacheControlInMessages = true
return false
}
return true
})
}
return !hasCacheControlInMessages
})
if hasCacheControlInMessages {
return payload
}
// Find all user message indices
var userMsgIndices []int
messages.ForEach(func(index gjson.Result, msg gjson.Result) bool {
if msg.Get("role").String() == "user" {
userMsgIndices = append(userMsgIndices, int(index.Int()))
}
return true
})
// Need at least 2 user turns to cache the second-to-last
if len(userMsgIndices) < 2 {
return payload
}
// Get the second-to-last user message index
secondToLastUserIdx := userMsgIndices[len(userMsgIndices)-2]
// Get the content of this message
contentPath := fmt.Sprintf("messages.%d.content", secondToLastUserIdx)
content := gjson.GetBytes(payload, contentPath)
if content.IsArray() {
// Add cache_control to the last content block of this message
contentCount := int(content.Get("#").Int())
if contentCount > 0 {
cacheControlPath := fmt.Sprintf("messages.%d.content.%d.cache_control", secondToLastUserIdx, contentCount-1)
result, err := sjson.SetBytes(payload, cacheControlPath, map[string]string{"type": "ephemeral"})
if err != nil {
log.Warnf("failed to inject cache_control into messages: %v", err)
return payload
}
payload = result
}
} else if content.Type == gjson.String {
// Convert string content to array with cache_control
text := content.String()
newContent := []map[string]interface{}{
{
"type": "text",
"text": text,
"cache_control": map[string]string{
"type": "ephemeral",
},
},
}
result, err := sjson.SetBytes(payload, contentPath, newContent)
if err != nil {
log.Warnf("failed to inject cache_control into message string content: %v", err)
return payload
}
payload = result
}
return payload
}
// injectToolsCacheControl adds cache_control to the last tool in the tools array.
// Per Anthropic docs: "The cache_control parameter on the last tool definition caches all tool definitions."
// This only adds cache_control if NO tool in the array already has it.
func injectToolsCacheControl(payload []byte) []byte {
tools := gjson.GetBytes(payload, "tools")
if !tools.Exists() || !tools.IsArray() {
return payload
}
toolCount := int(tools.Get("#").Int())
if toolCount == 0 {
return payload
}
// Check if ANY tool already has cache_control - if so, don't modify tools
hasCacheControlInTools := false
tools.ForEach(func(_, tool gjson.Result) bool {
if tool.Get("cache_control").Exists() {
hasCacheControlInTools = true
return false
}
return true
})
if hasCacheControlInTools {
return payload
}
// Add cache_control to the last tool
lastToolPath := fmt.Sprintf("tools.%d.cache_control", toolCount-1)
result, err := sjson.SetBytes(payload, lastToolPath, map[string]string{"type": "ephemeral"})
if err != nil {
log.Warnf("failed to inject cache_control into tools array: %v", err)
return payload
}
return result
}
// injectSystemCacheControl adds cache_control to the last element in the system prompt.
// Converts string system prompts to array format if needed.
// This only adds cache_control if NO system element already has it.
func injectSystemCacheControl(payload []byte) []byte {
system := gjson.GetBytes(payload, "system")
if !system.Exists() {
return payload
}
if system.IsArray() {
count := int(system.Get("#").Int())
if count == 0 {
return payload
}
// Check if ANY system element already has cache_control
hasCacheControlInSystem := false
system.ForEach(func(_, item gjson.Result) bool {
if item.Get("cache_control").Exists() {
hasCacheControlInSystem = true
return false
}
return true
})
if hasCacheControlInSystem {
return payload
}
// Add cache_control to the last system element
lastSystemPath := fmt.Sprintf("system.%d.cache_control", count-1)
result, err := sjson.SetBytes(payload, lastSystemPath, map[string]string{"type": "ephemeral"})
if err != nil {
log.Warnf("failed to inject cache_control into system array: %v", err)
return payload
}
payload = result
} else if system.Type == gjson.String {
// Convert string system prompt to array with cache_control
// "system": "text" -> "system": [{"type": "text", "text": "text", "cache_control": {"type": "ephemeral"}}]
text := system.String()
newSystem := []map[string]interface{}{
{
"type": "text",
"text": text,
"cache_control": map[string]string{
"type": "ephemeral",
},
},
}
result, err := sjson.SetBytes(payload, "system", newSystem)
if err != nil {
log.Warnf("failed to inject cache_control into system string: %v", err)
return payload
}
payload = result
}
return payload
}

View File

@@ -1,7 +1,7 @@
// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
// Package iflow implements thinking configuration for iFlow models.
//
// iFlow models use boolean toggle semantics:
// - GLM models: chat_template_kwargs.enable_thinking (boolean)
// - Models using chat_template_kwargs.enable_thinking (boolean toggle)
// - MiniMax models: reasoning_split (boolean)
//
// Level values are converted to boolean: none=false, all others=true
@@ -20,6 +20,7 @@ import (
// Applier implements thinking.ProviderApplier for iFlow models.
//
// iFlow-specific behavior:
// - enable_thinking toggle models: enable_thinking boolean
// - GLM models: enable_thinking boolean + clear_thinking=false
// - MiniMax models: reasoning_split boolean
// - Level to boolean: none=false, others=true
@@ -61,8 +62,8 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
return body, nil
}
if isGLMModel(modelInfo.ID) {
return applyGLM(body, config), nil
if isEnableThinkingModel(modelInfo.ID) {
return applyEnableThinking(body, config, isGLMModel(modelInfo.ID)), nil
}
if isMiniMaxModel(modelInfo.ID) {
@@ -97,7 +98,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool {
}
}
// applyGLM applies thinking configuration for GLM models.
// applyEnableThinking applies thinking configuration for models that use
// chat_template_kwargs.enable_thinking format.
//
// Output format when enabled:
//
@@ -107,9 +109,8 @@ func configToBoolean(config thinking.ThinkingConfig) bool {
//
// {"chat_template_kwargs": {"enable_thinking": false}}
//
// Note: clear_thinking is only set when thinking is enabled, to preserve
// thinking output in the response.
func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
// Note: clear_thinking is only set for GLM models when thinking is enabled.
func applyEnableThinking(body []byte, config thinking.ThinkingConfig, setClearThinking bool) []byte {
enableThinking := configToBoolean(config)
if len(body) == 0 || !gjson.ValidBytes(body) {
@@ -118,8 +119,11 @@ func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
// clear_thinking is a GLM-only knob, strip it for other models.
result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking")
// clear_thinking only needed when thinking is enabled
if enableThinking {
if enableThinking && setClearThinking {
result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
}
@@ -143,8 +147,21 @@ func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
return result
}
// isEnableThinkingModel determines if the model uses chat_template_kwargs.enable_thinking format.
func isEnableThinkingModel(modelID string) bool {
if isGLMModel(modelID) {
return true
}
id := strings.ToLower(modelID)
switch id {
case "qwen3-max-preview", "deepseek-v3.2", "deepseek-v3.1":
return true
default:
return false
}
}
// isGLMModel determines if the model is a GLM series model.
// GLM models use chat_template_kwargs.enable_thinking format.
func isGLMModel(modelID string) bool {
return strings.HasPrefix(strings.ToLower(modelID), "glm")
}

View File

@@ -155,10 +155,13 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
clientContentJSON, _ = sjson.SetRaw(clientContentJSON, "parts.-1", partJSON)
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "text" {
prompt := contentResult.Get("text").String()
partJSON := `{}`
if prompt != "" {
partJSON, _ = sjson.Set(partJSON, "text", prompt)
// Skip empty text parts to avoid Gemini API error:
// "required oneof field 'data' must have one initialized field"
if prompt == "" {
continue
}
partJSON := `{}`
partJSON, _ = sjson.Set(partJSON, "text", prompt)
clientContentJSON, _ = sjson.SetRaw(clientContentJSON, "parts.-1", partJSON)
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "tool_use" {
// NOTE: Do NOT inject dummy thinking blocks here.
@@ -285,6 +288,13 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
}
}
// Skip messages with empty parts array to avoid Gemini API error:
// "required oneof field 'data' must have one initialized field"
partsCheck := gjson.Get(clientContentJSON, "parts")
if !partsCheck.IsArray() || len(partsCheck.Array()) == 0 {
continue
}
contentsJSON, _ = sjson.SetRaw(contentsJSON, "-1", clientContentJSON)
hasContents = true
} else if contentsResult.Type == gjson.String {

View File

@@ -41,6 +41,7 @@ func ConvertAntigravityResponseToGemini(ctx context.Context, _ string, originalR
responseResult := gjson.GetBytes(rawJSON, "response")
if responseResult.Exists() {
chunk = []byte(responseResult.Raw)
chunk = restoreUsageMetadata(chunk)
}
} else {
chunkTemplate := "[]"
@@ -76,7 +77,8 @@ func ConvertAntigravityResponseToGemini(ctx context.Context, _ string, originalR
func ConvertAntigravityResponseToGeminiNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
responseResult := gjson.GetBytes(rawJSON, "response")
if responseResult.Exists() {
return responseResult.Raw
chunk := restoreUsageMetadata([]byte(responseResult.Raw))
return string(chunk)
}
return string(rawJSON)
}
@@ -84,3 +86,15 @@ func ConvertAntigravityResponseToGeminiNonStream(_ context.Context, _ string, or
func GeminiTokenCount(ctx context.Context, count int64) string {
return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count)
}
// restoreUsageMetadata renames cpaUsageMetadata back to usageMetadata.
// The executor renames usageMetadata to cpaUsageMetadata in non-terminal chunks
// to preserve usage data while hiding it from clients that don't expect it.
// When returning standard Gemini API format, we must restore the original name.
func restoreUsageMetadata(chunk []byte) []byte {
if cpaUsage := gjson.GetBytes(chunk, "cpaUsageMetadata"); cpaUsage.Exists() {
chunk, _ = sjson.SetRawBytes(chunk, "usageMetadata", []byte(cpaUsage.Raw))
chunk, _ = sjson.DeleteBytes(chunk, "cpaUsageMetadata")
}
return chunk
}

View File

@@ -0,0 +1,95 @@
package gemini
import (
"context"
"testing"
)
func TestRestoreUsageMetadata(t *testing.T) {
tests := []struct {
name string
input []byte
expected string
}{
{
name: "cpaUsageMetadata renamed to usageMetadata",
input: []byte(`{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100,"candidatesTokenCount":200}}`),
expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100,"candidatesTokenCount":200}}`,
},
{
name: "no cpaUsageMetadata unchanged",
input: []byte(`{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`),
expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
},
{
name: "empty input",
input: []byte(`{}`),
expected: `{}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := restoreUsageMetadata(tt.input)
if string(result) != tt.expected {
t.Errorf("restoreUsageMetadata() = %s, want %s", string(result), tt.expected)
}
})
}
}
func TestConvertAntigravityResponseToGeminiNonStream(t *testing.T) {
tests := []struct {
name string
input []byte
expected string
}{
{
name: "cpaUsageMetadata restored in response",
input: []byte(`{"response":{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100}}}`),
expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
},
{
name: "usageMetadata preserved",
input: []byte(`{"response":{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}}`),
expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := ConvertAntigravityResponseToGeminiNonStream(context.Background(), "", nil, nil, tt.input, nil)
if result != tt.expected {
t.Errorf("ConvertAntigravityResponseToGeminiNonStream() = %s, want %s", result, tt.expected)
}
})
}
}
func TestConvertAntigravityResponseToGeminiStream(t *testing.T) {
ctx := context.WithValue(context.Background(), "alt", "")
tests := []struct {
name string
input []byte
expected string
}{
{
name: "cpaUsageMetadata restored in streaming response",
input: []byte(`data: {"response":{"modelVersion":"gemini-3-pro","cpaUsageMetadata":{"promptTokenCount":100}}}`),
expected: `{"modelVersion":"gemini-3-pro","usageMetadata":{"promptTokenCount":100}}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
results := ConvertAntigravityResponseToGemini(ctx, "", nil, nil, tt.input, nil)
if len(results) != 1 {
t.Fatalf("expected 1 result, got %d", len(results))
}
if results[0] != tt.expected {
t.Errorf("ConvertAntigravityResponseToGemini() = %s, want %s", results[0], tt.expected)
}
})
}
}

View File

@@ -305,12 +305,14 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
}
}
// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch/codeExecution/urlContext passthrough
tools := gjson.GetBytes(rawJSON, "tools")
if tools.IsArray() && len(tools.Array()) > 0 {
functionToolNode := []byte(`{}`)
hasFunction := false
googleSearchNodes := make([][]byte, 0)
codeExecutionNodes := make([][]byte, 0)
urlContextNodes := make([][]byte, 0)
for _, t := range tools.Array() {
if t.Get("type").String() == "function" {
fn := t.Get("function")
@@ -370,8 +372,28 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
}
googleSearchNodes = append(googleSearchNodes, googleToolNode)
}
if ce := t.Get("code_execution"); ce.Exists() {
codeToolNode := []byte(`{}`)
var errSet error
codeToolNode, errSet = sjson.SetRawBytes(codeToolNode, "codeExecution", []byte(ce.Raw))
if errSet != nil {
log.Warnf("Failed to set codeExecution tool: %v", errSet)
continue
}
codeExecutionNodes = append(codeExecutionNodes, codeToolNode)
}
if uc := t.Get("url_context"); uc.Exists() {
urlToolNode := []byte(`{}`)
var errSet error
urlToolNode, errSet = sjson.SetRawBytes(urlToolNode, "urlContext", []byte(uc.Raw))
if errSet != nil {
log.Warnf("Failed to set urlContext tool: %v", errSet)
continue
}
urlContextNodes = append(urlContextNodes, urlToolNode)
}
}
if hasFunction || len(googleSearchNodes) > 0 {
if hasFunction || len(googleSearchNodes) > 0 || len(codeExecutionNodes) > 0 || len(urlContextNodes) > 0 {
toolsNode := []byte("[]")
if hasFunction {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
@@ -379,6 +401,12 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
for _, googleNode := range googleSearchNodes {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
}
for _, codeNode := range codeExecutionNodes {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", codeNode)
}
for _, urlNode := range urlContextNodes {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", urlNode)
}
out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
}
}

View File

@@ -283,12 +283,14 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
}
}
// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch/codeExecution/urlContext passthrough
tools := gjson.GetBytes(rawJSON, "tools")
if tools.IsArray() && len(tools.Array()) > 0 {
functionToolNode := []byte(`{}`)
hasFunction := false
googleSearchNodes := make([][]byte, 0)
codeExecutionNodes := make([][]byte, 0)
urlContextNodes := make([][]byte, 0)
for _, t := range tools.Array() {
if t.Get("type").String() == "function" {
fn := t.Get("function")
@@ -348,8 +350,28 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
}
googleSearchNodes = append(googleSearchNodes, googleToolNode)
}
if ce := t.Get("code_execution"); ce.Exists() {
codeToolNode := []byte(`{}`)
var errSet error
codeToolNode, errSet = sjson.SetRawBytes(codeToolNode, "codeExecution", []byte(ce.Raw))
if errSet != nil {
log.Warnf("Failed to set codeExecution tool: %v", errSet)
continue
}
codeExecutionNodes = append(codeExecutionNodes, codeToolNode)
}
if uc := t.Get("url_context"); uc.Exists() {
urlToolNode := []byte(`{}`)
var errSet error
urlToolNode, errSet = sjson.SetRawBytes(urlToolNode, "urlContext", []byte(uc.Raw))
if errSet != nil {
log.Warnf("Failed to set urlContext tool: %v", errSet)
continue
}
urlContextNodes = append(urlContextNodes, urlToolNode)
}
}
if hasFunction || len(googleSearchNodes) > 0 {
if hasFunction || len(googleSearchNodes) > 0 || len(codeExecutionNodes) > 0 || len(urlContextNodes) > 0 {
toolsNode := []byte("[]")
if hasFunction {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
@@ -357,6 +379,12 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
for _, googleNode := range googleSearchNodes {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
}
for _, codeNode := range codeExecutionNodes {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", codeNode)
}
for _, urlNode := range urlContextNodes {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", urlNode)
}
out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
}
}

View File

@@ -289,12 +289,14 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
}
}
// tools -> tools[].functionDeclarations + tools[].googleSearch passthrough
// tools -> tools[].functionDeclarations + tools[].googleSearch/codeExecution/urlContext passthrough
tools := gjson.GetBytes(rawJSON, "tools")
if tools.IsArray() && len(tools.Array()) > 0 {
functionToolNode := []byte(`{}`)
hasFunction := false
googleSearchNodes := make([][]byte, 0)
codeExecutionNodes := make([][]byte, 0)
urlContextNodes := make([][]byte, 0)
for _, t := range tools.Array() {
if t.Get("type").String() == "function" {
fn := t.Get("function")
@@ -354,8 +356,28 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
}
googleSearchNodes = append(googleSearchNodes, googleToolNode)
}
if ce := t.Get("code_execution"); ce.Exists() {
codeToolNode := []byte(`{}`)
var errSet error
codeToolNode, errSet = sjson.SetRawBytes(codeToolNode, "codeExecution", []byte(ce.Raw))
if errSet != nil {
log.Warnf("Failed to set codeExecution tool: %v", errSet)
continue
}
codeExecutionNodes = append(codeExecutionNodes, codeToolNode)
}
if uc := t.Get("url_context"); uc.Exists() {
urlToolNode := []byte(`{}`)
var errSet error
urlToolNode, errSet = sjson.SetRawBytes(urlToolNode, "urlContext", []byte(uc.Raw))
if errSet != nil {
log.Warnf("Failed to set urlContext tool: %v", errSet)
continue
}
urlContextNodes = append(urlContextNodes, urlToolNode)
}
}
if hasFunction || len(googleSearchNodes) > 0 {
if hasFunction || len(googleSearchNodes) > 0 || len(codeExecutionNodes) > 0 || len(urlContextNodes) > 0 {
toolsNode := []byte("[]")
if hasFunction {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
@@ -363,6 +385,12 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
for _, googleNode := range googleSearchNodes {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
}
for _, codeNode := range codeExecutionNodes {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", codeNode)
}
for _, urlNode := range urlContextNodes {
toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", urlNode)
}
out, _ = sjson.SetRawBytes(out, "tools", toolsNode)
}
}

View File

@@ -4,6 +4,7 @@ package util
import (
"fmt"
"sort"
"strconv"
"strings"
"github.com/tidwall/gjson"
@@ -175,7 +176,7 @@ func convertConstToEnum(jsonStr string) string {
return jsonStr
}
// convertEnumValuesToStrings ensures all enum values are strings.
// convertEnumValuesToStrings ensures all enum values are strings and the schema type is set to string.
// Gemini API requires enum values to be of type string, not numbers or booleans.
func convertEnumValuesToStrings(jsonStr string) string {
for _, p := range findPaths(jsonStr, "enum") {
@@ -185,19 +186,15 @@ func convertEnumValuesToStrings(jsonStr string) string {
}
var stringVals []string
needsConversion := false
for _, item := range arr.Array() {
// Check if any value is not a string
if item.Type != gjson.String {
needsConversion = true
}
stringVals = append(stringVals, item.String())
}
// Only update if we found non-string values
if needsConversion {
jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
}
// Always update enum values to strings and set type to "string"
// This ensures compatibility with Antigravity Gemini which only allows enum for STRING type
jsonStr, _ = sjson.Set(jsonStr, p, stringVals)
parentPath := trimSuffix(p, ".enum")
jsonStr, _ = sjson.Set(jsonStr, joinPath(parentPath, "type"), "string")
}
return jsonStr
}
@@ -435,9 +432,54 @@ func removeUnsupportedKeywords(jsonStr string) string {
jsonStr, _ = sjson.Delete(jsonStr, p)
}
}
// Remove x-* extension fields (e.g., x-google-enum-descriptions) that are not supported by Gemini API
jsonStr = removeExtensionFields(jsonStr)
return jsonStr
}
// removeExtensionFields removes all x-* extension fields from the JSON schema.
// These are OpenAPI/JSON Schema extension fields that Google APIs don't recognize.
func removeExtensionFields(jsonStr string) string {
var paths []string
walkForExtensions(gjson.Parse(jsonStr), "", &paths)
// walkForExtensions returns paths in a way that deeper paths are added before their ancestors
// when they are not deleted wholesale, but since we skip children of deleted x-* nodes,
// any collected path is safe to delete. We still use DeleteBytes for efficiency.
b := []byte(jsonStr)
for _, p := range paths {
b, _ = sjson.DeleteBytes(b, p)
}
return string(b)
}
func walkForExtensions(value gjson.Result, path string, paths *[]string) {
if value.IsArray() {
arr := value.Array()
for i := len(arr) - 1; i >= 0; i-- {
walkForExtensions(arr[i], joinPath(path, strconv.Itoa(i)), paths)
}
return
}
if value.IsObject() {
value.ForEach(func(key, val gjson.Result) bool {
keyStr := key.String()
safeKey := escapeGJSONPathKey(keyStr)
childPath := joinPath(path, safeKey)
// If it's an extension field, we delete it and don't need to look at its children.
if strings.HasPrefix(keyStr, "x-") && !isPropertyDefinition(path) {
*paths = append(*paths, childPath)
return true
}
walkForExtensions(val, childPath, paths)
return true
})
}
}
func cleanupRequiredFields(jsonStr string) string {
for _, p := range findPaths(jsonStr, "required") {
parentPath := trimSuffix(p, ".required")

View File

@@ -869,3 +869,129 @@ func TestCleanJSONSchemaForAntigravity_BooleanEnumToString(t *testing.T) {
t.Errorf("Boolean enum values should be converted to string format, got: %s", result)
}
}
func TestRemoveExtensionFields(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "removes x- fields at root",
input: `{
"type": "object",
"x-custom-meta": "value",
"properties": {
"foo": { "type": "string" }
}
}`,
expected: `{
"type": "object",
"properties": {
"foo": { "type": "string" }
}
}`,
},
{
name: "removes x- fields in nested properties",
input: `{
"type": "object",
"properties": {
"foo": {
"type": "string",
"x-internal-id": 123
}
}
}`,
expected: `{
"type": "object",
"properties": {
"foo": {
"type": "string"
}
}
}`,
},
{
name: "does NOT remove properties named x-",
input: `{
"type": "object",
"properties": {
"x-data": { "type": "string" },
"normal": { "type": "number", "x-meta": "remove" }
},
"required": ["x-data"]
}`,
expected: `{
"type": "object",
"properties": {
"x-data": { "type": "string" },
"normal": { "type": "number" }
},
"required": ["x-data"]
}`,
},
{
name: "does NOT remove $schema and other meta fields (as requested)",
input: `{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "test",
"type": "object",
"properties": {
"foo": { "type": "string" }
}
}`,
expected: `{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "test",
"type": "object",
"properties": {
"foo": { "type": "string" }
}
}`,
},
{
name: "handles properties named $schema",
input: `{
"type": "object",
"properties": {
"$schema": { "type": "string" }
}
}`,
expected: `{
"type": "object",
"properties": {
"$schema": { "type": "string" }
}
}`,
},
{
name: "handles escaping in paths",
input: `{
"type": "object",
"properties": {
"foo.bar": {
"type": "string",
"x-meta": "remove"
}
},
"x-root.meta": "remove"
}`,
expected: `{
"type": "object",
"properties": {
"foo.bar": {
"type": "string"
}
}
}`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := removeExtensionFields(tt.input)
compareJSON(t, tt.expected, actual)
})
}
}

View File

@@ -86,12 +86,19 @@ func (s *FileSynthesizer) Synthesize(ctx *SynthesisContext) ([]*coreauth.Auth, e
}
}
disabled, _ := metadata["disabled"].(bool)
status := coreauth.StatusActive
if disabled {
status = coreauth.StatusDisabled
}
a := &coreauth.Auth{
ID: id,
Provider: provider,
Label: label,
Prefix: prefix,
Status: coreauth.StatusActive,
Status: status,
Disabled: disabled,
Attributes: map[string]string{
"source": full,
"path": full,
@@ -167,6 +174,16 @@ func SynthesizeGeminiVirtualAuths(primary *coreauth.Auth, metadata map[string]an
"virtual_parent_id": primary.ID,
"type": metadata["type"],
}
if v, ok := metadata["disable_cooling"]; ok {
metadataCopy["disable_cooling"] = v
} else if v, ok := metadata["disable-cooling"]; ok {
metadataCopy["disable_cooling"] = v
}
if v, ok := metadata["request_retry"]; ok {
metadataCopy["request_retry"] = v
} else if v, ok := metadata["request-retry"]; ok {
metadataCopy["request_retry"] = v
}
proxy := strings.TrimSpace(primary.ProxyURL)
if proxy != "" {
metadataCopy["proxy_url"] = proxy

View File

@@ -69,10 +69,12 @@ func TestFileSynthesizer_Synthesize_ValidAuthFile(t *testing.T) {
// Create a valid auth file
authData := map[string]any{
"type": "claude",
"email": "test@example.com",
"proxy_url": "http://proxy.local",
"prefix": "test-prefix",
"type": "claude",
"email": "test@example.com",
"proxy_url": "http://proxy.local",
"prefix": "test-prefix",
"disable_cooling": true,
"request_retry": 2,
}
data, _ := json.Marshal(authData)
err := os.WriteFile(filepath.Join(tempDir, "claude-auth.json"), data, 0644)
@@ -108,6 +110,12 @@ func TestFileSynthesizer_Synthesize_ValidAuthFile(t *testing.T) {
if auths[0].ProxyURL != "http://proxy.local" {
t.Errorf("expected proxy_url http://proxy.local, got %s", auths[0].ProxyURL)
}
if v, ok := auths[0].Metadata["disable_cooling"].(bool); !ok || !v {
t.Errorf("expected disable_cooling true, got %v", auths[0].Metadata["disable_cooling"])
}
if v, ok := auths[0].Metadata["request_retry"].(float64); !ok || int(v) != 2 {
t.Errorf("expected request_retry 2, got %v", auths[0].Metadata["request_retry"])
}
if auths[0].Status != coreauth.StatusActive {
t.Errorf("expected status active, got %s", auths[0].Status)
}
@@ -336,9 +344,11 @@ func TestSynthesizeGeminiVirtualAuths_MultiProject(t *testing.T) {
},
}
metadata := map[string]any{
"project_id": "project-a, project-b, project-c",
"email": "test@example.com",
"type": "gemini",
"project_id": "project-a, project-b, project-c",
"email": "test@example.com",
"type": "gemini",
"request_retry": 2,
"disable_cooling": true,
}
virtuals := SynthesizeGeminiVirtualAuths(primary, metadata, now)
@@ -376,6 +386,12 @@ func TestSynthesizeGeminiVirtualAuths_MultiProject(t *testing.T) {
if v.ProxyURL != "http://proxy.local" {
t.Errorf("expected proxy_url http://proxy.local, got %s", v.ProxyURL)
}
if vv, ok := v.Metadata["disable_cooling"].(bool); !ok || !vv {
t.Errorf("expected disable_cooling true, got %v", v.Metadata["disable_cooling"])
}
if vv, ok := v.Metadata["request_retry"].(int); !ok || vv != 2 {
t.Errorf("expected request_retry 2, got %v", v.Metadata["request_retry"])
}
if v.Attributes["runtime_only"] != "true" {
t.Error("expected runtime_only=true")
}

View File

@@ -124,32 +124,47 @@ func (m *Manager) Stream(ctx context.Context, provider string, req *HTTPRequest)
out := make(chan StreamEvent)
go func() {
defer close(out)
send := func(ev StreamEvent) bool {
if ctx == nil {
out <- ev
return true
}
select {
case <-ctx.Done():
return false
case out <- ev:
return true
}
}
for {
select {
case <-ctx.Done():
out <- StreamEvent{Err: ctx.Err()}
return
case msg, ok := <-respCh:
if !ok {
out <- StreamEvent{Err: errors.New("wsrelay: stream closed")}
_ = send(StreamEvent{Err: errors.New("wsrelay: stream closed")})
return
}
switch msg.Type {
case MessageTypeStreamStart:
resp := decodeResponse(msg.Payload)
out <- StreamEvent{Type: MessageTypeStreamStart, Status: resp.Status, Headers: resp.Headers}
if okSend := send(StreamEvent{Type: MessageTypeStreamStart, Status: resp.Status, Headers: resp.Headers}); !okSend {
return
}
case MessageTypeStreamChunk:
chunk := decodeChunk(msg.Payload)
out <- StreamEvent{Type: MessageTypeStreamChunk, Payload: chunk}
if okSend := send(StreamEvent{Type: MessageTypeStreamChunk, Payload: chunk}); !okSend {
return
}
case MessageTypeStreamEnd:
out <- StreamEvent{Type: MessageTypeStreamEnd}
_ = send(StreamEvent{Type: MessageTypeStreamEnd})
return
case MessageTypeError:
out <- StreamEvent{Type: MessageTypeError, Err: decodeError(msg.Payload)}
_ = send(StreamEvent{Type: MessageTypeError, Err: decodeError(msg.Payload)})
return
case MessageTypeHTTPResp:
resp := decodeResponse(msg.Payload)
out <- StreamEvent{Type: MessageTypeHTTPResp, Status: resp.Status, Headers: resp.Headers, Payload: resp.Body}
_ = send(StreamEvent{Type: MessageTypeHTTPResp, Status: resp.Status, Headers: resp.Headers, Payload: resp.Body})
return
default:
}

View File

@@ -124,6 +124,7 @@ func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
log.Errorf("Failed to read response body: %v", err)
return
}
c.Set("API_RESPONSE_TIMESTAMP", time.Now())
_, _ = c.Writer.Write(output)
c.Set("API_RESPONSE", output)
}

View File

@@ -361,6 +361,11 @@ func appendAPIResponse(c *gin.Context, data []byte) {
return
}
// Capture timestamp on first API response
if _, exists := c.Get("API_RESPONSE_TIMESTAMP"); !exists {
c.Set("API_RESPONSE_TIMESTAMP", time.Now())
}
if existing, exists := c.Get("API_RESPONSE"); exists {
if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
combined := make([]byte, 0, len(existingBytes)+len(data)+1)
@@ -506,6 +511,32 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
bootstrapRetries := 0
maxBootstrapRetries := StreamingBootstrapRetries(h.Cfg)
sendErr := func(msg *interfaces.ErrorMessage) bool {
if ctx == nil {
errChan <- msg
return true
}
select {
case <-ctx.Done():
return false
case errChan <- msg:
return true
}
}
sendData := func(chunk []byte) bool {
if ctx == nil {
dataChan <- chunk
return true
}
select {
case <-ctx.Done():
return false
case dataChan <- chunk:
return true
}
}
bootstrapEligible := func(err error) bool {
status := statusFromError(err)
if status == 0 {
@@ -565,12 +596,14 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
addon = hdr.Clone()
}
}
errChan <- &interfaces.ErrorMessage{StatusCode: status, Error: streamErr, Addon: addon}
_ = sendErr(&interfaces.ErrorMessage{StatusCode: status, Error: streamErr, Addon: addon})
return
}
if len(chunk.Payload) > 0 {
sentPayload = true
dataChan <- cloneBytes(chunk.Payload)
if okSendData := sendData(cloneBytes(chunk.Payload)); !okSendData {
return
}
}
}
}

View File

@@ -70,6 +70,58 @@ func (e *failOnceStreamExecutor) Calls() int {
return e.calls
}
type payloadThenErrorStreamExecutor struct {
mu sync.Mutex
calls int
}
func (e *payloadThenErrorStreamExecutor) Identifier() string { return "codex" }
func (e *payloadThenErrorStreamExecutor) Execute(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "Execute not implemented"}
}
func (e *payloadThenErrorStreamExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (<-chan coreexecutor.StreamChunk, error) {
e.mu.Lock()
e.calls++
e.mu.Unlock()
ch := make(chan coreexecutor.StreamChunk, 2)
ch <- coreexecutor.StreamChunk{Payload: []byte("partial")}
ch <- coreexecutor.StreamChunk{
Err: &coreauth.Error{
Code: "upstream_closed",
Message: "upstream closed",
Retryable: false,
HTTPStatus: http.StatusBadGateway,
},
}
close(ch)
return ch, nil
}
func (e *payloadThenErrorStreamExecutor) Refresh(ctx context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) {
return auth, nil
}
func (e *payloadThenErrorStreamExecutor) CountTokens(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "CountTokens not implemented"}
}
func (e *payloadThenErrorStreamExecutor) HttpRequest(ctx context.Context, auth *coreauth.Auth, req *http.Request) (*http.Response, error) {
return nil, &coreauth.Error{
Code: "not_implemented",
Message: "HttpRequest not implemented",
HTTPStatus: http.StatusNotImplemented,
}
}
func (e *payloadThenErrorStreamExecutor) Calls() int {
e.mu.Lock()
defer e.mu.Unlock()
return e.calls
}
func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
executor := &failOnceStreamExecutor{}
manager := coreauth.NewManager(nil, nil, nil)
@@ -130,3 +182,73 @@ func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
t.Fatalf("expected 2 stream attempts, got %d", executor.Calls())
}
}
func TestExecuteStreamWithAuthManager_DoesNotRetryAfterFirstByte(t *testing.T) {
executor := &payloadThenErrorStreamExecutor{}
manager := coreauth.NewManager(nil, nil, nil)
manager.RegisterExecutor(executor)
auth1 := &coreauth.Auth{
ID: "auth1",
Provider: "codex",
Status: coreauth.StatusActive,
Metadata: map[string]any{"email": "test1@example.com"},
}
if _, err := manager.Register(context.Background(), auth1); err != nil {
t.Fatalf("manager.Register(auth1): %v", err)
}
auth2 := &coreauth.Auth{
ID: "auth2",
Provider: "codex",
Status: coreauth.StatusActive,
Metadata: map[string]any{"email": "test2@example.com"},
}
if _, err := manager.Register(context.Background(), auth2); err != nil {
t.Fatalf("manager.Register(auth2): %v", err)
}
registry.GetGlobalRegistry().RegisterClient(auth1.ID, auth1.Provider, []*registry.ModelInfo{{ID: "test-model"}})
registry.GetGlobalRegistry().RegisterClient(auth2.ID, auth2.Provider, []*registry.ModelInfo{{ID: "test-model"}})
t.Cleanup(func() {
registry.GetGlobalRegistry().UnregisterClient(auth1.ID)
registry.GetGlobalRegistry().UnregisterClient(auth2.ID)
})
handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
Streaming: sdkconfig.StreamingConfig{
BootstrapRetries: 1,
},
}, manager)
dataChan, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
if dataChan == nil || errChan == nil {
t.Fatalf("expected non-nil channels")
}
var got []byte
for chunk := range dataChan {
got = append(got, chunk...)
}
var gotErr error
var gotStatus int
for msg := range errChan {
if msg != nil && msg.Error != nil {
gotErr = msg.Error
gotStatus = msg.StatusCode
}
}
if string(got) != "partial" {
t.Fatalf("expected payload partial, got %q", string(got))
}
if gotErr == nil {
t.Fatalf("expected terminal error, got nil")
}
if gotStatus != http.StatusBadGateway {
t.Fatalf("expected status %d, got %d", http.StatusBadGateway, gotStatus)
}
if executor.Calls() != 1 {
t.Fatalf("expected 1 stream attempt, got %d", executor.Calls())
}
}

View File

@@ -176,13 +176,16 @@ waitForCallback:
}
if result.State != state {
log.Errorf("State mismatch: expected %s, got %s", state, result.State)
return nil, claude.NewAuthenticationError(claude.ErrInvalidState, fmt.Errorf("state mismatch"))
}
log.Debug("Claude authorization code received; exchanging for tokens")
log.Debugf("Code: %s, State: %s", result.Code[:min(20, len(result.Code))], state)
authBundle, err := authSvc.ExchangeCodeForTokens(ctx, result.Code, state, pkceCodes)
if err != nil {
log.Errorf("Token exchange failed: %v", err)
return nil, claude.NewAuthenticationError(claude.ErrCodeExchangeFailed, err)
}

View File

@@ -68,14 +68,13 @@ func (s *FileTokenStore) Save(ctx context.Context, auth *cliproxyauth.Auth) (str
return "", err
}
case auth.Metadata != nil:
auth.Metadata["disabled"] = auth.Disabled
raw, errMarshal := json.Marshal(auth.Metadata)
if errMarshal != nil {
return "", fmt.Errorf("auth filestore: marshal metadata failed: %w", errMarshal)
}
if existing, errRead := os.ReadFile(path); errRead == nil {
// Use metadataEqualIgnoringTimestamps to skip writes when only timestamp fields change.
// This prevents the token refresh loop caused by timestamp/expired/expires_in changes.
if metadataEqualIgnoringTimestamps(existing, raw, auth.Provider) {
if jsonEqual(existing, raw) {
return path, nil
}
file, errOpen := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0o600)
@@ -216,12 +215,18 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
return nil, fmt.Errorf("stat file: %w", err)
}
id := s.idFor(path, baseDir)
disabled, _ := metadata["disabled"].(bool)
status := cliproxyauth.StatusActive
if disabled {
status = cliproxyauth.StatusDisabled
}
auth := &cliproxyauth.Auth{
ID: id,
Provider: provider,
FileName: id,
Label: s.labelFor(metadata),
Status: cliproxyauth.StatusActive,
Status: status,
Disabled: disabled,
Attributes: map[string]string{"path": path},
Metadata: metadata,
CreatedAt: info.ModTime(),
@@ -299,8 +304,7 @@ func (s *FileTokenStore) baseDirSnapshot() string {
return s.baseDir
}
// DEPRECATED: Use metadataEqualIgnoringTimestamps for comparing auth metadata.
// This function is kept for backward compatibility but can cause refresh loops.
// jsonEqual compares two JSON blobs by parsing them into Go objects and deep comparing.
func jsonEqual(a, b []byte) bool {
var objA any
var objB any
@@ -313,41 +317,6 @@ func jsonEqual(a, b []byte) bool {
return deepEqualJSON(objA, objB)
}
// metadataEqualIgnoringTimestamps compares two metadata JSON blobs,
// ignoring fields that change on every refresh but don't affect functionality.
// This prevents unnecessary file writes that would trigger watcher events and
// create refresh loops.
// The provider parameter controls whether access_token is ignored: providers like
// Google OAuth (gemini, gemini-cli) can re-fetch tokens when needed, while others
// like iFlow require the refreshed token to be persisted.
func metadataEqualIgnoringTimestamps(a, b []byte, provider string) bool {
var objA, objB map[string]any
if err := json.Unmarshal(a, &objA); err != nil {
return false
}
if err := json.Unmarshal(b, &objB); err != nil {
return false
}
// Fields to ignore: these change on every refresh but don't affect authentication logic.
// - timestamp, expired, expires_in, last_refresh: time-related fields that change on refresh
ignoredFields := []string{"timestamp", "expired", "expires_in", "last_refresh"}
// For providers that can re-fetch tokens when needed (e.g., Google OAuth),
// we ignore access_token to avoid unnecessary file writes.
switch provider {
case "gemini", "gemini-cli", "antigravity":
ignoredFields = append(ignoredFields, "access_token")
}
for _, field := range ignoredFields {
delete(objA, field)
delete(objB, field)
}
return deepEqualJSON(objA, objB)
}
func deepEqualJSON(a, b any) bool {
switch valA := a.(type) {
case map[string]any:

View File

@@ -61,6 +61,15 @@ func SetQuotaCooldownDisabled(disable bool) {
quotaCooldownDisabled.Store(disable)
}
func quotaCooldownDisabledForAuth(auth *Auth) bool {
if auth != nil {
if override, ok := auth.DisableCoolingOverride(); ok {
return override
}
}
return quotaCooldownDisabled.Load()
}
// Result captures execution outcome used to adjust auth state.
type Result struct {
// AuthID references the auth that produced this result.
@@ -468,20 +477,16 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye
return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
}
retryTimes, maxWait := m.retrySettings()
attempts := retryTimes + 1
if attempts < 1 {
attempts = 1
}
_, maxWait := m.retrySettings()
var lastErr error
for attempt := 0; attempt < attempts; attempt++ {
for attempt := 0; ; attempt++ {
resp, errExec := m.executeMixedOnce(ctx, normalized, req, opts)
if errExec == nil {
return resp, nil
}
lastErr = errExec
wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait)
wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, normalized, req.Model, maxWait)
if !shouldRetry {
break
}
@@ -503,20 +508,16 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
}
retryTimes, maxWait := m.retrySettings()
attempts := retryTimes + 1
if attempts < 1 {
attempts = 1
}
_, maxWait := m.retrySettings()
var lastErr error
for attempt := 0; attempt < attempts; attempt++ {
for attempt := 0; ; attempt++ {
resp, errExec := m.executeCountMixedOnce(ctx, normalized, req, opts)
if errExec == nil {
return resp, nil
}
lastErr = errExec
wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, normalized, req.Model, maxWait)
wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, normalized, req.Model, maxWait)
if !shouldRetry {
break
}
@@ -538,20 +539,16 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
}
retryTimes, maxWait := m.retrySettings()
attempts := retryTimes + 1
if attempts < 1 {
attempts = 1
}
_, maxWait := m.retrySettings()
var lastErr error
for attempt := 0; attempt < attempts; attempt++ {
for attempt := 0; ; attempt++ {
chunks, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts)
if errStream == nil {
return chunks, nil
}
lastErr = errStream
wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, attempts, normalized, req.Model, maxWait)
wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, normalized, req.Model, maxWait)
if !shouldRetry {
break
}
@@ -721,6 +718,7 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
go func(streamCtx context.Context, streamAuth *Auth, streamProvider string, streamChunks <-chan cliproxyexecutor.StreamChunk) {
defer close(out)
var failed bool
forward := true
for chunk := range streamChunks {
if chunk.Err != nil && !failed {
failed = true
@@ -731,7 +729,18 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string
}
m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: false, Error: rerr})
}
out <- chunk
if !forward {
continue
}
if streamCtx == nil {
out <- chunk
continue
}
select {
case <-streamCtx.Done():
forward = false
case out <- chunk:
}
}
if !failed {
m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: true})
@@ -1034,11 +1043,15 @@ func (m *Manager) retrySettings() (int, time.Duration) {
return int(m.requestRetry.Load()), time.Duration(m.maxRetryInterval.Load())
}
func (m *Manager) closestCooldownWait(providers []string, model string) (time.Duration, bool) {
func (m *Manager) closestCooldownWait(providers []string, model string, attempt int) (time.Duration, bool) {
if m == nil || len(providers) == 0 {
return 0, false
}
now := time.Now()
defaultRetry := int(m.requestRetry.Load())
if defaultRetry < 0 {
defaultRetry = 0
}
providerSet := make(map[string]struct{}, len(providers))
for i := range providers {
key := strings.TrimSpace(strings.ToLower(providers[i]))
@@ -1061,6 +1074,16 @@ func (m *Manager) closestCooldownWait(providers []string, model string) (time.Du
if _, ok := providerSet[providerKey]; !ok {
continue
}
effectiveRetry := defaultRetry
if override, ok := auth.RequestRetryOverride(); ok {
effectiveRetry = override
}
if effectiveRetry < 0 {
effectiveRetry = 0
}
if attempt >= effectiveRetry {
continue
}
blocked, reason, next := isAuthBlockedForModel(auth, model, now)
if !blocked || next.IsZero() || reason == blockReasonDisabled {
continue
@@ -1077,8 +1100,8 @@ func (m *Manager) closestCooldownWait(providers []string, model string) (time.Du
return minWait, found
}
func (m *Manager) shouldRetryAfterError(err error, attempt, maxAttempts int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
if err == nil || attempt >= maxAttempts-1 {
func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
if err == nil {
return 0, false
}
if maxWait <= 0 {
@@ -1087,7 +1110,7 @@ func (m *Manager) shouldRetryAfterError(err error, attempt, maxAttempts int, pro
if status := statusCodeFromError(err); status == http.StatusOK {
return 0, false
}
wait, found := m.closestCooldownWait(providers, model)
wait, found := m.closestCooldownWait(providers, model, attempt)
if !found || wait > maxWait {
return 0, false
}
@@ -1176,7 +1199,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
if result.RetryAfter != nil {
next = now.Add(*result.RetryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(backoffLevel)
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
if cooldown > 0 {
next = now.Add(cooldown)
}
@@ -1193,7 +1216,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
shouldSuspendModel = true
setModelQuota = true
case 408, 500, 502, 503, 504:
if quotaCooldownDisabled.Load() {
if quotaCooldownDisabledForAuth(auth) {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(1 * time.Minute)
@@ -1439,7 +1462,7 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
if retryAfter != nil {
next = now.Add(*retryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel)
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel, quotaCooldownDisabledForAuth(auth))
if cooldown > 0 {
next = now.Add(cooldown)
}
@@ -1449,7 +1472,7 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
auth.NextRetryAfter = next
case 408, 500, 502, 503, 504:
auth.StatusMessage = "transient upstream error"
if quotaCooldownDisabled.Load() {
if quotaCooldownDisabledForAuth(auth) {
auth.NextRetryAfter = time.Time{}
} else {
auth.NextRetryAfter = now.Add(1 * time.Minute)
@@ -1462,11 +1485,11 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
}
// nextQuotaCooldown returns the next cooldown duration and updated backoff level for repeated quota errors.
func nextQuotaCooldown(prevLevel int) (time.Duration, int) {
func nextQuotaCooldown(prevLevel int, disableCooling bool) (time.Duration, int) {
if prevLevel < 0 {
prevLevel = 0
}
if quotaCooldownDisabled.Load() {
if disableCooling {
return 0, prevLevel
}
cooldown := quotaBackoffBase * time.Duration(1<<prevLevel)
@@ -1642,6 +1665,9 @@ func (m *Manager) persist(ctx context.Context, auth *Auth) error {
if m.store == nil || auth == nil {
return nil
}
if shouldSkipPersist(ctx) {
return nil
}
if auth.Attributes != nil {
if v := strings.ToLower(strings.TrimSpace(auth.Attributes["runtime_only"])); v == "true" {
return nil

View File

@@ -0,0 +1,97 @@
package auth
import (
"context"
"testing"
"time"
)
func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testing.T) {
m := NewManager(nil, nil, nil)
m.SetRetryConfig(3, 30*time.Second)
model := "test-model"
next := time.Now().Add(5 * time.Second)
auth := &Auth{
ID: "auth-1",
Provider: "claude",
Metadata: map[string]any{
"request_retry": float64(0),
},
ModelStates: map[string]*ModelState{
model: {
Unavailable: true,
Status: StatusError,
NextRetryAfter: next,
},
},
}
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
t.Fatalf("register auth: %v", errRegister)
}
_, maxWait := m.retrySettings()
wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 0, []string{"claude"}, model, maxWait)
if shouldRetry {
t.Fatalf("expected shouldRetry=false for request_retry=0, got true (wait=%v)", wait)
}
auth.Metadata["request_retry"] = float64(1)
if _, errUpdate := m.Update(context.Background(), auth); errUpdate != nil {
t.Fatalf("update auth: %v", errUpdate)
}
wait, shouldRetry = m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 0, []string{"claude"}, model, maxWait)
if !shouldRetry {
t.Fatalf("expected shouldRetry=true for request_retry=1, got false")
}
if wait <= 0 {
t.Fatalf("expected wait > 0, got %v", wait)
}
_, shouldRetry = m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 1, []string{"claude"}, model, maxWait)
if shouldRetry {
t.Fatalf("expected shouldRetry=false on attempt=1 for request_retry=1, got true")
}
}
func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
prev := quotaCooldownDisabled.Load()
quotaCooldownDisabled.Store(false)
t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
m := NewManager(nil, nil, nil)
auth := &Auth{
ID: "auth-1",
Provider: "claude",
Metadata: map[string]any{
"disable_cooling": true,
},
}
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
t.Fatalf("register auth: %v", errRegister)
}
model := "test-model"
m.MarkResult(context.Background(), Result{
AuthID: "auth-1",
Provider: "claude",
Model: model,
Success: false,
Error: &Error{HTTPStatus: 500, Message: "boom"},
})
updated, ok := m.GetByID("auth-1")
if !ok || updated == nil {
t.Fatalf("expected auth to be present")
}
state := updated.ModelStates[model]
if state == nil {
t.Fatalf("expected model state to be present")
}
if !state.NextRetryAfter.IsZero() {
t.Fatalf("expected NextRetryAfter to be zero when disable_cooling=true, got %v", state.NextRetryAfter)
}
}

View File

@@ -0,0 +1,24 @@
package auth
import "context"
type skipPersistContextKey struct{}
// WithSkipPersist returns a derived context that disables persistence for Manager Update/Register calls.
// It is intended for code paths that are reacting to file watcher events, where the file on disk is
// already the source of truth and persisting again would create a write-back loop.
func WithSkipPersist(ctx context.Context) context.Context {
if ctx == nil {
ctx = context.Background()
}
return context.WithValue(ctx, skipPersistContextKey{}, true)
}
func shouldSkipPersist(ctx context.Context) bool {
if ctx == nil {
return false
}
v := ctx.Value(skipPersistContextKey{})
enabled, ok := v.(bool)
return ok && enabled
}

View File

@@ -0,0 +1,62 @@
package auth
import (
"context"
"sync/atomic"
"testing"
)
type countingStore struct {
saveCount atomic.Int32
}
func (s *countingStore) List(context.Context) ([]*Auth, error) { return nil, nil }
func (s *countingStore) Save(context.Context, *Auth) (string, error) {
s.saveCount.Add(1)
return "", nil
}
func (s *countingStore) Delete(context.Context, string) error { return nil }
func TestWithSkipPersist_DisablesUpdatePersistence(t *testing.T) {
store := &countingStore{}
mgr := NewManager(store, nil, nil)
auth := &Auth{
ID: "auth-1",
Provider: "antigravity",
Metadata: map[string]any{"type": "antigravity"},
}
if _, err := mgr.Update(context.Background(), auth); err != nil {
t.Fatalf("Update returned error: %v", err)
}
if got := store.saveCount.Load(); got != 1 {
t.Fatalf("expected 1 Save call, got %d", got)
}
ctxSkip := WithSkipPersist(context.Background())
if _, err := mgr.Update(ctxSkip, auth); err != nil {
t.Fatalf("Update(skipPersist) returned error: %v", err)
}
if got := store.saveCount.Load(); got != 1 {
t.Fatalf("expected Save call count to remain 1, got %d", got)
}
}
func TestWithSkipPersist_DisablesRegisterPersistence(t *testing.T) {
store := &countingStore{}
mgr := NewManager(store, nil, nil)
auth := &Auth{
ID: "auth-1",
Provider: "antigravity",
Metadata: map[string]any{"type": "antigravity"},
}
if _, err := mgr.Register(WithSkipPersist(context.Background()), auth); err != nil {
t.Fatalf("Register(skipPersist) returned error: %v", err)
}
if got := store.saveCount.Load(); got != 0 {
t.Fatalf("expected 0 Save calls, got %d", got)
}
}

View File

@@ -194,6 +194,108 @@ func (a *Auth) ProxyInfo() string {
return "via proxy"
}
// DisableCoolingOverride returns the auth-file scoped disable_cooling override when present.
// The value is read from metadata key "disable_cooling" (or legacy "disable-cooling").
func (a *Auth) DisableCoolingOverride() (bool, bool) {
if a == nil || a.Metadata == nil {
return false, false
}
if val, ok := a.Metadata["disable_cooling"]; ok {
if parsed, okParse := parseBoolAny(val); okParse {
return parsed, true
}
}
if val, ok := a.Metadata["disable-cooling"]; ok {
if parsed, okParse := parseBoolAny(val); okParse {
return parsed, true
}
}
return false, false
}
// RequestRetryOverride returns the auth-file scoped request_retry override when present.
// The value is read from metadata key "request_retry" (or legacy "request-retry").
func (a *Auth) RequestRetryOverride() (int, bool) {
if a == nil || a.Metadata == nil {
return 0, false
}
if val, ok := a.Metadata["request_retry"]; ok {
if parsed, okParse := parseIntAny(val); okParse {
if parsed < 0 {
parsed = 0
}
return parsed, true
}
}
if val, ok := a.Metadata["request-retry"]; ok {
if parsed, okParse := parseIntAny(val); okParse {
if parsed < 0 {
parsed = 0
}
return parsed, true
}
}
return 0, false
}
func parseBoolAny(val any) (bool, bool) {
switch typed := val.(type) {
case bool:
return typed, true
case string:
trimmed := strings.TrimSpace(typed)
if trimmed == "" {
return false, false
}
parsed, err := strconv.ParseBool(trimmed)
if err != nil {
return false, false
}
return parsed, true
case float64:
return typed != 0, true
case json.Number:
parsed, err := typed.Int64()
if err != nil {
return false, false
}
return parsed != 0, true
default:
return false, false
}
}
func parseIntAny(val any) (int, bool) {
switch typed := val.(type) {
case int:
return typed, true
case int32:
return int(typed), true
case int64:
return int(typed), true
case float64:
return int(typed), true
case json.Number:
parsed, err := typed.Int64()
if err != nil {
return 0, false
}
return int(parsed), true
case string:
trimmed := strings.TrimSpace(typed)
if trimmed == "" {
return 0, false
}
parsed, err := strconv.Atoi(trimmed)
if err != nil {
return 0, false
}
return parsed, true
default:
return 0, false
}
}
func (a *Auth) AccountInfo() (string, string) {
if a == nil {
return "", ""

View File

@@ -124,6 +124,7 @@ func (s *Service) ensureAuthUpdateQueue(ctx context.Context) {
}
func (s *Service) consumeAuthUpdates(ctx context.Context) {
ctx = coreauth.WithSkipPersist(ctx)
for {
select {
case <-ctx.Done():
@@ -680,6 +681,10 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
if a == nil || a.ID == "" {
return
}
if a.Disabled {
GlobalModelRegistry().UnregisterClient(a.ID)
return
}
authKind := strings.ToLower(strings.TrimSpace(a.Attributes["auth_kind"]))
if authKind == "" {
if kind, _ := a.AccountInfo(); strings.EqualFold(kind, "api_key") {

View File

@@ -2,6 +2,7 @@ package test
import (
"fmt"
"strings"
"testing"
"time"
@@ -2778,12 +2779,18 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) {
// Verify clear_thinking for iFlow GLM models when enable_thinking=true
if tc.to == "iflow" && tc.expectField == "chat_template_kwargs.enable_thinking" && tc.expectValue == "true" {
baseModel := thinking.ParseSuffix(tc.model).ModelName
isGLM := strings.HasPrefix(strings.ToLower(baseModel), "glm")
ctVal := gjson.GetBytes(body, "chat_template_kwargs.clear_thinking")
if !ctVal.Exists() {
t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body))
}
if ctVal.Bool() != false {
t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body))
if isGLM {
if !ctVal.Exists() {
t.Fatalf("expected clear_thinking field not found for GLM model, body=%s", string(body))
}
if ctVal.Bool() != false {
t.Fatalf("clear_thinking: expected false, got %v, body=%s", ctVal.Bool(), string(body))
}
} else if ctVal.Exists() {
t.Fatalf("expected no clear_thinking field for non-GLM enable_thinking model, body=%s", string(body))
}
}
})