Compare commits

...

6 Commits

Author SHA1 Message Date
Luis Pater
368796349e Add Docker support with CI/CD workflow and usage instructions
- Added `.github/workflows/docker-image.yml` for automated Docker image build and push on version tags.
- Created `Dockerfile` to containerize the application.
- Updated README with instructions for running the application using Docker.
2025-07-14 16:50:51 +08:00
Luis Pater
c601542f6f Add ClaudeMessages handler for SSE-compatible chat completions
- Introduced `ClaudeMessages` to handle Claude-compatible streaming chat completions.
- Implemented client rotation, quota management, and dynamic model name mapping for better load balancing and resource utilization.
- Enhanced response streaming with real-time chunking and Claude format conversion.
- Added error handling for quota exhaustion, client disconnections, and backend failures.
2025-07-11 13:53:09 +08:00
Luis Pater
3c0c61aaf1 Add Claude compatibility and enhance API handling
- Integrated Claude API compatibility in handlers, translators, and server routes.
- Introduced `/messages` endpoint and upgraded `AuthMiddleware` for `X-Api-Key` header.
- Improved streaming response handling with `ConvertCliToClaude` for SSE compatibility.
- Enhanced request processing and tool-response mapping in translators.
- Updated README to reflect Claude integration and clarify supported features.
2025-07-11 13:46:27 +08:00
Luis Pater
edeadfc389 Restrict CLI access to localhost and update README for Gemini compatibility
- Added localhost-only access restriction to `CLIHandler` for security.
- Updated README to reflect Gemini-compatible API and local access limitation notes.
2025-07-11 10:57:23 +08:00
Luis Pater
aa9fd057fe Add FixCLIToolResponse for enhanced function call-response mapping
- Introduced `FixCLIToolResponse` in `translator` to group function calls with corresponding responses.
- Updated Gemini handlers to integrate new function for improved response handling.
- Enhanced error handling in case response mapping fails.
2025-07-11 10:17:25 +08:00
Luis Pater
b3607d3981 Add Gemini-compatible API and improve error handling
- Introduced a new Gemini-compatible API with routes under `/v1beta`.
- Added `GeminiHandler` to manage `generateContent` and `streamGenerateContent` actions.
- Enhanced `AuthMiddleware` to support `X-Goog-Api-Key` header.
- Improved client metadata handling and added conditional project ID updates in API calls.
- Updated logging to debug raw API request payloads for better traceability.
2025-07-11 04:01:45 +08:00
11 changed files with 1207 additions and 104 deletions

42
.github/workflows/docker-image.yml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: docker-image
on:
push:
tags:
- v*
env:
APP_NAME: CLIProxyAPI
DOCKERHUB_REPO: eceasy/cli-proxy-api
jobs:
docker:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Generate App Version
run: echo APP_VERSION=`git describe --tags --always` >> $GITHUB_ENV
- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
platforms: |
linux/amd64
linux/arm64
push: true
build-args: |
APP_NAME=${{ env.APP_NAME }}
APP_VERSION=${{ env.APP_VERSION }}
tags: |
${{ env.DOCKERHUB_REPO }}:latest
${{ env.DOCKERHUB_REPO }}:${{ env.APP_VERSION }}

23
Dockerfile Normal file
View File

@@ -0,0 +1,23 @@
FROM golang:1.24-alpine AS builder
WORKDIR /app
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux go build -o ./CLIProxyAPI ./cmd/server/
FROM alpine:3.22.0
RUN mkdir /CLIProxyAPI
COPY --from=builder ./app/CLIProxyAPI /CLIProxyAPI/CLIProxyAPI
WORKDIR /CLIProxyAPI
EXPOSE 8317
CMD ["./CLIProxyAPI"]

View File

@@ -1,10 +1,10 @@
# CLI Proxy API
A proxy server that provides an OpenAI-compatible API interface for CLI. This allows you to use CLI models with tools and libraries designed for the OpenAI API.
A proxy server that provides an OpenAI/Gemini/Claude compatible API interface for CLI. This allows you to use CLI models with tools and libraries designed for the OpenAI/Gemini/Claude API.
## Features
- OpenAI-compatible API endpoints for CLI models
- OpenAI/Gemini/Claude compatible API endpoints for CLI models
- Support for both streaming and non-streaming responses
- Function calling/tools support
- Multimodal input support (text and images)
@@ -136,7 +136,7 @@ console.log(response.choices[0].message.content);
- gemini-2.5-pro
- gemini-2.5-flash
- And various preview versions
- And it automates switching to various preview versions
## Configuration
@@ -208,6 +208,24 @@ export CODE_ASSIST_ENDPOINT="http://127.0.0.1:8317"
The server will relay the `loadCodeAssist`, `onboardUser`, and `countTokens` requests. And automatically load balance the text generation requests between the multiple accounts.
> [!NOTE]
> This feature only allows local access because I couldn't find a way to authenticate the requests.
> I hardcoded `127.0.0.1` into the load balancing.
## Run with Docker
Run the following command to login:
```bash
docker run --rm -p 8085:8085 -v /path/to/your/config.yaml:/CLIProxyAPI/config.yaml -v /path/to/your/auth-dir:/root/.cli-proxy-api eceasy/cli-proxy-api:latest /CLIProxyAPI/CLIProxyAPI --login
```
Run the following command to start the server:
```bash
docker run --rm -p 8317:8317 -v /path/to/your/config.yaml:/CLIProxyAPI/config.yaml -v /path/to/your/auth-dir:/root/.cli-proxy-api eceasy/cli-proxy-api:latest
```
## Contributing
Contributions are welcome! Please feel free to submit a Pull Request.

View File

@@ -0,0 +1,234 @@
package api
import (
"context"
"fmt"
"github.com/gin-gonic/gin"
"github.com/luispater/CLIProxyAPI/internal/api/translator"
"github.com/luispater/CLIProxyAPI/internal/client"
log "github.com/sirupsen/logrus"
"net/http"
"strings"
"time"
)
// ClaudeMessages handles Claude-compatible streaming chat completions.
// This function implements a sophisticated client rotation and quota management system
// to ensure high availability and optimal resource utilization across multiple backend clients.
func (h *APIHandlers) ClaudeMessages(c *gin.Context) {
// Extract raw JSON data from the incoming request
rawJson, err := c.GetRawData()
// If data retrieval fails, return a 400 Bad Request error.
if err != nil {
c.JSON(http.StatusBadRequest, ErrorResponse{
Error: ErrorDetail{
Message: fmt.Sprintf("Invalid request: %v", err),
Type: "invalid_request_error",
},
})
return
}
// Set up Server-Sent Events (SSE) headers for streaming response
// These headers are essential for maintaining a persistent connection
// and enabling real-time streaming of chat completions
c.Header("Content-Type", "text/event-stream")
c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive")
c.Header("Access-Control-Allow-Origin", "*")
// Get the http.Flusher interface to manually flush the response.
// This is crucial for streaming as it allows immediate sending of data chunks
flusher, ok := c.Writer.(http.Flusher)
if !ok {
c.JSON(http.StatusInternalServerError, ErrorResponse{
Error: ErrorDetail{
Message: "Streaming not supported",
Type: "server_error",
},
})
return
}
// Parse and prepare the Claude request, extracting model name, system instructions,
// conversation contents, and available tools from the raw JSON
modelName, systemInstruction, contents, tools := translator.PrepareClaudeRequest(rawJson)
// Map Claude model names to corresponding Gemini models
// This allows the proxy to handle Claude API calls using Gemini backend
if modelName == "claude-sonnet-4-20250514" {
modelName = "gemini-2.5-pro"
} else if modelName == "claude-3-5-haiku-20241022" {
modelName = "gemini-2.5-flash"
}
// Create a cancellable context for the backend client request
// This allows proper cleanup and cancellation of ongoing requests
cliCtx, cliCancel := context.WithCancel(context.Background())
var cliClient *client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
// This prevents deadlocks and ensures proper resource cleanup
if cliClient != nil {
cliClient.RequestMutex.Unlock()
}
}()
// Main client rotation loop with quota management
// This loop implements a sophisticated load balancing and failover mechanism
outLoop:
for {
// Thread-safe client index rotation to distribute load evenly
// This ensures fair usage across all available clients
mutex.Lock()
startIndex := lastUsedClientIndex
currentIndex := (startIndex + 1) % len(h.cliClients)
lastUsedClientIndex = currentIndex
mutex.Unlock()
// Build a list of available clients, starting from the next client in rotation
// This implements round-robin load balancing while filtering out quota-exceeded clients
reorderedClients := make([]*client.Client, 0)
for i := 0; i < len(h.cliClients); i++ {
cliClient = h.cliClients[(startIndex+1+i)%len(h.cliClients)]
// Skip clients that have exceeded their quota for the requested model
if cliClient.IsModelQuotaExceeded(modelName) {
// Log different messages based on authentication method (API key vs account)
if cliClient.GetGenerativeLanguageAPIKey() == "" {
log.Debugf("Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.GetProjectID())
} else {
log.Debugf("Model %s is quota exceeded for generative language API Key: %s", modelName, cliClient.GetGenerativeLanguageAPIKey())
}
cliClient = nil
continue
}
reorderedClients = append(reorderedClients, cliClient)
}
// If all clients have exceeded quota, return a 429 Too Many Requests error
if len(reorderedClients) == 0 {
c.Status(429)
_, _ = fmt.Fprint(c.Writer, fmt.Sprintf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName))
flusher.Flush()
cliCancel()
return
}
// Attempt to acquire a lock on an available client using non-blocking TryLock
// This prevents blocking when a client is busy with another request
locked := false
for i := 0; i < len(reorderedClients); i++ {
cliClient = reorderedClients[i]
if cliClient.RequestMutex.TryLock() {
locked = true
break
}
}
// If no client is immediately available, fall back to blocking on the first client
// This ensures the request will eventually be processed
if !locked {
cliClient = h.cliClients[0]
cliClient.RequestMutex.Lock()
}
// Determine the authentication method being used by the selected client
// This affects how responses are formatted and logged
isGlAPIKey := false
if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
log.Debugf("Request use generative language API Key: %s", glAPIKey)
isGlAPIKey = true
} else {
log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
}
// Initiate streaming communication with the backend client
// This returns two channels: one for response chunks and one for errors
includeThoughts := false
if userAgent, hasKey := c.Request.Header["User-Agent"]; hasKey {
includeThoughts = !strings.Contains(userAgent[0], "claude-cli")
}
respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJson, modelName, systemInstruction, contents, tools, includeThoughts)
// Track response state for proper Claude format conversion
hasFirstResponse := false
responseType := 0
responseIndex := 0
// Main streaming loop - handles multiple concurrent events using Go channels
// This select statement manages four different types of events simultaneously
for {
select {
// Case 1: Handle client disconnection
// Detects when the HTTP client has disconnected and cleans up resources
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("Client disconnected: %v", c.Request.Context().Err())
cliCancel() // Cancel the backend request to prevent resource leaks
return
}
// Case 2: Process incoming response chunks from the backend
// This handles the actual streaming data from the AI model
case chunk, okStream := <-respChan:
if !okStream {
// Stream has ended - send the final message_stop event
// This follows the Claude API specification for stream termination
_, _ = c.Writer.Write([]byte(`event: message_stop`))
_, _ = c.Writer.Write([]byte("\n"))
_, _ = c.Writer.Write([]byte(`data: {"type":"message_stop"}`))
_, _ = c.Writer.Write([]byte("\n\n\n"))
flusher.Flush()
cliCancel()
return
} else {
// Convert the backend response to Claude-compatible format
// This translation layer ensures API compatibility
claudeFormat := translator.ConvertCliToClaude(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
if claudeFormat != "" {
_, _ = c.Writer.Write([]byte(claudeFormat))
flusher.Flush() // Immediately send the chunk to the client
}
hasFirstResponse = true
}
// Case 3: Handle errors from the backend
// This manages various error conditions and implements retry logic
case errInfo, okError := <-errChan:
if okError {
// Special handling for quota exceeded errors
// If configured, attempt to switch to a different project/client
if errInfo.StatusCode == 429 && h.cfg.QuotaExceeded.SwitchProject {
continue outLoop // Restart the client selection process
} else {
// Forward other errors directly to the client
c.Status(errInfo.StatusCode)
_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
flusher.Flush()
cliCancel()
}
return
}
// Case 4: Send periodic keep-alive signals
// Prevents connection timeouts during long-running requests
case <-time.After(500 * time.Millisecond):
if hasFirstResponse {
// Send a ping event to maintain the connection
// This is especially important for slow AI model responses
output := "event: ping\n"
output = output + `data: {"type": "ping"}`
output = output + "\n\n\n"
_, _ = c.Writer.Write([]byte(output))
flusher.Flush()
}
}
}
}
}

View File

@@ -12,10 +12,21 @@ import (
"github.com/tidwall/sjson"
"io"
"net/http"
"strings"
"time"
)
func (h *APIHandlers) CLIHandler(c *gin.Context) {
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
c.JSON(http.StatusForbidden, ErrorResponse{
Error: ErrorDetail{
Message: "CLI reply only allow local access",
Type: "forbidden",
},
})
return
}
rawJson, _ := c.GetRawData()
requestRawURI := c.Request.URL.Path
if requestRawURI == "/v1internal:generateContent" {

View File

@@ -0,0 +1,242 @@
package api
import (
"context"
"fmt"
"github.com/gin-gonic/gin"
"github.com/luispater/CLIProxyAPI/internal/api/translator"
"github.com/luispater/CLIProxyAPI/internal/client"
log "github.com/sirupsen/logrus"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
"net/http"
"strings"
"time"
)
func (h *APIHandlers) GeminiHandler(c *gin.Context) {
var person struct {
Action string `uri:"action" binding:"required"`
}
if err := c.ShouldBindUri(&person); err != nil {
c.JSON(http.StatusBadRequest, ErrorResponse{
Error: ErrorDetail{
Message: fmt.Sprintf("Invalid request: %v", err),
Type: "invalid_request_error",
},
})
return
}
action := strings.Split(person.Action, ":")
if len(action) != 2 {
c.JSON(http.StatusNotFound, ErrorResponse{
Error: ErrorDetail{
Message: fmt.Sprintf("%s not found.", c.Request.URL.Path),
Type: "invalid_request_error",
},
})
return
}
modelName := action[0]
method := action[1]
rawJson, _ := c.GetRawData()
rawJson, _ = sjson.SetBytes(rawJson, "model", []byte(modelName))
if method == "generateContent" {
h.geminiGenerateContent(c, rawJson)
} else if method == "streamGenerateContent" {
h.geminiStreamGenerateContent(c, rawJson)
}
}
func (h *APIHandlers) geminiStreamGenerateContent(c *gin.Context, rawJson []byte) {
// Get the http.Flusher interface to manually flush the response.
flusher, ok := c.Writer.(http.Flusher)
if !ok {
c.JSON(http.StatusInternalServerError, ErrorResponse{
Error: ErrorDetail{
Message: "Streaming not supported",
Type: "server_error",
},
})
return
}
modelResult := gjson.GetBytes(rawJson, "model")
modelName := modelResult.String()
cliCtx, cliCancel := context.WithCancel(context.Background())
var cliClient *client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
if cliClient != nil {
cliClient.RequestMutex.Unlock()
}
}()
outLoop:
for {
var errorResponse *client.ErrorMessage
cliClient, errorResponse = h.getClient(modelName)
if errorResponse != nil {
c.Status(errorResponse.StatusCode)
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
flusher.Flush()
cliCancel()
return
}
template := `{"project":"","request":{},"model":""}`
template, _ = sjson.SetRaw(template, "request", string(rawJson))
template, _ = sjson.Set(template, "model", gjson.Get(template, "request.model").String())
template, _ = sjson.Delete(template, "request.model")
template, errFixCLIToolResponse := translator.FixCLIToolResponse(template)
if errFixCLIToolResponse != nil {
c.JSON(http.StatusInternalServerError, ErrorResponse{
Error: ErrorDetail{
Message: errFixCLIToolResponse.Error(),
Type: "server_error",
},
})
cliCancel()
return
}
systemInstructionResult := gjson.Get(template, "request.system_instruction")
if systemInstructionResult.Exists() {
template, _ = sjson.SetRaw(template, "request.systemInstruction", systemInstructionResult.Raw)
template, _ = sjson.Delete(template, "request.system_instruction")
}
rawJson = []byte(template)
if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
log.Debugf("Request use generative language API Key: %s", glAPIKey)
} else {
log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
}
// Send the message and receive response chunks and errors via channels.
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJson)
for {
select {
// Handle client disconnection.
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("Client disconnected: %v", c.Request.Context().Err())
cliCancel() // Cancel the backend request.
return
}
// Process incoming response chunks.
case chunk, okStream := <-respChan:
if !okStream {
cliCancel()
return
} else {
if cliClient.GetGenerativeLanguageAPIKey() == "" {
responseResult := gjson.GetBytes(chunk, "response")
if responseResult.Exists() {
chunk = []byte(responseResult.Raw)
}
}
_, _ = c.Writer.Write([]byte("data: "))
_, _ = c.Writer.Write(chunk)
_, _ = c.Writer.Write([]byte("\n\n"))
flusher.Flush()
}
// Handle errors from the backend.
case err, okError := <-errChan:
if okError {
if err.StatusCode == 429 && h.cfg.QuotaExceeded.SwitchProject {
continue outLoop
} else {
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
flusher.Flush()
cliCancel()
}
return
}
// Send a keep-alive signal to the client.
case <-time.After(500 * time.Millisecond):
}
}
}
}
func (h *APIHandlers) geminiGenerateContent(c *gin.Context, rawJson []byte) {
c.Header("Content-Type", "application/json")
modelResult := gjson.GetBytes(rawJson, "model")
modelName := modelResult.String()
cliCtx, cliCancel := context.WithCancel(context.Background())
var cliClient *client.Client
defer func() {
if cliClient != nil {
cliClient.RequestMutex.Unlock()
}
}()
for {
var errorResponse *client.ErrorMessage
cliClient, errorResponse = h.getClient(modelName)
if errorResponse != nil {
c.Status(errorResponse.StatusCode)
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
cliCancel()
return
}
template := `{"project":"","request":{},"model":""}`
template, _ = sjson.SetRaw(template, "request", string(rawJson))
template, _ = sjson.Set(template, "model", gjson.Get(template, "request.model").String())
template, _ = sjson.Delete(template, "request.model")
template, errFixCLIToolResponse := translator.FixCLIToolResponse(template)
if errFixCLIToolResponse != nil {
c.JSON(http.StatusInternalServerError, ErrorResponse{
Error: ErrorDetail{
Message: errFixCLIToolResponse.Error(),
Type: "server_error",
},
})
cliCancel()
return
}
systemInstructionResult := gjson.Get(template, "request.system_instruction")
if systemInstructionResult.Exists() {
template, _ = sjson.SetRaw(template, "request.systemInstruction", systemInstructionResult.Raw)
template, _ = sjson.Delete(template, "request.system_instruction")
}
rawJson = []byte(template)
if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
log.Debugf("Request use generative language API Key: %s", glAPIKey)
} else {
log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
}
resp, err := cliClient.SendRawMessage(cliCtx, rawJson)
if err != nil {
if err.StatusCode == 429 && h.cfg.QuotaExceeded.SwitchProject {
continue
} else {
c.Status(err.StatusCode)
_, _ = c.Writer.Write([]byte(err.Error.Error()))
cliCancel()
}
break
} else {
if cliClient.GetGenerativeLanguageAPIKey() == "" {
responseResult := gjson.GetBytes(resp, "response")
if responseResult.Exists() {
resp = []byte(responseResult.Raw)
}
}
_, _ = c.Writer.Write(resp)
cliCancel()
break
}
}
}

View File

@@ -41,46 +41,6 @@ func NewAPIHandlers(cliClients []*client.Client, cfg *config.Config) *APIHandler
func (h *APIHandlers) Models(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{
"data": []map[string]any{
{
"id": "gemini-2.5-pro-preview-05-06",
"object": "model",
"version": "2.5-preview-05-06",
"name": "Gemini 2.5 Pro Preview 05-06",
"description": "Preview release (May 6th, 2025) of Gemini 2.5 Pro",
"context_length": 1048576,
"max_completion_tokens": 65536,
"supported_parameters": []string{
"tools",
"temperature",
"top_p",
"top_k",
},
"temperature": 1,
"topP": 0.95,
"topK": 64,
"maxTemperature": 2,
"thinking": true,
},
{
"id": "gemini-2.5-pro-preview-06-05",
"object": "model",
"version": "2.5-preview-06-05",
"name": "Gemini 2.5 Pro Preview 06-05",
"description": "Preview release (June 5th, 2025) of Gemini 2.5 Pro",
"context_length": 1048576,
"max_completion_tokens": 65536,
"supported_parameters": []string{
"tools",
"temperature",
"top_p",
"top_k",
},
"temperature": 1,
"topP": 0.95,
"topK": 64,
"maxTemperature": 2,
"thinking": true,
},
{
"id": "gemini-2.5-pro",
"object": "model",
@@ -101,46 +61,6 @@ func (h *APIHandlers) Models(c *gin.Context) {
"maxTemperature": 2,
"thinking": true,
},
{
"id": "gemini-2.5-flash-preview-04-17",
"object": "model",
"version": "2.5-preview-04-17",
"name": "Gemini 2.5 Flash Preview 04-17",
"description": "Preview release (April 17th, 2025) of Gemini 2.5 Flash",
"context_length": 1048576,
"max_completion_tokens": 65536,
"supported_parameters": []string{
"tools",
"temperature",
"top_p",
"top_k",
},
"temperature": 1,
"topP": 0.95,
"topK": 64,
"maxTemperature": 2,
"thinking": true,
},
{
"id": "gemini-2.5-flash-preview-05-20",
"object": "model",
"version": "2.5-preview-05-20",
"name": "Gemini 2.5 Flash Preview 05-20",
"description": "Preview release (April 17th, 2025) of Gemini 2.5 Flash",
"context_length": 1048576,
"max_completion_tokens": 65536,
"supported_parameters": []string{
"tools",
"temperature",
"top_p",
"top_k",
},
"temperature": 1,
"topP": 0.95,
"topK": 64,
"maxTemperature": 2,
"thinking": true,
},
{
"id": "gemini-2.5-flash",
"object": "model",

View File

@@ -68,6 +68,15 @@ func (s *Server) setupRoutes() {
{
v1.GET("/models", s.handlers.Models)
v1.POST("/chat/completions", s.handlers.ChatCompletions)
v1.POST("/messages", s.handlers.ClaudeMessages)
}
// Gemini compatible API routes
v1beta := s.engine.Group("/v1beta")
v1beta.Use(AuthMiddleware(s.cfg))
{
v1beta.GET("/models", s.handlers.Models)
v1beta.POST("/models/:action", s.handlers.GeminiHandler)
}
// Root endpoint
@@ -140,7 +149,9 @@ func AuthMiddleware(cfg *config.Config) gin.HandlerFunc {
// Get the Authorization header
authHeader := c.GetHeader("Authorization")
if authHeader == "" {
authHeaderGoogle := c.GetHeader("X-Goog-Api-Key")
authHeaderAnthropic := c.GetHeader("X-Api-Key")
if authHeader == "" && authHeaderGoogle == "" && authHeaderAnthropic == "" {
c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{
"error": "Missing API key",
})
@@ -159,7 +170,7 @@ func AuthMiddleware(cfg *config.Config) gin.HandlerFunc {
// Find the API key in the in-memory list
var foundKey string
for i := range cfg.ApiKeys {
if cfg.ApiKeys[i] == apiKey {
if cfg.ApiKeys[i] == apiKey || cfg.ApiKeys[i] == authHeaderGoogle || cfg.ApiKeys[i] == authHeaderAnthropic {
foundKey = cfg.ApiKeys[i]
break
}

View File

@@ -1,7 +1,10 @@
package translator
import (
"bytes"
"encoding/json"
"fmt"
"github.com/tidwall/sjson"
"strings"
"github.com/luispater/CLIProxyAPI/internal/client"
@@ -20,11 +23,15 @@ func PrepareRequest(rawJson []byte) (string, *client.Content, []client.Content,
modelName = modelResult.String()
}
// Process the array of messages.
// Initialize data structures for processing conversation messages
// contents: stores the processed conversation history
// systemInstruction: stores system-level instructions separate from conversation
contents := make([]client.Content, 0)
var systemInstruction *client.Content
messagesResult := gjson.GetBytes(rawJson, "messages")
// Pre-process tool responses to create a lookup map
// This first pass collects all tool responses so they can be matched with their corresponding calls
toolItems := make(map[string]*client.FunctionResponse)
if messagesResult.IsArray() {
messagesResults := messagesResult.Array()
@@ -35,21 +42,26 @@ func PrepareRequest(rawJson []byte) (string, *client.Content, []client.Content,
continue
}
contentResult := messageResult.Get("content")
// Extract tool responses for later matching with function calls
if roleResult.String() == "tool" {
toolCallID := messageResult.Get("tool_call_id").String()
if toolCallID != "" {
var responseData string
// Handle both string and object-based tool response formats
if contentResult.Type == gjson.String {
responseData = contentResult.String()
} else if contentResult.IsObject() && contentResult.Get("type").String() == "text" {
responseData = contentResult.Get("text").String()
}
// drop the timestamp from the tool call ID
// Clean up tool call ID by removing timestamp suffix
// This normalizes IDs for consistent matching between calls and responses
toolCallIDs := strings.Split(toolCallID, "-")
strings.Join(toolCallIDs, "-")
newToolCallID := strings.Join(toolCallIDs[:len(toolCallIDs)-1], "-")
// Create function response object with normalized ID and response data
functionResponse := client.FunctionResponse{Name: newToolCallID, Response: map[string]interface{}{"result": responseData}}
toolItems[toolCallID] = &functionResponse
}
@@ -124,25 +136,33 @@ func PrepareRequest(rawJson []byte) (string, *client.Content, []client.Content,
}
contents = append(contents, client.Content{Role: "user", Parts: parts})
}
// Assistant messages can contain text or tool calls.
// Assistant messages can contain text responses or tool calls
// In the internal format, assistant messages are converted to "model" role
case "assistant":
if contentResult.Type == gjson.String {
// Simple text response from the assistant
contents = append(contents, client.Content{Role: "model", Parts: []client.Part{{Text: contentResult.String()}}})
} else if !contentResult.Exists() || contentResult.Type == gjson.Null {
// Handle tool calls made by the assistant.
// Handle complex tool calls made by the assistant
// This processes function calls and matches them with their responses
functionIDs := make([]string, 0)
toolCallsResult := messageResult.Get("tool_calls")
if toolCallsResult.IsArray() {
parts := make([]client.Part, 0)
tcsResult := toolCallsResult.Array()
// Process each tool call in the assistant's message
for j := 0; j < len(tcsResult); j++ {
tcResult := tcsResult[j]
// Extract function call details
functionID := tcResult.Get("id").String()
functionIDs = append(functionIDs, functionID)
functionName := tcResult.Get("function.name").String()
functionArgs := tcResult.Get("function.arguments").String()
// Parse function arguments from JSON string to map
var args map[string]any
if err := json.Unmarshal([]byte(functionArgs), &args); err == nil {
parts = append(parts, client.Part{
@@ -153,17 +173,22 @@ func PrepareRequest(rawJson []byte) (string, *client.Content, []client.Content,
})
}
}
// Add the model's function calls to the conversation
if len(parts) > 0 {
contents = append(contents, client.Content{
Role: "model", Parts: parts,
})
// Create a separate tool response message with the collected responses
// This matches function calls with their corresponding responses
toolParts := make([]client.Part, 0)
for _, functionID := range functionIDs {
if functionResponse, ok := toolItems[functionID]; ok {
toolParts = append(toolParts, client.Part{FunctionResponse: functionResponse})
}
}
// Add the tool responses as a separate message in the conversation
contents = append(contents, client.Content{Role: "tool", Parts: toolParts})
}
}
@@ -197,3 +222,323 @@ func PrepareRequest(rawJson []byte) (string, *client.Content, []client.Content,
return modelName, systemInstruction, contents, tools
}
// FunctionCallGroup represents a group of function calls and their responses
type FunctionCallGroup struct {
ModelContent map[string]interface{}
FunctionCalls []gjson.Result
ResponsesNeeded int
}
// FixCLIToolResponse performs sophisticated tool response format conversion and grouping.
// This function transforms the CLI tool response format by intelligently grouping function calls
// with their corresponding responses, ensuring proper conversation flow and API compatibility.
// It converts from a linear format (1.json) to a grouped format (2.json) where function calls
// and their responses are properly associated and structured.
func FixCLIToolResponse(input string) (string, error) {
// Parse the input JSON to extract the conversation structure
parsed := gjson.Parse(input)
// Extract the contents array which contains the conversation messages
contents := parsed.Get("request.contents")
if !contents.Exists() {
return input, fmt.Errorf("contents not found in input")
}
// Initialize data structures for processing and grouping
var newContents []interface{} // Final processed contents array
var pendingGroups []*FunctionCallGroup // Groups awaiting completion with responses
var collectedResponses []gjson.Result // Standalone responses to be matched
// Process each content object in the conversation
// This iterates through messages and groups function calls with their responses
contents.ForEach(func(key, value gjson.Result) bool {
role := value.Get("role").String()
parts := value.Get("parts")
// Check if this content has function responses
var responsePartsInThisContent []gjson.Result
parts.ForEach(func(_, part gjson.Result) bool {
if part.Get("functionResponse").Exists() {
responsePartsInThisContent = append(responsePartsInThisContent, part)
}
return true
})
// If this content has function responses, collect them
if len(responsePartsInThisContent) > 0 {
collectedResponses = append(collectedResponses, responsePartsInThisContent...)
// Check if any pending groups can be satisfied
for i := len(pendingGroups) - 1; i >= 0; i-- {
group := pendingGroups[i]
if len(collectedResponses) >= group.ResponsesNeeded {
// Take the needed responses for this group
groupResponses := collectedResponses[:group.ResponsesNeeded]
collectedResponses = collectedResponses[group.ResponsesNeeded:]
// Create merged function response content
var responseParts []interface{}
for _, response := range groupResponses {
var responseMap map[string]interface{}
errUnmarshal := json.Unmarshal([]byte(response.Raw), &responseMap)
if errUnmarshal != nil {
log.Warnf("failed to unmarshal function response: %v\n", errUnmarshal)
continue
}
responseParts = append(responseParts, responseMap)
}
if len(responseParts) > 0 {
functionResponseContent := map[string]interface{}{
"parts": responseParts,
"role": "function",
}
newContents = append(newContents, functionResponseContent)
}
// Remove this group as it's been satisfied
pendingGroups = append(pendingGroups[:i], pendingGroups[i+1:]...)
break
}
}
return true // Skip adding this content, responses are merged
}
// If this is a model with function calls, create a new group
if role == "model" {
var functionCallsInThisModel []gjson.Result
parts.ForEach(func(_, part gjson.Result) bool {
if part.Get("functionCall").Exists() {
functionCallsInThisModel = append(functionCallsInThisModel, part)
}
return true
})
if len(functionCallsInThisModel) > 0 {
// Add the model content
var contentMap map[string]interface{}
errUnmarshal := json.Unmarshal([]byte(value.Raw), &contentMap)
if errUnmarshal != nil {
log.Warnf("failed to unmarshal model content: %v\n", errUnmarshal)
return true
}
newContents = append(newContents, contentMap)
// Create a new group for tracking responses
group := &FunctionCallGroup{
ModelContent: contentMap,
FunctionCalls: functionCallsInThisModel,
ResponsesNeeded: len(functionCallsInThisModel),
}
pendingGroups = append(pendingGroups, group)
} else {
// Regular model content without function calls
var contentMap map[string]interface{}
errUnmarshal := json.Unmarshal([]byte(value.Raw), &contentMap)
if errUnmarshal != nil {
log.Warnf("failed to unmarshal content: %v\n", errUnmarshal)
return true
}
newContents = append(newContents, contentMap)
}
} else {
// Non-model content (user, etc.)
var contentMap map[string]interface{}
errUnmarshal := json.Unmarshal([]byte(value.Raw), &contentMap)
if errUnmarshal != nil {
log.Warnf("failed to unmarshal content: %v\n", errUnmarshal)
return true
}
newContents = append(newContents, contentMap)
}
return true
})
// Handle any remaining pending groups with remaining responses
for _, group := range pendingGroups {
if len(collectedResponses) >= group.ResponsesNeeded {
groupResponses := collectedResponses[:group.ResponsesNeeded]
collectedResponses = collectedResponses[group.ResponsesNeeded:]
var responseParts []interface{}
for _, response := range groupResponses {
var responseMap map[string]interface{}
errUnmarshal := json.Unmarshal([]byte(response.Raw), &responseMap)
if errUnmarshal != nil {
log.Warnf("failed to unmarshal function response: %v\n", errUnmarshal)
continue
}
responseParts = append(responseParts, responseMap)
}
if len(responseParts) > 0 {
functionResponseContent := map[string]interface{}{
"parts": responseParts,
"role": "function",
}
newContents = append(newContents, functionResponseContent)
}
}
}
// Update the original JSON with the new contents
result := input
newContentsJSON, _ := json.Marshal(newContents)
result, _ = sjson.Set(result, "request.contents", json.RawMessage(newContentsJSON))
return result, nil
}
func PrepareClaudeRequest(rawJson []byte) (string, *client.Content, []client.Content, []client.ToolDeclaration) {
var pathsToDelete []string
root := gjson.ParseBytes(rawJson)
walk(root, "", "additionalProperties", &pathsToDelete)
walk(root, "", "$schema", &pathsToDelete)
var err error
for _, p := range pathsToDelete {
rawJson, err = sjson.DeleteBytes(rawJson, p)
if err != nil {
continue
}
}
rawJson = bytes.Replace(rawJson, []byte(`"url":{"type":"string","format":"uri",`), []byte(`"url":{"type":"string",`), -1)
// log.Debug(string(rawJson))
modelName := "gemini-2.5-pro"
modelResult := gjson.GetBytes(rawJson, "model")
if modelResult.Type == gjson.String {
modelName = modelResult.String()
}
contents := make([]client.Content, 0)
var systemInstruction *client.Content
systemResult := gjson.GetBytes(rawJson, "system")
if systemResult.IsArray() {
systemResults := systemResult.Array()
systemInstruction = &client.Content{Role: "user", Parts: []client.Part{}}
for i := 0; i < len(systemResults); i++ {
systemPromptResult := systemResults[i]
systemTypePromptResult := systemPromptResult.Get("type")
if systemTypePromptResult.Type == gjson.String && systemTypePromptResult.String() == "text" {
systemPrompt := systemPromptResult.Get("text").String()
systemPart := client.Part{Text: systemPrompt}
systemInstruction.Parts = append(systemInstruction.Parts, systemPart)
}
}
if len(systemInstruction.Parts) == 0 {
systemInstruction = nil
}
}
messagesResult := gjson.GetBytes(rawJson, "messages")
if messagesResult.IsArray() {
messageResults := messagesResult.Array()
for i := 0; i < len(messageResults); i++ {
messageResult := messageResults[i]
roleResult := messageResult.Get("role")
if roleResult.Type != gjson.String {
continue
}
role := roleResult.String()
if role == "assistant" {
role = "model"
}
clientContent := client.Content{Role: role, Parts: []client.Part{}}
contentsResult := messageResult.Get("content")
if contentsResult.IsArray() {
contentResults := contentsResult.Array()
for j := 0; j < len(contentResults); j++ {
contentResult := contentResults[j]
contentTypeResult := contentResult.Get("type")
if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "text" {
prompt := contentResult.Get("text").String()
clientContent.Parts = append(clientContent.Parts, client.Part{Text: prompt})
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "tool_use" {
functionName := contentResult.Get("name").String()
functionArgs := contentResult.Get("input").String()
var args map[string]any
if err = json.Unmarshal([]byte(functionArgs), &args); err == nil {
clientContent.Parts = append(clientContent.Parts, client.Part{
FunctionCall: &client.FunctionCall{
Name: functionName,
Args: args,
},
})
}
} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "tool_result" {
toolCallID := contentResult.Get("tool_use_id").String()
if toolCallID != "" {
funcName := toolCallID
toolCallIDs := strings.Split(toolCallID, "-")
if len(toolCallIDs) > 1 {
funcName = strings.Join(toolCallIDs[0:len(toolCallIDs)-1], "-")
}
responseData := contentResult.Get("content").String()
functionResponse := client.FunctionResponse{Name: funcName, Response: map[string]interface{}{"result": responseData}}
clientContent.Parts = append(clientContent.Parts, client.Part{FunctionResponse: &functionResponse})
}
}
}
contents = append(contents, clientContent)
} else if contentsResult.Type == gjson.String {
prompt := contentsResult.String()
contents = append(contents, client.Content{Role: role, Parts: []client.Part{{Text: prompt}}})
}
}
}
var tools []client.ToolDeclaration
toolsResult := gjson.GetBytes(rawJson, "tools")
if toolsResult.IsArray() {
tools = make([]client.ToolDeclaration, 1)
tools[0].FunctionDeclarations = make([]any, 0)
toolsResults := toolsResult.Array()
for i := 0; i < len(toolsResults); i++ {
toolResult := toolsResults[i]
inputSchemaResult := toolResult.Get("input_schema")
if inputSchemaResult.Exists() && inputSchemaResult.IsObject() {
inputSchema := inputSchemaResult.Raw
inputSchema, _ = sjson.Delete(inputSchema, "additionalProperties")
inputSchema, _ = sjson.Delete(inputSchema, "$schema")
tool, _ := sjson.Delete(toolResult.Raw, "input_schema")
tool, _ = sjson.SetRaw(tool, "parameters", inputSchema)
var toolDeclaration any
if err = json.Unmarshal([]byte(tool), &toolDeclaration); err == nil {
tools[0].FunctionDeclarations = append(tools[0].FunctionDeclarations, toolDeclaration)
}
}
}
} else {
tools = make([]client.ToolDeclaration, 0)
}
return modelName, systemInstruction, contents, tools
}
func walk(value gjson.Result, path, field string, pathsToDelete *[]string) {
switch value.Type {
case gjson.JSON:
value.ForEach(func(key, val gjson.Result) bool {
var childPath string
if path == "" {
childPath = key.String()
} else {
childPath = path + "." + key.String()
}
if key.String() == field {
*pathsToDelete = append(*pathsToDelete, childPath)
}
walk(val, childPath, field, pathsToDelete)
return true
})
case gjson.String, gjson.Number, gjson.True, gjson.False, gjson.Null:
}
}

View File

@@ -1,6 +1,7 @@
package translator
import (
"bytes"
"fmt"
"time"
@@ -188,3 +189,194 @@ func ConvertCliToOpenAINonStream(rawJson []byte, unixTimestamp int64, isGlAPIKey
return template
}
// ConvertCliToClaude performs sophisticated streaming response format conversion.
// This function implements a complex state machine that translates backend client responses
// into Claude-compatible Server-Sent Events (SSE) format. It manages different response types
// and handles state transitions between content blocks, thinking processes, and function calls.
//
// Response type states: 0=none, 1=content, 2=thinking, 3=function
// The function maintains state across multiple calls to ensure proper SSE event sequencing.
func ConvertCliToClaude(rawJson []byte, isGlAPIKey, hasFirstResponse bool, responseType, responseIndex *int) string {
// Normalize the response format for different API key types
// Generative Language API keys have a different response structure
if isGlAPIKey {
rawJson, _ = sjson.SetRawBytes(rawJson, "response", rawJson)
}
// Track whether tools are being used in this response chunk
usedTool := false
output := ""
// Initialize the streaming session with a message_start event
// This is only sent for the very first response chunk
if !hasFirstResponse {
output = "event: message_start\n"
// Create the initial message structure with default values
// This follows the Claude API specification for streaming message initialization
messageStartTemplate := `{"type": "message_start", "message": {"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", "type": "message", "role": "assistant", "content": [], "model": "claude-3-5-sonnet-20241022", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 0, "output_tokens": 0}}}`
// Override default values with actual response metadata if available
if modelVersionResult := gjson.GetBytes(rawJson, "response.modelVersion"); modelVersionResult.Exists() {
messageStartTemplate, _ = sjson.Set(messageStartTemplate, "message.model", modelVersionResult.String())
}
if responseIdResult := gjson.GetBytes(rawJson, "response.responseId"); responseIdResult.Exists() {
messageStartTemplate, _ = sjson.Set(messageStartTemplate, "message.id", responseIdResult.String())
}
output = output + fmt.Sprintf("data: %s\n\n\n", messageStartTemplate)
}
// Process the response parts array from the backend client
// Each part can contain text content, thinking content, or function calls
partsResult := gjson.GetBytes(rawJson, "response.candidates.0.content.parts")
if partsResult.IsArray() {
partResults := partsResult.Array()
for i := 0; i < len(partResults); i++ {
partResult := partResults[i]
// Extract the different types of content from each part
partTextResult := partResult.Get("text")
functionCallResult := partResult.Get("functionCall")
// Handle text content (both regular content and thinking)
if partTextResult.Exists() {
// Process thinking content (internal reasoning)
if partResult.Get("thought").Bool() {
// Continue existing thinking block
if *responseType == 2 {
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, *responseIndex), "delta.thinking", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
} else {
// Transition from another state to thinking
// First, close any existing content block
if *responseType != 0 {
if *responseType == 2 {
output = output + "event: content_block_delta\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, *responseIndex)
output = output + "\n\n\n"
}
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, *responseIndex)
output = output + "\n\n\n"
*responseIndex++
}
// Start a new thinking content block
output = output + "event: content_block_start\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"thinking","thinking":""}}`, *responseIndex)
output = output + "\n\n\n"
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, *responseIndex), "delta.thinking", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
*responseType = 2 // Set state to thinking
}
} else {
// Process regular text content (user-visible output)
// Continue existing text block
if *responseType == 1 {
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, *responseIndex), "delta.text", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
} else {
// Transition from another state to text content
// First, close any existing content block
if *responseType != 0 {
if *responseType == 2 {
output = output + "event: content_block_delta\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, *responseIndex)
output = output + "\n\n\n"
}
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, *responseIndex)
output = output + "\n\n\n"
*responseIndex++
}
// Start a new text content block
output = output + "event: content_block_start\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_start","index":%d,"content_block":{"type":"text","text":""}}`, *responseIndex)
output = output + "\n\n\n"
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, *responseIndex), "delta.text", partTextResult.String())
output = output + fmt.Sprintf("data: %s\n\n\n", data)
*responseType = 1 // Set state to content
}
}
} else if functionCallResult.Exists() {
// Handle function/tool calls from the AI model
// This processes tool usage requests and formats them for Claude API compatibility
usedTool = true
fcName := functionCallResult.Get("name").String()
// Handle state transitions when switching to function calls
// Close any existing function call block first
if *responseType == 3 {
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, *responseIndex)
output = output + "\n\n\n"
*responseIndex++
*responseType = 0
}
// Special handling for thinking state transition
if *responseType == 2 {
output = output + "event: content_block_delta\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, *responseIndex)
output = output + "\n\n\n"
}
// Close any other existing content block
if *responseType != 0 {
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, *responseIndex)
output = output + "\n\n\n"
*responseIndex++
}
// Start a new tool use content block
// This creates the structure for a function call in Claude format
output = output + "event: content_block_start\n"
// Create the tool use block with unique ID and function details
data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, *responseIndex)
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
data, _ = sjson.Set(data, "content_block.name", fcName)
output = output + fmt.Sprintf("data: %s\n\n\n", data)
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
output = output + "event: content_block_delta\n"
data, _ = sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"input_json_delta","partial_json":""}}`, *responseIndex), "delta.partial_json", fcArgsResult.Raw)
output = output + fmt.Sprintf("data: %s\n\n\n", data)
}
*responseType = 3
}
}
}
usageResult := gjson.GetBytes(rawJson, "response.usageMetadata")
if usageResult.Exists() && bytes.Contains(rawJson, []byte(`"finishReason"`)) {
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
output = output + "event: content_block_stop\n"
output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, *responseIndex)
output = output + "\n\n\n"
output = output + "event: message_delta\n"
output = output + `data: `
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
if usedTool {
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
}
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
output = output + template + "\n\n\n"
}
}
return output
}

View File

@@ -311,6 +311,7 @@ func (c *Client) APIRequest(ctx context.Context, endpoint string, body interface
}
}()
bodyBytes, _ := io.ReadAll(resp.Body)
// log.Debug(string(jsonBody))
return nil, &ErrorMessage{resp.StatusCode, fmt.Errorf(string(bodyBytes))}
}
@@ -411,130 +412,192 @@ func (c *Client) SendMessage(ctx context.Context, rawJson []byte, model string,
}
}
// SendMessageStream handles a single conversational turn, including tool calls.
func (c *Client) SendMessageStream(ctx context.Context, rawJson []byte, model string, systemInstruction *Content, contents []Content, tools []ToolDeclaration) (<-chan []byte, <-chan *ErrorMessage) {
// SendMessageStream handles streaming conversational turns with comprehensive parameter management.
// This function implements a sophisticated streaming system that supports tool calls, reasoning modes,
// quota management, and automatic model fallback. It returns two channels for asynchronous communication:
// one for streaming response data and another for error handling.
func (c *Client) SendMessageStream(ctx context.Context, rawJson []byte, model string, systemInstruction *Content, contents []Content, tools []ToolDeclaration, includeThoughts ...bool) (<-chan []byte, <-chan *ErrorMessage) {
// Define the data prefix used in Server-Sent Events streaming format
dataTag := []byte("data: ")
// Create channels for asynchronous communication
// errChan: delivers error messages during streaming
// dataChan: delivers response data chunks
errChan := make(chan *ErrorMessage)
dataChan := make(chan []byte)
// Launch a goroutine to handle the streaming process asynchronously
// This allows the function to return immediately while processing continues in the background
go func() {
// Ensure channels are properly closed when the goroutine exits
defer close(errChan)
defer close(dataChan)
// Configure thinking/reasoning capabilities
// Default to including thoughts unless explicitly disabled
includeThoughtsFlag := true
if len(includeThoughts) > 0 {
includeThoughtsFlag = includeThoughts[0]
}
// Build the base request structure for the Gemini API
// This includes conversation contents and generation configuration
request := GenerateContentRequest{
Contents: contents,
GenerationConfig: GenerationConfig{
ThinkingConfig: GenerationConfigThinkingConfig{
IncludeThoughts: true,
IncludeThoughts: includeThoughtsFlag,
},
},
}
// Add system instructions if provided
// System instructions guide the AI's behavior and response style
request.SystemInstruction = systemInstruction
// Add available tools for function calling capabilities
// Tools allow the AI to perform actions beyond text generation
request.Tools = tools
// Construct the complete request body with project context
// The project ID is essential for proper API routing and billing
requestBody := map[string]interface{}{
"project": c.GetProjectID(), // Assuming ProjectID is available
"project": c.GetProjectID(), // Project ID for API routing and quota management
"request": request,
"model": model,
}
// Serialize the request body to JSON for API transmission
byteRequestBody, _ := json.Marshal(requestBody)
// log.Debug(string(byteRequestBody))
// Parse and configure reasoning effort levels from the original request
// This maps Claude-style reasoning effort parameters to Gemini's thinking budget system
reasoningEffortResult := gjson.GetBytes(rawJson, "reasoning_effort")
if reasoningEffortResult.String() == "none" {
// Disable thinking entirely for fastest responses
byteRequestBody, _ = sjson.DeleteBytes(byteRequestBody, "request.generationConfig.thinkingConfig.include_thoughts")
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
} else if reasoningEffortResult.String() == "auto" {
// Let the model decide the appropriate thinking budget automatically
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
} else if reasoningEffortResult.String() == "low" {
// Minimal thinking for simple tasks (1KB thinking budget)
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
} else if reasoningEffortResult.String() == "medium" {
// Moderate thinking for complex tasks (8KB thinking budget)
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
} else if reasoningEffortResult.String() == "high" {
// Maximum thinking for very complex tasks (24KB thinking budget)
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
} else {
// Default to automatic thinking budget if no specific level is provided
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
}
// Configure temperature parameter for response randomness control
// Temperature affects the creativity vs consistency trade-off in responses
temperatureResult := gjson.GetBytes(rawJson, "temperature")
if temperatureResult.Exists() && temperatureResult.Type == gjson.Number {
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.temperature", temperatureResult.Num)
}
// Configure top-p parameter for nucleus sampling
// Controls the cumulative probability threshold for token selection
topPResult := gjson.GetBytes(rawJson, "top_p")
if topPResult.Exists() && topPResult.Type == gjson.Number {
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.topP", topPResult.Num)
}
// Configure top-k parameter for limiting token candidates
// Restricts the model to consider only the top K most likely tokens
topKResult := gjson.GetBytes(rawJson, "top_k")
if topKResult.Exists() && topKResult.Type == gjson.Number {
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.topK", topKResult.Num)
}
// log.Debug(string(byteRequestBody))
// Initialize model name for quota management and potential fallback
modelName := model
var stream io.ReadCloser
// Quota management and model fallback loop
// This loop handles quota exceeded scenarios and automatic model switching
for {
// Check if the current model has exceeded its quota
if c.isModelQuotaExceeded(modelName) {
// Attempt to switch to a preview model if configured and using account auth
if c.cfg.QuotaExceeded.SwitchPreviewModel && c.glAPIKey == "" {
modelName = c.getPreviewModel(model)
if modelName != "" {
log.Debugf("Model %s is quota exceeded. Switch to preview model %s", model, modelName)
// Update the request body with the new model name
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "model", modelName)
continue
continue // Retry with the preview model
}
}
// If no fallback is available, return a quota exceeded error
errChan <- &ErrorMessage{
StatusCode: 429,
Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, model),
}
return
}
// Attempt to establish a streaming connection with the API
var err *ErrorMessage
stream, err = c.APIRequest(ctx, "streamGenerateContent", byteRequestBody, true)
if err != nil {
// Handle quota exceeded errors by marking the model and potentially retrying
if err.StatusCode == 429 {
now := time.Now()
c.modelQuotaExceeded[modelName] = &now
c.modelQuotaExceeded[modelName] = &now // Mark model as quota exceeded
// If preview model switching is enabled, retry the loop
if c.cfg.QuotaExceeded.SwitchPreviewModel && c.glAPIKey == "" {
continue
}
}
// Forward other errors to the error channel
errChan <- err
return
}
// Clear any previous quota exceeded status for this model
delete(c.modelQuotaExceeded, modelName)
break
break // Successfully established connection, exit the retry loop
}
// Process the streaming response using a scanner
// This handles the Server-Sent Events format from the API
scanner := bufio.NewScanner(stream)
for scanner.Scan() {
line := scanner.Bytes()
// log.Printf("Received stream chunk: %s", line)
// Filter and forward only data lines (those prefixed with "data: ")
// This extracts the actual JSON content from the SSE format
if bytes.HasPrefix(line, dataTag) {
dataChan <- line[6:]
dataChan <- line[6:] // Remove "data: " prefix and send the JSON content
}
}
// Handle any scanning errors that occurred during stream processing
if errScanner := scanner.Err(); errScanner != nil {
// log.Println(err)
// Send a 500 Internal Server Error for scanning failures
errChan <- &ErrorMessage{500, errScanner}
_ = stream.Close()
return
}
// Ensure the stream is properly closed to prevent resource leaks
_ = stream.Close()
}()
// Return the channels immediately for asynchronous communication
// The caller can read from these channels while the goroutine processes the request
return dataChan, errChan
}
// SendRawMessage handles a single conversational turn, including tool calls.
func (c *Client) SendRawMessage(ctx context.Context, rawJson []byte) ([]byte, *ErrorMessage) {
rawJson, _ = sjson.SetBytes(rawJson, "project", c.GetProjectID())
if c.glAPIKey == "" {
rawJson, _ = sjson.SetBytes(rawJson, "project", c.GetProjectID())
}
modelResult := gjson.GetBytes(rawJson, "model")
model := modelResult.String()
@@ -584,7 +647,9 @@ func (c *Client) SendRawMessageStream(ctx context.Context, rawJson []byte) (<-ch
defer close(errChan)
defer close(dataChan)
rawJson, _ = sjson.SetBytes(rawJson, "project", c.GetProjectID())
if c.glAPIKey == "" {
rawJson, _ = sjson.SetBytes(rawJson, "project", c.GetProjectID())
}
modelResult := gjson.GetBytes(rawJson, "model")
model := modelResult.String()