mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-02 12:30:50 +08:00
Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fa8d94971f | ||
|
|
ef68a97526 | ||
|
|
d880d1a1ea | ||
|
|
d4104214ed | ||
|
|
273e1d9cbe | ||
|
|
65f47c196a | ||
|
|
9be56fe8e0 | ||
|
|
589ae6d3aa | ||
|
|
7cb76ae1a5 | ||
|
|
e73f165070 | ||
|
|
512f2d5247 | ||
|
|
bf086464dd |
@@ -8,4 +8,10 @@ builds:
|
||||
- amd64
|
||||
- arm64
|
||||
main: ./cmd/server/
|
||||
binary: cli-proxy-api
|
||||
binary: cli-proxy-api
|
||||
archives:
|
||||
- id: "cli-proxy-api"
|
||||
files:
|
||||
- LICENSE
|
||||
- README.md
|
||||
- config.yaml
|
||||
46
README.md
46
README.md
@@ -10,6 +10,8 @@ A proxy server that provides an OpenAI-compatible API interface for CLI. This al
|
||||
- Multimodal input support (text and images)
|
||||
- Multiple account support with load balancing
|
||||
- Simple CLI authentication flow
|
||||
- Support for Generative Language API Key
|
||||
- Support Gemini CLI with multiple account load balancing
|
||||
|
||||
## Installation
|
||||
|
||||
@@ -146,13 +148,14 @@ The server uses a YAML configuration file (`config.yaml`) located in the project
|
||||
|
||||
### Configuration Options
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
|-------------|----------|--------------------|----------------------------------------------------------------------------------------------|
|
||||
| `port` | integer | 8317 | The port number on which the server will listen |
|
||||
| `auth_dir` | string | "~/.cli-proxy-api" | Directory where authentication tokens are stored. Supports using `~` for home directory |
|
||||
| `proxy-url` | string | "" | Proxy url, support socks5/http/https protocol, example: socks5://user:pass@192.168.1.1:1080/ |
|
||||
| `debug` | boolean | false | Enable debug mode for verbose logging |
|
||||
| `api_keys` | string[] | [] | List of API keys that can be used to authenticate requests |
|
||||
| Parameter | Type | Default | Description |
|
||||
|-------------------------------|----------|--------------------|----------------------------------------------------------------------------------------------|
|
||||
| `port` | integer | 8317 | The port number on which the server will listen |
|
||||
| `auth-dir` | string | "~/.cli-proxy-api" | Directory where authentication tokens are stored. Supports using `~` for home directory |
|
||||
| `proxy-url` | string | "" | Proxy url, support socks5/http/https protocol, example: socks5://user:pass@192.168.1.1:1080/ |
|
||||
| `debug` | boolean | false | Enable debug mode for verbose logging |
|
||||
| `api-keys` | string[] | [] | List of API keys that can be used to authenticate requests |
|
||||
| `generative-language-api-key` | string[] | [] | List of Generative Language API keys |
|
||||
|
||||
### Example Configuration File
|
||||
|
||||
@@ -161,29 +164,50 @@ The server uses a YAML configuration file (`config.yaml`) located in the project
|
||||
port: 8317
|
||||
|
||||
# Authentication directory (supports ~ for home directory)
|
||||
auth_dir: "~/.cli-proxy-api"
|
||||
auth-dir: "~/.cli-proxy-api"
|
||||
|
||||
# Enable debug logging
|
||||
debug: false
|
||||
|
||||
# API keys for authentication
|
||||
api_keys:
|
||||
api-keys:
|
||||
- "your-api-key-1"
|
||||
- "your-api-key-2"
|
||||
|
||||
# API keys for official Generative Language API
|
||||
generative-language-api-key:
|
||||
- "AIzaSy...01"
|
||||
- "AIzaSy...02"
|
||||
- "AIzaSy...03"
|
||||
- "AIzaSy...04"
|
||||
```
|
||||
|
||||
### Authentication Directory
|
||||
|
||||
The `auth_dir` parameter specifies where authentication tokens are stored. When you run the login command, the application will create JSON files in this directory containing the authentication tokens for your Google accounts. Multiple accounts can be used for load balancing.
|
||||
The `auth-dir` parameter specifies where authentication tokens are stored. When you run the login command, the application will create JSON files in this directory containing the authentication tokens for your Google accounts. Multiple accounts can be used for load balancing.
|
||||
|
||||
### API Keys
|
||||
|
||||
The `api_keys` parameter allows you to define a list of API keys that can be used to authenticate requests to your proxy server. When making requests to the API, you can include one of these keys in the `Authorization` header:
|
||||
The `api-keys` parameter allows you to define a list of API keys that can be used to authenticate requests to your proxy server. When making requests to the API, you can include one of these keys in the `Authorization` header:
|
||||
|
||||
```
|
||||
Authorization: Bearer your-api-key-1
|
||||
```
|
||||
|
||||
### Official Generative Language API
|
||||
|
||||
The `generative-language-api-key` parameter allows you to define a list of API keys that can be used to authenticate requests to the official Generative Language API.
|
||||
|
||||
## Gemini CLI with multiple account load balancing
|
||||
|
||||
Start CLI Proxy API server, and then set the `CODE_ASSIST_ENDPOINT` environment variable to the URL of the CLI Proxy API server.
|
||||
|
||||
```bash
|
||||
export CODE_ASSIST_ENDPOINT="http://127.0.0.1:8317"
|
||||
```
|
||||
|
||||
The server will relay the `loadCodeAssist`, `onboardUser`, and `countTokens` requests. And automatically load balance the text generation requests between the multiple accounts.
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please feel free to submit a Pull Request.
|
||||
|
||||
14
config.yaml
14
config.yaml
@@ -1,7 +1,15 @@
|
||||
port: 8317
|
||||
auth_dir: "~/.cli-proxy-api"
|
||||
auth-dir: "~/.cli-proxy-api"
|
||||
debug: true
|
||||
proxy-url: ""
|
||||
api_keys:
|
||||
quota-exceeded:
|
||||
switch-project: true
|
||||
switch-preview-model: true
|
||||
api-keys:
|
||||
- "12345"
|
||||
- "23456"
|
||||
- "23456"
|
||||
generative-language-api-key:
|
||||
- "AIzaSy...01"
|
||||
- "AIzaSy...02"
|
||||
- "AIzaSy...03"
|
||||
- "AIzaSy...04"
|
||||
|
||||
228
internal/api/cli-handlers.go
Normal file
228
internal/api/cli-handlers.go
Normal file
@@ -0,0 +1,228 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/luispater/CLIProxyAPI/internal/client"
|
||||
"github.com/luispater/CLIProxyAPI/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
func (h *APIHandlers) CLIHandler(c *gin.Context) {
|
||||
rawJson, _ := c.GetRawData()
|
||||
requestRawURI := c.Request.URL.Path
|
||||
if requestRawURI == "/v1internal:generateContent" {
|
||||
h.internalGenerateContent(c, rawJson)
|
||||
} else if requestRawURI == "/v1internal:streamGenerateContent" {
|
||||
h.internalStreamGenerateContent(c, rawJson)
|
||||
} else {
|
||||
reqBody := bytes.NewBuffer(rawJson)
|
||||
req, err := http.NewRequest("POST", fmt.Sprintf("https://cloudcode-pa.googleapis.com%s", c.Request.URL.RequestURI()), reqBody)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, ErrorResponse{
|
||||
Error: ErrorDetail{
|
||||
Message: fmt.Sprintf("Invalid request: %v", err),
|
||||
Type: "invalid_request_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
for key, value := range c.Request.Header {
|
||||
req.Header[key] = value
|
||||
}
|
||||
|
||||
httpClient, err := util.SetProxy(h.cfg, &http.Client{})
|
||||
if err != nil {
|
||||
log.Fatalf("set proxy failed: %v", err)
|
||||
}
|
||||
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, ErrorResponse{
|
||||
Error: ErrorDetail{
|
||||
Message: fmt.Sprintf("Invalid request: %v", err),
|
||||
Type: "invalid_request_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
defer func() {
|
||||
if err = resp.Body.Close(); err != nil {
|
||||
log.Printf("warn: failed to close response body: %v", err)
|
||||
}
|
||||
}()
|
||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||
|
||||
c.JSON(http.StatusBadRequest, ErrorResponse{
|
||||
Error: ErrorDetail{
|
||||
Message: string(bodyBytes),
|
||||
Type: "invalid_request_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
}()
|
||||
|
||||
for key, value := range resp.Header {
|
||||
c.Header(key, value[0])
|
||||
}
|
||||
output, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
log.Errorf("Failed to read response body: %v", err)
|
||||
return
|
||||
}
|
||||
_, _ = c.Writer.Write(output)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *APIHandlers) internalStreamGenerateContent(c *gin.Context, rawJson []byte) {
|
||||
// Get the http.Flusher interface to manually flush the response.
|
||||
flusher, ok := c.Writer.(http.Flusher)
|
||||
if !ok {
|
||||
c.JSON(http.StatusInternalServerError, ErrorResponse{
|
||||
Error: ErrorDetail{
|
||||
Message: "Streaming not supported",
|
||||
Type: "server_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
modelResult := gjson.GetBytes(rawJson, "model")
|
||||
modelName := modelResult.String()
|
||||
|
||||
cliCtx, cliCancel := context.WithCancel(context.Background())
|
||||
var cliClient *client.Client
|
||||
defer func() {
|
||||
// Ensure the client's mutex is unlocked on function exit.
|
||||
if cliClient != nil {
|
||||
cliClient.RequestMutex.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
outLoop:
|
||||
for {
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.getClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
|
||||
log.Debugf("Request use generative language API Key: %s", glAPIKey)
|
||||
} else {
|
||||
log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
|
||||
}
|
||||
// Send the message and receive response chunks and errors via channels.
|
||||
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJson)
|
||||
hasFirstResponse := false
|
||||
for {
|
||||
select {
|
||||
// Handle client disconnection.
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("Client disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel() // Cancel the backend request.
|
||||
return
|
||||
}
|
||||
// Process incoming response chunks.
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
cliCancel()
|
||||
return
|
||||
} else {
|
||||
hasFirstResponse = true
|
||||
if cliClient.GetGenerativeLanguageAPIKey() != "" {
|
||||
chunk, _ = sjson.SetRawBytes(chunk, "response", chunk)
|
||||
}
|
||||
_, _ = c.Writer.Write([]byte("data: "))
|
||||
_, _ = c.Writer.Write(chunk)
|
||||
_, _ = c.Writer.Write([]byte("\n\n"))
|
||||
flusher.Flush()
|
||||
}
|
||||
// Handle errors from the backend.
|
||||
case err, okError := <-errChan:
|
||||
if okError {
|
||||
if err.StatusCode == 429 && h.cfg.QuotaExceeded.SwitchProject {
|
||||
continue outLoop
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
}
|
||||
return
|
||||
}
|
||||
// Send a keep-alive signal to the client.
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
if hasFirstResponse {
|
||||
_, _ = c.Writer.Write([]byte("\n"))
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *APIHandlers) internalGenerateContent(c *gin.Context, rawJson []byte) {
|
||||
c.Header("Content-Type", "application/json")
|
||||
|
||||
modelResult := gjson.GetBytes(rawJson, "model")
|
||||
modelName := modelResult.String()
|
||||
cliCtx, cliCancel := context.WithCancel(context.Background())
|
||||
var cliClient *client.Client
|
||||
defer func() {
|
||||
if cliClient != nil {
|
||||
cliClient.RequestMutex.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.getClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
|
||||
log.Debugf("Request use generative language API Key: %s", glAPIKey)
|
||||
} else {
|
||||
log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
|
||||
}
|
||||
|
||||
resp, err := cliClient.SendRawMessage(cliCtx, rawJson)
|
||||
if err != nil {
|
||||
if err.StatusCode == 429 && h.cfg.QuotaExceeded.SwitchProject {
|
||||
continue
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = c.Writer.Write([]byte(err.Error.Error()))
|
||||
cliCancel()
|
||||
}
|
||||
break
|
||||
} else {
|
||||
_, _ = c.Writer.Write(resp)
|
||||
cliCancel()
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"github.com/luispater/CLIProxyAPI/internal/api/translator"
|
||||
"github.com/luispater/CLIProxyAPI/internal/client"
|
||||
"github.com/luispater/CLIProxyAPI/internal/config"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
"net/http"
|
||||
@@ -23,15 +24,15 @@ var (
|
||||
// It holds a pool of clients to interact with the backend service.
|
||||
type APIHandlers struct {
|
||||
cliClients []*client.Client
|
||||
debug bool
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewAPIHandlers creates a new API handlers instance.
|
||||
// It takes a slice of clients and a debug flag as input.
|
||||
func NewAPIHandlers(cliClients []*client.Client, debug bool) *APIHandlers {
|
||||
func NewAPIHandlers(cliClients []*client.Client, cfg *config.Config) *APIHandlers {
|
||||
return &APIHandlers{
|
||||
cliClients: cliClients,
|
||||
debug: debug,
|
||||
cfg: cfg,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,6 +165,48 @@ func (h *APIHandlers) Models(c *gin.Context) {
|
||||
})
|
||||
}
|
||||
|
||||
func (h *APIHandlers) getClient(modelName string) (*client.Client, *client.ErrorMessage) {
|
||||
var cliClient *client.Client
|
||||
|
||||
// Lock the mutex to update the last used client index
|
||||
mutex.Lock()
|
||||
startIndex := lastUsedClientIndex
|
||||
currentIndex := (startIndex + 1) % len(h.cliClients)
|
||||
lastUsedClientIndex = currentIndex
|
||||
mutex.Unlock()
|
||||
|
||||
// Reorder the client to start from the last used index
|
||||
reorderedClients := make([]*client.Client, 0)
|
||||
for i := 0; i < len(h.cliClients); i++ {
|
||||
cliClient = h.cliClients[(startIndex+1+i)%len(h.cliClients)]
|
||||
if cliClient.IsModelQuotaExceeded(modelName) {
|
||||
log.Debugf("Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.GetProjectID())
|
||||
cliClient = nil
|
||||
continue
|
||||
}
|
||||
reorderedClients = append(reorderedClients, cliClient)
|
||||
}
|
||||
|
||||
if len(reorderedClients) == 0 {
|
||||
return nil, &client.ErrorMessage{StatusCode: 429, Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName)}
|
||||
}
|
||||
|
||||
locked := false
|
||||
for i := 0; i < len(reorderedClients); i++ {
|
||||
cliClient = reorderedClients[i]
|
||||
if cliClient.RequestMutex.TryLock() {
|
||||
locked = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !locked {
|
||||
cliClient = h.cliClients[0]
|
||||
cliClient.RequestMutex.Lock()
|
||||
}
|
||||
|
||||
return cliClient, nil
|
||||
}
|
||||
|
||||
// ChatCompletions handles the /v1/chat/completions endpoint.
|
||||
// It determines whether the request is for a streaming or non-streaming response
|
||||
// and calls the appropriate handler.
|
||||
@@ -195,19 +238,7 @@ func (h *APIHandlers) ChatCompletions(c *gin.Context) {
|
||||
func (h *APIHandlers) handleNonStreamingResponse(c *gin.Context, rawJson []byte) {
|
||||
c.Header("Content-Type", "application/json")
|
||||
|
||||
// Handle streaming manually
|
||||
flusher, ok := c.Writer.(http.Flusher)
|
||||
if !ok {
|
||||
c.JSON(http.StatusInternalServerError, ErrorResponse{
|
||||
Error: ErrorDetail{
|
||||
Message: "Streaming not supported",
|
||||
Type: "server_error",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
modelName, contents, tools := translator.PrepareRequest(rawJson)
|
||||
modelName, systemInstruction, contents, tools := translator.PrepareRequest(rawJson)
|
||||
cliCtx, cliCancel := context.WithCancel(context.Background())
|
||||
var cliClient *client.Client
|
||||
defer func() {
|
||||
@@ -216,63 +247,41 @@ func (h *APIHandlers) handleNonStreamingResponse(c *gin.Context, rawJson []byte)
|
||||
}
|
||||
}()
|
||||
|
||||
// Lock the mutex to update the last used page index
|
||||
mutex.Lock()
|
||||
startIndex := lastUsedClientIndex
|
||||
currentIndex := (startIndex + 1) % len(h.cliClients)
|
||||
lastUsedClientIndex = currentIndex
|
||||
mutex.Unlock()
|
||||
|
||||
// Reorder the pages to start from the last used index
|
||||
reorderedPages := make([]*client.Client, len(h.cliClients))
|
||||
for i := 0; i < len(h.cliClients); i++ {
|
||||
reorderedPages[i] = h.cliClients[(startIndex+1+i)%len(h.cliClients)]
|
||||
}
|
||||
|
||||
locked := false
|
||||
for i := 0; i < len(reorderedPages); i++ {
|
||||
cliClient = reorderedPages[i]
|
||||
if cliClient.RequestMutex.TryLock() {
|
||||
locked = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !locked {
|
||||
cliClient = h.cliClients[0]
|
||||
cliClient.RequestMutex.Lock()
|
||||
}
|
||||
|
||||
log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
|
||||
jsonTemplate := `{"id":"","object":"chat.completion","created":123456,"model":"model","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
|
||||
respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJson, modelName, contents, tools)
|
||||
for {
|
||||
select {
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("Client disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
_, _ = fmt.Fprint(c.Writer, jsonTemplate)
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.getClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
isGlAPIKey := false
|
||||
if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
|
||||
log.Debugf("Request use generative language API Key: %s", glAPIKey)
|
||||
isGlAPIKey = true
|
||||
} else {
|
||||
log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
|
||||
}
|
||||
|
||||
resp, err := cliClient.SendMessage(cliCtx, rawJson, modelName, systemInstruction, contents, tools)
|
||||
if err != nil {
|
||||
if err.StatusCode == 429 && h.cfg.QuotaExceeded.SwitchProject {
|
||||
continue
|
||||
} else {
|
||||
jsonTemplate = translator.ConvertCliToOpenAINonStream(jsonTemplate, chunk)
|
||||
}
|
||||
case err, okError := <-errChan:
|
||||
if okError {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
flusher.Flush()
|
||||
_, _ = c.Writer.Write([]byte(err.Error.Error()))
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
_, _ = c.Writer.Write([]byte("\n"))
|
||||
flusher.Flush()
|
||||
break
|
||||
} else {
|
||||
openAIFormat := translator.ConvertCliToOpenAINonStream(resp, time.Now().Unix(), isGlAPIKey)
|
||||
if openAIFormat != "" {
|
||||
_, _ = c.Writer.Write([]byte(openAIFormat))
|
||||
}
|
||||
cliCancel()
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -297,7 +306,7 @@ func (h *APIHandlers) handleStreamingResponse(c *gin.Context, rawJson []byte) {
|
||||
}
|
||||
|
||||
// Prepare the request for the backend client.
|
||||
modelName, contents, tools := translator.PrepareRequest(rawJson)
|
||||
modelName, systemInstruction, contents, tools := translator.PrepareRequest(rawJson)
|
||||
cliCtx, cliCancel := context.WithCancel(context.Background())
|
||||
var cliClient *client.Client
|
||||
defer func() {
|
||||
@@ -307,75 +316,74 @@ func (h *APIHandlers) handleStreamingResponse(c *gin.Context, rawJson []byte) {
|
||||
}
|
||||
}()
|
||||
|
||||
// Use a round-robin approach to select the next available client.
|
||||
// This distributes the load among the available clients.
|
||||
mutex.Lock()
|
||||
startIndex := lastUsedClientIndex
|
||||
currentIndex := (startIndex + 1) % len(h.cliClients)
|
||||
lastUsedClientIndex = currentIndex
|
||||
mutex.Unlock()
|
||||
|
||||
// Reorder the clients to start from the next client in the rotation.
|
||||
reorderedPages := make([]*client.Client, len(h.cliClients))
|
||||
for i := 0; i < len(h.cliClients); i++ {
|
||||
reorderedPages[i] = h.cliClients[(startIndex+1+i)%len(h.cliClients)]
|
||||
}
|
||||
|
||||
// Attempt to lock a client for the request.
|
||||
locked := false
|
||||
for i := 0; i < len(reorderedPages); i++ {
|
||||
cliClient = reorderedPages[i]
|
||||
if cliClient.RequestMutex.TryLock() {
|
||||
locked = true
|
||||
break
|
||||
}
|
||||
}
|
||||
// If no client is available, block and wait for the first client.
|
||||
if !locked {
|
||||
cliClient = h.cliClients[0]
|
||||
cliClient.RequestMutex.Lock()
|
||||
}
|
||||
log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
|
||||
// Send the message and receive response chunks and errors via channels.
|
||||
respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJson, modelName, contents, tools)
|
||||
outLoop:
|
||||
for {
|
||||
select {
|
||||
// Handle client disconnection.
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("Client disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel() // Cancel the backend request.
|
||||
return
|
||||
}
|
||||
// Process incoming response chunks.
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
// Stream is closed, send the final [DONE] message.
|
||||
_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
} else {
|
||||
// Convert the chunk to OpenAI format and send it to the client.
|
||||
openAIFormat := translator.ConvertCliToOpenAI(chunk)
|
||||
if openAIFormat != "" {
|
||||
_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", openAIFormat)
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.getClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
isGlAPIKey := false
|
||||
if glAPIKey := cliClient.GetGenerativeLanguageAPIKey(); glAPIKey != "" {
|
||||
log.Debugf("Request use generative language API Key: %s", glAPIKey)
|
||||
isGlAPIKey = true
|
||||
} else {
|
||||
log.Debugf("Request use account: %s, project id: %s", cliClient.GetEmail(), cliClient.GetProjectID())
|
||||
}
|
||||
// Send the message and receive response chunks and errors via channels.
|
||||
respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJson, modelName, systemInstruction, contents, tools)
|
||||
hasFirstResponse := false
|
||||
for {
|
||||
select {
|
||||
// Handle client disconnection.
|
||||
case <-c.Request.Context().Done():
|
||||
if c.Request.Context().Err().Error() == "context canceled" {
|
||||
log.Debugf("Client disconnected: %v", c.Request.Context().Err())
|
||||
cliCancel() // Cancel the backend request.
|
||||
return
|
||||
}
|
||||
// Process incoming response chunks.
|
||||
case chunk, okStream := <-respChan:
|
||||
if !okStream {
|
||||
// Stream is closed, send the final [DONE] message.
|
||||
_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
} else {
|
||||
// Convert the chunk to OpenAI format and send it to the client.
|
||||
hasFirstResponse = true
|
||||
openAIFormat := translator.ConvertCliToOpenAI(chunk, time.Now().Unix(), isGlAPIKey)
|
||||
if openAIFormat != "" {
|
||||
_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", openAIFormat)
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
// Handle errors from the backend.
|
||||
case err, okError := <-errChan:
|
||||
if okError {
|
||||
if err.StatusCode == 429 && h.cfg.QuotaExceeded.SwitchProject {
|
||||
continue outLoop
|
||||
} else {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
}
|
||||
return
|
||||
}
|
||||
// Send a keep-alive signal to the client.
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
if hasFirstResponse {
|
||||
_, _ = c.Writer.Write([]byte(": CLI-PROXY-API PROCESSING\n\n"))
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
// Handle errors from the backend.
|
||||
case err, okError := <-errChan:
|
||||
if okError {
|
||||
c.Status(err.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
// Send a keep-alive signal to the client.
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
_, _ = c.Writer.Write([]byte(": CLI-PROXY-API PROCESSING\n\n"))
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/luispater/CLIProxyAPI/internal/client"
|
||||
"github.com/luispater/CLIProxyAPI/internal/config"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"net/http"
|
||||
"strings"
|
||||
@@ -17,29 +18,19 @@ type Server struct {
|
||||
engine *gin.Engine
|
||||
server *http.Server
|
||||
handlers *APIHandlers
|
||||
cfg *ServerConfig
|
||||
}
|
||||
|
||||
// ServerConfig contains the configuration for the API server.
|
||||
type ServerConfig struct {
|
||||
// Port is the port number the server will listen on.
|
||||
Port string
|
||||
// Debug enables or disables debug mode for the server and Gin.
|
||||
Debug bool
|
||||
// ApiKeys is a list of valid API keys for authentication.
|
||||
ApiKeys []string
|
||||
cfg *config.Config
|
||||
}
|
||||
|
||||
// NewServer creates and initializes a new API server instance.
|
||||
// It sets up the Gin engine, middleware, routes, and handlers.
|
||||
func NewServer(config *ServerConfig, cliClients []*client.Client) *Server {
|
||||
func NewServer(cfg *config.Config, cliClients []*client.Client) *Server {
|
||||
// Set gin mode
|
||||
if !config.Debug {
|
||||
if !cfg.Debug {
|
||||
gin.SetMode(gin.ReleaseMode)
|
||||
}
|
||||
|
||||
// Create handlers
|
||||
handlers := NewAPIHandlers(cliClients, config.Debug)
|
||||
handlers := NewAPIHandlers(cliClients, cfg)
|
||||
|
||||
// Create gin engine
|
||||
engine := gin.New()
|
||||
@@ -53,7 +44,7 @@ func NewServer(config *ServerConfig, cliClients []*client.Client) *Server {
|
||||
s := &Server{
|
||||
engine: engine,
|
||||
handlers: handlers,
|
||||
cfg: config,
|
||||
cfg: cfg,
|
||||
}
|
||||
|
||||
// Setup routes
|
||||
@@ -61,7 +52,7 @@ func NewServer(config *ServerConfig, cliClients []*client.Client) *Server {
|
||||
|
||||
// Create HTTP server
|
||||
s.server = &http.Server{
|
||||
Addr: ":" + config.Port,
|
||||
Addr: fmt.Sprintf(":%d", cfg.Port),
|
||||
Handler: engine,
|
||||
}
|
||||
|
||||
@@ -90,6 +81,8 @@ func (s *Server) setupRoutes() {
|
||||
},
|
||||
})
|
||||
})
|
||||
s.engine.POST("/v1internal:method", s.handlers.CLIHandler)
|
||||
|
||||
}
|
||||
|
||||
// Start begins listening for and serving HTTP requests.
|
||||
@@ -138,7 +131,7 @@ func corsMiddleware() gin.HandlerFunc {
|
||||
|
||||
// AuthMiddleware returns a Gin middleware handler that authenticates requests
|
||||
// using API keys. If no API keys are configured, it allows all requests.
|
||||
func AuthMiddleware(cfg *ServerConfig) gin.HandlerFunc {
|
||||
func AuthMiddleware(cfg *config.Config) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
if len(cfg.ApiKeys) == 0 {
|
||||
c.Next()
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
// PrepareRequest translates a raw JSON request from an OpenAI-compatible format
|
||||
// to the internal format expected by the backend client. It parses messages,
|
||||
// roles, content types (text, image, file), and tool calls.
|
||||
func PrepareRequest(rawJson []byte) (string, []client.Content, []client.ToolDeclaration) {
|
||||
func PrepareRequest(rawJson []byte) (string, *client.Content, []client.Content, []client.ToolDeclaration) {
|
||||
// Extract the model name from the request, defaulting to "gemini-2.5-pro".
|
||||
modelName := "gemini-2.5-pro"
|
||||
modelResult := gjson.GetBytes(rawJson, "model")
|
||||
@@ -22,7 +22,41 @@ func PrepareRequest(rawJson []byte) (string, []client.Content, []client.ToolDecl
|
||||
|
||||
// Process the array of messages.
|
||||
contents := make([]client.Content, 0)
|
||||
var systemInstruction *client.Content
|
||||
messagesResult := gjson.GetBytes(rawJson, "messages")
|
||||
|
||||
toolItems := make(map[string]*client.FunctionResponse)
|
||||
if messagesResult.IsArray() {
|
||||
messagesResults := messagesResult.Array()
|
||||
for i := 0; i < len(messagesResults); i++ {
|
||||
messageResult := messagesResults[i]
|
||||
roleResult := messageResult.Get("role")
|
||||
if roleResult.Type != gjson.String {
|
||||
continue
|
||||
}
|
||||
contentResult := messageResult.Get("content")
|
||||
if roleResult.String() == "tool" {
|
||||
toolCallID := messageResult.Get("tool_call_id").String()
|
||||
if toolCallID != "" {
|
||||
var responseData string
|
||||
if contentResult.Type == gjson.String {
|
||||
responseData = contentResult.String()
|
||||
} else if contentResult.IsObject() && contentResult.Get("type").String() == "text" {
|
||||
responseData = contentResult.Get("text").String()
|
||||
}
|
||||
|
||||
// drop the timestamp from the tool call ID
|
||||
toolCallIDs := strings.Split(toolCallID, "-")
|
||||
strings.Join(toolCallIDs, "-")
|
||||
newToolCallID := strings.Join(toolCallIDs[:len(toolCallIDs)-1], "-")
|
||||
|
||||
functionResponse := client.FunctionResponse{Name: newToolCallID, Response: map[string]interface{}{"result": responseData}}
|
||||
toolItems[toolCallID] = &functionResponse
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if messagesResult.IsArray() {
|
||||
messagesResults := messagesResult.Array()
|
||||
for i := 0; i < len(messagesResults); i++ {
|
||||
@@ -37,13 +71,11 @@ func PrepareRequest(rawJson []byte) (string, []client.Content, []client.ToolDecl
|
||||
// System messages are converted to a user message followed by a model's acknowledgment.
|
||||
case "system":
|
||||
if contentResult.Type == gjson.String {
|
||||
contents = append(contents, client.Content{Role: "user", Parts: []client.Part{{Text: contentResult.String()}}})
|
||||
contents = append(contents, client.Content{Role: "model", Parts: []client.Part{{Text: "Understood. I will follow these instructions and use my tools to assist you."}}})
|
||||
systemInstruction = &client.Content{Role: "user", Parts: []client.Part{{Text: contentResult.String()}}}
|
||||
} else if contentResult.IsObject() {
|
||||
// Handle object-based system messages.
|
||||
if contentResult.Get("type").String() == "text" {
|
||||
contents = append(contents, client.Content{Role: "user", Parts: []client.Part{{Text: contentResult.Get("text").String()}}})
|
||||
contents = append(contents, client.Content{Role: "model", Parts: []client.Part{{Text: "Understood. I will follow these instructions and use my tools to assist you."}}})
|
||||
systemInstruction = &client.Content{Role: "user", Parts: []client.Part{{Text: contentResult.Get("text").String()}}}
|
||||
}
|
||||
}
|
||||
// User messages can contain simple text or a multi-part body.
|
||||
@@ -98,40 +130,44 @@ func PrepareRequest(rawJson []byte) (string, []client.Content, []client.ToolDecl
|
||||
contents = append(contents, client.Content{Role: "model", Parts: []client.Part{{Text: contentResult.String()}}})
|
||||
} else if !contentResult.Exists() || contentResult.Type == gjson.Null {
|
||||
// Handle tool calls made by the assistant.
|
||||
functionIDs := make([]string, 0)
|
||||
toolCallsResult := messageResult.Get("tool_calls")
|
||||
if toolCallsResult.IsArray() {
|
||||
parts := make([]client.Part, 0)
|
||||
tcsResult := toolCallsResult.Array()
|
||||
for j := 0; j < len(tcsResult); j++ {
|
||||
tcResult := tcsResult[j]
|
||||
|
||||
functionID := tcResult.Get("id").String()
|
||||
functionIDs = append(functionIDs, functionID)
|
||||
|
||||
functionName := tcResult.Get("function.name").String()
|
||||
functionArgs := tcResult.Get("function.arguments").String()
|
||||
var args map[string]any
|
||||
if err := json.Unmarshal([]byte(functionArgs), &args); err == nil {
|
||||
contents = append(contents, client.Content{
|
||||
Role: "model", Parts: []client.Part{{
|
||||
FunctionCall: &client.FunctionCall{
|
||||
Name: functionName,
|
||||
Args: args,
|
||||
},
|
||||
}},
|
||||
parts = append(parts, client.Part{
|
||||
FunctionCall: &client.FunctionCall{
|
||||
Name: functionName,
|
||||
Args: args,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
if len(parts) > 0 {
|
||||
contents = append(contents, client.Content{
|
||||
Role: "model", Parts: parts,
|
||||
})
|
||||
|
||||
toolParts := make([]client.Part, 0)
|
||||
for _, functionID := range functionIDs {
|
||||
if functionResponse, ok := toolItems[functionID]; ok {
|
||||
toolParts = append(toolParts, client.Part{FunctionResponse: functionResponse})
|
||||
}
|
||||
}
|
||||
contents = append(contents, client.Content{Role: "tool", Parts: toolParts})
|
||||
}
|
||||
}
|
||||
}
|
||||
// Tool messages contain the output of a tool call.
|
||||
case "tool":
|
||||
toolCallID := messageResult.Get("tool_call_id").String()
|
||||
if toolCallID != "" {
|
||||
var responseData string
|
||||
if contentResult.Type == gjson.String {
|
||||
responseData = contentResult.String()
|
||||
} else if contentResult.IsObject() && contentResult.Get("type").String() == "text" {
|
||||
responseData = contentResult.Get("text").String()
|
||||
}
|
||||
functionResponse := client.FunctionResponse{Name: toolCallID, Response: map[string]interface{}{"result": responseData}}
|
||||
contents = append(contents, client.Content{Role: "tool", Parts: []client.Part{{FunctionResponse: &functionResponse}}})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -159,5 +195,5 @@ func PrepareRequest(rawJson []byte) (string, []client.Content, []client.ToolDecl
|
||||
tools = make([]client.ToolDeclaration, 0)
|
||||
}
|
||||
|
||||
return modelName, contents, tools
|
||||
return modelName, systemInstruction, contents, tools
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package translator
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -10,7 +11,11 @@ import (
|
||||
// ConvertCliToOpenAI translates a single chunk of a streaming response from the
|
||||
// backend client format to the OpenAI Server-Sent Events (SSE) format.
|
||||
// It returns an empty string if the chunk contains no useful data.
|
||||
func ConvertCliToOpenAI(rawJson []byte) string {
|
||||
func ConvertCliToOpenAI(rawJson []byte, unixTimestamp int64, isGlAPIKey bool) string {
|
||||
if isGlAPIKey {
|
||||
rawJson, _ = sjson.SetRawBytes(rawJson, "response", rawJson)
|
||||
}
|
||||
|
||||
// Initialize the OpenAI SSE template.
|
||||
template := `{"id":"","object":"chat.completion.chunk","created":12345,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
|
||||
|
||||
@@ -22,11 +27,12 @@ func ConvertCliToOpenAI(rawJson []byte) string {
|
||||
// Extract and set the creation timestamp.
|
||||
if createTimeResult := gjson.GetBytes(rawJson, "response.createTime"); createTimeResult.Exists() {
|
||||
t, err := time.Parse(time.RFC3339Nano, createTimeResult.String())
|
||||
unixTimestamp := time.Now().Unix()
|
||||
if err == nil {
|
||||
unixTimestamp = t.Unix()
|
||||
}
|
||||
template, _ = sjson.Set(template, "created", unixTimestamp)
|
||||
} else {
|
||||
template, _ = sjson.Set(template, "created", unixTimestamp)
|
||||
}
|
||||
|
||||
// Extract and set the response ID.
|
||||
@@ -57,65 +63,75 @@ func ConvertCliToOpenAI(rawJson []byte) string {
|
||||
}
|
||||
|
||||
// Process the main content part of the response.
|
||||
partResult := gjson.GetBytes(rawJson, "response.candidates.0.content.parts.0")
|
||||
partTextResult := partResult.Get("text")
|
||||
functionCallResult := partResult.Get("functionCall")
|
||||
partsResult := gjson.GetBytes(rawJson, "response.candidates.0.content.parts")
|
||||
if partsResult.IsArray() {
|
||||
partResults := partsResult.Array()
|
||||
for i := 0; i < len(partResults); i++ {
|
||||
partResult := partResults[i]
|
||||
partTextResult := partResult.Get("text")
|
||||
functionCallResult := partResult.Get("functionCall")
|
||||
|
||||
if partTextResult.Exists() {
|
||||
// Handle text content, distinguishing between regular content and reasoning/thoughts.
|
||||
if partResult.Get("thought").Bool() {
|
||||
template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", partTextResult.String())
|
||||
} else {
|
||||
template, _ = sjson.Set(template, "choices.0.delta.content", partTextResult.String())
|
||||
if partTextResult.Exists() {
|
||||
// Handle text content, distinguishing between regular content and reasoning/thoughts.
|
||||
if partResult.Get("thought").Bool() {
|
||||
template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", partTextResult.String())
|
||||
} else {
|
||||
template, _ = sjson.Set(template, "choices.0.delta.content", partTextResult.String())
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
|
||||
} else if functionCallResult.Exists() {
|
||||
// Handle function call content.
|
||||
toolCallsResult := gjson.Get(template, "choices.0.delta.tool_calls")
|
||||
if !toolCallsResult.Exists() || !toolCallsResult.IsArray() {
|
||||
template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
|
||||
}
|
||||
|
||||
functionCallTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.arguments", fcArgsResult.Raw)
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
|
||||
template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls.-1", functionCallTemplate)
|
||||
}
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
|
||||
} else if functionCallResult.Exists() {
|
||||
// Handle function call content.
|
||||
functionCallTemplate := `[{"id": "","type": "function","function": {"name": "","arguments": ""}}]`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "0.id", fcName)
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "0.function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
functionCallTemplate, _ = sjson.Set(functionCallTemplate, "0.function.arguments", fcArgsResult.Raw)
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
|
||||
template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", functionCallTemplate)
|
||||
} else {
|
||||
// If no usable content is found, return an empty string.
|
||||
return ""
|
||||
}
|
||||
|
||||
return template
|
||||
}
|
||||
|
||||
// ConvertCliToOpenAINonStream aggregates response chunks from the backend client
|
||||
// into a single, non-streaming OpenAI-compatible JSON response.
|
||||
func ConvertCliToOpenAINonStream(template string, rawJson []byte) string {
|
||||
// Extract and set metadata fields that are typically set once per response.
|
||||
if gjson.Get(template, "id").String() == "" {
|
||||
if modelVersionResult := gjson.GetBytes(rawJson, "response.modelVersion"); modelVersionResult.Exists() {
|
||||
template, _ = sjson.Set(template, "model", modelVersionResult.String())
|
||||
}
|
||||
if createTimeResult := gjson.GetBytes(rawJson, "response.createTime"); createTimeResult.Exists() {
|
||||
t, err := time.Parse(time.RFC3339Nano, createTimeResult.String())
|
||||
unixTimestamp := time.Now().Unix()
|
||||
if err == nil {
|
||||
unixTimestamp = t.Unix()
|
||||
}
|
||||
template, _ = sjson.Set(template, "created", unixTimestamp)
|
||||
}
|
||||
if responseIdResult := gjson.GetBytes(rawJson, "response.responseId"); responseIdResult.Exists() {
|
||||
template, _ = sjson.Set(template, "id", responseIdResult.String())
|
||||
}
|
||||
// ConvertCliToOpenAINonStream aggregates response from the backend client
|
||||
// convert a single, non-streaming OpenAI-compatible JSON response.
|
||||
func ConvertCliToOpenAINonStream(rawJson []byte, unixTimestamp int64, isGlAPIKey bool) string {
|
||||
if isGlAPIKey {
|
||||
rawJson, _ = sjson.SetRawBytes(rawJson, "response", rawJson)
|
||||
}
|
||||
template := `{"id":"","object":"chat.completion","created":123456,"model":"model","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":null,"native_finish_reason":null}]}`
|
||||
if modelVersionResult := gjson.GetBytes(rawJson, "response.modelVersion"); modelVersionResult.Exists() {
|
||||
template, _ = sjson.Set(template, "model", modelVersionResult.String())
|
||||
}
|
||||
|
||||
if createTimeResult := gjson.GetBytes(rawJson, "response.createTime"); createTimeResult.Exists() {
|
||||
t, err := time.Parse(time.RFC3339Nano, createTimeResult.String())
|
||||
if err == nil {
|
||||
unixTimestamp = t.Unix()
|
||||
}
|
||||
template, _ = sjson.Set(template, "created", unixTimestamp)
|
||||
} else {
|
||||
template, _ = sjson.Set(template, "created", unixTimestamp)
|
||||
}
|
||||
|
||||
if responseIdResult := gjson.GetBytes(rawJson, "response.responseId"); responseIdResult.Exists() {
|
||||
template, _ = sjson.Set(template, "id", responseIdResult.String())
|
||||
}
|
||||
|
||||
// Extract and set the finish reason.
|
||||
if finishReasonResult := gjson.GetBytes(rawJson, "response.candidates.0.finishReason"); finishReasonResult.Exists() {
|
||||
template, _ = sjson.Set(template, "choices.0.finish_reason", finishReasonResult.String())
|
||||
template, _ = sjson.Set(template, "choices.0.native_finish_reason", finishReasonResult.String())
|
||||
}
|
||||
|
||||
// Extract and set usage metadata (token counts).
|
||||
if usageResult := gjson.GetBytes(rawJson, "response.usageMetadata"); usageResult.Exists() {
|
||||
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
|
||||
template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
|
||||
@@ -132,37 +148,42 @@ func ConvertCliToOpenAINonStream(template string, rawJson []byte) string {
|
||||
}
|
||||
|
||||
// Process the main content part of the response.
|
||||
partResult := gjson.GetBytes(rawJson, "response.candidates.0.content.parts.0")
|
||||
partTextResult := partResult.Get("text")
|
||||
functionCallResult := partResult.Get("functionCall")
|
||||
partsResult := gjson.GetBytes(rawJson, "response.candidates.0.content.parts")
|
||||
if partsResult.IsArray() {
|
||||
partsResults := partsResult.Array()
|
||||
for i := 0; i < len(partsResults); i++ {
|
||||
partResult := partsResults[i]
|
||||
partTextResult := partResult.Get("text")
|
||||
functionCallResult := partResult.Get("functionCall")
|
||||
|
||||
if partTextResult.Exists() {
|
||||
// Append text content, distinguishing between regular content and reasoning.
|
||||
if partResult.Get("thought").Bool() {
|
||||
currentContent := gjson.Get(template, "choices.0.message.reasoning_content").String()
|
||||
template, _ = sjson.Set(template, "choices.0.message.reasoning_content", currentContent+partTextResult.String())
|
||||
} else {
|
||||
currentContent := gjson.Get(template, "choices.0.message.content").String()
|
||||
template, _ = sjson.Set(template, "choices.0.message.content", currentContent+partTextResult.String())
|
||||
if partTextResult.Exists() {
|
||||
// Append text content, distinguishing between regular content and reasoning.
|
||||
if partResult.Get("thought").Bool() {
|
||||
template, _ = sjson.Set(template, "choices.0.message.reasoning_content", partTextResult.String())
|
||||
} else {
|
||||
template, _ = sjson.Set(template, "choices.0.message.content", partTextResult.String())
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
|
||||
} else if functionCallResult.Exists() {
|
||||
// Append function call content to the tool_calls array.
|
||||
toolCallsResult := gjson.Get(template, "choices.0.message.tool_calls")
|
||||
if !toolCallsResult.Exists() || !toolCallsResult.IsArray() {
|
||||
template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls", `[]`)
|
||||
}
|
||||
functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
|
||||
template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls.-1", functionCallItemTemplate)
|
||||
} else {
|
||||
// If no usable content is found, return an empty string.
|
||||
return ""
|
||||
}
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
|
||||
} else if functionCallResult.Exists() {
|
||||
// Append function call content to the tool_calls array.
|
||||
if !gjson.Get(template, "choices.0.message.tool_calls").Exists() {
|
||||
template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls", `[]`)
|
||||
}
|
||||
functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
|
||||
fcName := functionCallResult.Get("name").String()
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fcName)
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
|
||||
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
|
||||
functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
|
||||
}
|
||||
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
|
||||
template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls.-1", functionCallItemTemplate)
|
||||
} else {
|
||||
// If no usable content is found, return an empty string.
|
||||
return ""
|
||||
}
|
||||
|
||||
return template
|
||||
|
||||
@@ -27,22 +27,40 @@ const (
|
||||
codeAssistEndpoint = "https://cloudcode-pa.googleapis.com"
|
||||
apiVersion = "v1internal"
|
||||
pluginVersion = "0.1.9"
|
||||
|
||||
glEndPoint = "https://generativelanguage.googleapis.com/"
|
||||
glApiVersion = "v1beta"
|
||||
)
|
||||
|
||||
var (
|
||||
previewModels = map[string][]string{
|
||||
"gemini-2.5-pro": {"gemini-2.5-pro-preview-05-06", "gemini-2.5-pro-preview-06-05"},
|
||||
"gemini-2.5-flash": {"gemini-2.5-flash-preview-04-17", "gemini-2.5-flash-preview-05-20"},
|
||||
}
|
||||
)
|
||||
|
||||
// Client is the main client for interacting with the CLI API.
|
||||
type Client struct {
|
||||
httpClient *http.Client
|
||||
RequestMutex sync.Mutex
|
||||
tokenStorage *auth.TokenStorage
|
||||
cfg *config.Config
|
||||
httpClient *http.Client
|
||||
RequestMutex sync.Mutex
|
||||
tokenStorage *auth.TokenStorage
|
||||
cfg *config.Config
|
||||
modelQuotaExceeded map[string]*time.Time
|
||||
glAPIKey string
|
||||
}
|
||||
|
||||
// NewClient creates a new CLI API client.
|
||||
func NewClient(httpClient *http.Client, ts *auth.TokenStorage, cfg *config.Config) *Client {
|
||||
func NewClient(httpClient *http.Client, ts *auth.TokenStorage, cfg *config.Config, glAPIKey ...string) *Client {
|
||||
var glKey string
|
||||
if len(glAPIKey) > 0 {
|
||||
glKey = glAPIKey[0]
|
||||
}
|
||||
return &Client{
|
||||
httpClient: httpClient,
|
||||
tokenStorage: ts,
|
||||
cfg: cfg,
|
||||
httpClient: httpClient,
|
||||
tokenStorage: ts,
|
||||
cfg: cfg,
|
||||
modelQuotaExceeded: make(map[string]*time.Time),
|
||||
glAPIKey: glKey,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,7 +89,14 @@ func (c *Client) GetEmail() string {
|
||||
}
|
||||
|
||||
func (c *Client) GetProjectID() string {
|
||||
return c.tokenStorage.ProjectID
|
||||
if c.tokenStorage != nil {
|
||||
return c.tokenStorage.ProjectID
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (c *Client) GetGenerativeLanguageAPIKey() string {
|
||||
return c.glAPIKey
|
||||
}
|
||||
|
||||
// SetupUser performs the initial user onboarding and setup.
|
||||
@@ -187,6 +212,7 @@ func (c *Client) makeAPIRequest(ctx context.Context, endpoint, method string, bo
|
||||
metadataStr := getClientMetadataString()
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", getUserAgent())
|
||||
req.Header.Set("X-Goog-Api-Client", "gl-node/22.17.0")
|
||||
req.Header.Set("Client-Metadata", metadataStr)
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token.AccessToken))
|
||||
|
||||
@@ -214,8 +240,8 @@ func (c *Client) makeAPIRequest(ctx context.Context, endpoint, method string, bo
|
||||
return nil
|
||||
}
|
||||
|
||||
// StreamAPIRequest handles making streaming requests to the CLI API endpoints.
|
||||
func (c *Client) StreamAPIRequest(ctx context.Context, endpoint string, body interface{}) (io.ReadCloser, *ErrorMessage) {
|
||||
// APIRequest handles making requests to the CLI API endpoints.
|
||||
func (c *Client) APIRequest(ctx context.Context, endpoint string, body interface{}, stream bool) (io.ReadCloser, *ErrorMessage) {
|
||||
var jsonBody []byte
|
||||
var err error
|
||||
if byteBody, ok := body.([]byte); ok {
|
||||
@@ -226,32 +252,56 @@ func (c *Client) StreamAPIRequest(ctx context.Context, endpoint string, body int
|
||||
return nil, &ErrorMessage{500, fmt.Errorf("failed to marshal request body: %w", err)}
|
||||
}
|
||||
}
|
||||
|
||||
var url string
|
||||
if c.glAPIKey == "" {
|
||||
// Add alt=sse for streaming
|
||||
url = fmt.Sprintf("%s/%s:%s", codeAssistEndpoint, apiVersion, endpoint)
|
||||
if stream {
|
||||
url = url + "?alt=sse"
|
||||
}
|
||||
} else {
|
||||
modelResult := gjson.GetBytes(jsonBody, "model")
|
||||
url = fmt.Sprintf("%s/%s/models/%s:%s", glEndPoint, glApiVersion, modelResult.String(), endpoint)
|
||||
if stream {
|
||||
url = url + "?alt=sse"
|
||||
}
|
||||
jsonBody = []byte(gjson.GetBytes(jsonBody, "request").Raw)
|
||||
systemInstructionResult := gjson.GetBytes(jsonBody, "systemInstruction")
|
||||
if systemInstructionResult.Exists() {
|
||||
jsonBody, _ = sjson.SetRawBytes(jsonBody, "system_instruction", []byte(systemInstructionResult.Raw))
|
||||
jsonBody, _ = sjson.DeleteBytes(jsonBody, "systemInstruction")
|
||||
jsonBody, _ = sjson.DeleteBytes(jsonBody, "session_id")
|
||||
}
|
||||
}
|
||||
|
||||
// log.Debug(string(jsonBody))
|
||||
reqBody := bytes.NewBuffer(jsonBody)
|
||||
|
||||
// Add alt=sse for streaming
|
||||
url := fmt.Sprintf("%s/%s:%s?alt=sse", codeAssistEndpoint, apiVersion, endpoint)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, reqBody)
|
||||
if err != nil {
|
||||
return nil, &ErrorMessage{500, fmt.Errorf("failed to create request: %w", err)}
|
||||
}
|
||||
|
||||
token, err := c.httpClient.Transport.(*oauth2.Transport).Source.Token()
|
||||
if err != nil {
|
||||
return nil, &ErrorMessage{500, fmt.Errorf("failed to get token: %w", err)}
|
||||
return nil, &ErrorMessage{500, fmt.Errorf("failed to create request: %v", err)}
|
||||
}
|
||||
|
||||
// Set headers
|
||||
metadataStr := getClientMetadataString()
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", getUserAgent())
|
||||
req.Header.Set("Client-Metadata", metadataStr)
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token.AccessToken))
|
||||
if c.glAPIKey == "" {
|
||||
token, errToken := c.httpClient.Transport.(*oauth2.Transport).Source.Token()
|
||||
if errToken != nil {
|
||||
return nil, &ErrorMessage{500, fmt.Errorf("failed to get token: %v", errToken)}
|
||||
}
|
||||
req.Header.Set("User-Agent", getUserAgent())
|
||||
req.Header.Set("X-Goog-Api-Client", "gl-node/22.17.0")
|
||||
req.Header.Set("Client-Metadata", metadataStr)
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token.AccessToken))
|
||||
} else {
|
||||
req.Header.Set("x-goog-api-key", c.glAPIKey)
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, &ErrorMessage{500, fmt.Errorf("failed to execute request: %w", err)}
|
||||
return nil, &ErrorMessage{500, fmt.Errorf("failed to execute request: %v", err)}
|
||||
}
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
@@ -261,16 +311,108 @@ func (c *Client) StreamAPIRequest(ctx context.Context, endpoint string, body int
|
||||
}
|
||||
}()
|
||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||
|
||||
return nil, &ErrorMessage{resp.StatusCode, fmt.Errorf(string(bodyBytes))}
|
||||
// return nil, fmt.Errorf("api streaming request failed with status %d: %s", resp.StatusCode, string(bodyBytes))
|
||||
}
|
||||
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
// SendMessage handles a single conversational turn, including tool calls.
|
||||
func (c *Client) SendMessage(ctx context.Context, rawJson []byte, model string, systemInstruction *Content, contents []Content, tools []ToolDeclaration) ([]byte, *ErrorMessage) {
|
||||
request := GenerateContentRequest{
|
||||
Contents: contents,
|
||||
GenerationConfig: GenerationConfig{
|
||||
ThinkingConfig: GenerationConfigThinkingConfig{
|
||||
IncludeThoughts: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
request.SystemInstruction = systemInstruction
|
||||
|
||||
request.Tools = tools
|
||||
|
||||
requestBody := map[string]interface{}{
|
||||
"project": c.GetProjectID(), // Assuming ProjectID is available
|
||||
"request": request,
|
||||
"model": model,
|
||||
}
|
||||
|
||||
byteRequestBody, _ := json.Marshal(requestBody)
|
||||
|
||||
// log.Debug(string(byteRequestBody))
|
||||
|
||||
reasoningEffortResult := gjson.GetBytes(rawJson, "reasoning_effort")
|
||||
if reasoningEffortResult.String() == "none" {
|
||||
byteRequestBody, _ = sjson.DeleteBytes(byteRequestBody, "request.generationConfig.thinkingConfig.include_thoughts")
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
|
||||
} else if reasoningEffortResult.String() == "auto" {
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
} else if reasoningEffortResult.String() == "low" {
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
|
||||
} else if reasoningEffortResult.String() == "medium" {
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
|
||||
} else if reasoningEffortResult.String() == "high" {
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
|
||||
} else {
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
}
|
||||
|
||||
temperatureResult := gjson.GetBytes(rawJson, "temperature")
|
||||
if temperatureResult.Exists() && temperatureResult.Type == gjson.Number {
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.temperature", temperatureResult.Num)
|
||||
}
|
||||
|
||||
topPResult := gjson.GetBytes(rawJson, "top_p")
|
||||
if topPResult.Exists() && topPResult.Type == gjson.Number {
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.topP", topPResult.Num)
|
||||
}
|
||||
|
||||
topKResult := gjson.GetBytes(rawJson, "top_k")
|
||||
if topKResult.Exists() && topKResult.Type == gjson.Number {
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "request.generationConfig.topK", topKResult.Num)
|
||||
}
|
||||
|
||||
modelName := model
|
||||
// log.Debug(string(byteRequestBody))
|
||||
for {
|
||||
if c.isModelQuotaExceeded(modelName) {
|
||||
if c.cfg.QuotaExceeded.SwitchPreviewModel && c.glAPIKey == "" {
|
||||
modelName = c.getPreviewModel(model)
|
||||
if modelName != "" {
|
||||
log.Debugf("Model %s is quota exceeded. Switch to preview model %s", model, modelName)
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "model", modelName)
|
||||
continue
|
||||
}
|
||||
}
|
||||
return nil, &ErrorMessage{
|
||||
StatusCode: 429,
|
||||
Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, model),
|
||||
}
|
||||
}
|
||||
|
||||
respBody, err := c.APIRequest(ctx, "generateContent", byteRequestBody, false)
|
||||
if err != nil {
|
||||
if err.StatusCode == 429 {
|
||||
now := time.Now()
|
||||
c.modelQuotaExceeded[modelName] = &now
|
||||
if c.cfg.QuotaExceeded.SwitchPreviewModel && c.glAPIKey == "" {
|
||||
continue
|
||||
}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
delete(c.modelQuotaExceeded, modelName)
|
||||
bodyBytes, errReadAll := io.ReadAll(respBody)
|
||||
if errReadAll != nil {
|
||||
return nil, &ErrorMessage{StatusCode: 500, Error: errReadAll}
|
||||
}
|
||||
return bodyBytes, nil
|
||||
}
|
||||
}
|
||||
|
||||
// SendMessageStream handles a single conversational turn, including tool calls.
|
||||
func (c *Client) SendMessageStream(ctx context.Context, rawJson []byte, model string, contents []Content, tools []ToolDeclaration) (<-chan []byte, <-chan *ErrorMessage) {
|
||||
func (c *Client) SendMessageStream(ctx context.Context, rawJson []byte, model string, systemInstruction *Content, contents []Content, tools []ToolDeclaration) (<-chan []byte, <-chan *ErrorMessage) {
|
||||
dataTag := []byte("data: ")
|
||||
errChan := make(chan *ErrorMessage)
|
||||
dataChan := make(chan []byte)
|
||||
@@ -286,10 +428,13 @@ func (c *Client) SendMessageStream(ctx context.Context, rawJson []byte, model st
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
request.SystemInstruction = systemInstruction
|
||||
|
||||
request.Tools = tools
|
||||
|
||||
requestBody := map[string]interface{}{
|
||||
"project": c.tokenStorage.ProjectID, // Assuming ProjectID is available
|
||||
"project": c.GetProjectID(), // Assuming ProjectID is available
|
||||
"request": request,
|
||||
"model": model,
|
||||
}
|
||||
@@ -330,12 +475,39 @@ func (c *Client) SendMessageStream(ctx context.Context, rawJson []byte, model st
|
||||
}
|
||||
|
||||
// log.Debug(string(byteRequestBody))
|
||||
|
||||
stream, err := c.StreamAPIRequest(ctx, "streamGenerateContent", byteRequestBody)
|
||||
if err != nil {
|
||||
// log.Println(err)
|
||||
errChan <- err
|
||||
return
|
||||
modelName := model
|
||||
var stream io.ReadCloser
|
||||
for {
|
||||
if c.isModelQuotaExceeded(modelName) {
|
||||
if c.cfg.QuotaExceeded.SwitchPreviewModel && c.glAPIKey == "" {
|
||||
modelName = c.getPreviewModel(model)
|
||||
if modelName != "" {
|
||||
log.Debugf("Model %s is quota exceeded. Switch to preview model %s", model, modelName)
|
||||
byteRequestBody, _ = sjson.SetBytes(byteRequestBody, "model", modelName)
|
||||
continue
|
||||
}
|
||||
}
|
||||
errChan <- &ErrorMessage{
|
||||
StatusCode: 429,
|
||||
Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, model),
|
||||
}
|
||||
return
|
||||
}
|
||||
var err *ErrorMessage
|
||||
stream, err = c.APIRequest(ctx, "streamGenerateContent", byteRequestBody, true)
|
||||
if err != nil {
|
||||
if err.StatusCode == 429 {
|
||||
now := time.Now()
|
||||
c.modelQuotaExceeded[modelName] = &now
|
||||
if c.cfg.QuotaExceeded.SwitchPreviewModel && c.glAPIKey == "" {
|
||||
continue
|
||||
}
|
||||
}
|
||||
errChan <- err
|
||||
return
|
||||
}
|
||||
delete(c.modelQuotaExceeded, modelName)
|
||||
break
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(stream)
|
||||
@@ -360,6 +532,149 @@ func (c *Client) SendMessageStream(ctx context.Context, rawJson []byte, model st
|
||||
return dataChan, errChan
|
||||
}
|
||||
|
||||
// SendRawMessage handles a single conversational turn, including tool calls.
|
||||
func (c *Client) SendRawMessage(ctx context.Context, rawJson []byte) ([]byte, *ErrorMessage) {
|
||||
rawJson, _ = sjson.SetBytes(rawJson, "project", c.GetProjectID())
|
||||
|
||||
modelResult := gjson.GetBytes(rawJson, "model")
|
||||
model := modelResult.String()
|
||||
modelName := model
|
||||
for {
|
||||
if c.isModelQuotaExceeded(modelName) {
|
||||
if c.cfg.QuotaExceeded.SwitchPreviewModel && c.glAPIKey == "" {
|
||||
modelName = c.getPreviewModel(model)
|
||||
if modelName != "" {
|
||||
log.Debugf("Model %s is quota exceeded. Switch to preview model %s", model, modelName)
|
||||
rawJson, _ = sjson.SetBytes(rawJson, "model", modelName)
|
||||
continue
|
||||
}
|
||||
}
|
||||
return nil, &ErrorMessage{
|
||||
StatusCode: 429,
|
||||
Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, model),
|
||||
}
|
||||
}
|
||||
|
||||
respBody, err := c.APIRequest(ctx, "generateContent", rawJson, false)
|
||||
if err != nil {
|
||||
if err.StatusCode == 429 {
|
||||
now := time.Now()
|
||||
c.modelQuotaExceeded[modelName] = &now
|
||||
if c.cfg.QuotaExceeded.SwitchPreviewModel && c.glAPIKey == "" {
|
||||
continue
|
||||
}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
delete(c.modelQuotaExceeded, modelName)
|
||||
bodyBytes, errReadAll := io.ReadAll(respBody)
|
||||
if errReadAll != nil {
|
||||
return nil, &ErrorMessage{StatusCode: 500, Error: errReadAll}
|
||||
}
|
||||
return bodyBytes, nil
|
||||
}
|
||||
}
|
||||
|
||||
// SendRawMessageStream handles a single conversational turn, including tool calls.
|
||||
func (c *Client) SendRawMessageStream(ctx context.Context, rawJson []byte) (<-chan []byte, <-chan *ErrorMessage) {
|
||||
dataTag := []byte("data: ")
|
||||
errChan := make(chan *ErrorMessage)
|
||||
dataChan := make(chan []byte)
|
||||
go func() {
|
||||
defer close(errChan)
|
||||
defer close(dataChan)
|
||||
|
||||
rawJson, _ = sjson.SetBytes(rawJson, "project", c.GetProjectID())
|
||||
|
||||
modelResult := gjson.GetBytes(rawJson, "model")
|
||||
model := modelResult.String()
|
||||
modelName := model
|
||||
var stream io.ReadCloser
|
||||
for {
|
||||
if c.isModelQuotaExceeded(modelName) {
|
||||
if c.cfg.QuotaExceeded.SwitchPreviewModel && c.glAPIKey == "" {
|
||||
modelName = c.getPreviewModel(model)
|
||||
if modelName != "" {
|
||||
log.Debugf("Model %s is quota exceeded. Switch to preview model %s", model, modelName)
|
||||
rawJson, _ = sjson.SetBytes(rawJson, "model", modelName)
|
||||
continue
|
||||
}
|
||||
}
|
||||
errChan <- &ErrorMessage{
|
||||
StatusCode: 429,
|
||||
Error: fmt.Errorf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, model),
|
||||
}
|
||||
return
|
||||
}
|
||||
var err *ErrorMessage
|
||||
stream, err = c.APIRequest(ctx, "streamGenerateContent", rawJson, true)
|
||||
if err != nil {
|
||||
if err.StatusCode == 429 {
|
||||
now := time.Now()
|
||||
c.modelQuotaExceeded[modelName] = &now
|
||||
if c.cfg.QuotaExceeded.SwitchPreviewModel && c.glAPIKey == "" {
|
||||
continue
|
||||
}
|
||||
}
|
||||
errChan <- err
|
||||
return
|
||||
}
|
||||
delete(c.modelQuotaExceeded, modelName)
|
||||
break
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(stream)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
if bytes.HasPrefix(line, dataTag) {
|
||||
dataChan <- line[6:]
|
||||
}
|
||||
}
|
||||
|
||||
if errScanner := scanner.Err(); errScanner != nil {
|
||||
errChan <- &ErrorMessage{500, errScanner}
|
||||
_ = stream.Close()
|
||||
return
|
||||
}
|
||||
|
||||
_ = stream.Close()
|
||||
}()
|
||||
|
||||
return dataChan, errChan
|
||||
}
|
||||
|
||||
func (c *Client) isModelQuotaExceeded(model string) bool {
|
||||
if lastExceededTime, hasKey := c.modelQuotaExceeded[model]; hasKey {
|
||||
duration := time.Now().Sub(*lastExceededTime)
|
||||
if duration > 30*time.Minute {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (c *Client) getPreviewModel(model string) string {
|
||||
if models, hasKey := previewModels[model]; hasKey {
|
||||
for i := 0; i < len(models); i++ {
|
||||
if !c.isModelQuotaExceeded(models[i]) {
|
||||
return models[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (c *Client) IsModelQuotaExceeded(model string) bool {
|
||||
if c.isModelQuotaExceeded(model) {
|
||||
if c.cfg.QuotaExceeded.SwitchPreviewModel {
|
||||
return c.getPreviewModel(model) == ""
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// CheckCloudAPIIsEnabled sends a simple test request to the API to verify
|
||||
// that the Cloud AI API is enabled for the user's project. It provides
|
||||
// an activation URL if the API is disabled.
|
||||
@@ -374,7 +689,7 @@ func (c *Client) CheckCloudAPIIsEnabled() (bool, error) {
|
||||
// A simple request to test the API endpoint.
|
||||
requestBody := fmt.Sprintf(`{"project":"%s","request":{"contents":[{"role":"user","parts":[{"text":"Be concise. What is the capital of France?"}]}],"generationConfig":{"thinkingConfig":{"include_thoughts":false,"thinkingBudget":0}}},"model":"gemini-2.5-flash"}`, c.tokenStorage.ProjectID)
|
||||
|
||||
stream, err := c.StreamAPIRequest(ctx, "streamGenerateContent", []byte(requestBody))
|
||||
stream, err := c.APIRequest(ctx, "streamGenerateContent", []byte(requestBody), true)
|
||||
if err != nil {
|
||||
// If a 403 Forbidden error occurs, it likely means the API is not enabled.
|
||||
if err.StatusCode == 403 {
|
||||
@@ -468,10 +783,10 @@ func (c *Client) SaveTokenToFile() error {
|
||||
// such as IDE type, platform, and plugin version.
|
||||
func getClientMetadata() map[string]string {
|
||||
return map[string]string{
|
||||
"ideType": "IDE_UNSPECIFIED",
|
||||
"platform": getPlatform(),
|
||||
"pluginType": "GEMINI",
|
||||
"pluginVersion": pluginVersion,
|
||||
"ideType": "IDE_UNSPECIFIED",
|
||||
"platform": "PLATFORM_UNSPECIFIED",
|
||||
"pluginType": "GEMINI",
|
||||
// "pluginVersion": pluginVersion,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -488,7 +803,8 @@ func getClientMetadataString() string {
|
||||
|
||||
// getUserAgent constructs the User-Agent string for HTTP requests.
|
||||
func getUserAgent() string {
|
||||
return fmt.Sprintf("GeminiCLI/%s (%s; %s)", pluginVersion, runtime.GOOS, runtime.GOARCH)
|
||||
// return fmt.Sprintf("GeminiCLI/%s (%s; %s)", pluginVersion, runtime.GOOS, runtime.GOARCH)
|
||||
return "google-api-nodejs-client/9.15.1"
|
||||
}
|
||||
|
||||
// getPlatform determines the operating system and architecture and formats
|
||||
|
||||
@@ -64,9 +64,10 @@ type FunctionResponse struct {
|
||||
|
||||
// GenerateContentRequest is the top-level request structure for the streamGenerateContent endpoint.
|
||||
type GenerateContentRequest struct {
|
||||
Contents []Content `json:"contents"`
|
||||
Tools []ToolDeclaration `json:"tools,omitempty"`
|
||||
GenerationConfig `json:"generationConfig"`
|
||||
SystemInstruction *Content `json:"systemInstruction,omitempty"`
|
||||
Contents []Content `json:"contents"`
|
||||
Tools []ToolDeclaration `json:"tools,omitempty"`
|
||||
GenerationConfig `json:"generationConfig"`
|
||||
}
|
||||
|
||||
// GenerationConfig defines parameters that control the model's generation behavior.
|
||||
|
||||
@@ -3,13 +3,14 @@ package cmd
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/luispater/CLIProxyAPI/internal/api"
|
||||
"github.com/luispater/CLIProxyAPI/internal/auth"
|
||||
"github.com/luispater/CLIProxyAPI/internal/client"
|
||||
"github.com/luispater/CLIProxyAPI/internal/config"
|
||||
"github.com/luispater/CLIProxyAPI/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"io/fs"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
@@ -22,13 +23,6 @@ import (
|
||||
// It loads all available authentication tokens, creates a pool of clients,
|
||||
// starts the API server, and handles graceful shutdown signals.
|
||||
func StartService(cfg *config.Config) {
|
||||
// Configure the API server based on the main application config.
|
||||
apiConfig := &api.ServerConfig{
|
||||
Port: fmt.Sprintf("%d", cfg.Port),
|
||||
Debug: cfg.Debug,
|
||||
ApiKeys: cfg.ApiKeys,
|
||||
}
|
||||
|
||||
// Create a pool of API clients, one for each token file found.
|
||||
cliClients := make([]*client.Client, 0)
|
||||
err := filepath.Walk(cfg.AuthDir, func(path string, info fs.FileInfo, err error) error {
|
||||
@@ -72,9 +66,22 @@ func StartService(cfg *config.Config) {
|
||||
log.Fatalf("Error walking auth directory: %v", err)
|
||||
}
|
||||
|
||||
if len(cfg.GlAPIKey) > 0 {
|
||||
for i := 0; i < len(cfg.GlAPIKey); i++ {
|
||||
httpClient, errSetProxy := util.SetProxy(cfg, &http.Client{})
|
||||
if errSetProxy != nil {
|
||||
log.Fatalf("set proxy failed: %v", errSetProxy)
|
||||
}
|
||||
|
||||
log.Debug("Initializing with Generative Language API key...")
|
||||
cliClient := client.NewClient(httpClient, nil, cfg, cfg.GlAPIKey[i])
|
||||
cliClients = append(cliClients, cliClient)
|
||||
}
|
||||
}
|
||||
|
||||
// Create and start the API server with the pool of clients.
|
||||
apiServer := api.NewServer(apiConfig, cliClients)
|
||||
log.Infof("Starting API server on port %s", apiConfig.Port)
|
||||
apiServer := api.NewServer(cfg, cliClients)
|
||||
log.Infof("Starting API server on port %d", cfg.Port)
|
||||
if err = apiServer.Start(); err != nil {
|
||||
log.Fatalf("API server failed to start: %v", err)
|
||||
}
|
||||
|
||||
@@ -11,13 +11,24 @@ type Config struct {
|
||||
// Port is the network port on which the API server will listen.
|
||||
Port int `yaml:"port"`
|
||||
// AuthDir is the directory where authentication token files are stored.
|
||||
AuthDir string `yaml:"auth_dir"`
|
||||
AuthDir string `yaml:"auth-dir"`
|
||||
// Debug enables or disables debug-level logging and other debug features.
|
||||
Debug bool `yaml:"debug"`
|
||||
// ProxyUrl is the URL of an optional proxy server to use for outbound requests.
|
||||
ProxyUrl string `yaml:"proxy-url"`
|
||||
// ApiKeys is a list of keys for authenticating clients to this proxy server.
|
||||
ApiKeys []string `yaml:"api_keys"`
|
||||
ApiKeys []string `yaml:"api-keys"`
|
||||
// QuotaExceeded defines the behavior when a quota is exceeded.
|
||||
QuotaExceeded ConfigQuotaExceeded `yaml:"quota-exceeded"`
|
||||
// GlAPIKey is the API key for the generative language API.
|
||||
GlAPIKey []string `yaml:"generative-language-api-key"`
|
||||
}
|
||||
|
||||
type ConfigQuotaExceeded struct {
|
||||
// SwitchProject indicates whether to automatically switch to another project when a quota is exceeded.
|
||||
SwitchProject bool `yaml:"switch-project"`
|
||||
// SwitchPreviewModel indicates whether to automatically switch to a preview model when a quota is exceeded.
|
||||
SwitchPreviewModel bool `yaml:"switch-preview-model"`
|
||||
}
|
||||
|
||||
// LoadConfig reads a YAML configuration file from the given path,
|
||||
|
||||
37
internal/util/proxy.go
Normal file
37
internal/util/proxy.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/luispater/CLIProxyAPI/internal/config"
|
||||
"golang.org/x/net/proxy"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
func SetProxy(cfg *config.Config, httpClient *http.Client) (*http.Client, error) {
|
||||
var transport *http.Transport
|
||||
proxyURL, errParse := url.Parse(cfg.ProxyUrl)
|
||||
if errParse == nil {
|
||||
if proxyURL.Scheme == "socks5" {
|
||||
username := proxyURL.User.Username()
|
||||
password, _ := proxyURL.User.Password()
|
||||
proxyAuth := &proxy.Auth{User: username, Password: password}
|
||||
dialer, errSOCKS5 := proxy.SOCKS5("tcp", proxyURL.Host, proxyAuth, proxy.Direct)
|
||||
if errSOCKS5 != nil {
|
||||
return nil, errSOCKS5
|
||||
}
|
||||
transport = &http.Transport{
|
||||
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
return dialer.Dial(network, addr)
|
||||
},
|
||||
}
|
||||
} else if proxyURL.Scheme == "http" || proxyURL.Scheme == "https" {
|
||||
transport = &http.Transport{Proxy: http.ProxyURL(proxyURL)}
|
||||
}
|
||||
}
|
||||
if transport != nil {
|
||||
httpClient.Transport = transport
|
||||
}
|
||||
return httpClient, nil
|
||||
}
|
||||
Reference in New Issue
Block a user