Compare commits

..

13 Commits

Author SHA1 Message Date
Luis Pater
fb760718e2 ci(homebrew): add workflow to auto-bump Homebrew formula on release 2025-10-07 22:55:23 +08:00
Luis Pater
d6721e4e75 Merge pull request #95 from router-for-me/gemini-web
feat(cliproxy): Rebind auth executors on config change
2025-10-07 21:30:31 +08:00
hkfires
514f5a8ad4 feat(cliproxy): Rebind auth executors on config change 2025-10-07 21:23:21 +08:00
Luis Pater
a68e0dd8aa Merge pull request #94 from router-for-me/gemini-web
Add Gem Mode for Gemini Web
2025-10-07 21:01:05 +08:00
hkfires
75d7763c5c refactor(gemini-web): Rename flash image preview model ID 2025-10-07 20:35:53 +08:00
hkfires
9bb7df7af7 feat(gemini-web): Enable config hot-reload and fix Gem selection 2025-10-07 20:23:33 +08:00
hkfires
43665cb649 feat(gemini-web): Replace code-mode with flexible gem-mode 2025-10-07 19:36:22 +08:00
Luis Pater
39337627b9 feat(auth): include email attribute in auth files response
- Added logic to parse and include the "email" attribute from auth files.
- Updated file data extraction to support additional metadata.
2025-10-07 15:45:27 +08:00
Luis Pater
4bc8a52771 Merge pull request #90 from router-for-me/dethink
Dethink
2025-10-07 03:41:19 +08:00
Luis Pater
b727e4e12e Fixed: #86
feat(translator): add support for single input string in Codex responses parser

- Modified input parsing logic to handle cases where input is a single string instead of an array.
- Added functionality to convert single string inputs into structured JSON format.
2025-10-07 02:10:59 +08:00
Luis Pater
93588919e5 docs: add vibeproxy project information to README and README_CN
- Listed `vibeproxy` as a project utilizing CLIProxyAPI.
- Encouraged contributions by inviting PRs to expand the project list.
2025-10-07 00:57:36 +08:00
hkfires
31659c790d feat(translator/gemini-cli): support inline image data in responses 2025-10-06 17:06:04 +08:00
hkfires
c62ecc2442 fix(gemini): Disable thinking config for incompatible models 2025-10-06 16:32:03 +08:00
16 changed files with 193 additions and 32 deletions

16
.github/workflows/homebrew.yml vendored Normal file
View File

@@ -0,0 +1,16 @@
name: Bump Homebrew formula
on:
release:
types: [ released ]
jobs:
homebrew:
name: Bump Homebrew formula
runs-on: ubuntu-latest
steps:
- name: Update Homebrew Formula
uses: dawidd6/action-homebrew-bump-formula@v5
with:
token: ${{ secrets.HOMEBREW_TOKEN }}
formula: cliproxyapi

View File

@@ -328,7 +328,7 @@ The server uses a YAML configuration file (`config.yaml`) located in the project
| `openai-compatibility.*.models.*.alias` | string | "" | The alias used in the API. |
| `gemini-web` | object | {} | Configuration specific to the Gemini Web client. |
| `gemini-web.context` | boolean | true | Enables conversation context reuse for continuous dialogue. |
| `gemini-web.code-mode` | boolean | false | Enables code mode for optimized responses in coding-related tasks. |
| `gemini-web.gem-mode` | string | "" | Selects a predefined Gem to attach for Gemini Web requests; allowed values: `coding-partner`, `writing-editor`. When empty, no Gem is attached. |
| `gemini-web.max-chars-per-request` | integer | 1,000,000 | The maximum number of characters to send to Gemini Web in a single request. |
| `gemini-web.disable-continuation-hint` | boolean | false | Disables the continuation hint for split prompts. |
@@ -378,7 +378,7 @@ quota-exceeded:
# Gemini Web client configuration
gemini-web:
context: true # Enable conversation context reuse
code-mode: false # Enable code mode
gem-mode: "" # Select Gem: "coding-partner" or "writing-editor"; empty means no Gem
max-chars-per-request: 1000000 # Max characters per request
# API keys for official Generative Language API
@@ -720,6 +720,17 @@ Contributions are welcome! Please feel free to submit a Pull Request.
4. Push to the branch (`git push origin feature/amazing-feature`)
5. Open a Pull Request
## Who is with us?
Those projects are based on CLIProxyAPI:
### [vibeproxy](https://github.com/automazeio/vibeproxy)
Native macOS menu bar app to use your Claude Code & ChatGPT subscriptions with AI coding tools - no API keys needed
> [!NOTE]
> If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.
## License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

View File

@@ -340,7 +340,7 @@ console.log(await claudeResponse.json());
| `openai-compatibility.*.models.*.alias` | string | "" | 在API中使用的别名。 |
| `gemini-web` | object | {} | Gemini Web 客户端的特定配置。 |
| `gemini-web.context` | boolean | true | 是否启用会话上下文重用,以实现连续对话。 |
| `gemini-web.code-mode` | boolean | false | 是否启用代码模式,优化代码相关任务的响应。 |
| `gemini-web.gem-mode` | string | "" | 选择要附加的预设 Gem`coding-partner` 或 `writing-editor`);为空表示不附加。 |
| `gemini-web.max-chars-per-request` | integer | 1,000,000 | 单次请求发送给 Gemini Web 的最大字符数。 |
| `gemini-web.disable-continuation-hint` | boolean | false | 当提示被拆分时,是否禁用连续提示的暗示。 |
@@ -390,7 +390,7 @@ quota-exceeded:
# Gemini Web 客户端配置
gemini-web:
context: true # 启用会话上下文重用
code-mode: false # 启用代码模式
gem-mode: "" # 选择 Gem"coding-partner" 或 "writing-editor";为空表示不附加
max-chars-per-request: 1000000 # 单次请求最大字符数
# AIStduio Gemini API 的 API 密钥
@@ -729,6 +729,18 @@ docker run --rm -p 8317:8317 -v /path/to/your/config.yaml:/CLIProxyAPI/config.ya
4. 推送到分支(`git push origin feature/amazing-feature`
5. 打开 Pull Request
## 谁与我们在一起?
这些项目基于 CLIProxyAPI:
### [vibeproxy](https://github.com/automazeio/vibeproxy)
一个原生 macOS 菜单栏应用,让您可以使用 Claude Code & ChatGPT 订阅服务和 AI 编程工具,无需 API 密钥。
> [!NOTE]
> 如果你开发了基于 CLIProxyAPI 的项目,请提交一个 PR拉取请求将其添加到此列表中。
## 许可证
此项目根据 MIT 许可证授权 - 有关详细信息,请参阅 [LICENSE](LICENSE) 文件。

View File

@@ -90,10 +90,8 @@ quota-exceeded:
# # Disable the short continuation hint appended to intermediate chunks
# # when splitting long prompts. Default is false (hint enabled by default).
# disable-continuation-hint: false
# # Code mode:
# # - true: enable XML wrapping hint and attach the coding-partner Gem.
# # Thought merging (<think> into visible content) applies to STREAMING only;
# # non-stream responses keep reasoning/thought parts separate for clients
# # that expect explicit reasoning fields.
# # - false: disable XML hint and keep <think> separate
# code-mode: false
# # Gem selection (Gem Mode):
# # - "coding-partner": attach the predefined Coding partner Gem
# # - "writing-editor": attach the predefined Writing editor Gem
# # - empty: do not attach any Gem
# gem-mode: ""

View File

@@ -255,7 +255,9 @@ func (h *Handler) ListAuthFiles(c *gin.Context) {
full := filepath.Join(h.cfg.AuthDir, name)
if data, errRead := os.ReadFile(full); errRead == nil {
typeValue := gjson.GetBytes(data, "type").String()
emailValue := gjson.GetBytes(data, "email").String()
fileData["type"] = typeValue
fileData["email"] = emailValue
}
files = append(files, fileData)

View File

@@ -62,10 +62,19 @@ type GeminiWebConfig struct {
// Defaults to true if not set in YAML (see LoadConfig).
Context bool `yaml:"context" json:"context"`
// CodeMode, when true, enables coding mode behaviors for Gemini Web:
// - Attach the predefined "Coding partner" Gem
// - Enable XML wrapping hint for tool markup
// - Merge <think> content into visible content for tool-friendly output
// GemMode selects a predefined Gem to attach for Gemini Web requests.
// Allowed values:
// - "coding-partner"
// - "writing-editor"
// When empty, no Gem is attached by configuration.
// This is independent from CodeMode below, which is kept for backwards compatibility.
GemMode string `yaml:"gem-mode" json:"gem-mode"`
// CodeMode enables legacy coding-mode behaviors for Gemini Web.
// Backwards compatibility: when true, the service behaves as before by
// attaching the predefined "Coding partner" Gem and enabling extra
// conveniences (e.g., XML wrapping hints). Prefer GemMode for selecting
// a Gem going forward.
CodeMode bool `yaml:"code-mode" json:"code-mode"`
// MaxCharsPerRequest caps the number of characters (runes) sent to

View File

@@ -225,7 +225,7 @@ func MaskToken28(s string) string {
}
var NanoBananaModel = map[string]struct{}{
"gemini-2.5-flash-image-preview": {},
"gemini-2.5-flash-image-web": {},
}
// NewGeminiClient creates a client. Pass empty strings to auto-detect via browser cookies (not implemented in Go port).
@@ -380,6 +380,15 @@ func (c *GeminiClient) generateOnce(prompt string, files []string, model Model,
}
inner := []any{item0, nil, item2}
// Attach Gem first to keep index alignment with reference implementation
// so the Gemini Web UI can recognize the selected Gem.
if gem != nil {
// pad with 16 nils then gem ID
for i := 0; i < 16; i++ {
inner = append(inner, nil)
}
inner = append(inner, gem.ID)
}
requestedModel := strings.ToLower(model.Name)
if chat != nil && chat.RequestedModel() != "" {
requestedModel = chat.RequestedModel()
@@ -388,13 +397,6 @@ func (c *GeminiClient) generateOnce(prompt string, files []string, model Model,
inner = ensureAnyLen(inner, 49)
inner[49] = 14
}
if gem != nil {
// pad with 16 nils then gem ID
for i := 0; i < 16; i++ {
inner = append(inner, nil)
}
inner = append(inner, gem.ID)
}
innerJSON, _ := json.Marshal(inner)
outer := []any{nil, string(innerJSON)}
outerJSON, _ := json.Marshal(outer)

View File

@@ -21,7 +21,7 @@ func EnsureGeminiWebAliasMap() {
continue
}
if m.ID == "gemini-2.5-flash" {
aliasMap["gemini-2.5-flash-image-preview"] = "gemini-2.5-flash"
aliasMap["gemini-2.5-flash-image-web"] = "gemini-2.5-flash"
}
alias := AliasFromModelID(m.ID)
aliasMap[strings.ToLower(alias)] = strings.ToLower(m.ID)
@@ -65,8 +65,8 @@ func GetGeminiWebAliasedModels() []*registry.ModelInfo {
continue
} else if m.ID == "gemini-2.5-flash" {
cpy := *m
cpy.ID = "gemini-2.5-flash-image-preview"
cpy.Name = "gemini-2.5-flash-image-preview"
cpy.ID = "gemini-2.5-flash-image-web"
cpy.Name = "gemini-2.5-flash-image-web"
cpy.DisplayName = "Nano Banana"
cpy.Description = "Gemini 2.5 Flash Preview Image"
aliased = append(aliased, &cpy)

View File

@@ -460,10 +460,10 @@ func (s *GeminiWebState) Send(ctx context.Context, modelName string, reqPayload
return nil, s.wrapSendError(err), nil
}
// Hook: For gemini-2.5-flash-image-preview, if the API returns only images without any text,
// Hook: For gemini-2.5-flash-image-web, if the API returns only images without any text,
// inject a small textual summary so that conversation persistence has non-empty assistant text.
// This helps conversation recovery (conv store) to match sessions reliably.
if strings.EqualFold(modelName, "gemini-2.5-flash-image-preview") {
if strings.EqualFold(modelName, "gemini-2.5-flash-image-web") {
if len(output.Candidates) > 0 {
c := output.Candidates[output.Chosen]
hasNoText := strings.TrimSpace(c.Text) == ""
@@ -696,7 +696,22 @@ func (s *GeminiWebState) findReusableSession(modelName string, msgs []RoleText)
}
func (s *GeminiWebState) getConfiguredGem() *Gem {
if s.cfg != nil && s.cfg.GeminiWeb.CodeMode {
if s.cfg == nil {
return nil
}
// New behavior: attach Gem based on explicit GemMode selection.
// Only attaches the Gem; does not toggle any other behavior.
if gm := strings.ToLower(strings.TrimSpace(s.cfg.GeminiWeb.GemMode)); gm != "" {
switch gm {
case "coding-partner":
return &Gem{ID: "coding-partner", Name: "Coding partner", Predefined: true}
case "writing-editor":
return &Gem{ID: "writing-editor", Name: "Writing editor", Predefined: true}
}
}
// Backwards compatibility: legacy CodeMode still attaches Coding partner
// and may enable extra behaviors elsewhere.
if s.cfg.GeminiWeb.CodeMode {
return &Gem{ID: "coding-partner", Name: "Coding partner", Predefined: true}
}
return nil
@@ -1015,3 +1030,10 @@ func FindReusableSessionIn(items map[string]ConversationRecord, index map[string
}
return ConversationRecord{}, nil, 0, false
}
// SetConfig updates the configuration reference used by the state.
// This allows hot-reload of configuration to take effect for existing
// runtime states that were cached on auth during previous requests.
func (s *GeminiWebState) SetConfig(cfg *config.Config) {
s.cfg = cfg
}

View File

@@ -89,6 +89,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel)
}
payload = disableGeminiThinkingConfig(payload, attemptModel)
tok, errTok := tokenSource.Token()
if errTok != nil {
@@ -165,6 +166,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
payload := append([]byte(nil), basePayload...)
payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel)
payload = disableGeminiThinkingConfig(payload, attemptModel)
tok, errTok := tokenSource.Token()
if errTok != nil {
@@ -291,6 +293,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
payload = deleteJSONField(payload, "project")
payload = deleteJSONField(payload, "model")
payload = disableGeminiThinkingConfig(payload, attemptModel)
tok, errTok := tokenSource.Token()
if errTok != nil {
@@ -500,6 +503,29 @@ func cliPreviewFallbackOrder(model string) []string {
}
}
func disableGeminiThinkingConfig(body []byte, model string) []byte {
if !geminiModelDisallowsThinking(model) {
return body
}
updated := deleteJSONField(body, "request.generationConfig.thinkingConfig")
updated = deleteJSONField(updated, "generationConfig.thinkingConfig")
return updated
}
func geminiModelDisallowsThinking(model string) bool {
if model == "" {
return false
}
lower := strings.ToLower(model)
for _, marker := range []string{"gemini-2.5-flash-image-preview"} {
if strings.Contains(lower, marker) {
return true
}
}
return false
}
// setJSONField sets a top-level JSON field on a byte slice payload via sjson.
func setJSONField(body []byte, key, value string) []byte {
if key == "" {

View File

@@ -77,6 +77,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = disableGeminiThinkingConfig(body, req.Model)
action := "generateContent"
if req.Metadata != nil {
@@ -134,6 +135,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = disableGeminiThinkingConfig(body, req.Model)
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
if opts.Alt == "" {
@@ -204,6 +206,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model)
respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")

View File

@@ -168,6 +168,8 @@ func (e *GeminiWebExecutor) stateFor(auth *cliproxyauth.Auth) (*geminiwebapi.Gem
return nil, fmt.Errorf("gemini-web executor: auth is nil")
}
if runtime, ok := auth.Runtime.(*geminiWebRuntime); ok && runtime != nil && runtime.state != nil {
// Hot-reload: ensure cached state sees the latest config
runtime.state.SetConfig(e.cfg)
return runtime.state, nil
}
@@ -175,6 +177,8 @@ func (e *GeminiWebExecutor) stateFor(auth *cliproxyauth.Auth) (*geminiwebapi.Gem
defer e.mu.Unlock()
if runtime, ok := auth.Runtime.(*geminiWebRuntime); ok && runtime != nil && runtime.state != nil {
// Hot-reload: ensure cached state sees the latest config
runtime.state.SetConfig(e.cfg)
return runtime.state, nil
}

View File

@@ -34,9 +34,17 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
}
inputResult := gjson.GetBytes(rawJSON, "input")
inputResults := []gjson.Result{}
if inputResult.Exists() && inputResult.IsArray() {
inputResults = inputResult.Array()
var inputResults []gjson.Result
if inputResult.Exists() {
if inputResult.IsArray() {
inputResults = inputResult.Array()
} else if inputResult.Type == gjson.String {
newInput := `[{"type":"message","role":"user","content":[{"type":"input_text","text":""}]}]`
newInput, _ = sjson.Set(newInput, "0.content.0.text", inputResult.String())
inputResults = gjson.Parse(newInput).Array()
}
} else {
inputResults = []gjson.Result{}
}
extractedSystemInstructions := false

View File

@@ -8,6 +8,7 @@ package chat_completions
import (
"bytes"
"context"
"encoding/json"
"fmt"
"time"
@@ -100,6 +101,10 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
partResult := partResults[i]
partTextResult := partResult.Get("text")
functionCallResult := partResult.Get("functionCall")
inlineDataResult := partResult.Get("inlineData")
if !inlineDataResult.Exists() {
inlineDataResult = partResult.Get("inline_data")
}
if partTextResult.Exists() {
// Handle text content, distinguishing between regular content and reasoning/thoughts.
@@ -125,6 +130,34 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
}
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate)
} else if inlineDataResult.Exists() {
data := inlineDataResult.Get("data").String()
if data == "" {
continue
}
mimeType := inlineDataResult.Get("mimeType").String()
if mimeType == "" {
mimeType = inlineDataResult.Get("mime_type").String()
}
if mimeType == "" {
mimeType = "image/png"
}
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
imagePayload, err := json.Marshal(map[string]any{
"type": "image_url",
"image_url": map[string]string{
"url": imageURL,
},
})
if err != nil {
continue
}
imagesResult := gjson.Get(template, "choices.0.delta.images")
if !imagesResult.Exists() || !imagesResult.IsArray() {
template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`)
}
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload))
}
}
}

View File

@@ -521,6 +521,9 @@ func (w *Watcher) reloadConfig() bool {
if oldConfig.GeminiWeb.DisableContinuationHint != newConfig.GeminiWeb.DisableContinuationHint {
log.Debugf(" gemini-web.disable-continuation-hint: %t -> %t", oldConfig.GeminiWeb.DisableContinuationHint, newConfig.GeminiWeb.DisableContinuationHint)
}
if oldConfig.GeminiWeb.GemMode != newConfig.GeminiWeb.GemMode {
log.Debugf(" gemini-web.gem-mode: %s -> %s", oldConfig.GeminiWeb.GemMode, newConfig.GeminiWeb.GemMode)
}
if oldConfig.GeminiWeb.CodeMode != newConfig.GeminiWeb.CodeMode {
log.Debugf(" gemini-web.code-mode: %t -> %t", oldConfig.GeminiWeb.CodeMode, newConfig.GeminiWeb.CodeMode)
}

View File

@@ -291,6 +291,17 @@ func (s *Service) ensureExecutorsForAuth(a *coreauth.Auth) {
}
}
// rebindExecutors refreshes provider executors so they observe the latest configuration.
func (s *Service) rebindExecutors() {
if s == nil || s.coreManager == nil {
return
}
auths := s.coreManager.List()
for _, auth := range auths {
s.ensureExecutorsForAuth(auth)
}
}
// Run starts the service and blocks until the context is cancelled or the server stops.
// It initializes all components including authentication, file watching, HTTP server,
// and starts processing requests. The method blocks until the context is cancelled.
@@ -389,6 +400,7 @@ func (s *Service) Run(ctx context.Context) error {
s.cfgMu.Lock()
s.cfg = newCfg
s.cfgMu.Unlock()
s.rebindExecutors()
}