diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index d1407a9e..5051d0bb 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -104,6 +104,34 @@ func GetGeminiModels() []*ModelInfo { OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, }, + { + ID: "gemini-2.5-flash-image-preview", + Object: "model", + Created: time.Now().Unix(), + OwnedBy: "google", + Type: "gemini", + Name: "models/gemini-2.5-flash-image-preview", + Version: "2.5", + DisplayName: "Gemini 2.5 Flash Image Preview", + Description: "State-of-the-art image generation and editing model.", + InputTokenLimit: 1048576, + OutputTokenLimit: 8192, + SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + }, + { + ID: "gemini-2.5-flash-image", + Object: "model", + Created: time.Now().Unix(), + OwnedBy: "google", + Type: "gemini", + Name: "models/gemini-2.5-flash-image", + Version: "2.5", + DisplayName: "Gemini 2.5 Flash Image", + Description: "State-of-the-art image generation and editing model.", + InputTokenLimit: 1048576, + OutputTokenLimit: 8192, + SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + }, } } diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 37f83ae3..8a70b196 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -14,6 +14,7 @@ import ( "github.com/gin-gonic/gin" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -60,6 +61,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) action := "generateContent" if req.Metadata != nil { @@ -148,6 +150,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id")) @@ -294,6 +297,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") payload = disableGeminiThinkingConfig(payload, attemptModel) + payload = fixGeminiCLIImageAspectRatio(attemptModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { @@ -549,3 +553,45 @@ func deleteJSONField(body []byte, key string) []byte { } return updated } + +func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte { + if modelName == "gemini-2.5-flash-image-preview" { + aspectRatioResult := gjson.GetBytes(rawJSON, "request.generationConfig.imageConfig.aspectRatio") + if aspectRatioResult.Exists() { + contents := gjson.GetBytes(rawJSON, "request.contents") + contentArray := contents.Array() + if len(contentArray) > 0 { + hasInlineData := false + loopContent: + for i := 0; i < len(contentArray); i++ { + parts := contentArray[i].Get("parts").Array() + for j := 0; j < len(parts); j++ { + if parts[j].Get("inlineData").Exists() { + hasInlineData = true + break loopContent + } + } + } + + if !hasInlineData { + emptyImageBase64ed, _ := util.CreateWhiteImageBase64(aspectRatioResult.String()) + emptyImagePart := `{"inlineData":{"mime_type":"image/png","data":""}}` + emptyImagePart, _ = sjson.Set(emptyImagePart, "inlineData.data", emptyImageBase64ed) + newPartsJson := `[]` + newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", `{"text": "Based on the following requirements, create an image within the uploaded picture. The new content *MUST* completely cover the entire area of the original picture, maintaining its exact proportions, and *NO* blank areas should appear."}`) + newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", emptyImagePart) + + parts := contentArray[0].Get("parts").Array() + for j := 0; j < len(parts); j++ { + newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", parts[j].Raw) + } + + rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.contents.0.parts", []byte(newPartsJson)) + rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.generationConfig.responseModalities", []byte(`["Image", "Text"]`)) + } + } + rawJSON, _ = sjson.DeleteBytes(rawJSON, "request.generationConfig.imageConfig") + } + } + return rawJSON +} diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index 9a94783f..9073e0b6 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -78,6 +78,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = disableGeminiThinkingConfig(body, req.Model) + body = fixGeminiImageAspectRatio(req.Model, body) action := "generateContent" if req.Metadata != nil { @@ -136,6 +137,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = disableGeminiThinkingConfig(body, req.Model) + body = fixGeminiImageAspectRatio(req.Model, body) url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent") if opts.Alt == "" { @@ -207,6 +209,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("gemini") translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model) + translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") @@ -374,3 +377,45 @@ func geminiCreds(a *cliproxyauth.Auth) (apiKey, bearer string) { } return } + +func fixGeminiImageAspectRatio(modelName string, rawJSON []byte) []byte { + if modelName == "gemini-2.5-flash-image-preview" { + aspectRatioResult := gjson.GetBytes(rawJSON, "generationConfig.imageConfig.aspectRatio") + if aspectRatioResult.Exists() { + contents := gjson.GetBytes(rawJSON, "contents") + contentArray := contents.Array() + if len(contentArray) > 0 { + hasInlineData := false + loopContent: + for i := 0; i < len(contentArray); i++ { + parts := contentArray[i].Get("parts").Array() + for j := 0; j < len(parts); j++ { + if parts[j].Get("inlineData").Exists() { + hasInlineData = true + break loopContent + } + } + } + + if !hasInlineData { + emptyImageBase64ed, _ := util.CreateWhiteImageBase64(aspectRatioResult.String()) + emptyImagePart := `{"inlineData":{"mime_type":"image/png","data":""}}` + emptyImagePart, _ = sjson.Set(emptyImagePart, "inlineData.data", emptyImageBase64ed) + newPartsJson := `[]` + newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", `{"text": "Based on the following requirements, create an image within the uploaded picture. The new content *MUST* completely cover the entire area of the original picture, maintaining its exact proportions, and *NO* blank areas should appear."}`) + newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", emptyImagePart) + + parts := contentArray[0].Get("parts").Array() + for j := 0; j < len(parts); j++ { + newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", parts[j].Raw) + } + + rawJSON, _ = sjson.SetRawBytes(rawJSON, "contents.0.parts", []byte(newPartsJson)) + rawJSON, _ = sjson.SetRawBytes(rawJSON, "generationConfig.responseModalities", []byte(`["Image", "Text"]`)) + } + } + rawJSON, _ = sjson.DeleteBytes(rawJSON, "generationConfig.imageConfig") + } + } + return rawJSON +} diff --git a/internal/util/image.go b/internal/util/image.go new file mode 100644 index 00000000..70d5cdc4 --- /dev/null +++ b/internal/util/image.go @@ -0,0 +1,59 @@ +package util + +import ( + "bytes" + "encoding/base64" + "image" + "image/draw" + "image/png" +) + +func CreateWhiteImageBase64(aspectRatio string) (string, error) { + width := 1024 + height := 1024 + + switch aspectRatio { + case "1:1": + width = 1024 + height = 1024 + case "2:3": + width = 832 + height = 1248 + case "3:2": + width = 1248 + height = 832 + case "3:4": + width = 864 + height = 1184 + case "4:3": + width = 1184 + height = 864 + case "4:5": + width = 896 + height = 1152 + case "5:4": + width = 1152 + height = 896 + case "9:16": + width = 768 + height = 1344 + case "16:9": + width = 1344 + height = 768 + case "21:9": + width = 1536 + height = 672 + } + + img := image.NewRGBA(image.Rect(0, 0, width, height)) + draw.Draw(img, img.Bounds(), image.White, image.Point{}, draw.Src) + + var buf bytes.Buffer + + if err := png.Encode(&buf, img); err != nil { + return "", err + } + + base64String := base64.StdEncoding.EncodeToString(buf.Bytes()) + return base64String, nil +}