feat(registry, executor, util): add support for gemini-2.5-flash-image-preview and improve aspect ratio handling

- Introduced `gemini-2.5-flash-image-preview` model to the registry with updated definitions.
- Enhanced Gemini CLI and API executors to handle image aspect ratio adjustments for the new model.
- Added utility function to create base64 white image placeholders based on aspect ratio configurations.
This commit is contained in:
Luis Pater
2025-10-10 01:49:58 +08:00
parent 1aa568ce45
commit 20787cd107
4 changed files with 178 additions and 0 deletions

View File

@@ -104,6 +104,34 @@ func GetGeminiModels() []*ModelInfo {
OutputTokenLimit: 65536, OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
}, },
{
ID: "gemini-2.5-flash-image-preview",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image-preview",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image Preview",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
},
{
ID: "gemini-2.5-flash-image",
Object: "model",
Created: time.Now().Unix(),
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-2.5-flash-image",
Version: "2.5",
DisplayName: "Gemini 2.5 Flash Image",
Description: "State-of-the-art image generation and editing model.",
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
},
} }
} }

View File

@@ -14,6 +14,7 @@ import (
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -60,6 +61,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini-cli") to := sdktranslator.FromString("gemini-cli")
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
action := "generateContent" action := "generateContent"
if req.Metadata != nil { if req.Metadata != nil {
@@ -148,6 +150,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini-cli") to := sdktranslator.FromString("gemini-cli")
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id")) projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id"))
@@ -294,6 +297,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "project")
payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "model")
payload = disableGeminiThinkingConfig(payload, attemptModel) payload = disableGeminiThinkingConfig(payload, attemptModel)
payload = fixGeminiCLIImageAspectRatio(attemptModel, payload)
tok, errTok := tokenSource.Token() tok, errTok := tokenSource.Token()
if errTok != nil { if errTok != nil {
@@ -549,3 +553,45 @@ func deleteJSONField(body []byte, key string) []byte {
} }
return updated return updated
} }
func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {
if modelName == "gemini-2.5-flash-image-preview" {
aspectRatioResult := gjson.GetBytes(rawJSON, "request.generationConfig.imageConfig.aspectRatio")
if aspectRatioResult.Exists() {
contents := gjson.GetBytes(rawJSON, "request.contents")
contentArray := contents.Array()
if len(contentArray) > 0 {
hasInlineData := false
loopContent:
for i := 0; i < len(contentArray); i++ {
parts := contentArray[i].Get("parts").Array()
for j := 0; j < len(parts); j++ {
if parts[j].Get("inlineData").Exists() {
hasInlineData = true
break loopContent
}
}
}
if !hasInlineData {
emptyImageBase64ed, _ := util.CreateWhiteImageBase64(aspectRatioResult.String())
emptyImagePart := `{"inlineData":{"mime_type":"image/png","data":""}}`
emptyImagePart, _ = sjson.Set(emptyImagePart, "inlineData.data", emptyImageBase64ed)
newPartsJson := `[]`
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", `{"text": "Based on the following requirements, create an image within the uploaded picture. The new content *MUST* completely cover the entire area of the original picture, maintaining its exact proportions, and *NO* blank areas should appear."}`)
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", emptyImagePart)
parts := contentArray[0].Get("parts").Array()
for j := 0; j < len(parts); j++ {
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", parts[j].Raw)
}
rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.contents.0.parts", []byte(newPartsJson))
rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.generationConfig.responseModalities", []byte(`["Image", "Text"]`))
}
}
rawJSON, _ = sjson.DeleteBytes(rawJSON, "request.generationConfig.imageConfig")
}
}
return rawJSON
}

View File

@@ -78,6 +78,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = disableGeminiThinkingConfig(body, req.Model) body = disableGeminiThinkingConfig(body, req.Model)
body = fixGeminiImageAspectRatio(req.Model, body)
action := "generateContent" action := "generateContent"
if req.Metadata != nil { if req.Metadata != nil {
@@ -136,6 +137,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = disableGeminiThinkingConfig(body, req.Model) body = disableGeminiThinkingConfig(body, req.Model)
body = fixGeminiImageAspectRatio(req.Model, body)
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent") url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
if opts.Alt == "" { if opts.Alt == "" {
@@ -207,6 +209,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model) translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
respCtx := context.WithValue(ctx, "alt", opts.Alt) respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
@@ -374,3 +377,45 @@ func geminiCreds(a *cliproxyauth.Auth) (apiKey, bearer string) {
} }
return return
} }
func fixGeminiImageAspectRatio(modelName string, rawJSON []byte) []byte {
if modelName == "gemini-2.5-flash-image-preview" {
aspectRatioResult := gjson.GetBytes(rawJSON, "generationConfig.imageConfig.aspectRatio")
if aspectRatioResult.Exists() {
contents := gjson.GetBytes(rawJSON, "contents")
contentArray := contents.Array()
if len(contentArray) > 0 {
hasInlineData := false
loopContent:
for i := 0; i < len(contentArray); i++ {
parts := contentArray[i].Get("parts").Array()
for j := 0; j < len(parts); j++ {
if parts[j].Get("inlineData").Exists() {
hasInlineData = true
break loopContent
}
}
}
if !hasInlineData {
emptyImageBase64ed, _ := util.CreateWhiteImageBase64(aspectRatioResult.String())
emptyImagePart := `{"inlineData":{"mime_type":"image/png","data":""}}`
emptyImagePart, _ = sjson.Set(emptyImagePart, "inlineData.data", emptyImageBase64ed)
newPartsJson := `[]`
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", `{"text": "Based on the following requirements, create an image within the uploaded picture. The new content *MUST* completely cover the entire area of the original picture, maintaining its exact proportions, and *NO* blank areas should appear."}`)
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", emptyImagePart)
parts := contentArray[0].Get("parts").Array()
for j := 0; j < len(parts); j++ {
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", parts[j].Raw)
}
rawJSON, _ = sjson.SetRawBytes(rawJSON, "contents.0.parts", []byte(newPartsJson))
rawJSON, _ = sjson.SetRawBytes(rawJSON, "generationConfig.responseModalities", []byte(`["Image", "Text"]`))
}
}
rawJSON, _ = sjson.DeleteBytes(rawJSON, "generationConfig.imageConfig")
}
}
return rawJSON
}

59
internal/util/image.go Normal file
View File

@@ -0,0 +1,59 @@
package util
import (
"bytes"
"encoding/base64"
"image"
"image/draw"
"image/png"
)
func CreateWhiteImageBase64(aspectRatio string) (string, error) {
width := 1024
height := 1024
switch aspectRatio {
case "1:1":
width = 1024
height = 1024
case "2:3":
width = 832
height = 1248
case "3:2":
width = 1248
height = 832
case "3:4":
width = 864
height = 1184
case "4:3":
width = 1184
height = 864
case "4:5":
width = 896
height = 1152
case "5:4":
width = 1152
height = 896
case "9:16":
width = 768
height = 1344
case "16:9":
width = 1344
height = 768
case "21:9":
width = 1536
height = 672
}
img := image.NewRGBA(image.Rect(0, 0, width, height))
draw.Draw(img, img.Bounds(), image.White, image.Point{}, draw.Src)
var buf bytes.Buffer
if err := png.Encode(&buf, img); err != nil {
return "", err
}
base64String := base64.StdEncoding.EncodeToString(buf.Bytes())
return base64String, nil
}