mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-02-03 04:50:52 +08:00
feat(registry, executor, util): add support for gemini-2.5-flash-image-preview and improve aspect ratio handling
- Introduced `gemini-2.5-flash-image-preview` model to the registry with updated definitions. - Enhanced Gemini CLI and API executors to handle image aspect ratio adjustments for the new model. - Added utility function to create base64 white image placeholders based on aspect ratio configurations.
This commit is contained in:
@@ -104,6 +104,34 @@ func GetGeminiModels() []*ModelInfo {
|
|||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-flash-image-preview",
|
||||||
|
Object: "model",
|
||||||
|
Created: time.Now().Unix(),
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-flash-image-preview",
|
||||||
|
Version: "2.5",
|
||||||
|
DisplayName: "Gemini 2.5 Flash Image Preview",
|
||||||
|
Description: "State-of-the-art image generation and editing model.",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 8192,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "gemini-2.5-flash-image",
|
||||||
|
Object: "model",
|
||||||
|
Created: time.Now().Unix(),
|
||||||
|
OwnedBy: "google",
|
||||||
|
Type: "gemini",
|
||||||
|
Name: "models/gemini-2.5-flash-image",
|
||||||
|
Version: "2.5",
|
||||||
|
DisplayName: "Gemini 2.5 Flash Image",
|
||||||
|
Description: "State-of-the-art image generation and editing model.",
|
||||||
|
InputTokenLimit: 1048576,
|
||||||
|
OutputTokenLimit: 8192,
|
||||||
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import (
|
|||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||||
@@ -60,6 +61,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini-cli")
|
to := sdktranslator.FromString("gemini-cli")
|
||||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
|
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||||
|
|
||||||
action := "generateContent"
|
action := "generateContent"
|
||||||
if req.Metadata != nil {
|
if req.Metadata != nil {
|
||||||
@@ -148,6 +150,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini-cli")
|
to := sdktranslator.FromString("gemini-cli")
|
||||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
|
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||||
|
|
||||||
projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id"))
|
projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id"))
|
||||||
|
|
||||||
@@ -294,6 +297,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
|||||||
payload = deleteJSONField(payload, "project")
|
payload = deleteJSONField(payload, "project")
|
||||||
payload = deleteJSONField(payload, "model")
|
payload = deleteJSONField(payload, "model")
|
||||||
payload = disableGeminiThinkingConfig(payload, attemptModel)
|
payload = disableGeminiThinkingConfig(payload, attemptModel)
|
||||||
|
payload = fixGeminiCLIImageAspectRatio(attemptModel, payload)
|
||||||
|
|
||||||
tok, errTok := tokenSource.Token()
|
tok, errTok := tokenSource.Token()
|
||||||
if errTok != nil {
|
if errTok != nil {
|
||||||
@@ -549,3 +553,45 @@ func deleteJSONField(body []byte, key string) []byte {
|
|||||||
}
|
}
|
||||||
return updated
|
return updated
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {
|
||||||
|
if modelName == "gemini-2.5-flash-image-preview" {
|
||||||
|
aspectRatioResult := gjson.GetBytes(rawJSON, "request.generationConfig.imageConfig.aspectRatio")
|
||||||
|
if aspectRatioResult.Exists() {
|
||||||
|
contents := gjson.GetBytes(rawJSON, "request.contents")
|
||||||
|
contentArray := contents.Array()
|
||||||
|
if len(contentArray) > 0 {
|
||||||
|
hasInlineData := false
|
||||||
|
loopContent:
|
||||||
|
for i := 0; i < len(contentArray); i++ {
|
||||||
|
parts := contentArray[i].Get("parts").Array()
|
||||||
|
for j := 0; j < len(parts); j++ {
|
||||||
|
if parts[j].Get("inlineData").Exists() {
|
||||||
|
hasInlineData = true
|
||||||
|
break loopContent
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !hasInlineData {
|
||||||
|
emptyImageBase64ed, _ := util.CreateWhiteImageBase64(aspectRatioResult.String())
|
||||||
|
emptyImagePart := `{"inlineData":{"mime_type":"image/png","data":""}}`
|
||||||
|
emptyImagePart, _ = sjson.Set(emptyImagePart, "inlineData.data", emptyImageBase64ed)
|
||||||
|
newPartsJson := `[]`
|
||||||
|
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", `{"text": "Based on the following requirements, create an image within the uploaded picture. The new content *MUST* completely cover the entire area of the original picture, maintaining its exact proportions, and *NO* blank areas should appear."}`)
|
||||||
|
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", emptyImagePart)
|
||||||
|
|
||||||
|
parts := contentArray[0].Get("parts").Array()
|
||||||
|
for j := 0; j < len(parts); j++ {
|
||||||
|
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", parts[j].Raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.contents.0.parts", []byte(newPartsJson))
|
||||||
|
rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.generationConfig.responseModalities", []byte(`["Image", "Text"]`))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rawJSON, _ = sjson.DeleteBytes(rawJSON, "request.generationConfig.imageConfig")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rawJSON
|
||||||
|
}
|
||||||
|
|||||||
@@ -78,6 +78,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
|||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
body = disableGeminiThinkingConfig(body, req.Model)
|
body = disableGeminiThinkingConfig(body, req.Model)
|
||||||
|
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||||
|
|
||||||
action := "generateContent"
|
action := "generateContent"
|
||||||
if req.Metadata != nil {
|
if req.Metadata != nil {
|
||||||
@@ -136,6 +137,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
body = disableGeminiThinkingConfig(body, req.Model)
|
body = disableGeminiThinkingConfig(body, req.Model)
|
||||||
|
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||||
|
|
||||||
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
|
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
|
||||||
if opts.Alt == "" {
|
if opts.Alt == "" {
|
||||||
@@ -207,6 +209,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
|||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model)
|
translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model)
|
||||||
|
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
|
||||||
@@ -374,3 +377,45 @@ func geminiCreds(a *cliproxyauth.Auth) (apiKey, bearer string) {
|
|||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func fixGeminiImageAspectRatio(modelName string, rawJSON []byte) []byte {
|
||||||
|
if modelName == "gemini-2.5-flash-image-preview" {
|
||||||
|
aspectRatioResult := gjson.GetBytes(rawJSON, "generationConfig.imageConfig.aspectRatio")
|
||||||
|
if aspectRatioResult.Exists() {
|
||||||
|
contents := gjson.GetBytes(rawJSON, "contents")
|
||||||
|
contentArray := contents.Array()
|
||||||
|
if len(contentArray) > 0 {
|
||||||
|
hasInlineData := false
|
||||||
|
loopContent:
|
||||||
|
for i := 0; i < len(contentArray); i++ {
|
||||||
|
parts := contentArray[i].Get("parts").Array()
|
||||||
|
for j := 0; j < len(parts); j++ {
|
||||||
|
if parts[j].Get("inlineData").Exists() {
|
||||||
|
hasInlineData = true
|
||||||
|
break loopContent
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !hasInlineData {
|
||||||
|
emptyImageBase64ed, _ := util.CreateWhiteImageBase64(aspectRatioResult.String())
|
||||||
|
emptyImagePart := `{"inlineData":{"mime_type":"image/png","data":""}}`
|
||||||
|
emptyImagePart, _ = sjson.Set(emptyImagePart, "inlineData.data", emptyImageBase64ed)
|
||||||
|
newPartsJson := `[]`
|
||||||
|
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", `{"text": "Based on the following requirements, create an image within the uploaded picture. The new content *MUST* completely cover the entire area of the original picture, maintaining its exact proportions, and *NO* blank areas should appear."}`)
|
||||||
|
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", emptyImagePart)
|
||||||
|
|
||||||
|
parts := contentArray[0].Get("parts").Array()
|
||||||
|
for j := 0; j < len(parts); j++ {
|
||||||
|
newPartsJson, _ = sjson.SetRaw(newPartsJson, "-1", parts[j].Raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
rawJSON, _ = sjson.SetRawBytes(rawJSON, "contents.0.parts", []byte(newPartsJson))
|
||||||
|
rawJSON, _ = sjson.SetRawBytes(rawJSON, "generationConfig.responseModalities", []byte(`["Image", "Text"]`))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rawJSON, _ = sjson.DeleteBytes(rawJSON, "generationConfig.imageConfig")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rawJSON
|
||||||
|
}
|
||||||
|
|||||||
59
internal/util/image.go
Normal file
59
internal/util/image.go
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
package util
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/base64"
|
||||||
|
"image"
|
||||||
|
"image/draw"
|
||||||
|
"image/png"
|
||||||
|
)
|
||||||
|
|
||||||
|
func CreateWhiteImageBase64(aspectRatio string) (string, error) {
|
||||||
|
width := 1024
|
||||||
|
height := 1024
|
||||||
|
|
||||||
|
switch aspectRatio {
|
||||||
|
case "1:1":
|
||||||
|
width = 1024
|
||||||
|
height = 1024
|
||||||
|
case "2:3":
|
||||||
|
width = 832
|
||||||
|
height = 1248
|
||||||
|
case "3:2":
|
||||||
|
width = 1248
|
||||||
|
height = 832
|
||||||
|
case "3:4":
|
||||||
|
width = 864
|
||||||
|
height = 1184
|
||||||
|
case "4:3":
|
||||||
|
width = 1184
|
||||||
|
height = 864
|
||||||
|
case "4:5":
|
||||||
|
width = 896
|
||||||
|
height = 1152
|
||||||
|
case "5:4":
|
||||||
|
width = 1152
|
||||||
|
height = 896
|
||||||
|
case "9:16":
|
||||||
|
width = 768
|
||||||
|
height = 1344
|
||||||
|
case "16:9":
|
||||||
|
width = 1344
|
||||||
|
height = 768
|
||||||
|
case "21:9":
|
||||||
|
width = 1536
|
||||||
|
height = 672
|
||||||
|
}
|
||||||
|
|
||||||
|
img := image.NewRGBA(image.Rect(0, 0, width, height))
|
||||||
|
draw.Draw(img, img.Bounds(), image.White, image.Point{}, draw.Src)
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
if err := png.Encode(&buf, img); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
base64String := base64.StdEncoding.EncodeToString(buf.Bytes())
|
||||||
|
return base64String, nil
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user