refactor(config): migrate to SDKConfig and streamline proxy handling

- Replaced `config.Config` with `config.SDKConfig` across components for simpler configuration management. - Updated proxy setup functions and handlers to align with `SDKConfig` improvements. - Reorganized handler imports to match new SDK structure.
2026-02-03 04:50:52 +08:00 · 2025-09-27 04:50:23 +08:00
parent 40255b128e
commit 57c9ba49f4
17 changed files with 61 additions and 51 deletions
--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -0,0 +1,237 @@
+// Package claude provides HTTP handlers for Claude API code-related functionality.
+// This package implements Claude-compatible streaming chat completions with sophisticated
+// client rotation and quota management systems to ensure high availability and optimal
+// resource utilization across multiple backend clients. It handles request translation
+// between Claude API format and the underlying Gemini backend, providing seamless
+// API compatibility while maintaining robust error handling and connection management.
+package claude
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
+	"github.com/tidwall/gjson"
+)
+
+// ClaudeCodeAPIHandler contains the handlers for Claude API endpoints.
+// It holds a pool of clients to interact with the backend service.
+type ClaudeCodeAPIHandler struct {
+	*handlers.BaseAPIHandler
+}
+
+// NewClaudeCodeAPIHandler creates a new Claude API handlers instance.
+// It takes an BaseAPIHandler instance as input and returns a ClaudeCodeAPIHandler.
+//
+// Parameters:
+//   - apiHandlers: The base API handler instance.
+//
+// Returns:
+//   - *ClaudeCodeAPIHandler: A new Claude code API handler instance.
+func NewClaudeCodeAPIHandler(apiHandlers *handlers.BaseAPIHandler) *ClaudeCodeAPIHandler {
+	return &ClaudeCodeAPIHandler{
+		BaseAPIHandler: apiHandlers,
+	}
+}
+
+// HandlerType returns the identifier for this handler implementation.
+func (h *ClaudeCodeAPIHandler) HandlerType() string {
+	return Claude
+}
+
+// Models returns a list of models supported by this handler.
+func (h *ClaudeCodeAPIHandler) Models() []map[string]any {
+	// Get dynamic models from the global registry
+	modelRegistry := registry.GetGlobalRegistry()
+	return modelRegistry.GetAvailableModels("claude")
+}
+
+// ClaudeMessages handles Claude-compatible streaming chat completions.
+// This function implements a sophisticated client rotation and quota management system
+// to ensure high availability and optimal resource utilization across multiple backend clients.
+//
+// Parameters:
+//   - c: The Gin context for the request.
+func (h *ClaudeCodeAPIHandler) ClaudeMessages(c *gin.Context) {
+	// Extract raw JSON data from the incoming request
+	rawJSON, err := c.GetRawData()
+	// If data retrieval fails, return a 400 Bad Request error.
+	if err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+
+	// Check if the client requested a streaming response.
+	streamResult := gjson.GetBytes(rawJSON, "stream")
+	if !streamResult.Exists() || streamResult.Type == gjson.False {
+		h.handleNonStreamingResponse(c, rawJSON)
+	} else {
+		h.handleStreamingResponse(c, rawJSON)
+	}
+}
+
+// ClaudeMessages handles Claude-compatible streaming chat completions.
+// This function implements a sophisticated client rotation and quota management system
+// to ensure high availability and optimal resource utilization across multiple backend clients.
+//
+// Parameters:
+//   - c: The Gin context for the request.
+func (h *ClaudeCodeAPIHandler) ClaudeCountTokens(c *gin.Context) {
+	// Extract raw JSON data from the incoming request
+	rawJSON, err := c.GetRawData()
+	// If data retrieval fails, return a 400 Bad Request error.
+	if err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+
+	c.Header("Content-Type", "application/json")
+
+	alt := h.GetAlt(c)
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+
+	resp, errMsg := h.ExecuteCountWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
+		return
+	}
+	_, _ = c.Writer.Write(resp)
+	cliCancel()
+}
+
+// ClaudeModels handles the Claude models listing endpoint.
+// It returns a JSON response containing available Claude models and their specifications.
+//
+// Parameters:
+//   - c: The Gin context for the request.
+func (h *ClaudeCodeAPIHandler) ClaudeModels(c *gin.Context) {
+	c.JSON(http.StatusOK, gin.H{
+		"data": h.Models(),
+	})
+}
+
+// handleNonStreamingResponse handles non-streaming content generation requests for Claude models.
+// This function processes the request synchronously and returns the complete generated
+// response in a single API call. It supports various generation parameters and
+// response formats.
+//
+// Parameters:
+//   - c: The Gin context for the request
+//   - modelName: The name of the Gemini model to use for content generation
+//   - rawJSON: The raw JSON request body containing generation parameters and content
+func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+	alt := h.GetAlt(c)
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
+		return
+	}
+	_, _ = c.Writer.Write(resp)
+	cliCancel()
+}
+
+// handleStreamingResponse streams Claude-compatible responses backed by Gemini.
+// It sets up SSE, selects a backend client with rotation/quota logic,
+// forwards chunks, and translates them to Claude CLI format.
+//
+// Parameters:
+//   - c: The Gin context for the request.
+//   - rawJSON: The raw JSON request body.
+func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byte) {
+	// Set up Server-Sent Events (SSE) headers for streaming response
+	// These headers are essential for maintaining a persistent connection
+	// and enabling real-time streaming of chat completions
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	// This is crucial for streaming as it allows immediate sending of data chunks
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+
+	// Create a cancellable context for the backend client request
+	// This allows proper cleanup and cancellation of ongoing requests
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	h.forwardClaudeStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
+	return
+}
+
+func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
+			return
+		case chunk, ok := <-data:
+			if !ok {
+				flusher.Flush()
+				cancel(nil)
+				return
+			}
+
+			if bytes.HasPrefix(chunk, []byte("event:")) {
+				_, _ = c.Writer.Write([]byte("\n"))
+			}
+
+			_, _ = c.Writer.Write(chunk)
+			_, _ = c.Writer.Write([]byte("\n"))
+
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
+	}
+}
--- a/sdk/api/handlers/gemini/gemini-cli_handlers.go
+++ b/sdk/api/handlers/gemini/gemini-cli_handlers.go
@@ -0,0 +1,227 @@
+// Package gemini provides HTTP handlers for Gemini CLI API functionality.
+// This package implements handlers that process CLI-specific requests for Gemini API operations,
+// including content generation and streaming content generation endpoints.
+// The handlers restrict access to localhost only and manage communication with the backend service.
+package gemini
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
+	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
+)
+
+// GeminiCLIAPIHandler contains the handlers for Gemini CLI API endpoints.
+// It holds a pool of clients to interact with the backend service.
+type GeminiCLIAPIHandler struct {
+	*handlers.BaseAPIHandler
+}
+
+// NewGeminiCLIAPIHandler creates a new Gemini CLI API handlers instance.
+// It takes an BaseAPIHandler instance as input and returns a GeminiCLIAPIHandler.
+func NewGeminiCLIAPIHandler(apiHandlers *handlers.BaseAPIHandler) *GeminiCLIAPIHandler {
+	return &GeminiCLIAPIHandler{
+		BaseAPIHandler: apiHandlers,
+	}
+}
+
+// HandlerType returns the type of this handler.
+func (h *GeminiCLIAPIHandler) HandlerType() string {
+	return GeminiCLI
+}
+
+// Models returns a list of models supported by this handler.
+func (h *GeminiCLIAPIHandler) Models() []map[string]any {
+	return make([]map[string]any, 0)
+}
+
+// CLIHandler handles CLI-specific requests for Gemini API operations.
+// It restricts access to localhost only and routes requests to appropriate internal handlers.
+func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
+	if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
+		c.JSON(http.StatusForbidden, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "CLI reply only allow local access",
+				Type:    "forbidden",
+			},
+		})
+		return
+	}
+
+	rawJSON, _ := c.GetRawData()
+	requestRawURI := c.Request.URL.Path
+
+	if requestRawURI == "/v1internal:generateContent" {
+		h.handleInternalGenerateContent(c, rawJSON)
+	} else if requestRawURI == "/v1internal:streamGenerateContent" {
+		h.handleInternalStreamGenerateContent(c, rawJSON)
+	} else {
+		reqBody := bytes.NewBuffer(rawJSON)
+		req, err := http.NewRequest("POST", fmt.Sprintf("https://cloudcode-pa.googleapis.com%s", c.Request.URL.RequestURI()), reqBody)
+		if err != nil {
+			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: fmt.Sprintf("Invalid request: %v", err),
+					Type:    "invalid_request_error",
+				},
+			})
+			return
+		}
+		for key, value := range c.Request.Header {
+			req.Header[key] = value
+		}
+
+		httpClient := util.SetProxy(h.Cfg, &http.Client{})
+
+		resp, err := httpClient.Do(req)
+		if err != nil {
+			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: fmt.Sprintf("Invalid request: %v", err),
+					Type:    "invalid_request_error",
+				},
+			})
+			return
+		}
+
+		if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+			defer func() {
+				if err = resp.Body.Close(); err != nil {
+					log.Printf("warn: failed to close response body: %v", err)
+				}
+			}()
+			bodyBytes, _ := io.ReadAll(resp.Body)
+
+			c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+				Error: handlers.ErrorDetail{
+					Message: string(bodyBytes),
+					Type:    "invalid_request_error",
+				},
+			})
+			return
+		}
+
+		defer func() {
+			_ = resp.Body.Close()
+		}()
+
+		for key, value := range resp.Header {
+			c.Header(key, value[0])
+		}
+		output, err := io.ReadAll(resp.Body)
+		if err != nil {
+			log.Errorf("Failed to read response body: %v", err)
+			return
+		}
+		_, _ = c.Writer.Write(output)
+		c.Set("API_RESPONSE", output)
+	}
+}
+
+// handleInternalStreamGenerateContent handles streaming content generation requests.
+// It sets up a server-sent event stream and forwards the request to the backend client.
+// The function continuously proxies response chunks from the backend to the client.
+func (h *GeminiCLIAPIHandler) handleInternalStreamGenerateContent(c *gin.Context, rawJSON []byte) {
+	alt := h.GetAlt(c)
+
+	if alt == "" {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	h.forwardCLIStream(c, flusher, "", func(err error) { cliCancel(err) }, dataChan, errChan)
+	return
+}
+
+// handleInternalGenerateContent handles non-streaming content generation requests.
+// It sends a request to the backend client and proxies the entire response back to the client at once.
+func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+	modelResult := gjson.GetBytes(rawJSON, "model")
+	modelName := modelResult.String()
+
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
+		return
+	}
+	_, _ = c.Writer.Write(resp)
+	cliCancel()
+}
+
+func (h *GeminiCLIAPIHandler) forwardCLIStream(c *gin.Context, flusher http.Flusher, alt string, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
+			return
+		case chunk, ok := <-data:
+			if !ok {
+				cancel(nil)
+				return
+			}
+			if alt == "" {
+				if bytes.Equal(chunk, []byte("data: [DONE]")) || bytes.Equal(chunk, []byte("[DONE]")) {
+					continue
+				}
+
+				if !bytes.HasPrefix(chunk, []byte("data:")) {
+					_, _ = c.Writer.Write([]byte("data: "))
+				}
+
+				_, _ = c.Writer.Write(chunk)
+				_, _ = c.Writer.Write([]byte("\n\n"))
+			} else {
+				_, _ = c.Writer.Write(chunk)
+			}
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
+	}
+}
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -0,0 +1,297 @@
+// Package gemini provides HTTP handlers for Gemini API endpoints.
+// This package implements handlers for managing Gemini model operations including
+// model listing, content generation, streaming content generation, and token counting.
+// It serves as a proxy layer between clients and the Gemini backend service,
+// handling request translation, client management, and response processing.
+package gemini
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
+)
+
+// GeminiAPIHandler contains the handlers for Gemini API endpoints.
+// It holds a pool of clients to interact with the backend service.
+type GeminiAPIHandler struct {
+	*handlers.BaseAPIHandler
+}
+
+// NewGeminiAPIHandler creates a new Gemini API handlers instance.
+// It takes an BaseAPIHandler instance as input and returns a GeminiAPIHandler.
+func NewGeminiAPIHandler(apiHandlers *handlers.BaseAPIHandler) *GeminiAPIHandler {
+	return &GeminiAPIHandler{
+		BaseAPIHandler: apiHandlers,
+	}
+}
+
+// HandlerType returns the identifier for this handler implementation.
+func (h *GeminiAPIHandler) HandlerType() string {
+	return Gemini
+}
+
+// Models returns the Gemini-compatible model metadata supported by this handler.
+func (h *GeminiAPIHandler) Models() []map[string]any {
+	// Get dynamic models from the global registry
+	modelRegistry := registry.GetGlobalRegistry()
+	return modelRegistry.GetAvailableModels("gemini")
+}
+
+// GeminiModels handles the Gemini models listing endpoint.
+// It returns a JSON response containing available Gemini models and their specifications.
+func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
+	c.JSON(http.StatusOK, gin.H{
+		"models": h.Models(),
+	})
+}
+
+// GeminiGetHandler handles GET requests for specific Gemini model information.
+// It returns detailed information about a specific Gemini model based on the action parameter.
+func (h *GeminiAPIHandler) GeminiGetHandler(c *gin.Context) {
+	var request struct {
+		Action string `uri:"action" binding:"required"`
+	}
+	if err := c.ShouldBindUri(&request); err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+	switch request.Action {
+	case "gemini-2.5-pro":
+		c.JSON(http.StatusOK, gin.H{
+			"name":             "models/gemini-2.5-pro",
+			"version":          "2.5",
+			"displayName":      "Gemini 2.5 Pro",
+			"description":      "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
+			"inputTokenLimit":  1048576,
+			"outputTokenLimit": 65536,
+			"supportedGenerationMethods": []string{
+				"generateContent",
+				"countTokens",
+				"createCachedContent",
+				"batchGenerateContent",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		},
+		)
+	case "gemini-2.5-flash":
+		c.JSON(http.StatusOK, gin.H{
+			"name":             "models/gemini-2.5-flash",
+			"version":          "001",
+			"displayName":      "Gemini 2.5 Flash",
+			"description":      "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
+			"inputTokenLimit":  1048576,
+			"outputTokenLimit": 65536,
+			"supportedGenerationMethods": []string{
+				"generateContent",
+				"countTokens",
+				"createCachedContent",
+				"batchGenerateContent",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		})
+	case "gpt-5":
+		c.JSON(http.StatusOK, gin.H{
+			"name":             "gpt-5",
+			"version":          "001",
+			"displayName":      "GPT 5",
+			"description":      "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
+			"inputTokenLimit":  400000,
+			"outputTokenLimit": 128000,
+			"supportedGenerationMethods": []string{
+				"generateContent",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		})
+	default:
+		c.JSON(http.StatusNotFound, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Not Found",
+				Type:    "not_found",
+			},
+		})
+	}
+}
+
+// GeminiHandler handles POST requests for Gemini API operations.
+// It routes requests to appropriate handlers based on the action parameter (model:method format).
+func (h *GeminiAPIHandler) GeminiHandler(c *gin.Context) {
+	var request struct {
+		Action string `uri:"action" binding:"required"`
+	}
+	if err := c.ShouldBindUri(&request); err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+	action := strings.Split(request.Action, ":")
+	if len(action) != 2 {
+		c.JSON(http.StatusNotFound, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("%s not found.", c.Request.URL.Path),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+
+	method := action[1]
+	rawJSON, _ := c.GetRawData()
+
+	switch method {
+	case "generateContent":
+		h.handleGenerateContent(c, action[0], rawJSON)
+	case "streamGenerateContent":
+		h.handleStreamGenerateContent(c, action[0], rawJSON)
+	case "countTokens":
+		h.handleCountTokens(c, action[0], rawJSON)
+	}
+}
+
+// handleStreamGenerateContent handles streaming content generation requests for Gemini models.
+// This function establishes a Server-Sent Events connection and streams the generated content
+// back to the client in real-time. It supports both SSE format and direct streaming based
+// on the 'alt' query parameter.
+//
+// Parameters:
+//   - c: The Gin context for the request
+//   - modelName: The name of the Gemini model to use for content generation
+//   - rawJSON: The raw JSON request body containing generation parameters
+func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName string, rawJSON []byte) {
+	alt := h.GetAlt(c)
+
+	if alt == "" {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	h.forwardGeminiStream(c, flusher, alt, func(err error) { cliCancel(err) }, dataChan, errChan)
+	return
+}
+
+// handleCountTokens handles token counting requests for Gemini models.
+// This function counts the number of tokens in the provided content without
+// generating a response. It's useful for quota management and content validation.
+//
+// Parameters:
+//   - c: The Gin context for the request
+//   - modelName: The name of the Gemini model to use for token counting
+//   - rawJSON: The raw JSON request body containing the content to count
+func (h *GeminiAPIHandler) handleCountTokens(c *gin.Context, modelName string, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+	alt := h.GetAlt(c)
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	resp, errMsg := h.ExecuteCountWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
+		return
+	}
+	_, _ = c.Writer.Write(resp)
+	cliCancel()
+}
+
+// handleGenerateContent handles non-streaming content generation requests for Gemini models.
+// This function processes the request synchronously and returns the complete generated
+// response in a single API call. It supports various generation parameters and
+// response formats.
+//
+// Parameters:
+//   - c: The Gin context for the request
+//   - modelName: The name of the Gemini model to use for content generation
+//   - rawJSON: The raw JSON request body containing generation parameters and content
+func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName string, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+	alt := h.GetAlt(c)
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
+		return
+	}
+	_, _ = c.Writer.Write(resp)
+	cliCancel()
+}
+
+func (h *GeminiAPIHandler) forwardGeminiStream(c *gin.Context, flusher http.Flusher, alt string, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
+			return
+		case chunk, ok := <-data:
+			if !ok {
+				cancel(nil)
+				return
+			}
+			if alt == "" {
+				_, _ = c.Writer.Write([]byte("data: "))
+				_, _ = c.Writer.Write(chunk)
+				_, _ = c.Writer.Write([]byte("\n\n"))
+			} else {
+				_, _ = c.Writer.Write(chunk)
+			}
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
+	}
+}
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -0,0 +1,267 @@
+// Package handlers provides core API handler functionality for the CLI Proxy API server.
+// It includes common types, client management, load balancing, and error handling
+// shared across all API endpoint handlers (OpenAI, Claude, Gemini).
+package handlers
+
+import (
+	"fmt"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+	"golang.org/x/net/context"
+)
+
+// ErrorResponse represents a standard error response format for the API.
+// It contains a single ErrorDetail field.
+type ErrorResponse struct {
+	// Error contains detailed information about the error that occurred.
+	Error ErrorDetail `json:"error"`
+}
+
+// ErrorDetail provides specific information about an error that occurred.
+// It includes a human-readable message, an error type, and an optional error code.
+type ErrorDetail struct {
+	// Message is a human-readable message providing more details about the error.
+	Message string `json:"message"`
+
+	// Type is the category of error that occurred (e.g., "invalid_request_error").
+	Type string `json:"type"`
+
+	// Code is a short code identifying the error, if applicable.
+	Code string `json:"code,omitempty"`
+}
+
+// BaseAPIHandler contains the handlers for API endpoints.
+// It holds a pool of clients to interact with the backend service and manages
+// load balancing, client selection, and configuration.
+type BaseAPIHandler struct {
+	// AuthManager manages auth lifecycle and execution in the new architecture.
+	AuthManager *coreauth.Manager
+
+	// Cfg holds the current application configuration.
+	Cfg *config.SDKConfig
+}
+
+// NewBaseAPIHandlers creates a new API handlers instance.
+// It takes a slice of clients and configuration as input.
+//
+// Parameters:
+//   - cliClients: A slice of AI service clients
+//   - cfg: The application configuration
+//
+// Returns:
+//   - *BaseAPIHandler: A new API handlers instance
+func NewBaseAPIHandlers(cfg *config.SDKConfig, authManager *coreauth.Manager) *BaseAPIHandler {
+	return &BaseAPIHandler{
+		Cfg:         cfg,
+		AuthManager: authManager,
+	}
+}
+
+// UpdateClients updates the handlers' client list and configuration.
+// This method is called when the configuration or authentication tokens change.
+//
+// Parameters:
+//   - clients: The new slice of AI service clients
+//   - cfg: The new application configuration
+func (h *BaseAPIHandler) UpdateClients(cfg *config.SDKConfig) { h.Cfg = cfg }
+
+// GetAlt extracts the 'alt' parameter from the request query string.
+// It checks both 'alt' and '$alt' parameters and returns the appropriate value.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request
+//
+// Returns:
+//   - string: The alt parameter value, or empty string if it's "sse"
+func (h *BaseAPIHandler) GetAlt(c *gin.Context) string {
+	var alt string
+	var hasAlt bool
+	alt, hasAlt = c.GetQuery("alt")
+	if !hasAlt {
+		alt, _ = c.GetQuery("$alt")
+	}
+	if alt == "sse" {
+		return ""
+	}
+	return alt
+}
+
+// GetContextWithCancel creates a new context with cancellation capabilities.
+// It embeds the Gin context and the API handler into the new context for later use.
+// The returned cancel function also handles logging the API response if request logging is enabled.
+//
+// Parameters:
+//   - handler: The API handler associated with the request.
+//   - c: The Gin context of the current request.
+//   - ctx: The parent context.
+//
+// Returns:
+//   - context.Context: The new context with cancellation and embedded values.
+//   - APIHandlerCancelFunc: A function to cancel the context and log the response.
+func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *gin.Context, ctx context.Context) (context.Context, APIHandlerCancelFunc) {
+	newCtx, cancel := context.WithCancel(ctx)
+	newCtx = context.WithValue(newCtx, "gin", c)
+	newCtx = context.WithValue(newCtx, "handler", handler)
+	return newCtx, func(params ...interface{}) {
+		if h.Cfg.RequestLog {
+			if len(params) == 1 {
+				data := params[0]
+				switch data.(type) {
+				case []byte:
+					c.Set("API_RESPONSE", data.([]byte))
+				case error:
+					c.Set("API_RESPONSE", []byte(data.(error).Error()))
+				case string:
+					c.Set("API_RESPONSE", []byte(data.(string)))
+				case bool:
+				case nil:
+				}
+			}
+		}
+
+		cancel()
+	}
+}
+
+// ExecuteWithAuthManager executes a non-streaming request via the core auth manager.
+// This path is the only supported execution route.
+func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
+	providers := util.GetProviderName(modelName)
+	if len(providers) == 0 {
+		return nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
+	}
+	req := coreexecutor.Request{
+		Model:   modelName,
+		Payload: cloneBytes(rawJSON),
+	}
+	opts := coreexecutor.Options{
+		Stream:          false,
+		Alt:             alt,
+		OriginalRequest: cloneBytes(rawJSON),
+		SourceFormat:    sdktranslator.FromString(handlerType),
+	}
+	resp, err := h.AuthManager.Execute(ctx, providers, req, opts)
+	if err != nil {
+		return nil, &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: err}
+	}
+	return cloneBytes(resp.Payload), nil
+}
+
+// ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager.
+// This path is the only supported execution route.
+func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
+	providers := util.GetProviderName(modelName)
+	if len(providers) == 0 {
+		return nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
+	}
+	req := coreexecutor.Request{
+		Model:   modelName,
+		Payload: cloneBytes(rawJSON),
+	}
+	opts := coreexecutor.Options{
+		Stream:          false,
+		Alt:             alt,
+		OriginalRequest: cloneBytes(rawJSON),
+		SourceFormat:    sdktranslator.FromString(handlerType),
+	}
+	resp, err := h.AuthManager.ExecuteCount(ctx, providers, req, opts)
+	if err != nil {
+		return nil, &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: err}
+	}
+	return cloneBytes(resp.Payload), nil
+}
+
+// ExecuteStreamWithAuthManager executes a streaming request via the core auth manager.
+// This path is the only supported execution route.
+func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, <-chan *interfaces.ErrorMessage) {
+	providers := util.GetProviderName(modelName)
+	if len(providers) == 0 {
+		errChan := make(chan *interfaces.ErrorMessage, 1)
+		errChan <- &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)}
+		close(errChan)
+		return nil, errChan
+	}
+	req := coreexecutor.Request{
+		Model:   modelName,
+		Payload: cloneBytes(rawJSON),
+	}
+	opts := coreexecutor.Options{
+		Stream:          true,
+		Alt:             alt,
+		OriginalRequest: cloneBytes(rawJSON),
+		SourceFormat:    sdktranslator.FromString(handlerType),
+	}
+	chunks, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
+	if err != nil {
+		errChan := make(chan *interfaces.ErrorMessage, 1)
+		errChan <- &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: err}
+		close(errChan)
+		return nil, errChan
+	}
+	dataChan := make(chan []byte)
+	errChan := make(chan *interfaces.ErrorMessage, 1)
+	go func() {
+		defer close(dataChan)
+		defer close(errChan)
+		for chunk := range chunks {
+			if chunk.Err != nil {
+				errChan <- &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: chunk.Err}
+				return
+			}
+			if len(chunk.Payload) > 0 {
+				dataChan <- cloneBytes(chunk.Payload)
+			}
+		}
+	}()
+	return dataChan, errChan
+}
+
+func cloneBytes(src []byte) []byte {
+	if len(src) == 0 {
+		return nil
+	}
+	dst := make([]byte, len(src))
+	copy(dst, src)
+	return dst
+}
+
+// WriteErrorResponse writes an error message to the response writer using the HTTP status embedded in the message.
+func (h *BaseAPIHandler) WriteErrorResponse(c *gin.Context, msg *interfaces.ErrorMessage) {
+	status := http.StatusInternalServerError
+	if msg != nil && msg.StatusCode > 0 {
+		status = msg.StatusCode
+	}
+	c.Status(status)
+	if msg != nil && msg.Error != nil {
+		_, _ = c.Writer.Write([]byte(msg.Error.Error()))
+	} else {
+		_, _ = c.Writer.Write([]byte(http.StatusText(status)))
+	}
+}
+
+func (h *BaseAPIHandler) LoggingAPIResponseError(ctx context.Context, err *interfaces.ErrorMessage) {
+	if h.Cfg.RequestLog {
+		if ginContext, ok := ctx.Value("gin").(*gin.Context); ok {
+			if apiResponseErrors, isExist := ginContext.Get("API_RESPONSE_ERROR"); isExist {
+				if slicesAPIResponseError, isOk := apiResponseErrors.([]*interfaces.ErrorMessage); isOk {
+					slicesAPIResponseError = append(slicesAPIResponseError, err)
+					ginContext.Set("API_RESPONSE_ERROR", slicesAPIResponseError)
+				}
+			} else {
+				// Create new response data entry
+				ginContext.Set("API_RESPONSE_ERROR", []*interfaces.ErrorMessage{err})
+			}
+		}
+	}
+}
+
+// APIHandlerCancelFunc is a function type for canceling an API handler's context.
+// It can optionally accept parameters, which are used for logging the response.
+type APIHandlerCancelFunc func(params ...interface{})
--- a/sdk/api/handlers/openai/openai_handlers.go
+++ b/sdk/api/handlers/openai/openai_handlers.go
@@ -0,0 +1,568 @@
+// Package openai provides HTTP handlers for OpenAI API endpoints.
+// This package implements the OpenAI-compatible API interface, including model listing
+// and chat completion functionality. It supports both streaming and non-streaming responses,
+// and manages a pool of clients to interact with backend services.
+// The handlers translate OpenAI API requests to the appropriate backend format and
+// convert responses back to OpenAI-compatible format.
+package openai
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// OpenAIAPIHandler contains the handlers for OpenAI API endpoints.
+// It holds a pool of clients to interact with the backend service.
+type OpenAIAPIHandler struct {
+	*handlers.BaseAPIHandler
+}
+
+// NewOpenAIAPIHandler creates a new OpenAI API handlers instance.
+// It takes an BaseAPIHandler instance as input and returns an OpenAIAPIHandler.
+//
+// Parameters:
+//   - apiHandlers: The base API handlers instance
+//
+// Returns:
+//   - *OpenAIAPIHandler: A new OpenAI API handlers instance
+func NewOpenAIAPIHandler(apiHandlers *handlers.BaseAPIHandler) *OpenAIAPIHandler {
+	return &OpenAIAPIHandler{
+		BaseAPIHandler: apiHandlers,
+	}
+}
+
+// HandlerType returns the identifier for this handler implementation.
+func (h *OpenAIAPIHandler) HandlerType() string {
+	return OpenAI
+}
+
+// Models returns the OpenAI-compatible model metadata supported by this handler.
+func (h *OpenAIAPIHandler) Models() []map[string]any {
+	// Get dynamic models from the global registry
+	modelRegistry := registry.GetGlobalRegistry()
+	return modelRegistry.GetAvailableModels("openai")
+}
+
+// OpenAIModels handles the /v1/models endpoint.
+// It returns a list of available AI models with their capabilities
+// and specifications in OpenAI-compatible format.
+func (h *OpenAIAPIHandler) OpenAIModels(c *gin.Context) {
+	// Get all available models
+	allModels := h.Models()
+
+	// Filter to only include the 4 required fields: id, object, created, owned_by
+	filteredModels := make([]map[string]any, len(allModels))
+	for i, model := range allModels {
+		filteredModel := map[string]any{
+			"id":     model["id"],
+			"object": model["object"],
+		}
+
+		// Add created field if it exists
+		if created, exists := model["created"]; exists {
+			filteredModel["created"] = created
+		}
+
+		// Add owned_by field if it exists
+		if ownedBy, exists := model["owned_by"]; exists {
+			filteredModel["owned_by"] = ownedBy
+		}
+
+		filteredModels[i] = filteredModel
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"object": "list",
+		"data":   filteredModels,
+	})
+}
+
+// ChatCompletions handles the /v1/chat/completions endpoint.
+// It determines whether the request is for a streaming or non-streaming response
+// and calls the appropriate handler based on the model provider.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+func (h *OpenAIAPIHandler) ChatCompletions(c *gin.Context) {
+	rawJSON, err := c.GetRawData()
+	// If data retrieval fails, return a 400 Bad Request error.
+	if err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+
+	// Check if the client requested a streaming response.
+	streamResult := gjson.GetBytes(rawJSON, "stream")
+	if streamResult.Type == gjson.True {
+		h.handleStreamingResponse(c, rawJSON)
+	} else {
+		h.handleNonStreamingResponse(c, rawJSON)
+	}
+
+}
+
+// Completions handles the /v1/completions endpoint.
+// It determines whether the request is for a streaming or non-streaming response
+// and calls the appropriate handler based on the model provider.
+// This endpoint follows the OpenAI completions API specification.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+func (h *OpenAIAPIHandler) Completions(c *gin.Context) {
+	rawJSON, err := c.GetRawData()
+	// If data retrieval fails, return a 400 Bad Request error.
+	if err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+
+	// Check if the client requested a streaming response.
+	streamResult := gjson.GetBytes(rawJSON, "stream")
+	if streamResult.Type == gjson.True {
+		h.handleCompletionsStreamingResponse(c, rawJSON)
+	} else {
+		h.handleCompletionsNonStreamingResponse(c, rawJSON)
+	}
+
+}
+
+// convertCompletionsRequestToChatCompletions converts OpenAI completions API request to chat completions format.
+// This allows the completions endpoint to use the existing chat completions infrastructure.
+//
+// Parameters:
+//   - rawJSON: The raw JSON bytes of the completions request
+//
+// Returns:
+//   - []byte: The converted chat completions request
+func convertCompletionsRequestToChatCompletions(rawJSON []byte) []byte {
+	root := gjson.ParseBytes(rawJSON)
+
+	// Extract prompt from completions request
+	prompt := root.Get("prompt").String()
+	if prompt == "" {
+		prompt = "Complete this:"
+	}
+
+	// Create chat completions structure
+	out := `{"model":"","messages":[{"role":"user","content":""}]}`
+
+	// Set model
+	if model := root.Get("model"); model.Exists() {
+		out, _ = sjson.Set(out, "model", model.String())
+	}
+
+	// Set the prompt as user message content
+	out, _ = sjson.Set(out, "messages.0.content", prompt)
+
+	// Copy other parameters from completions to chat completions
+	if maxTokens := root.Get("max_tokens"); maxTokens.Exists() {
+		out, _ = sjson.Set(out, "max_tokens", maxTokens.Int())
+	}
+
+	if temperature := root.Get("temperature"); temperature.Exists() {
+		out, _ = sjson.Set(out, "temperature", temperature.Float())
+	}
+
+	if topP := root.Get("top_p"); topP.Exists() {
+		out, _ = sjson.Set(out, "top_p", topP.Float())
+	}
+
+	if frequencyPenalty := root.Get("frequency_penalty"); frequencyPenalty.Exists() {
+		out, _ = sjson.Set(out, "frequency_penalty", frequencyPenalty.Float())
+	}
+
+	if presencePenalty := root.Get("presence_penalty"); presencePenalty.Exists() {
+		out, _ = sjson.Set(out, "presence_penalty", presencePenalty.Float())
+	}
+
+	if stop := root.Get("stop"); stop.Exists() {
+		out, _ = sjson.SetRaw(out, "stop", stop.Raw)
+	}
+
+	if stream := root.Get("stream"); stream.Exists() {
+		out, _ = sjson.Set(out, "stream", stream.Bool())
+	}
+
+	if logprobs := root.Get("logprobs"); logprobs.Exists() {
+		out, _ = sjson.Set(out, "logprobs", logprobs.Bool())
+	}
+
+	if topLogprobs := root.Get("top_logprobs"); topLogprobs.Exists() {
+		out, _ = sjson.Set(out, "top_logprobs", topLogprobs.Int())
+	}
+
+	if echo := root.Get("echo"); echo.Exists() {
+		out, _ = sjson.Set(out, "echo", echo.Bool())
+	}
+
+	return []byte(out)
+}
+
+// convertChatCompletionsResponseToCompletions converts chat completions API response back to completions format.
+// This ensures the completions endpoint returns data in the expected format.
+//
+// Parameters:
+//   - rawJSON: The raw JSON bytes of the chat completions response
+//
+// Returns:
+//   - []byte: The converted completions response
+func convertChatCompletionsResponseToCompletions(rawJSON []byte) []byte {
+	root := gjson.ParseBytes(rawJSON)
+
+	// Base completions response structure
+	out := `{"id":"","object":"text_completion","created":0,"model":"","choices":[]}`
+
+	// Copy basic fields
+	if id := root.Get("id"); id.Exists() {
+		out, _ = sjson.Set(out, "id", id.String())
+	}
+
+	if created := root.Get("created"); created.Exists() {
+		out, _ = sjson.Set(out, "created", created.Int())
+	}
+
+	if model := root.Get("model"); model.Exists() {
+		out, _ = sjson.Set(out, "model", model.String())
+	}
+
+	if usage := root.Get("usage"); usage.Exists() {
+		out, _ = sjson.SetRaw(out, "usage", usage.Raw)
+	}
+
+	// Convert choices from chat completions to completions format
+	var choices []interface{}
+	if chatChoices := root.Get("choices"); chatChoices.Exists() && chatChoices.IsArray() {
+		chatChoices.ForEach(func(_, choice gjson.Result) bool {
+			completionsChoice := map[string]interface{}{
+				"index": choice.Get("index").Int(),
+			}
+
+			// Extract text content from message.content
+			if message := choice.Get("message"); message.Exists() {
+				if content := message.Get("content"); content.Exists() {
+					completionsChoice["text"] = content.String()
+				}
+			} else if delta := choice.Get("delta"); delta.Exists() {
+				// For streaming responses, use delta.content
+				if content := delta.Get("content"); content.Exists() {
+					completionsChoice["text"] = content.String()
+				}
+			}
+
+			// Copy finish_reason
+			if finishReason := choice.Get("finish_reason"); finishReason.Exists() {
+				completionsChoice["finish_reason"] = finishReason.String()
+			}
+
+			// Copy logprobs if present
+			if logprobs := choice.Get("logprobs"); logprobs.Exists() {
+				completionsChoice["logprobs"] = logprobs.Value()
+			}
+
+			choices = append(choices, completionsChoice)
+			return true
+		})
+	}
+
+	if len(choices) > 0 {
+		choicesJSON, _ := json.Marshal(choices)
+		out, _ = sjson.SetRaw(out, "choices", string(choicesJSON))
+	}
+
+	return []byte(out)
+}
+
+// convertChatCompletionsStreamChunkToCompletions converts a streaming chat completions chunk to completions format.
+// This handles the real-time conversion of streaming response chunks and filters out empty text responses.
+//
+// Parameters:
+//   - chunkData: The raw JSON bytes of a single chat completions stream chunk
+//
+// Returns:
+//   - []byte: The converted completions stream chunk, or nil if should be filtered out
+func convertChatCompletionsStreamChunkToCompletions(chunkData []byte) []byte {
+	root := gjson.ParseBytes(chunkData)
+
+	// Check if this chunk has any meaningful content
+	hasContent := false
+	if chatChoices := root.Get("choices"); chatChoices.Exists() && chatChoices.IsArray() {
+		chatChoices.ForEach(func(_, choice gjson.Result) bool {
+			// Check if delta has content or finish_reason
+			if delta := choice.Get("delta"); delta.Exists() {
+				if content := delta.Get("content"); content.Exists() && content.String() != "" {
+					hasContent = true
+					return false // Break out of forEach
+				}
+			}
+			// Also check for finish_reason to ensure we don't skip final chunks
+			if finishReason := choice.Get("finish_reason"); finishReason.Exists() && finishReason.String() != "" && finishReason.String() != "null" {
+				hasContent = true
+				return false // Break out of forEach
+			}
+			return true
+		})
+	}
+
+	// If no meaningful content, return nil to indicate this chunk should be skipped
+	if !hasContent {
+		return nil
+	}
+
+	// Base completions stream response structure
+	out := `{"id":"","object":"text_completion","created":0,"model":"","choices":[]}`
+
+	// Copy basic fields
+	if id := root.Get("id"); id.Exists() {
+		out, _ = sjson.Set(out, "id", id.String())
+	}
+
+	if created := root.Get("created"); created.Exists() {
+		out, _ = sjson.Set(out, "created", created.Int())
+	}
+
+	if model := root.Get("model"); model.Exists() {
+		out, _ = sjson.Set(out, "model", model.String())
+	}
+
+	// Convert choices from chat completions delta to completions format
+	var choices []interface{}
+	if chatChoices := root.Get("choices"); chatChoices.Exists() && chatChoices.IsArray() {
+		chatChoices.ForEach(func(_, choice gjson.Result) bool {
+			completionsChoice := map[string]interface{}{
+				"index": choice.Get("index").Int(),
+			}
+
+			// Extract text content from delta.content
+			if delta := choice.Get("delta"); delta.Exists() {
+				if content := delta.Get("content"); content.Exists() && content.String() != "" {
+					completionsChoice["text"] = content.String()
+				} else {
+					completionsChoice["text"] = ""
+				}
+			} else {
+				completionsChoice["text"] = ""
+			}
+
+			// Copy finish_reason
+			if finishReason := choice.Get("finish_reason"); finishReason.Exists() && finishReason.String() != "null" {
+				completionsChoice["finish_reason"] = finishReason.String()
+			}
+
+			// Copy logprobs if present
+			if logprobs := choice.Get("logprobs"); logprobs.Exists() {
+				completionsChoice["logprobs"] = logprobs.Value()
+			}
+
+			choices = append(choices, completionsChoice)
+			return true
+		})
+	}
+
+	if len(choices) > 0 {
+		choicesJSON, _ := json.Marshal(choices)
+		out, _ = sjson.SetRaw(out, "choices", string(choicesJSON))
+	}
+
+	return []byte(out)
+}
+
+// handleNonStreamingResponse handles non-streaming chat completion responses
+// for Gemini models. It selects a client from the pool, sends the request, and
+// aggregates the response before sending it back to the client in OpenAI format.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
+func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, h.GetAlt(c))
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
+		return
+	}
+	_, _ = c.Writer.Write(resp)
+	cliCancel()
+}
+
+// handleStreamingResponse handles streaming responses for Gemini models.
+// It establishes a streaming connection with the backend service and forwards
+// the response chunks to the client in real-time using Server-Sent Events.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
+func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, h.GetAlt(c))
+	h.handleStreamResult(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
+}
+
+// handleCompletionsNonStreamingResponse handles non-streaming completions responses.
+// It converts completions request to chat completions format, sends to backend,
+// then converts the response back to completions format before sending to client.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible completions request
+func (h *OpenAIAPIHandler) handleCompletionsNonStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	// Convert completions request to chat completions format
+	chatCompletionsJSON := convertCompletionsRequestToChatCompletions(rawJSON)
+
+	modelName := gjson.GetBytes(chatCompletionsJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
+		return
+	}
+	completionsResp := convertChatCompletionsResponseToCompletions(resp)
+	_, _ = c.Writer.Write(completionsResp)
+	cliCancel()
+}
+
+// handleCompletionsStreamingResponse handles streaming completions responses.
+// It converts completions request to chat completions format, streams from backend,
+// then converts each response chunk back to completions format before sending to client.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAI-compatible completions request
+func (h *OpenAIAPIHandler) handleCompletionsStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// Convert completions request to chat completions format
+	chatCompletionsJSON := convertCompletionsRequestToChatCompletions(rawJSON)
+
+	modelName := gjson.GetBytes(chatCompletionsJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")
+
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cliCancel(c.Request.Context().Err())
+			return
+		case chunk, isOk := <-dataChan:
+			if !isOk {
+				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
+				flusher.Flush()
+				cliCancel()
+				return
+			}
+			converted := convertChatCompletionsStreamChunkToCompletions(chunk)
+			if converted != nil {
+				_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(converted))
+				flusher.Flush()
+			}
+		case errMsg, isOk := <-errChan:
+			if !isOk {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cliCancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
+	}
+}
+func (h *OpenAIAPIHandler) handleStreamResult(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
+			return
+		case chunk, ok := <-data:
+			if !ok {
+				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
+				flusher.Flush()
+				cancel(nil)
+				return
+			}
+			_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(chunk))
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
+	}
+}
--- a/sdk/api/handlers/openai/openai_responses_handlers.go
+++ b/sdk/api/handlers/openai/openai_responses_handlers.go
@@ -0,0 +1,194 @@
+// Package openai provides HTTP handlers for OpenAIResponses API endpoints.
+// This package implements the OpenAIResponses-compatible API interface, including model listing
+// and chat completion functionality. It supports both streaming and non-streaming responses,
+// and manages a pool of clients to interact with backend services.
+// The handlers translate OpenAIResponses API requests to the appropriate backend format and
+// convert responses back to OpenAIResponses-compatible format.
+package openai
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
+	"github.com/tidwall/gjson"
+)
+
+// OpenAIResponsesAPIHandler contains the handlers for OpenAIResponses API endpoints.
+// It holds a pool of clients to interact with the backend service.
+type OpenAIResponsesAPIHandler struct {
+	*handlers.BaseAPIHandler
+}
+
+// NewOpenAIResponsesAPIHandler creates a new OpenAIResponses API handlers instance.
+// It takes an BaseAPIHandler instance as input and returns an OpenAIResponsesAPIHandler.
+//
+// Parameters:
+//   - apiHandlers: The base API handlers instance
+//
+// Returns:
+//   - *OpenAIResponsesAPIHandler: A new OpenAIResponses API handlers instance
+func NewOpenAIResponsesAPIHandler(apiHandlers *handlers.BaseAPIHandler) *OpenAIResponsesAPIHandler {
+	return &OpenAIResponsesAPIHandler{
+		BaseAPIHandler: apiHandlers,
+	}
+}
+
+// HandlerType returns the identifier for this handler implementation.
+func (h *OpenAIResponsesAPIHandler) HandlerType() string {
+	return OpenaiResponse
+}
+
+// Models returns the OpenAIResponses-compatible model metadata supported by this handler.
+func (h *OpenAIResponsesAPIHandler) Models() []map[string]any {
+	// Get dynamic models from the global registry
+	modelRegistry := registry.GetGlobalRegistry()
+	return modelRegistry.GetAvailableModels("openai")
+}
+
+// OpenAIResponsesModels handles the /v1/models endpoint.
+// It returns a list of available AI models with their capabilities
+// and specifications in OpenAIResponses-compatible format.
+func (h *OpenAIResponsesAPIHandler) OpenAIResponsesModels(c *gin.Context) {
+	c.JSON(http.StatusOK, gin.H{
+		"object": "list",
+		"data":   h.Models(),
+	})
+}
+
+// Responses handles the /v1/responses endpoint.
+// It determines whether the request is for a streaming or non-streaming response
+// and calls the appropriate handler based on the model provider.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+func (h *OpenAIResponsesAPIHandler) Responses(c *gin.Context) {
+	rawJSON, err := c.GetRawData()
+	// If data retrieval fails, return a 400 Bad Request error.
+	if err != nil {
+		c.JSON(http.StatusBadRequest, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: fmt.Sprintf("Invalid request: %v", err),
+				Type:    "invalid_request_error",
+			},
+		})
+		return
+	}
+
+	// Check if the client requested a streaming response.
+	streamResult := gjson.GetBytes(rawJSON, "stream")
+	if streamResult.Type == gjson.True {
+		h.handleStreamingResponse(c, rawJSON)
+	} else {
+		h.handleNonStreamingResponse(c, rawJSON)
+	}
+
+}
+
+// handleNonStreamingResponse handles non-streaming chat completion responses
+// for Gemini models. It selects a client from the pool, sends the request, and
+// aggregates the response before sending it back to the client in OpenAIResponses format.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAIResponses-compatible request
+func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	defer func() {
+		cliCancel()
+	}()
+
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		return
+	}
+	_, _ = c.Writer.Write(resp)
+	return
+
+	// no legacy fallback
+
+}
+
+// handleStreamingResponse handles streaming responses for Gemini models.
+// It establishes a streaming connection with the backend service and forwards
+// the response chunks to the client in real-time using Server-Sent Events.
+//
+// Parameters:
+//   - c: The Gin context containing the HTTP request and response
+//   - rawJSON: The raw JSON bytes of the OpenAIResponses-compatible request
+func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byte) {
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+	c.Header("Access-Control-Allow-Origin", "*")
+
+	// Get the http.Flusher interface to manually flush the response.
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	// New core execution path
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	h.forwardResponsesStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
+	return
+}
+
+func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
+			return
+		case chunk, ok := <-data:
+			if !ok {
+				_, _ = c.Writer.Write([]byte("\n"))
+				flusher.Flush()
+				cancel(nil)
+				return
+			}
+
+			if bytes.HasPrefix(chunk, []byte("event:")) {
+				_, _ = c.Writer.Write([]byte("\n"))
+			}
+			_, _ = c.Writer.Write(chunk)
+			_, _ = c.Writer.Write([]byte("\n"))
+
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				h.WriteErrorResponse(c, errMsg)
+				flusher.Flush()
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-time.After(500 * time.Millisecond):
+		}
+	}
+}
--- a/sdk/config/config.go
+++ b/sdk/config/config.go
@@ -0,0 +1,14 @@
+// Package config provides configuration management for the CLI Proxy API server.
+// It handles loading and parsing YAML configuration files, and provides structured
+// access to application settings including server port, authentication directory,
+// debug settings, proxy configuration, and API keys.
+package config
+
+// SDKConfig represents the application's configuration, loaded from a YAML file.
+type SDKConfig struct {
+	// ProxyURL is the URL of an optional proxy server to use for outbound requests.
+	ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
+
+	// RequestLog enables or disables detailed request logging functionality.
+	RequestLog bool `yaml:"request-log" json:"request-log"`
+}