feat(api): add non-streaming keep-alive support for idle timeout prevention

- Introduced `StartNonStreamingKeepAlive` to emit periodic blank lines during non-streaming responses.
- Added `nonstream-keepalive` configuration option in `SDKConfig`.
- Updated handlers to utilize `StartNonStreamingKeepAlive` and ensure proper cleanup.
- Extended config diff and tests to include `nonstream-keepalive` changes.
This commit is contained in:
Luis Pater
2026-01-13 02:36:07 +08:00
parent 21ac161b21
commit b1b379ea18
9 changed files with 89 additions and 21 deletions

View File

@@ -146,10 +146,12 @@ func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSO
c.Header("Content-Type", "application/json")
alt := h.GetAlt(c)
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
modelName := gjson.GetBytes(rawJSON, "model").String()
resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
stopKeepAlive()
if errMsg != nil {
h.WriteErrorResponse(c, errMsg)
cliCancel(errMsg.Error)
@@ -159,13 +161,18 @@ func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSO
// Decompress gzipped responses - Claude API sometimes returns gzip without Content-Encoding header
// This fixes title generation and other non-streaming responses that arrive compressed
if len(resp) >= 2 && resp[0] == 0x1f && resp[1] == 0x8b {
gzReader, err := gzip.NewReader(bytes.NewReader(resp))
if err != nil {
log.Warnf("failed to decompress gzipped Claude response: %v", err)
gzReader, errGzip := gzip.NewReader(bytes.NewReader(resp))
if errGzip != nil {
log.Warnf("failed to decompress gzipped Claude response: %v", errGzip)
} else {
defer gzReader.Close()
if decompressed, err := io.ReadAll(gzReader); err != nil {
log.Warnf("failed to read decompressed Claude response: %v", err)
defer func() {
if errClose := gzReader.Close(); errClose != nil {
log.Warnf("failed to close Claude gzip reader: %v", errClose)
}
}()
decompressed, errRead := io.ReadAll(gzReader)
if errRead != nil {
log.Warnf("failed to read decompressed Claude response: %v", errRead)
} else {
resp = decompressed
}

View File

@@ -336,7 +336,9 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin
c.Header("Content-Type", "application/json")
alt := h.GetAlt(c)
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
stopKeepAlive()
if errMsg != nil {
h.WriteErrorResponse(c, errMsg)
cliCancel(errMsg.Error)

View File

@@ -9,6 +9,7 @@ import (
"fmt"
"net/http"
"strings"
"sync"
"time"
"github.com/gin-gonic/gin"
@@ -48,6 +49,7 @@ const idempotencyKeyMetadataKey = "idempotency_key"
const (
defaultStreamingKeepAliveSeconds = 0
defaultStreamingBootstrapRetries = 0
nonStreamingKeepAliveInterval = 5 * time.Second
)
// BuildErrorResponseBody builds an OpenAI-compatible JSON error response body.
@@ -293,6 +295,52 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
}
}
// StartNonStreamingKeepAlive emits blank lines every 5 seconds while waiting for a non-streaming response.
// It returns a stop function that must be called before writing the final response.
func (h *BaseAPIHandler) StartNonStreamingKeepAlive(c *gin.Context, ctx context.Context) func() {
if h == nil || h.Cfg == nil || !h.Cfg.NonStreamKeepAlive {
return func() {}
}
if c == nil {
return func() {}
}
flusher, ok := c.Writer.(http.Flusher)
if !ok {
return func() {}
}
if ctx == nil {
ctx = context.Background()
}
stopChan := make(chan struct{})
var stopOnce sync.Once
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
ticker := time.NewTicker(nonStreamingKeepAliveInterval)
defer ticker.Stop()
for {
select {
case <-stopChan:
return
case <-ctx.Done():
return
case <-ticker.C:
_, _ = c.Writer.Write([]byte("\n"))
flusher.Flush()
}
}
}()
return func() {
stopOnce.Do(func() {
close(stopChan)
})
wg.Wait()
}
}
// appendAPIResponse preserves any previously captured API response and appends new data.
func appendAPIResponse(c *gin.Context, data []byte) {
if c == nil || len(data) == 0 {

View File

@@ -524,7 +524,9 @@ func (h *OpenAIAPIHandler) handleCompletionsNonStreamingResponse(c *gin.Context,
modelName := gjson.GetBytes(chatCompletionsJSON, "model").String()
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")
stopKeepAlive()
if errMsg != nil {
h.WriteErrorResponse(c, errMsg)
cliCancel(errMsg.Error)

View File

@@ -103,20 +103,17 @@ func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponse(c *gin.Context, r
modelName := gjson.GetBytes(rawJSON, "model").String()
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
defer func() {
cliCancel()
}()
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
stopKeepAlive()
if errMsg != nil {
h.WriteErrorResponse(c, errMsg)
cliCancel(errMsg.Error)
return
}
_, _ = c.Writer.Write(resp)
return
// no legacy fallback
cliCancel()
}
// handleStreamingResponse handles streaming responses for Gemini models.