Add request logging capabilities to API handlers and update .gitignore

Enhance API response handling by storing responses in context and updating request logger to include API responses
This commit is contained in:
Luis Pater
2025-08-16 05:03:10 +08:00
parent 4155805ad6
commit fcadf08921
10 changed files with 191 additions and 26 deletions

View File

@@ -108,7 +108,9 @@ func (h *ClaudeCodeAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, ra
// Create a cancellable context for the backend client request
// This allows proper cleanup and cancellation of ongoing requests
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
cliClient = client.NewGeminiClient(nil, nil, nil)
defer func() {
@@ -157,6 +159,7 @@ outLoop:
responseType := 0
responseIndex := 0
apiResponseData := make([]byte, 0)
// Main streaming loop - handles multiple concurrent events using Go channels
// This select statement manages four different types of events simultaneously
for {
@@ -166,6 +169,7 @@ outLoop:
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request to prevent resource leaks
return
}
@@ -182,9 +186,12 @@ outLoop:
_, _ = c.Writer.Write([]byte("\n\n\n"))
flusher.Flush()
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
apiResponseData = append(apiResponseData, chunk...)
// Convert the backend response to Claude-compatible format
// This translation layer ensures API compatibility
claudeFormat := translatorClaudeCodeToGeminiCli.ConvertCliResponseToClaudeCode(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
@@ -207,6 +214,7 @@ outLoop:
c.Status(errInfo.StatusCode)
_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
flusher.Flush()
c.Set("API_RESPONSE", []byte(errInfo.Error.Error()))
cliCancel()
}
return
@@ -272,7 +280,9 @@ func (h *ClaudeCodeAPIHandlers) handleCodexStreamingResponse(c *gin.Context, raw
// return
// Create a cancellable context for the backend client request
// This allows proper cleanup and cancellation of ongoing requests
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
@@ -306,6 +316,7 @@ outLoop:
hasFirstResponse := false
hasToolCall := false
apiResponseData := make([]byte, 0)
// Main streaming loop - handles multiple concurrent events using Go channels
// This select statement manages four different types of events simultaneously
for {
@@ -315,6 +326,7 @@ outLoop:
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request to prevent resource leaks
return
}
@@ -324,9 +336,11 @@ outLoop:
case chunk, okStream := <-respChan:
if !okStream {
flusher.Flush()
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
apiResponseData = append(apiResponseData, chunk...)
// Convert the backend response to Claude-compatible format
// This translation layer ensures API compatibility
if bytes.HasPrefix(chunk, []byte("data: ")) {
@@ -357,6 +371,7 @@ outLoop:
// Forward other errors directly to the client
c.Status(errInfo.StatusCode)
_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
c.Set("API_RESPONSE", []byte(errInfo.Error.Error()))
flusher.Flush()
cliCancel()
}

View File

@@ -127,6 +127,7 @@ func (h *GeminiCLIAPIHandlers) CLIHandler(c *gin.Context) {
return
}
_, _ = c.Writer.Write(output)
c.Set("API_RESPONSE", output)
}
}
@@ -155,7 +156,9 @@ func (h *GeminiCLIAPIHandlers) handleInternalStreamGenerateContent(c *gin.Contex
modelResult := gjson.GetBytes(rawJSON, "model")
modelName := modelResult.String()
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
@@ -184,21 +187,28 @@ outLoop:
// Send the message and receive response chunks and errors via channels.
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, "")
hasFirstResponse := false
apiResponseData := make([]byte, 0)
for {
select {
// Handle client disconnection.
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request.
return
}
// Process incoming response chunks.
case chunk, okStream := <-respChan:
if !okStream {
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
apiResponseData = append(apiResponseData, chunk...)
hasFirstResponse = true
if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() != "" {
chunk, _ = sjson.SetRawBytes(chunk, "response", chunk)
@@ -217,6 +227,7 @@ outLoop:
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
flusher.Flush()
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
}
return
@@ -237,7 +248,9 @@ func (h *GeminiCLIAPIHandlers) handleInternalGenerateContent(c *gin.Context, raw
// log.Debugf("GenerateContent: %s", string(rawJSON))
modelResult := gjson.GetBytes(rawJSON, "model")
modelName := modelResult.String()
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
if cliClient != nil {
@@ -269,11 +282,13 @@ func (h *GeminiCLIAPIHandlers) handleInternalGenerateContent(c *gin.Context, raw
c.Status(err.StatusCode)
_, _ = c.Writer.Write([]byte(err.Error.Error()))
log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
}
break
} else {
_, _ = c.Writer.Write(resp)
c.Set("API_RESPONSE", resp)
cliCancel()
break
}
@@ -313,7 +328,9 @@ func (h *GeminiCLIAPIHandlers) handleCodexInternalStreamGenerateContent(c *gin.C
modelName := gjson.GetBytes(rawJSON, "model")
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
@@ -345,24 +362,28 @@ outLoop:
ResponseID: "",
LastStorageOutput: "",
}
apiResponseData := make([]byte, 0)
for {
select {
// Handle client disconnection.
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request.
return
}
// Process incoming response chunks.
case chunk, okStream := <-respChan:
if !okStream {
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
// _, _ = logFile.Write(chunk)
// _, _ = logFile.Write([]byte("\n"))
apiResponseData = append(apiResponseData, chunk...)
if bytes.HasPrefix(chunk, []byte("data: ")) {
jsonData := chunk[6:]
data := gjson.ParseBytes(jsonData)
@@ -390,6 +411,7 @@ outLoop:
c.Status(errMessage.StatusCode)
_, _ = fmt.Fprint(c.Writer, errMessage.Error.Error())
flusher.Flush()
c.Set("API_RESPONSE", []byte(errMessage.Error.Error()))
cliCancel()
}
return
@@ -416,7 +438,9 @@ func (h *GeminiCLIAPIHandlers) handleCodexInternalGenerateContent(c *gin.Context
modelName := gjson.GetBytes(rawJSON, "model")
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
@@ -440,22 +464,25 @@ outLoop:
// Send the message and receive response chunks and errors via channels.
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
apiResponseData := make([]byte, 0)
for {
select {
// Handle client disconnection.
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request.
return
}
// Process incoming response chunks.
case chunk, okStream := <-respChan:
if !okStream {
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
apiResponseData = append(apiResponseData, chunk...)
if bytes.HasPrefix(chunk, []byte("data: ")) {
jsonData := chunk[6:]
data := gjson.ParseBytes(jsonData)
@@ -479,6 +506,7 @@ outLoop:
log.Debugf("org: %s", string(orgRawJSON))
log.Debugf("raw: %s", string(rawJSON))
log.Debugf("newRequestJSON: %s", newRequestJSON)
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
}
return

View File

@@ -267,7 +267,9 @@ func (h *GeminiAPIHandlers) handleGeminiStreamGenerateContent(c *gin.Context, ra
modelResult := gjson.GetBytes(rawJSON, "model")
modelName := modelResult.String()
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
@@ -327,21 +329,26 @@ outLoop:
// Send the message and receive response chunks and errors via channels.
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, alt)
apiResponseData := make([]byte, 0)
for {
select {
// Handle client disconnection.
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request.
return
}
// Process incoming response chunks.
case chunk, okStream := <-respChan:
if !okStream {
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
apiResponseData = append(apiResponseData, chunk...)
if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() == "" {
if alt == "" {
responseResult := gjson.GetBytes(chunk, "response")
@@ -382,6 +389,7 @@ outLoop:
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
flusher.Flush()
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
}
return
@@ -400,7 +408,9 @@ func (h *GeminiAPIHandlers) handleGeminiCountTokens(c *gin.Context, rawJSON []by
// orgrawJSON := rawJSON
modelResult := gjson.GetBytes(rawJSON, "model")
modelName := modelResult.String()
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
if cliClient != nil {
@@ -441,6 +451,7 @@ func (h *GeminiAPIHandlers) handleGeminiCountTokens(c *gin.Context, rawJSON []by
} else {
c.Status(err.StatusCode)
_, _ = c.Writer.Write([]byte(err.Error.Error()))
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
// log.Debugf(err.Error.Error())
// log.Debugf(string(rawJSON))
@@ -455,6 +466,7 @@ func (h *GeminiAPIHandlers) handleGeminiCountTokens(c *gin.Context, rawJSON []by
}
}
_, _ = c.Writer.Write(resp)
c.Set("API_RESPONSE", resp)
cliCancel()
break
}
@@ -468,7 +480,9 @@ func (h *GeminiAPIHandlers) handleGeminiGenerateContent(c *gin.Context, rawJSON
modelResult := gjson.GetBytes(rawJSON, "model")
modelName := modelResult.String()
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
if cliClient != nil {
@@ -529,6 +543,7 @@ func (h *GeminiAPIHandlers) handleGeminiGenerateContent(c *gin.Context, rawJSON
} else {
c.Status(err.StatusCode)
_, _ = c.Writer.Write([]byte(err.Error.Error()))
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
}
break
@@ -540,6 +555,7 @@ func (h *GeminiAPIHandlers) handleGeminiGenerateContent(c *gin.Context, rawJSON
}
}
_, _ = c.Writer.Write(resp)
c.Set("API_RESPONSE", resp)
cliCancel()
break
}
@@ -570,7 +586,9 @@ func (h *GeminiAPIHandlers) handleCodexStreamGenerateContent(c *gin.Context, raw
modelName := gjson.GetBytes(rawJSON, "model")
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
@@ -595,6 +613,9 @@ outLoop:
// Send the message and receive response chunks and errors via channels.
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
apiResponseData := make([]byte, 0)
params := &translatorGeminiToCodex.ConvertCodexResponseToGeminiParams{
Model: modelName.String(),
CreatedAt: 0,
@@ -607,15 +628,18 @@ outLoop:
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request.
return
}
// Process incoming response chunks.
case chunk, okStream := <-respChan:
if !okStream {
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
apiResponseData = append(apiResponseData, chunk...)
if bytes.HasPrefix(chunk, []byte("data: ")) {
jsonData := chunk[6:]
@@ -643,6 +667,7 @@ outLoop:
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
flusher.Flush()
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
}
return
@@ -663,7 +688,9 @@ func (h *GeminiAPIHandlers) handleCodexGenerateContent(c *gin.Context, rawJSON [
modelName := gjson.GetBytes(rawJSON, "model")
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
@@ -687,21 +714,25 @@ outLoop:
// Send the message and receive response chunks and errors via channels.
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
apiResponseData := make([]byte, 0)
for {
select {
// Handle client disconnection.
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request.
return
}
// Process incoming response chunks.
case chunk, okStream := <-respChan:
if !okStream {
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
apiResponseData = append(apiResponseData, chunk...)
if bytes.HasPrefix(chunk, []byte("data: ")) {
jsonData := chunk[6:]
@@ -723,6 +754,7 @@ outLoop:
} else {
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
}
return

View File

@@ -160,7 +160,9 @@ func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, raw
c.Header("Content-Type", "application/json")
modelName, systemInstruction, contents, tools := translatorOpenAIToGeminiCli.ConvertOpenAIChatRequestToCli(rawJSON)
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
if cliClient != nil {
@@ -193,6 +195,7 @@ func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, raw
} else {
c.Status(err.StatusCode)
_, _ = c.Writer.Write([]byte(err.Error.Error()))
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
}
break
@@ -201,6 +204,7 @@ func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, raw
if openAIFormat != "" {
_, _ = c.Writer.Write([]byte(openAIFormat))
}
c.Set("API_RESPONSE", resp)
cliCancel()
break
}
@@ -234,7 +238,9 @@ func (h *OpenAIAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, rawJSO
// Prepare the request for the backend client.
modelName, systemInstruction, contents, tools := translatorOpenAIToGeminiCli.ConvertOpenAIChatRequestToCli(rawJSON)
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
@@ -264,6 +270,8 @@ outLoop:
}
// Send the message and receive response chunks and errors via channels.
respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools)
apiResponseData := make([]byte, 0)
hasFirstResponse := false
for {
select {
@@ -271,6 +279,7 @@ outLoop:
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request.
return
}
@@ -280,9 +289,11 @@ outLoop:
// Stream is closed, send the final [DONE] message.
_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
flusher.Flush()
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
apiResponseData = append(apiResponseData, chunk...)
// Convert the chunk to OpenAI format and send it to the client.
hasFirstResponse = true
openAIFormat := translatorOpenAIToGeminiCli.ConvertCliResponseToOpenAIChat(chunk, time.Now().Unix(), isGlAPIKey)
@@ -299,6 +310,7 @@ outLoop:
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
flusher.Flush()
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
}
return
@@ -326,7 +338,9 @@ func (h *OpenAIAPIHandlers) handleCodexNonStreamingResponse(c *gin.Context, rawJ
newRequestJSON := translatorOpenAIToCodex.ConvertOpenAIChatRequestToCodex(rawJSON)
modelName := gjson.GetBytes(rawJSON, "model")
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
if cliClient != nil {
@@ -349,21 +363,25 @@ outLoop:
// Send the message and receive response chunks and errors via channels.
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
apiResponseData := make([]byte, 0)
for {
select {
// Handle client disconnection.
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request.
return
}
// Process incoming response chunks.
case chunk, okStream := <-respChan:
if !okStream {
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
apiResponseData = append(apiResponseData, chunk...)
if bytes.HasPrefix(chunk, []byte("data: ")) {
jsonData := chunk[6:]
data := gjson.ParseBytes(jsonData)
@@ -382,6 +400,7 @@ outLoop:
} else {
c.Status(err.StatusCode)
_, _ = c.Writer.Write([]byte(err.Error.Error()))
c.Set("API_RESPONSE", []byte(err.Error.Error()))
cliCancel()
}
return
@@ -424,7 +443,9 @@ func (h *OpenAIAPIHandlers) handleCodexStreamingResponse(c *gin.Context, rawJSON
modelName := gjson.GetBytes(rawJSON, "model")
cliCtx, cliCancel := context.WithCancel(context.Background())
backgroundCtx, cliCancel := context.WithCancel(context.Background())
cliCtx := context.WithValue(backgroundCtx, "gin", c)
var cliClient client.Client
defer func() {
// Ensure the client's mutex is unlocked on function exit.
@@ -450,12 +471,14 @@ outLoop:
// Send the message and receive response chunks and errors via channels.
var params *translatorOpenAIToCodex.ConvertCliToOpenAIParams
respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
apiResponseData := make([]byte, 0)
for {
select {
// Handle client disconnection.
case <-c.Request.Context().Done():
if c.Request.Context().Err().Error() == "context canceled" {
log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
c.Set("API_RESPONSE", apiResponseData)
cliCancel() // Cancel the backend request.
return
}
@@ -464,9 +487,11 @@ outLoop:
if !okStream {
_, _ = c.Writer.Write([]byte("[done]\n\n"))
flusher.Flush()
c.Set("API_RESPONSE", apiResponseData)
cliCancel()
return
}
apiResponseData = append(apiResponseData, chunk...)
// log.Debugf("Response: %s\n", string(chunk))
// Convert the chunk to OpenAI format and send it to the client.
if bytes.HasPrefix(chunk, []byte("data: ")) {
@@ -493,6 +518,7 @@ outLoop:
} else {
c.Status(err.StatusCode)
_, _ = fmt.Fprint(c.Writer, err.Error.Error())
c.Set("API_RESPONSE", []byte(err.Error.Error()))
flusher.Flush()
cliCancel()
}