Add request logging capabilities to API handlers and update .gitignore

Enhance API response handling by storing responses in context and updating request logger to include API responses
2026-02-03 13:00:52 +08:00 · 2025-08-16 05:03:10 +08:00
parent 4155805ad6
commit fcadf08921
10 changed files with 191 additions and 26 deletions
--- a/internal/api/handlers/claude/code_handlers.go
+++ b/internal/api/handlers/claude/code_handlers.go
@@ -108,7 +108,9 @@ func (h *ClaudeCodeAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, ra

 	// Create a cancellable context for the backend client request
 	// This allows proper cleanup and cancellation of ongoing requests
-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	cliClient = client.NewGeminiClient(nil, nil, nil)
 	defer func() {
@@ -157,6 +159,7 @@ outLoop:
 		responseType := 0
 		responseIndex := 0

+		apiResponseData := make([]byte, 0)
 		// Main streaming loop - handles multiple concurrent events using Go channels
 		// This select statement manages four different types of events simultaneously
 		for {
@@ -166,6 +169,7 @@ outLoop:
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request to prevent resource leaks
 					return
 				}
@@ -182,9 +186,12 @@ outLoop:
 					_, _ = c.Writer.Write([]byte("\n\n\n"))

 					flusher.Flush()
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
+
+				apiResponseData = append(apiResponseData, chunk...)
 				// Convert the backend response to Claude-compatible format
 				// This translation layer ensures API compatibility
 				claudeFormat := translatorClaudeCodeToGeminiCli.ConvertCliResponseToClaudeCode(chunk, isGlAPIKey, hasFirstResponse, &responseType, &responseIndex)
@@ -207,6 +214,7 @@ outLoop:
 						c.Status(errInfo.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
 						flusher.Flush()
+						c.Set("API_RESPONSE", []byte(errInfo.Error.Error()))
 						cliCancel()
 					}
 					return
@@ -272,7 +280,9 @@ func (h *ClaudeCodeAPIHandlers) handleCodexStreamingResponse(c *gin.Context, raw
 	// return
 	// Create a cancellable context for the backend client request
 	// This allows proper cleanup and cancellation of ongoing requests
-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
@@ -306,6 +316,7 @@ outLoop:
 		hasFirstResponse := false
 		hasToolCall := false

+		apiResponseData := make([]byte, 0)
 		// Main streaming loop - handles multiple concurrent events using Go channels
 		// This select statement manages four different types of events simultaneously
 		for {
@@ -315,6 +326,7 @@ outLoop:
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request to prevent resource leaks
 					return
 				}
@@ -324,9 +336,11 @@ outLoop:
 			case chunk, okStream := <-respChan:
 				if !okStream {
 					flusher.Flush()
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
+				apiResponseData = append(apiResponseData, chunk...)
 				// Convert the backend response to Claude-compatible format
 				// This translation layer ensures API compatibility
 				if bytes.HasPrefix(chunk, []byte("data: ")) {
@@ -357,6 +371,7 @@ outLoop:
 						// Forward other errors directly to the client
 						c.Status(errInfo.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
+						c.Set("API_RESPONSE", []byte(errInfo.Error.Error()))
 						flusher.Flush()
 						cliCancel()
 					}
--- a/internal/api/handlers/gemini/cli/cli_handlers.go
+++ b/internal/api/handlers/gemini/cli/cli_handlers.go
@@ -127,6 +127,7 @@ func (h *GeminiCLIAPIHandlers) CLIHandler(c *gin.Context) {
 			return
 		}
 		_, _ = c.Writer.Write(output)
+		c.Set("API_RESPONSE", output)
 	}
 }

@@ -155,7 +156,9 @@ func (h *GeminiCLIAPIHandlers) handleInternalStreamGenerateContent(c *gin.Contex
 	modelResult := gjson.GetBytes(rawJSON, "model")
 	modelName := modelResult.String()

-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
@@ -184,21 +187,28 @@ outLoop:
 		// Send the message and receive response chunks and errors via channels.
 		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, "")
 		hasFirstResponse := false
+
+		apiResponseData := make([]byte, 0)
 		for {
 			select {
 			// Handle client disconnection.
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request.
 					return
 				}
 			// Process incoming response chunks.
 			case chunk, okStream := <-respChan:
 				if !okStream {
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
+
+				apiResponseData = append(apiResponseData, chunk...)
+
 				hasFirstResponse = true
 				if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() != "" {
 					chunk, _ = sjson.SetRawBytes(chunk, "response", chunk)
@@ -217,6 +227,7 @@ outLoop:
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
 						flusher.Flush()
+						c.Set("API_RESPONSE", []byte(err.Error.Error()))
 						cliCancel()
 					}
 					return
@@ -237,7 +248,9 @@ func (h *GeminiCLIAPIHandlers) handleInternalGenerateContent(c *gin.Context, raw
 	// log.Debugf("GenerateContent: %s", string(rawJSON))
 	modelResult := gjson.GetBytes(rawJSON, "model")
 	modelName := modelResult.String()
-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		if cliClient != nil {
@@ -269,11 +282,13 @@ func (h *GeminiCLIAPIHandlers) handleInternalGenerateContent(c *gin.Context, raw
 				c.Status(err.StatusCode)
 				_, _ = c.Writer.Write([]byte(err.Error.Error()))
 				log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
+				c.Set("API_RESPONSE", []byte(err.Error.Error()))
 				cliCancel()
 			}
 			break
 		} else {
 			_, _ = c.Writer.Write(resp)
+			c.Set("API_RESPONSE", resp)
 			cliCancel()
 			break
 		}
@@ -313,7 +328,9 @@ func (h *GeminiCLIAPIHandlers) handleCodexInternalStreamGenerateContent(c *gin.C

 	modelName := gjson.GetBytes(rawJSON, "model")

-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
@@ -345,24 +362,28 @@ outLoop:
 			ResponseID:        "",
 			LastStorageOutput: "",
 		}
+		apiResponseData := make([]byte, 0)
+
 		for {
 			select {
 			// Handle client disconnection.
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request.
 					return
 				}
 			// Process incoming response chunks.
 			case chunk, okStream := <-respChan:
 				if !okStream {
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
 				// _, _ = logFile.Write(chunk)
 				// _, _ = logFile.Write([]byte("\n"))
-
+				apiResponseData = append(apiResponseData, chunk...)
 				if bytes.HasPrefix(chunk, []byte("data: ")) {
 					jsonData := chunk[6:]
 					data := gjson.ParseBytes(jsonData)
@@ -390,6 +411,7 @@ outLoop:
 						c.Status(errMessage.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, errMessage.Error.Error())
 						flusher.Flush()
+						c.Set("API_RESPONSE", []byte(errMessage.Error.Error()))
 						cliCancel()
 					}
 					return
@@ -416,7 +438,9 @@ func (h *GeminiCLIAPIHandlers) handleCodexInternalGenerateContent(c *gin.Context

 	modelName := gjson.GetBytes(rawJSON, "model")

-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
@@ -440,22 +464,25 @@ outLoop:

 		// Send the message and receive response chunks and errors via channels.
 		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+		apiResponseData := make([]byte, 0)
 		for {
 			select {
 			// Handle client disconnection.
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request.
 					return
 				}
 			// Process incoming response chunks.
 			case chunk, okStream := <-respChan:
 				if !okStream {
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
-
+				apiResponseData = append(apiResponseData, chunk...)
 				if bytes.HasPrefix(chunk, []byte("data: ")) {
 					jsonData := chunk[6:]
 					data := gjson.ParseBytes(jsonData)
@@ -479,6 +506,7 @@ outLoop:
 						log.Debugf("org: %s", string(orgRawJSON))
 						log.Debugf("raw: %s", string(rawJSON))
 						log.Debugf("newRequestJSON: %s", newRequestJSON)
+						c.Set("API_RESPONSE", []byte(err.Error.Error()))
 						cliCancel()
 					}
 					return
--- a/internal/api/handlers/gemini/gemini_handlers.go
+++ b/internal/api/handlers/gemini/gemini_handlers.go
@@ -267,7 +267,9 @@ func (h *GeminiAPIHandlers) handleGeminiStreamGenerateContent(c *gin.Context, ra
 	modelResult := gjson.GetBytes(rawJSON, "model")
 	modelName := modelResult.String()

-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
@@ -327,21 +329,26 @@ outLoop:

 		// Send the message and receive response chunks and errors via channels.
 		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, rawJSON, alt)
+		apiResponseData := make([]byte, 0)
 		for {
 			select {
 			// Handle client disconnection.
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request.
 					return
 				}
 			// Process incoming response chunks.
 			case chunk, okStream := <-respChan:
 				if !okStream {
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
+				apiResponseData = append(apiResponseData, chunk...)
+
 				if cliClient.(*client.GeminiClient).GetGenerativeLanguageAPIKey() == "" {
 					if alt == "" {
 						responseResult := gjson.GetBytes(chunk, "response")
@@ -382,6 +389,7 @@ outLoop:
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
 						flusher.Flush()
+						c.Set("API_RESPONSE", []byte(err.Error.Error()))
 						cliCancel()
 					}
 					return
@@ -400,7 +408,9 @@ func (h *GeminiAPIHandlers) handleGeminiCountTokens(c *gin.Context, rawJSON []by
 	// orgrawJSON := rawJSON
 	modelResult := gjson.GetBytes(rawJSON, "model")
 	modelName := modelResult.String()
-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		if cliClient != nil {
@@ -441,6 +451,7 @@ func (h *GeminiAPIHandlers) handleGeminiCountTokens(c *gin.Context, rawJSON []by
 			} else {
 				c.Status(err.StatusCode)
 				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				c.Set("API_RESPONSE", []byte(err.Error.Error()))
 				cliCancel()
 				// log.Debugf(err.Error.Error())
 				// log.Debugf(string(rawJSON))
@@ -455,6 +466,7 @@ func (h *GeminiAPIHandlers) handleGeminiCountTokens(c *gin.Context, rawJSON []by
 				}
 			}
 			_, _ = c.Writer.Write(resp)
+			c.Set("API_RESPONSE", resp)
 			cliCancel()
 			break
 		}
@@ -468,7 +480,9 @@ func (h *GeminiAPIHandlers) handleGeminiGenerateContent(c *gin.Context, rawJSON

 	modelResult := gjson.GetBytes(rawJSON, "model")
 	modelName := modelResult.String()
-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		if cliClient != nil {
@@ -529,6 +543,7 @@ func (h *GeminiAPIHandlers) handleGeminiGenerateContent(c *gin.Context, rawJSON
 			} else {
 				c.Status(err.StatusCode)
 				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				c.Set("API_RESPONSE", []byte(err.Error.Error()))
 				cliCancel()
 			}
 			break
@@ -540,6 +555,7 @@ func (h *GeminiAPIHandlers) handleGeminiGenerateContent(c *gin.Context, rawJSON
 				}
 			}
 			_, _ = c.Writer.Write(resp)
+			c.Set("API_RESPONSE", resp)
 			cliCancel()
 			break
 		}
@@ -570,7 +586,9 @@ func (h *GeminiAPIHandlers) handleCodexStreamGenerateContent(c *gin.Context, raw

 	modelName := gjson.GetBytes(rawJSON, "model")

-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
@@ -595,6 +613,9 @@ outLoop:

 		// Send the message and receive response chunks and errors via channels.
 		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+
+		apiResponseData := make([]byte, 0)
+
 		params := &translatorGeminiToCodex.ConvertCodexResponseToGeminiParams{
 			Model:             modelName.String(),
 			CreatedAt:         0,
@@ -607,15 +628,18 @@ outLoop:
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request.
 					return
 				}
 			// Process incoming response chunks.
 			case chunk, okStream := <-respChan:
 				if !okStream {
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
+				apiResponseData = append(apiResponseData, chunk...)

 				if bytes.HasPrefix(chunk, []byte("data: ")) {
 					jsonData := chunk[6:]
@@ -643,6 +667,7 @@ outLoop:
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
 						flusher.Flush()
+						c.Set("API_RESPONSE", []byte(err.Error.Error()))
 						cliCancel()
 					}
 					return
@@ -663,7 +688,9 @@ func (h *GeminiAPIHandlers) handleCodexGenerateContent(c *gin.Context, rawJSON [

 	modelName := gjson.GetBytes(rawJSON, "model")

-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
@@ -687,21 +714,25 @@ outLoop:

 		// Send the message and receive response chunks and errors via channels.
 		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+		apiResponseData := make([]byte, 0)
 		for {
 			select {
 			// Handle client disconnection.
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request.
 					return
 				}
 			// Process incoming response chunks.
 			case chunk, okStream := <-respChan:
 				if !okStream {
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
+				apiResponseData = append(apiResponseData, chunk...)

 				if bytes.HasPrefix(chunk, []byte("data: ")) {
 					jsonData := chunk[6:]
@@ -723,6 +754,7 @@ outLoop:
 					} else {
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						c.Set("API_RESPONSE", []byte(err.Error.Error()))
 						cliCancel()
 					}
 					return
--- a/internal/api/handlers/openai/openai_handlers.go
+++ b/internal/api/handlers/openai/openai_handlers.go
@@ -160,7 +160,9 @@ func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, raw
 	c.Header("Content-Type", "application/json")

 	modelName, systemInstruction, contents, tools := translatorOpenAIToGeminiCli.ConvertOpenAIChatRequestToCli(rawJSON)
-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		if cliClient != nil {
@@ -193,6 +195,7 @@ func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, raw
 			} else {
 				c.Status(err.StatusCode)
 				_, _ = c.Writer.Write([]byte(err.Error.Error()))
+				c.Set("API_RESPONSE", []byte(err.Error.Error()))
 				cliCancel()
 			}
 			break
@@ -201,6 +204,7 @@ func (h *OpenAIAPIHandlers) handleGeminiNonStreamingResponse(c *gin.Context, raw
 			if openAIFormat != "" {
 				_, _ = c.Writer.Write([]byte(openAIFormat))
 			}
+			c.Set("API_RESPONSE", resp)
 			cliCancel()
 			break
 		}
@@ -234,7 +238,9 @@ func (h *OpenAIAPIHandlers) handleGeminiStreamingResponse(c *gin.Context, rawJSO

 	// Prepare the request for the backend client.
 	modelName, systemInstruction, contents, tools := translatorOpenAIToGeminiCli.ConvertOpenAIChatRequestToCli(rawJSON)
-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
@@ -264,6 +270,8 @@ outLoop:
 		}
 		// Send the message and receive response chunks and errors via channels.
 		respChan, errChan := cliClient.SendMessageStream(cliCtx, rawJSON, modelName, systemInstruction, contents, tools)
+		apiResponseData := make([]byte, 0)
+
 		hasFirstResponse := false
 		for {
 			select {
@@ -271,6 +279,7 @@ outLoop:
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("GeminiClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request.
 					return
 				}
@@ -280,9 +289,11 @@ outLoop:
 					// Stream is closed, send the final [DONE] message.
 					_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
 					flusher.Flush()
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
+				apiResponseData = append(apiResponseData, chunk...)
 				// Convert the chunk to OpenAI format and send it to the client.
 				hasFirstResponse = true
 				openAIFormat := translatorOpenAIToGeminiCli.ConvertCliResponseToOpenAIChat(chunk, time.Now().Unix(), isGlAPIKey)
@@ -299,6 +310,7 @@ outLoop:
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
 						flusher.Flush()
+						c.Set("API_RESPONSE", []byte(err.Error.Error()))
 						cliCancel()
 					}
 					return
@@ -326,7 +338,9 @@ func (h *OpenAIAPIHandlers) handleCodexNonStreamingResponse(c *gin.Context, rawJ

 	newRequestJSON := translatorOpenAIToCodex.ConvertOpenAIChatRequestToCodex(rawJSON)
 	modelName := gjson.GetBytes(rawJSON, "model")
-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		if cliClient != nil {
@@ -349,21 +363,25 @@ outLoop:

 		// Send the message and receive response chunks and errors via channels.
 		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+		apiResponseData := make([]byte, 0)
 		for {
 			select {
 			// Handle client disconnection.
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request.
 					return
 				}
 			// Process incoming response chunks.
 			case chunk, okStream := <-respChan:
 				if !okStream {
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
+				apiResponseData = append(apiResponseData, chunk...)
 				if bytes.HasPrefix(chunk, []byte("data: ")) {
 					jsonData := chunk[6:]
 					data := gjson.ParseBytes(jsonData)
@@ -382,6 +400,7 @@ outLoop:
 					} else {
 						c.Status(err.StatusCode)
 						_, _ = c.Writer.Write([]byte(err.Error.Error()))
+						c.Set("API_RESPONSE", []byte(err.Error.Error()))
 						cliCancel()
 					}
 					return
@@ -424,7 +443,9 @@ func (h *OpenAIAPIHandlers) handleCodexStreamingResponse(c *gin.Context, rawJSON

 	modelName := gjson.GetBytes(rawJSON, "model")

-	cliCtx, cliCancel := context.WithCancel(context.Background())
+	backgroundCtx, cliCancel := context.WithCancel(context.Background())
+	cliCtx := context.WithValue(backgroundCtx, "gin", c)
+
 	var cliClient client.Client
 	defer func() {
 		// Ensure the client's mutex is unlocked on function exit.
@@ -450,12 +471,14 @@ outLoop:
 		// Send the message and receive response chunks and errors via channels.
 		var params *translatorOpenAIToCodex.ConvertCliToOpenAIParams
 		respChan, errChan := cliClient.SendRawMessageStream(cliCtx, []byte(newRequestJSON), "")
+		apiResponseData := make([]byte, 0)
 		for {
 			select {
 			// Handle client disconnection.
 			case <-c.Request.Context().Done():
 				if c.Request.Context().Err().Error() == "context canceled" {
 					log.Debugf("CodexClient disconnected: %v", c.Request.Context().Err())
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel() // Cancel the backend request.
 					return
 				}
@@ -464,9 +487,11 @@ outLoop:
 				if !okStream {
 					_, _ = c.Writer.Write([]byte("[done]\n\n"))
 					flusher.Flush()
+					c.Set("API_RESPONSE", apiResponseData)
 					cliCancel()
 					return
 				}
+				apiResponseData = append(apiResponseData, chunk...)
 				// log.Debugf("Response: %s\n", string(chunk))
 				// Convert the chunk to OpenAI format and send it to the client.
 				if bytes.HasPrefix(chunk, []byte("data: ")) {
@@ -493,6 +518,7 @@ outLoop:
 					} else {
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
+						c.Set("API_RESPONSE", []byte(err.Error.Error()))
 						flusher.Flush()
 						cliCancel()
 					}