fix(ai): correct GPT-5 context window metadata

closes #5644
2026-06-18 15:54:04 +08:00 · 2026-06-12 23:03:10 +02:00
parent 17721d5e01
commit a7cdc679e7
4 changed files with 374 additions and 579 deletions
@@ -6,6 +6,7 @@

 ### Fixed

+- Fixed OpenAI GPT-5.4/GPT-5.5 and OpenAI Codex GPT-5.4/GPT-5.4 mini/GPT-5.5 context window metadata to match current OpenAI limits ([#5644](https://github.com/earendil-works/pi/issues/5644)).
 - Increased the OpenAI Codex Responses SSE response-header timeout to 20 seconds to reduce false-positive stalls while retaining the bounded wait introduced for zero-event hangs ([#4945](https://github.com/earendil-works/pi/issues/4945)).
 - Fixed Claude Fable 5 thinking-off requests to omit Anthropic's unsupported `thinking.type: "disabled"` payload ([#5567](https://github.com/earendil-works/pi/pull/5567) by [@tmustier](https://github.com/tmustier)).

@@ -1363,7 +1363,7 @@ async function generateModels() {
 			candidate.maxTokens = 128000;
 		}
 		if (candidate.provider === "openai" && (candidate.id === "gpt-5.4" || candidate.id === "gpt-5.5")) {
-			candidate.contextWindow = 272000;
+			candidate.contextWindow = 1050000;
 			candidate.maxTokens = 128000;
 		}
 		// models.dev reports gpt-5-pro output as 272000 (a duplicate of the input sub-limit);
@@ -1630,7 +1630,7 @@ async function generateModels() {
 				cacheRead: 0.25,
 				cacheWrite: 0,
 			},
-			contextWindow: 272000,
+			contextWindow: 1050000,
 			maxTokens: 128000,
 		});
 	}
@@ -1757,9 +1757,9 @@ async function generateModels() {

 	// OpenAI Codex (ChatGPT OAuth) models
 	// NOTE: These are not fetched from models.dev; we keep a small, explicit list to avoid aliases.
-	// Context window is based on observed server limits (400s above ~272k), not marketing numbers.
 	const CODEX_BASE_URL = "https://chatgpt.com/backend-api";
-	const CODEX_CONTEXT = 272000;
+	const CODEX_GPT_54_CONTEXT = 1000000;
+	const CODEX_STANDARD_CONTEXT = 400000;
 	const CODEX_SPARK_CONTEXT = 128000;
 	const CODEX_MAX_TOKENS = 128000;
 	const codexModels: Model<"openai-codex-responses">[] = [
@@ -1784,7 +1784,7 @@ async function generateModels() {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 },
-			contextWindow: CODEX_CONTEXT,
+			contextWindow: CODEX_GPT_54_CONTEXT,
 			maxTokens: CODEX_MAX_TOKENS,
 		},
 		{
@@ -1796,7 +1796,7 @@ async function generateModels() {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: { input: 0.75, output: 4.5, cacheRead: 0.075, cacheWrite: 0 },
-			contextWindow: CODEX_CONTEXT,
+			contextWindow: CODEX_STANDARD_CONTEXT,
 			maxTokens: CODEX_MAX_TOKENS,
 		},
 		{
@@ -1808,7 +1808,7 @@ async function generateModels() {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: { input: 5, output: 30, cacheRead: 0.5, cacheWrite: 0 },
-			contextWindow: CODEX_CONTEXT,
+			contextWindow: CODEX_STANDARD_CONTEXT,
 			maxTokens: CODEX_MAX_TOKENS,
 		},
 	];
@@ -0,0 +1,15 @@
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.ts";
+
+describe("OpenAI model metadata", () => {
+	it("uses current GPT-5.4 and GPT-5.5 API context windows", () => {
+		expect(getModel("openai", "gpt-5.4").contextWindow).toBe(1050000);
+		expect(getModel("openai", "gpt-5.5").contextWindow).toBe(1050000);
+	});
+
+	it("uses current OpenAI Codex context windows", () => {
+		expect(getModel("openai-codex", "gpt-5.4").contextWindow).toBe(1000000);
+		expect(getModel("openai-codex", "gpt-5.4-mini").contextWindow).toBe(400000);
+		expect(getModel("openai-codex", "gpt-5.5").contextWindow).toBe(400000);
+	});
+});