fix(ai): correct GPT-5 context window metadata

closes #5644
This commit is contained in:
Armin Ronacher
2026-06-12 23:03:10 +02:00
Unverified
parent 17721d5e01
commit a7cdc679e7
4 changed files with 374 additions and 579 deletions
+1
View File
@@ -6,6 +6,7 @@
### Fixed
- Fixed OpenAI GPT-5.4/GPT-5.5 and OpenAI Codex GPT-5.4/GPT-5.4 mini/GPT-5.5 context window metadata to match current OpenAI limits ([#5644](https://github.com/earendil-works/pi/issues/5644)).
- Increased the OpenAI Codex Responses SSE response-header timeout to 20 seconds to reduce false-positive stalls while retaining the bounded wait introduced for zero-event hangs ([#4945](https://github.com/earendil-works/pi/issues/4945)).
- Fixed Claude Fable 5 thinking-off requests to omit Anthropic's unsupported `thinking.type: "disabled"` payload ([#5567](https://github.com/earendil-works/pi/pull/5567) by [@tmustier](https://github.com/tmustier)).
+7 -7
View File
@@ -1363,7 +1363,7 @@ async function generateModels() {
candidate.maxTokens = 128000;
}
if (candidate.provider === "openai" && (candidate.id === "gpt-5.4" || candidate.id === "gpt-5.5")) {
candidate.contextWindow = 272000;
candidate.contextWindow = 1050000;
candidate.maxTokens = 128000;
}
// models.dev reports gpt-5-pro output as 272000 (a duplicate of the input sub-limit);
@@ -1630,7 +1630,7 @@ async function generateModels() {
cacheRead: 0.25,
cacheWrite: 0,
},
contextWindow: 272000,
contextWindow: 1050000,
maxTokens: 128000,
});
}
@@ -1757,9 +1757,9 @@ async function generateModels() {
// OpenAI Codex (ChatGPT OAuth) models
// NOTE: These are not fetched from models.dev; we keep a small, explicit list to avoid aliases.
// Context window is based on observed server limits (400s above ~272k), not marketing numbers.
const CODEX_BASE_URL = "https://chatgpt.com/backend-api";
const CODEX_CONTEXT = 272000;
const CODEX_GPT_54_CONTEXT = 1000000;
const CODEX_STANDARD_CONTEXT = 400000;
const CODEX_SPARK_CONTEXT = 128000;
const CODEX_MAX_TOKENS = 128000;
const codexModels: Model<"openai-codex-responses">[] = [
@@ -1784,7 +1784,7 @@ async function generateModels() {
reasoning: true,
input: ["text", "image"],
cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
contextWindow: CODEX_GPT_54_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
@@ -1796,7 +1796,7 @@ async function generateModels() {
reasoning: true,
input: ["text", "image"],
cost: { input: 0.75, output: 4.5, cacheRead: 0.075, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
contextWindow: CODEX_STANDARD_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
@@ -1808,7 +1808,7 @@ async function generateModels() {
reasoning: true,
input: ["text", "image"],
cost: { input: 5, output: 30, cacheRead: 0.5, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
contextWindow: CODEX_STANDARD_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
];
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,15 @@
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.ts";
describe("OpenAI model metadata", () => {
it("uses current GPT-5.4 and GPT-5.5 API context windows", () => {
expect(getModel("openai", "gpt-5.4").contextWindow).toBe(1050000);
expect(getModel("openai", "gpt-5.5").contextWindow).toBe(1050000);
});
it("uses current OpenAI Codex context windows", () => {
expect(getModel("openai-codex", "gpt-5.4").contextWindow).toBe(1000000);
expect(getModel("openai-codex", "gpt-5.4-mini").contextWindow).toBe(400000);
expect(getModel("openai-codex", "gpt-5.5").contextWindow).toBe(400000);
});
});