diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 93fc9a802..b7b1112db 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -6,6 +6,7 @@ ### Fixed +- Fixed OpenAI GPT-5.4/GPT-5.5 and OpenAI Codex GPT-5.4/GPT-5.4 mini/GPT-5.5 context window metadata to match current OpenAI limits ([#5644](https://github.com/earendil-works/pi/issues/5644)). - Increased the OpenAI Codex Responses SSE response-header timeout to 20 seconds to reduce false-positive stalls while retaining the bounded wait introduced for zero-event hangs ([#4945](https://github.com/earendil-works/pi/issues/4945)). - Fixed Claude Fable 5 thinking-off requests to omit Anthropic's unsupported `thinking.type: "disabled"` payload ([#5567](https://github.com/earendil-works/pi/pull/5567) by [@tmustier](https://github.com/tmustier)). diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts index 5e5fff94d..a3892cf16 100644 --- a/packages/ai/scripts/generate-models.ts +++ b/packages/ai/scripts/generate-models.ts @@ -1363,7 +1363,7 @@ async function generateModels() { candidate.maxTokens = 128000; } if (candidate.provider === "openai" && (candidate.id === "gpt-5.4" || candidate.id === "gpt-5.5")) { - candidate.contextWindow = 272000; + candidate.contextWindow = 1050000; candidate.maxTokens = 128000; } // models.dev reports gpt-5-pro output as 272000 (a duplicate of the input sub-limit); @@ -1630,7 +1630,7 @@ async function generateModels() { cacheRead: 0.25, cacheWrite: 0, }, - contextWindow: 272000, + contextWindow: 1050000, maxTokens: 128000, }); } @@ -1757,9 +1757,9 @@ async function generateModels() { // OpenAI Codex (ChatGPT OAuth) models // NOTE: These are not fetched from models.dev; we keep a small, explicit list to avoid aliases. - // Context window is based on observed server limits (400s above ~272k), not marketing numbers. const CODEX_BASE_URL = "https://chatgpt.com/backend-api"; - const CODEX_CONTEXT = 272000; + const CODEX_GPT_54_CONTEXT = 1000000; + const CODEX_STANDARD_CONTEXT = 400000; const CODEX_SPARK_CONTEXT = 128000; const CODEX_MAX_TOKENS = 128000; const codexModels: Model<"openai-codex-responses">[] = [ @@ -1784,7 +1784,7 @@ async function generateModels() { reasoning: true, input: ["text", "image"], cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 }, - contextWindow: CODEX_CONTEXT, + contextWindow: CODEX_GPT_54_CONTEXT, maxTokens: CODEX_MAX_TOKENS, }, { @@ -1796,7 +1796,7 @@ async function generateModels() { reasoning: true, input: ["text", "image"], cost: { input: 0.75, output: 4.5, cacheRead: 0.075, cacheWrite: 0 }, - contextWindow: CODEX_CONTEXT, + contextWindow: CODEX_STANDARD_CONTEXT, maxTokens: CODEX_MAX_TOKENS, }, { @@ -1808,7 +1808,7 @@ async function generateModels() { reasoning: true, input: ["text", "image"], cost: { input: 5, output: 30, cacheRead: 0.5, cacheWrite: 0 }, - contextWindow: CODEX_CONTEXT, + contextWindow: CODEX_STANDARD_CONTEXT, maxTokens: CODEX_MAX_TOKENS, }, ]; diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 57da60bca..eeed2e92f 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -357,7 +357,7 @@ export const MODELS = { provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.eu-central-1.amazonaws.com", reasoning: true, - thinkingLevelMap: {"xhigh":"xhigh"}, + thinkingLevelMap: {"off":null,"xhigh":"xhigh"}, input: ["text", "image"], cost: { input: 11, @@ -497,7 +497,7 @@ export const MODELS = { provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", reasoning: true, - thinkingLevelMap: {"xhigh":"xhigh"}, + thinkingLevelMap: {"off":null,"xhigh":"xhigh"}, input: ["text", "image"], cost: { input: 10, @@ -1389,7 +1389,7 @@ export const MODELS = { provider: "amazon-bedrock", baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", reasoning: true, - thinkingLevelMap: {"xhigh":"xhigh"}, + thinkingLevelMap: {"off":null,"xhigh":"xhigh"}, input: ["text", "image"], cost: { input: 10, @@ -1878,7 +1878,7 @@ export const MODELS = { baseUrl: "https://api.anthropic.com", compat: {"forceAdaptiveThinking":true}, reasoning: true, - thinkingLevelMap: {"xhigh":"xhigh"}, + thinkingLevelMap: {"off":null,"xhigh":"xhigh"}, input: ["text", "image"], cost: { input: 10, @@ -2919,30 +2919,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.25, - output: 0.69, + input: 0.35, + output: 0.75, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "llama3.1-8b": { - id: "llama3.1-8b", - name: "Llama 3.1 8B", - api: "openai-completions", - provider: "cerebras", - baseUrl: "https://api.cerebras.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.1, - output: 0.1, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32000, - maxTokens: 8000, + maxTokens: 40960, } satisfies Model<"openai-completions">, "zai-glm-4.7": { id: "zai-glm-4.7", @@ -2950,7 +2933,7 @@ export const MODELS = { api: "openai-completions", provider: "cerebras", baseUrl: "https://api.cerebras.ai/v1", - reasoning: false, + reasoning: true, input: ["text"], cost: { input: 2.25, @@ -2959,7 +2942,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 40000, + maxTokens: 40960, } satisfies Model<"openai-completions">, }, "cloudflare-ai-gateway": { @@ -3073,7 +3056,7 @@ export const MODELS = { baseUrl: "https://gateway.ai.cloudflare.com/v1/{CLOUDFLARE_ACCOUNT_ID}/{CLOUDFLARE_GATEWAY_ID}/anthropic", compat: {"forceAdaptiveThinking":true}, reasoning: true, - thinkingLevelMap: {"xhigh":"xhigh"}, + thinkingLevelMap: {"off":null,"xhigh":"xhigh"}, input: ["text", "image"], cost: { input: 10, @@ -3722,6 +3705,24 @@ export const MODELS = { contextWindow: 262144, maxTokens: 256000, } satisfies Model<"openai-completions">, + "@cf/moonshotai/kimi-k2.7-code": { + id: "@cf/moonshotai/kimi-k2.7-code", + name: "Kimi K2.7 Code", + api: "openai-completions", + provider: "cloudflare-workers-ai", + baseUrl: "https://api.cloudflare.com/client/v4/accounts/{CLOUDFLARE_ACCOUNT_ID}/ai/v1", + compat: {"sendSessionAffinityHeaders":true}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.95, + output: 4, + cacheRead: 0.19, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, "@cf/nvidia/nemotron-3-120b-a12b": { id: "@cf/nvidia/nemotron-3-120b-a12b", name: "Nemotron 3 Super 120B", @@ -4090,6 +4091,25 @@ export const MODELS = { } satisfies Model<"anthropic-messages">, }, "github-copilot": { + "claude-fable-5": { + id: "claude-fable-5", + name: "Claude Fable 5", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 10, + output: 50, + cacheRead: 1, + cacheWrite: 12.5, + }, + contextWindow: 1000000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, "claude-haiku-4.5": { id: "claude-haiku-4.5", name: "Claude Haiku 4.5 (latest)", @@ -5022,77 +5042,9 @@ export const MODELS = { } satisfies Model<"google-vertex">, }, "groq": { - "deepseek-r1-distill-llama-70b": { - id: "deepseek-r1-distill-llama-70b", - name: "DeepSeek R1 Distill Llama 70B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.75, - output: 0.99, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "gemma2-9b-it": { - id: "gemma2-9b-it", - name: "Gemma 2 9B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.2, - output: 0.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "groq/compound": { - id: "groq/compound", - name: "Compound", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "groq/compound-mini": { - id: "groq/compound-mini", - name: "Compound Mini", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, "llama-3.1-8b-instant": { id: "llama-3.1-8b-instant", - name: "Llama 3.1 8B Instant", + name: "Llama 3.1 8B", api: "openai-completions", provider: "groq", baseUrl: "https://api.groq.com/openai/v1", @@ -5109,7 +5061,7 @@ export const MODELS = { } satisfies Model<"openai-completions">, "llama-3.3-70b-versatile": { id: "llama-3.3-70b-versatile", - name: "Llama 3.3 70B Versatile", + name: "Llama 3.3 70B", api: "openai-completions", provider: "groq", baseUrl: "https://api.groq.com/openai/v1", @@ -5124,60 +5076,9 @@ export const MODELS = { contextWindow: 131072, maxTokens: 32768, } satisfies Model<"openai-completions">, - "llama3-70b-8192": { - id: "llama3-70b-8192", - name: "Llama 3 70B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.59, - output: 0.79, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "llama3-8b-8192": { - id: "llama3-8b-8192", - name: "Llama 3 8B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.05, - output: 0.08, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "meta-llama/llama-4-maverick-17b-128e-instruct": { - id: "meta-llama/llama-4-maverick-17b-128e-instruct", - name: "Llama 4 Maverick 17B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.2, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, "meta-llama/llama-4-scout-17b-16e-instruct": { id: "meta-llama/llama-4-scout-17b-16e-instruct", - name: "Llama 4 Scout 17B", + name: "Llama 4 Scout 17B 16E", api: "openai-completions", provider: "groq", baseUrl: "https://api.groq.com/openai/v1", @@ -5192,57 +5093,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 8192, } satisfies Model<"openai-completions">, - "mistral-saba-24b": { - id: "mistral-saba-24b", - name: "Mistral Saba 24B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.79, - output: 0.79, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2-instruct": { - id: "moonshotai/kimi-k2-instruct", - name: "Kimi K2 Instruct", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2-instruct-0905": { - id: "moonshotai/kimi-k2-instruct-0905", - name: "Kimi K2 Instruct 0905", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 3, - cacheRead: 0.5, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "openai/gpt-oss-120b": { id: "openai/gpt-oss-120b", name: "GPT OSS 120B", @@ -5294,26 +5144,9 @@ export const MODELS = { contextWindow: 131072, maxTokens: 65536, } satisfies Model<"openai-completions">, - "qwen-qwq-32b": { - id: "qwen-qwq-32b", - name: "Qwen QwQ 32B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.29, - output: 0.39, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "qwen/qwen3-32b": { id: "qwen/qwen3-32b", - name: "Qwen3 32B", + name: "Qwen3-32B", api: "openai-completions", provider: "groq", baseUrl: "https://api.groq.com/openai/v1", @@ -5729,6 +5562,24 @@ export const MODELS = { } satisfies Model<"openai-completions">, }, "kimi-coding": { + "k2p7": { + id: "k2p7", + name: "Kimi K2.7 Code", + api: "anthropic-messages", + provider: "kimi-coding", + baseUrl: "https://api.kimi.com/coding", + headers: {"User-Agent":"KimiCLI/1.5"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 32768, + } satisfies Model<"anthropic-messages">, "kimi-for-coding": { id: "kimi-for-coding", name: "Kimi For Coding", @@ -6511,6 +6362,24 @@ export const MODELS = { contextWindow: 262144, maxTokens: 262144, } satisfies Model<"openai-completions">, + "kimi-k2.7-code": { + id: "kimi-k2.7-code", + name: "Kimi K2.7 Code", + api: "openai-completions", + provider: "moonshotai", + baseUrl: "https://api.moonshot.ai/v1", + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"thinkingFormat":"deepseek"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.95, + output: 4, + cacheRead: 0.19, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, }, "moonshotai-cn": { "kimi-k2-0711-preview": { @@ -6763,8 +6632,8 @@ export const MODELS = { baseUrl: "https://integrate.api.nvidia.com/v1", headers: {"NVCF-POLL-SECONDS":"3600"}, compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false}, - reasoning: false, - input: ["text"], + reasoning: true, + input: ["text", "image"], cost: { input: 0, output: 0, @@ -6793,44 +6662,6 @@ export const MODELS = { contextWindow: 262144, maxTokens: 262144, } satisfies Model<"openai-completions">, - "nvidia/llama-3.3-nemotron-super-49b-v1": { - id: "nvidia/llama-3.3-nemotron-super-49b-v1", - name: "Llama 3.3 Nemotron Super 49B v1", - api: "openai-completions", - provider: "nvidia", - baseUrl: "https://integrate.api.nvidia.com/v1", - headers: {"NVCF-POLL-SECONDS":"3600"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "nvidia/llama-3.3-nemotron-super-49b-v1.5": { - id: "nvidia/llama-3.3-nemotron-super-49b-v1.5", - name: "Llama 3.3 Nemotron Super 49B v1.5", - api: "openai-completions", - provider: "nvidia", - baseUrl: "https://integrate.api.nvidia.com/v1", - headers: {"NVCF-POLL-SECONDS":"3600"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, "nvidia/nemotron-3-nano-30b-a3b": { id: "nvidia/nemotron-3-nano-30b-a3b", name: "nemotron-3-nano-30b-a3b", @@ -6926,6 +6757,25 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"openai-completions">, + "openai/gpt-oss-120b": { + id: "openai/gpt-oss-120b", + name: "GPT-OSS-120B", + api: "openai-completions", + provider: "nvidia", + baseUrl: "https://integrate.api.nvidia.com/v1", + headers: {"NVCF-POLL-SECONDS":"3600"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false}, + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 8192, + } satisfies Model<"openai-completions">, "openai/gpt-oss-20b": { id: "openai/gpt-oss-20b", name: "GPT OSS 20B", @@ -6945,25 +6795,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 32768, } satisfies Model<"openai-completions">, - "qwen/qwen3-coder-480b-a35b-instruct": { - id: "qwen/qwen3-coder-480b-a35b-instruct", - name: "Qwen3 Coder 480B A35B Instruct", - api: "openai-completions", - provider: "nvidia", - baseUrl: "https://integrate.api.nvidia.com/v1", - headers: {"NVCF-POLL-SECONDS":"3600"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false}, - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 66536, - } satisfies Model<"openai-completions">, "qwen/qwen3.5-122b-a10b": { id: "qwen/qwen3.5-122b-a10b", name: "Qwen3.5 122B-A10B", @@ -7551,7 +7382,7 @@ export const MODELS = { cacheRead: 0.25, cacheWrite: 0, }, - contextWindow: 272000, + contextWindow: 1050000, maxTokens: 128000, } satisfies Model<"openai-responses">, "gpt-5.4-mini": { @@ -7623,7 +7454,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 0, }, - contextWindow: 272000, + contextWindow: 1050000, maxTokens: 128000, } satisfies Model<"openai-responses">, "gpt-5.5-pro": { @@ -7815,7 +7646,7 @@ export const MODELS = { cacheRead: 0.25, cacheWrite: 0, }, - contextWindow: 272000, + contextWindow: 1000000, maxTokens: 128000, } satisfies Model<"openai-codex-responses">, "gpt-5.4-mini": { @@ -7833,7 +7664,7 @@ export const MODELS = { cacheRead: 0.075, cacheWrite: 0, }, - contextWindow: 272000, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-codex-responses">, "gpt-5.5": { @@ -7851,7 +7682,7 @@ export const MODELS = { cacheRead: 0.5, cacheWrite: 0, }, - contextWindow: 272000, + contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-codex-responses">, }, @@ -7882,7 +7713,7 @@ export const MODELS = { baseUrl: "https://opencode.ai/zen", compat: {"forceAdaptiveThinking":true}, reasoning: true, - thinkingLevelMap: {"xhigh":"xhigh"}, + thinkingLevelMap: {"off":null,"xhigh":"xhigh"}, input: ["text", "image"], cost: { input: 10, @@ -8066,7 +7897,7 @@ export const MODELS = { cost: { input: 0.14, output: 0.28, - cacheRead: 0.03, + cacheRead: 0.028, cacheWrite: 0, }, contextWindow: 1000000, @@ -8091,6 +7922,25 @@ export const MODELS = { contextWindow: 200000, maxTokens: 128000, } satisfies Model<"openai-completions">, + "deepseek-v4-pro": { + id: "deepseek-v4-pro", + name: "DeepSeek V4 Pro", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + compat: {"maxTokensField":"max_tokens","requiresReasoningContentOnAssistantMessages":true,"thinkingFormat":"deepseek"}, + reasoning: true, + thinkingLevelMap: {"minimal":null,"low":null,"medium":null,"high":"high","xhigh":"max"}, + input: ["text"], + cost: { + input: 1.74, + output: 3.84, + cacheRead: 0.145, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 384000, + } satisfies Model<"openai-completions">, "gemini-3-flash": { id: "gemini-3-flash", name: "Gemini 3 Flash", @@ -8724,24 +8574,6 @@ export const MODELS = { contextWindow: 202752, maxTokens: 32768, } satisfies Model<"openai-completions">, - "kimi-k2.5": { - id: "kimi-k2.5", - name: "Kimi K2.5", - api: "openai-completions", - provider: "opencode-go", - baseUrl: "https://opencode.ai/zen/go/v1", - compat: {"maxTokensField":"max_tokens"}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.6, - output: 3, - cacheRead: 0.1, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, "kimi-k2.6": { id: "kimi-k2.6", name: "Kimi K2.6", @@ -8761,6 +8593,24 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, + "kimi-k2.7-code": { + id: "kimi-k2.7-code", + name: "Kimi K2.7 Code", + api: "openai-completions", + provider: "opencode-go", + baseUrl: "https://opencode.ai/zen/go/v1", + compat: {"maxTokensField":"max_tokens"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.95, + output: 4, + cacheRead: 0.19, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, "mimo-v2.5": { id: "mimo-v2.5", name: "MiMo V2.5", @@ -8797,23 +8647,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 128000, } satisfies Model<"openai-completions">, - "minimax-m2.5": { - id: "minimax-m2.5", - name: "MiniMax M2.5", - api: "anthropic-messages", - provider: "opencode-go", - baseUrl: "https://opencode.ai/zen/go", - reasoning: true, - input: ["text"], - cost: { - input: 0.3, - output: 1.2, - cacheRead: 0.03, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 65536, - } satisfies Model<"anthropic-messages">, "minimax-m2.7": { id: "minimax-m2.7", name: "MiniMax M2.7", @@ -8834,16 +8667,16 @@ export const MODELS = { } satisfies Model<"openai-completions">, "minimax-m3": { id: "minimax-m3", - name: "MiniMax M3", + name: "MiniMax M3 (3x usage)", api: "anthropic-messages", provider: "opencode-go", baseUrl: "https://opencode.ai/zen/go", reasoning: true, input: ["text", "image"], cost: { - input: 0.3, - output: 1.2, - cacheRead: 0.06, + input: 0.1, + output: 0.4, + cacheRead: 0.02, cacheWrite: 0, }, contextWindow: 512000, @@ -9600,13 +9433,13 @@ export const MODELS = { thinkingLevelMap: {"minimal":null,"low":null,"medium":null,"high":"high","xhigh":"xhigh"}, input: ["text"], cost: { - input: 0.0983, - output: 0.1966, - cacheRead: 0.019700000000000002, + input: 0.098, + output: 0.196, + cacheRead: 0.02, cacheWrite: 0, }, contextWindow: 1048576, - maxTokens: 131072, + maxTokens: 4096, } satisfies Model<"openai-completions">, "deepseek/deepseek-v4-pro": { id: "deepseek/deepseek-v4-pro", @@ -9926,12 +9759,12 @@ export const MODELS = { input: ["text", "image"], cost: { input: 0.12, - output: 0.36, + output: 0.35, cacheRead: 0.09, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 8192, + maxTokens: 262144, } satisfies Model<"openai-completions">, "google/gemma-4-31b-it:free": { id: "google/gemma-4-31b-it:free", @@ -10232,9 +10065,9 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.27, - output: 1.08, - cacheRead: 0.054, + input: 0.25, + output: 1, + cacheRead: 0.049999999999999996, cacheWrite: 0, }, contextWindow: 204800, @@ -10632,19 +10465,18 @@ export const MODELS = { contextWindow: 262144, maxTokens: 262142, } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2.6:free": { - id: "moonshotai/kimi-k2.6:free", - name: "MoonshotAI: Kimi K2.6 (free)", + "moonshotai/kimi-k2.7-code": { + id: "moonshotai/kimi-k2.7-code", + name: "MoonshotAI: Kimi K2.7 Code", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - compat: {"supportsDeveloperRole":false,"requiresReasoningContentOnAssistantMessages":true}, reasoning: true, input: ["text", "image"], cost: { - input: 0, - output: 0, - cacheRead: 0, + input: 0.95, + output: 4, + cacheRead: 0.19, cacheWrite: 0, }, contextWindow: 262144, @@ -10820,23 +10652,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 128000, } satisfies Model<"openai-completions">, - "nvidia/nemotron-nano-9b-v2": { - id: "nvidia/nemotron-nano-9b-v2", - name: "NVIDIA: Nemotron Nano 9B V2", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.04, - output: 0.16, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "nvidia/nemotron-nano-9b-v2:free": { id: "nvidia/nemotron-nano-9b-v2:free", name: "NVIDIA: Nemotron Nano 9B V2 (free)", @@ -12558,13 +12373,13 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.28900000000000003, - output: 2.4, + input: 0.28850000000000003, + output: 3.17, cacheRead: 0, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 131072, + maxTokens: 262140, } satisfies Model<"openai-completions">, "qwen/qwen3.6-35b-a3b": { id: "qwen/qwen3.6-35b-a3b", @@ -12575,13 +12390,13 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.14, + input: 0.15, output: 1, - cacheRead: 0, + cacheRead: 0.049999999999999996, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 262140, + maxTokens: 262144, } satisfies Model<"openai-completions">, "qwen/qwen3.6-flash": { id: "qwen/qwen3.6-flash", @@ -12660,10 +12475,10 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.39999999999999997, - output: 1.5999999999999999, - cacheRead: 0.08, - cacheWrite: 0.5, + input: 0.32, + output: 1.28, + cacheRead: 0.064, + cacheWrite: 0.39999999999999997, }, contextWindow: 1000000, maxTokens: 65536, @@ -12923,23 +12738,6 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 131072, } satisfies Model<"openai-completions">, - "z-ai/glm-4-32b": { - id: "z-ai/glm-4-32b", - name: "Z.ai: GLM 4 32B ", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "z-ai/glm-4.5": { id: "z-ai/glm-4.5", name: "Z.ai: GLM 4.5", @@ -12974,23 +12772,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131070, } satisfies Model<"openai-completions">, - "z-ai/glm-4.5-air:free": { - id: "z-ai/glm-4.5-air:free", - name: "Z.ai: GLM 4.5 Air (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 96000, - } satisfies Model<"openai-completions">, "z-ai/glm-4.5v": { id: "z-ai/glm-4.5v", name: "Z.ai: GLM 4.5V", @@ -13036,11 +12817,11 @@ export const MODELS = { cost: { input: 0.3, output: 0.8999999999999999, - cacheRead: 0.049999999999999996, + cacheRead: 0.055, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 24000, + maxTokens: 32768, } satisfies Model<"openai-completions">, "z-ai/glm-4.7": { id: "z-ai/glm-4.7", @@ -13107,7 +12888,7 @@ export const MODELS = { cacheRead: 0.24, cacheWrite: 0, }, - contextWindow: 202752, + contextWindow: 262144, maxTokens: 131072, } satisfies Model<"openai-completions">, "z-ai/glm-5.1": { @@ -13127,23 +12908,6 @@ export const MODELS = { contextWindow: 202752, maxTokens: 4096, } satisfies Model<"openai-completions">, - "z-ai/glm-5v-turbo": { - id: "z-ai/glm-5v-turbo", - name: "Z.ai: GLM 5V Turbo", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.2, - output: 4, - cacheRead: 0.24, - cacheWrite: 0, - }, - contextWindow: 202752, - maxTokens: 131072, - } satisfies Model<"openai-completions">, "~anthropic/claude-fable-latest": { id: "~anthropic/claude-fable-latest", name: "Anthropic: Claude Fable Latest", @@ -13299,25 +13063,6 @@ export const MODELS = { } satisfies Model<"openai-completions">, }, "together": { - "MiniMaxAI/MiniMax-M2.5": { - id: "MiniMaxAI/MiniMax-M2.5", - name: "MiniMax-M2.5", - api: "openai-completions", - provider: "together", - baseUrl: "https://api.together.ai/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false,"thinkingFormat":"together"}, - reasoning: true, - thinkingLevelMap: {"minimal":null,"low":null,"medium":null}, - input: ["text"], - cost: { - input: 0.3, - output: 1.2, - cacheRead: 0.06, - cacheWrite: 0, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, "MiniMaxAI/MiniMax-M2.7": { id: "MiniMaxAI/MiniMax-M2.7", name: "MiniMax-M2.7", @@ -13337,28 +13082,9 @@ export const MODELS = { contextWindow: 202752, maxTokens: 131072, } satisfies Model<"openai-completions">, - "Qwen/Qwen3-235B-A22B-Instruct-2507-tput": { - id: "Qwen/Qwen3-235B-A22B-Instruct-2507-tput", - name: "Qwen3 235B A22B Instruct 2507 FP8", - api: "openai-completions", - provider: "together", - baseUrl: "https://api.together.ai/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false,"thinkingFormat":"together"}, - reasoning: true, - thinkingLevelMap: {"minimal":null,"low":null,"medium":null}, - input: ["text"], - cost: { - input: 0.2, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": { - id: "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", - name: "Qwen3 Coder 480B A35B Instruct", + "Qwen/Qwen2.5-7B-Instruct-Turbo": { + id: "Qwen/Qwen2.5-7B-Instruct-Turbo", + name: "Qwen 2.5 7B Instruct Turbo", api: "openai-completions", provider: "together", baseUrl: "https://api.together.ai/v1", @@ -13366,27 +13092,26 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 2, - output: 2, + input: 0.3, + output: 0.3, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 262144, - maxTokens: 262144, + contextWindow: 32768, + maxTokens: 32768, } satisfies Model<"openai-completions">, - "Qwen/Qwen3-Coder-Next-FP8": { - id: "Qwen/Qwen3-Coder-Next-FP8", - name: "Qwen3 Coder Next FP8", + "Qwen/Qwen3-235B-A22B-Instruct-2507-tput": { + id: "Qwen/Qwen3-235B-A22B-Instruct-2507-tput", + name: "Qwen3 235B A22B Instruct 2507 FP8", api: "openai-completions", provider: "together", baseUrl: "https://api.together.ai/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false,"thinkingFormat":"together"}, - reasoning: true, - thinkingLevelMap: {"minimal":null,"low":null,"medium":null}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false}, + reasoning: false, input: ["text"], cost: { - input: 0.5, - output: 1.2, + input: 0.2, + output: 0.6, cacheRead: 0, cacheWrite: 0, }, @@ -13412,6 +13137,25 @@ export const MODELS = { contextWindow: 262144, maxTokens: 130000, } satisfies Model<"openai-completions">, + "Qwen/Qwen3.5-9B": { + id: "Qwen/Qwen3.5-9B", + name: "Qwen3.5 9B", + api: "openai-completions", + provider: "together", + baseUrl: "https://api.together.ai/v1", + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false,"thinkingFormat":"together"}, + reasoning: true, + thinkingLevelMap: {"minimal":null,"low":null,"medium":null}, + input: ["text", "image"], + cost: { + input: 0.17, + output: 0.25, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 65536, + } satisfies Model<"openai-completions">, "Qwen/Qwen3.6-Plus": { id: "Qwen/Qwen3.6-Plus", name: "Qwen3.6 Plus", @@ -13437,9 +13181,8 @@ export const MODELS = { api: "openai-completions", provider: "together", baseUrl: "https://api.together.ai/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false,"thinkingFormat":"together"}, - reasoning: true, - thinkingLevelMap: {"minimal":null,"low":null,"medium":null}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false}, + reasoning: false, input: ["text"], cost: { input: 2.5, @@ -13450,44 +13193,6 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 500000, } satisfies Model<"openai-completions">, - "deepseek-ai/DeepSeek-V3": { - id: "deepseek-ai/DeepSeek-V3", - name: "DeepSeek-V3", - api: "openai-completions", - provider: "together", - baseUrl: "https://api.together.ai/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false,"thinkingFormat":"together"}, - reasoning: true, - thinkingLevelMap: {"minimal":null,"low":null,"medium":null}, - input: ["text"], - cost: { - input: 1.25, - output: 1.25, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "deepseek-ai/DeepSeek-V3-1": { - id: "deepseek-ai/DeepSeek-V3-1", - name: "DeepSeek V3.1", - api: "openai-completions", - provider: "together", - baseUrl: "https://api.together.ai/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false,"thinkingFormat":"together"}, - reasoning: true, - thinkingLevelMap: {"minimal":null,"low":null,"medium":null}, - input: ["text"], - cost: { - input: 0.6, - output: 1.7, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, "deepseek-ai/DeepSeek-V4-Pro": { id: "deepseek-ai/DeepSeek-V4-Pro", name: "DeepSeek V4 Pro", @@ -13499,8 +13204,8 @@ export const MODELS = { thinkingLevelMap: {"minimal":null,"low":null,"medium":null,"high":"high","xhigh":null}, input: ["text"], cost: { - input: 2.1, - output: 4.4, + input: 1.74, + output: 3.48, cacheRead: 0.2, cacheWrite: 0, }, @@ -13536,8 +13241,8 @@ export const MODELS = { thinkingLevelMap: {"minimal":null,"low":null,"medium":null}, input: ["text", "image"], cost: { - input: 0.2, - output: 0.5, + input: 0.39, + output: 0.97, cacheRead: 0, cacheWrite: 0, }, @@ -13562,25 +13267,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"openai-completions">, - "moonshotai/Kimi-K2.5": { - id: "moonshotai/Kimi-K2.5", - name: "Kimi K2.5", - api: "openai-completions", - provider: "together", - baseUrl: "https://api.together.ai/v1", - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false,"thinkingFormat":"together"}, - reasoning: true, - thinkingLevelMap: {"minimal":null,"low":null,"medium":null}, - input: ["text", "image"], - cost: { - input: 0.5, - output: 2.8, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, "moonshotai/Kimi-K2.6": { id: "moonshotai/Kimi-K2.6", name: "Kimi K2.6", @@ -13638,6 +13324,44 @@ export const MODELS = { contextWindow: 131072, maxTokens: 131072, } satisfies Model<"openai-completions">, + "openai/gpt-oss-20b": { + id: "openai/gpt-oss-20b", + name: "GPT OSS 20B", + api: "openai-completions", + provider: "together", + baseUrl: "https://api.together.ai/v1", + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":true,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false,"thinkingFormat":"openai"}, + reasoning: true, + thinkingLevelMap: {"off":null,"minimal":null}, + input: ["text"], + cost: { + input: 0.05, + output: 0.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "zai-org/GLM-5": { + id: "zai-org/GLM-5", + name: "GLM-5", + api: "openai-completions", + provider: "together", + baseUrl: "https://api.together.ai/v1", + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"supportsLongCacheRetention":false,"thinkingFormat":"together"}, + reasoning: true, + thinkingLevelMap: {"minimal":null,"low":null,"medium":null}, + input: ["text"], + cost: { + input: 1, + output: 3.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 202752, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "zai-org/GLM-5.1": { id: "zai-org/GLM-5.1", name: "GLM-5.1", @@ -14075,7 +13799,7 @@ export const MODELS = { baseUrl: "https://ai-gateway.vercel.sh", compat: {"forceAdaptiveThinking":true}, reasoning: true, - thinkingLevelMap: {"xhigh":"xhigh"}, + thinkingLevelMap: {"off":null,"xhigh":"xhigh"}, input: ["text", "image"], cost: { input: 10, @@ -15215,40 +14939,6 @@ export const MODELS = { contextWindow: 262114, maxTokens: 262114, } satisfies Model<"anthropic-messages">, - "moonshotai/kimi-k2-thinking-turbo": { - id: "moonshotai/kimi-k2-thinking-turbo", - name: "Kimi K2 Thinking Turbo", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: true, - input: ["text"], - cost: { - input: 1.15, - output: 8, - cacheRead: 0.15, - cacheWrite: 0, - }, - contextWindow: 262114, - maxTokens: 262114, - } satisfies Model<"anthropic-messages">, - "moonshotai/kimi-k2-turbo": { - id: "moonshotai/kimi-k2-turbo", - name: "Kimi K2 Turbo", - api: "anthropic-messages", - provider: "vercel-ai-gateway", - baseUrl: "https://ai-gateway.vercel.sh", - reasoning: false, - input: ["text"], - cost: { - input: 1.15, - output: 8, - cacheRead: 0.15, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 16384, - } satisfies Model<"anthropic-messages">, "moonshotai/kimi-k2.5": { id: "moonshotai/kimi-k2.5", name: "Kimi K2.5", @@ -15283,6 +14973,23 @@ export const MODELS = { contextWindow: 262000, maxTokens: 262000, } satisfies Model<"anthropic-messages">, + "moonshotai/kimi-k2.7-code": { + id: "moonshotai/kimi-k2.7-code", + name: "Kimi K2.7 Code", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.95, + output: 4, + cacheRead: 0.19, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 32768, + } satisfies Model<"anthropic-messages">, "nvidia/nemotron-3-super-120b-a12b": { id: "nvidia/nemotron-3-super-120b-a12b", name: "NVIDIA Nemotron 3 Super 120B A12B", @@ -16749,6 +16456,24 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 131072, } satisfies Model<"openai-completions">, + "mimo-v2.5-pro-ultraspeed": { + id: "mimo-v2.5-pro-ultraspeed", + name: "MiMo-V2.5-Pro-UltraSpeed", + api: "openai-completions", + provider: "xiaomi", + baseUrl: "https://api.xiaomimimo.com/v1", + compat: {"requiresReasoningContentOnAssistantMessages":true,"thinkingFormat":"deepseek"}, + reasoning: true, + input: ["text"], + cost: { + input: 1.305, + output: 2.61, + cacheRead: 0.0108, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 131072, + } satisfies Model<"openai-completions">, }, "xiaomi-token-plan-ams": { "mimo-v2-omni": { @@ -16823,6 +16548,24 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 131072, } satisfies Model<"openai-completions">, + "mimo-v2.5-pro-ultraspeed": { + id: "mimo-v2.5-pro-ultraspeed", + name: "MiMo-V2.5-Pro-UltraSpeed", + api: "openai-completions", + provider: "xiaomi-token-plan-ams", + baseUrl: "https://token-plan-ams.xiaomimimo.com/v1", + compat: {"requiresReasoningContentOnAssistantMessages":true,"thinkingFormat":"deepseek"}, + reasoning: true, + input: ["text"], + cost: { + input: 1.305, + output: 2.61, + cacheRead: 0.0108, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 131072, + } satisfies Model<"openai-completions">, }, "xiaomi-token-plan-cn": { "mimo-v2-omni": { @@ -16897,6 +16640,24 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 131072, } satisfies Model<"openai-completions">, + "mimo-v2.5-pro-ultraspeed": { + id: "mimo-v2.5-pro-ultraspeed", + name: "MiMo-V2.5-Pro-UltraSpeed", + api: "openai-completions", + provider: "xiaomi-token-plan-cn", + baseUrl: "https://token-plan-cn.xiaomimimo.com/v1", + compat: {"requiresReasoningContentOnAssistantMessages":true,"thinkingFormat":"deepseek"}, + reasoning: true, + input: ["text"], + cost: { + input: 1.305, + output: 2.61, + cacheRead: 0.0108, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 131072, + } satisfies Model<"openai-completions">, }, "xiaomi-token-plan-sgp": { "mimo-v2-omni": { @@ -16971,6 +16732,24 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 131072, } satisfies Model<"openai-completions">, + "mimo-v2.5-pro-ultraspeed": { + id: "mimo-v2.5-pro-ultraspeed", + name: "MiMo-V2.5-Pro-UltraSpeed", + api: "openai-completions", + provider: "xiaomi-token-plan-sgp", + baseUrl: "https://token-plan-sgp.xiaomimimo.com/v1", + compat: {"requiresReasoningContentOnAssistantMessages":true,"thinkingFormat":"deepseek"}, + reasoning: true, + input: ["text"], + cost: { + input: 1.305, + output: 2.61, + cacheRead: 0.0108, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 131072, + } satisfies Model<"openai-completions">, }, "zai": { "glm-4.5-air": { diff --git a/packages/ai/test/openai-model-metadata.test.ts b/packages/ai/test/openai-model-metadata.test.ts new file mode 100644 index 000000000..64c593e17 --- /dev/null +++ b/packages/ai/test/openai-model-metadata.test.ts @@ -0,0 +1,15 @@ +import { describe, expect, it } from "vitest"; +import { getModel } from "../src/models.ts"; + +describe("OpenAI model metadata", () => { + it("uses current GPT-5.4 and GPT-5.5 API context windows", () => { + expect(getModel("openai", "gpt-5.4").contextWindow).toBe(1050000); + expect(getModel("openai", "gpt-5.5").contextWindow).toBe(1050000); + }); + + it("uses current OpenAI Codex context windows", () => { + expect(getModel("openai-codex", "gpt-5.4").contextWindow).toBe(1000000); + expect(getModel("openai-codex", "gpt-5.4-mini").contextWindow).toBe(400000); + expect(getModel("openai-codex", "gpt-5.5").contextWindow).toBe(400000); + }); +});