Files
pi/packages/ai/scripts/generate-models.ts
yanirz 99dc6fcec8 fix(ai): add session affinity and compat fixes for Fireworks provider caching
Fireworks prompt caching is enabled by default (automatic prefix matching),
but on serverless infrastructure, requests hit random replicas. Without
session affinity, the per-replica cache misses, negating cache hit rates
and the discounted cacheRead pricing.

Changes:

- Add sendSessionAffinityHeaders and supportsCacheControlOnTools
  to AnthropicMessagesCompat interface
- Send x-session-affinity header for Fireworks (and Cloudflare AI
  Gateway Anthropic) when sessionId is available and caching is enabled
- Omit cache_control on tool definitions for Fireworks (unsupported
  per https://docs.fireworks.ai/tools-sdks/anthropic-compatibility)
- Default supportsEagerToolInputStreaming to false for Fireworks
  (unsupported field)
- Default supportsLongCacheRetention to false for Fireworks
  (cache_control.ttl not supported)
- Add compat settings to Fireworks models in generate-models.ts
- Update generated models with Fireworks compat settings
- Add integration tests for session affinity and tool compat

Refs: https://docs.fireworks.ai/guides/prompt-caching
Refs: https://docs.fireworks.ai/tools-sdks/anthropic-compatibility
2026-05-10 00:11:36 +02:00

1943 lines
58 KiB
TypeScript

#!/usr/bin/env tsx
import { writeFileSync } from "fs";
import { join, dirname } from "path";
import { fileURLToPath } from "url";
import {
CLOUDFLARE_AI_GATEWAY_ANTHROPIC_BASE_URL,
CLOUDFLARE_AI_GATEWAY_COMPAT_BASE_URL,
CLOUDFLARE_AI_GATEWAY_OPENAI_BASE_URL,
CLOUDFLARE_WORKERS_AI_BASE_URL,
} from "../src/providers/cloudflare.js";
import {
Api,
type AnthropicMessagesCompat,
KnownProvider,
Model,
type OpenAICompletionsCompat,
} from "../src/types.js";
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const packageRoot = join(__dirname, "..");
interface ModelsDevModel {
id: string;
name: string;
tool_call?: boolean;
reasoning?: boolean;
limit?: {
context?: number;
output?: number;
};
cost?: {
input?: number;
output?: number;
cache_read?: number;
cache_write?: number;
};
modalities?: {
input?: string[];
};
provider?: {
npm?: string;
};
}
interface AiGatewayModel {
id: string;
name?: string;
context_window?: number;
max_tokens?: number;
tags?: string[];
pricing?: {
input?: string | number;
output?: string | number;
input_cache_read?: string | number;
input_cache_write?: string | number;
};
}
const COPILOT_STATIC_HEADERS = {
"User-Agent": "GitHubCopilotChat/0.35.0",
"Editor-Version": "vscode/1.107.0",
"Editor-Plugin-Version": "copilot-chat/0.35.0",
"Copilot-Integration-Id": "vscode-chat",
} as const;
const KIMI_STATIC_HEADERS = {
"User-Agent": "KimiCLI/1.5",
} as const;
const TOGETHER_BASE_URL = "https://api.together.ai/v1";
const TOGETHER_BASE_COMPAT: OpenAICompletionsCompat = {
supportsStore: false,
supportsDeveloperRole: false,
supportsReasoningEffort: false,
maxTokensField: "max_tokens",
supportsStrictMode: false,
supportsLongCacheRetention: false,
};
const TOGETHER_TOGGLE_REASONING_COMPAT: OpenAICompletionsCompat = {
...TOGETHER_BASE_COMPAT,
thinkingFormat: "together",
};
const TOGETHER_REASONING_EFFORT_COMPAT: OpenAICompletionsCompat = {
...TOGETHER_BASE_COMPAT,
supportsReasoningEffort: true,
thinkingFormat: "openai",
};
const TOGETHER_TOGGLE_REASONING_EFFORT_COMPAT: OpenAICompletionsCompat = {
...TOGETHER_TOGGLE_REASONING_COMPAT,
supportsReasoningEffort: true,
};
const TOGETHER_REASONING_ONLY_MODELS = new Set([
"deepseek-ai/DeepSeek-R1",
"MiniMaxAI/MiniMax-M2.5",
"MiniMaxAI/MiniMax-M2.7",
]);
const TOGETHER_REASONING_EFFORT_MODELS = new Set(["openai/gpt-oss-20b", "openai/gpt-oss-120b"]);
const TOGETHER_TOGGLE_REASONING_EFFORT_MODELS = new Set(["deepseek-ai/DeepSeek-V4-Pro"]);
const TOGETHER_FIXED_REASONING_LEVEL_MAP = {
off: null,
minimal: null,
low: null,
medium: null,
} as const;
const TOGETHER_REASONING_EFFORT_LEVEL_MAP = {
off: null,
minimal: null,
} as const;
const TOGETHER_DEEPSEEK_V4_THINKING_LEVEL_MAP = {
minimal: null,
low: null,
medium: null,
high: "high",
xhigh: null,
} as const;
const TOGETHER_TOGGLE_REASONING_LEVEL_MAP = {
minimal: null,
low: null,
medium: null,
} as const;
const AI_GATEWAY_MODELS_URL = "https://ai-gateway.vercel.sh/v1";
const AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh";
const ZAI_TOOL_STREAM_UNSUPPORTED_MODELS = new Set(["glm-4.5", "glm-4.5-air", "glm-4.5-flash", "glm-4.5v"]);
const EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS = new Set([
"github-copilot:claude-haiku-4.5",
"github-copilot:claude-sonnet-4",
"github-copilot:claude-sonnet-4.5",
]);
const DEEPSEEK_V4_THINKING_LEVEL_MAP = {
minimal: null,
low: null,
medium: null,
high: "high",
xhigh: "max",
} as const;
const OPENAI_RESPONSES_NONE_REASONING_MODELS = new Set([
"gpt-5.1",
"gpt-5.2",
"gpt-5.3-codex",
"gpt-5.4",
"gpt-5.4-mini",
"gpt-5.4-nano",
"gpt-5.5",
]);
function mergeThinkingLevelMap(model: Model<any>, map: NonNullable<Model<any>["thinkingLevelMap"]>): void {
model.thinkingLevelMap = { ...model.thinkingLevelMap, ...map };
}
function getTogetherCompat(modelId: string, reasoning: boolean): OpenAICompletionsCompat {
if (!reasoning) return TOGETHER_BASE_COMPAT;
if (TOGETHER_REASONING_EFFORT_MODELS.has(modelId)) return TOGETHER_REASONING_EFFORT_COMPAT;
if (TOGETHER_TOGGLE_REASONING_EFFORT_MODELS.has(modelId)) return TOGETHER_TOGGLE_REASONING_EFFORT_COMPAT;
if (TOGETHER_REASONING_ONLY_MODELS.has(modelId)) return TOGETHER_BASE_COMPAT;
return TOGETHER_TOGGLE_REASONING_COMPAT;
}
function getTogetherThinkingLevelMap(
modelId: string,
reasoning: boolean,
): NonNullable<Model<any>["thinkingLevelMap"]> | undefined {
if (!reasoning) return undefined;
if (TOGETHER_REASONING_EFFORT_MODELS.has(modelId)) return { ...TOGETHER_REASONING_EFFORT_LEVEL_MAP };
if (TOGETHER_TOGGLE_REASONING_EFFORT_MODELS.has(modelId)) return { ...TOGETHER_DEEPSEEK_V4_THINKING_LEVEL_MAP };
if (TOGETHER_REASONING_ONLY_MODELS.has(modelId)) return { ...TOGETHER_FIXED_REASONING_LEVEL_MAP };
return { ...TOGETHER_TOGGLE_REASONING_LEVEL_MAP };
}
function supportsOpenAiXhigh(modelId: string): boolean {
return (
modelId.includes("gpt-5.2") ||
modelId.includes("gpt-5.3") ||
modelId.includes("gpt-5.4") ||
modelId.includes("gpt-5.5")
);
}
function isGoogleThinkingApi(model: Model<any>): boolean {
return model.api === "google-generative-ai" || model.api === "google-vertex";
}
function isGemini3ProModel(modelId: string): boolean {
return /gemini-3(?:\.\d+)?-pro/.test(modelId.toLowerCase());
}
function isGemini3FlashModel(modelId: string): boolean {
return /gemini-3(?:\.\d+)?-flash/.test(modelId.toLowerCase());
}
function isGemma4Model(modelId: string): boolean {
return /gemma-?4/.test(modelId.toLowerCase());
}
function applyThinkingLevelMetadata(model: Model<any>): void {
if (
(model.api === "openai-responses" || model.api === "azure-openai-responses") &&
model.id.startsWith("gpt-5")
) {
mergeThinkingLevelMap(model, { off: null });
}
if (
model.api === "openai-responses" &&
model.provider === "openai" &&
OPENAI_RESPONSES_NONE_REASONING_MODELS.has(model.id)
) {
mergeThinkingLevelMap(model, { off: "none" });
}
if (supportsOpenAiXhigh(model.id)) {
mergeThinkingLevelMap(model, { xhigh: "xhigh" });
}
if (model.id.includes("opus-4-6") || model.id.includes("opus-4.6")) {
mergeThinkingLevelMap(model, { xhigh: "max" });
}
if (model.id.includes("opus-4-7") || model.id.includes("opus-4.7")) {
mergeThinkingLevelMap(model, { xhigh: "xhigh" });
}
if (model.api === "openai-completions" && model.id.includes("deepseek-v4")) {
mergeThinkingLevelMap(model, DEEPSEEK_V4_THINKING_LEVEL_MAP);
}
if (isGoogleThinkingApi(model) && isGemini3ProModel(model.id)) {
mergeThinkingLevelMap(model, { off: null, minimal: null, low: "LOW", medium: null, high: "HIGH" });
}
if (isGoogleThinkingApi(model) && isGemini3FlashModel(model.id)) {
mergeThinkingLevelMap(model, { off: null });
}
if (isGoogleThinkingApi(model) && isGemma4Model(model.id)) {
mergeThinkingLevelMap(model, { off: null, minimal: "MINIMAL", low: null, medium: null, high: "HIGH" });
}
if (model.provider === "groq" && model.id === "qwen/qwen3-32b") {
mergeThinkingLevelMap(model, { minimal: null, low: null, medium: null, high: "default" });
}
if (model.provider === "openai-codex" && supportsOpenAiXhigh(model.id)) {
mergeThinkingLevelMap(model, { minimal: "low" });
}
if (model.provider === "openai-codex" && model.id === "gpt-5.1-codex-mini") {
mergeThinkingLevelMap(model, { minimal: "medium", low: "medium", medium: "medium", high: "high" });
}
}
function getAnthropicMessagesCompat(provider: string, modelId: string): AnthropicMessagesCompat | undefined {
return EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`)
? { supportsEagerToolInputStreaming: false }
: undefined;
}
function getBedrockBaseUrl(modelId: string): string {
return modelId.startsWith("eu.")
? "https://bedrock-runtime.eu-central-1.amazonaws.com"
: "https://bedrock-runtime.us-east-1.amazonaws.com";
}
async function fetchOpenRouterModels(): Promise<Model<any>[]> {
try {
console.log("Fetching models from OpenRouter API...");
const response = await fetch("https://openrouter.ai/api/v1/models");
const data = await response.json();
const models: Model<any>[] = [];
for (const model of data.data) {
// Only include models that support tools
if (!model.supported_parameters?.includes("tools")) continue;
// Parse provider from model ID
let provider: KnownProvider = "openrouter";
let modelKey = model.id;
modelKey = model.id; // Keep full ID for OpenRouter
// Parse input modalities
const input: ("text" | "image")[] = ["text"];
if (model.architecture?.modality?.includes("image")) {
input.push("image");
}
// Convert pricing from $/token to $/million tokens
const inputCost = parseFloat(model.pricing?.prompt || "0") * 1_000_000;
const outputCost = parseFloat(model.pricing?.completion || "0") * 1_000_000;
const cacheReadCost = parseFloat(model.pricing?.input_cache_read || "0") * 1_000_000;
const cacheWriteCost = parseFloat(model.pricing?.input_cache_write || "0") * 1_000_000;
const normalizedModel: Model<any> = {
id: modelKey,
name: model.name,
api: "openai-completions",
baseUrl: "https://openrouter.ai/api/v1",
provider,
reasoning: model.supported_parameters?.includes("reasoning") || false,
input,
cost: {
input: inputCost,
output: outputCost,
cacheRead: cacheReadCost,
cacheWrite: cacheWriteCost,
},
contextWindow: model.context_length || 4096,
maxTokens: model.top_provider?.max_completion_tokens || 4096,
};
models.push(normalizedModel);
}
console.log(`Fetched ${models.length} tool-capable models from OpenRouter`);
return models;
} catch (error) {
console.error("Failed to fetch OpenRouter models:", error);
return [];
}
}
async function fetchAiGatewayModels(): Promise<Model<any>[]> {
try {
console.log("Fetching models from Vercel AI Gateway API...");
const response = await fetch(`${AI_GATEWAY_MODELS_URL}/models`);
const data = await response.json();
const models: Model<any>[] = [];
const toNumber = (value: string | number | undefined): number => {
if (typeof value === "number") {
return Number.isFinite(value) ? value : 0;
}
const parsed = parseFloat(value ?? "0");
return Number.isFinite(parsed) ? parsed : 0;
};
const items = Array.isArray(data.data) ? (data.data as AiGatewayModel[]) : [];
for (const model of items) {
const tags = Array.isArray(model.tags) ? model.tags : [];
// Only include models that support tools
if (!tags.includes("tool-use")) continue;
const input: ("text" | "image")[] = ["text"];
if (tags.includes("vision")) {
input.push("image");
}
const inputCost = toNumber(model.pricing?.input) * 1_000_000;
const outputCost = toNumber(model.pricing?.output) * 1_000_000;
const cacheReadCost = toNumber(model.pricing?.input_cache_read) * 1_000_000;
const cacheWriteCost = toNumber(model.pricing?.input_cache_write) * 1_000_000;
models.push({
id: model.id,
name: model.name || model.id,
api: "anthropic-messages",
baseUrl: AI_GATEWAY_BASE_URL,
provider: "vercel-ai-gateway",
reasoning: tags.includes("reasoning"),
input,
cost: {
input: inputCost,
output: outputCost,
cacheRead: cacheReadCost,
cacheWrite: cacheWriteCost,
},
contextWindow: model.context_window || 4096,
maxTokens: model.max_tokens || 4096,
});
}
console.log(`Fetched ${models.length} tool-capable models from Vercel AI Gateway`);
return models;
} catch (error) {
console.error("Failed to fetch Vercel AI Gateway models:", error);
return [];
}
}
async function loadModelsDevData(): Promise<Model<any>[]> {
try {
console.log("Fetching models from models.dev API...");
const response = await fetch("https://models.dev/api.json");
const data = await response.json();
const models: Model<any>[] = [];
// Process Amazon Bedrock models
if (data["amazon-bedrock"]?.models) {
for (const [modelId, model] of Object.entries(data["amazon-bedrock"].models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
let id = modelId;
if (id.startsWith("ai21.jamba")) {
// These models doesn't support tool use in streaming mode
continue;
}
if (id.startsWith("mistral.mistral-7b-instruct-v0")) {
// These models doesn't support system messages
continue;
}
models.push({
id,
name: m.name || id,
api: "bedrock-converse-stream" as const,
provider: "amazon-bedrock" as const,
baseUrl: getBedrockBaseUrl(id),
reasoning: m.reasoning === true,
input: (m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"]) as ("text" | "image")[],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process Anthropic models
if (data.anthropic?.models) {
for (const [modelId, model] of Object.entries(data.anthropic.models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "anthropic-messages",
provider: "anthropic",
baseUrl: "https://api.anthropic.com",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process Google models
if (data.google?.models) {
for (const [modelId, model] of Object.entries(data.google.models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "google-generative-ai",
provider: "google",
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process OpenAI models
if (data.openai?.models) {
for (const [modelId, model] of Object.entries(data.openai.models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "openai-responses",
provider: "openai",
baseUrl: "https://api.openai.com/v1",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process Groq models
if (data.groq?.models) {
for (const [modelId, model] of Object.entries(data.groq.models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "openai-completions",
provider: "groq",
baseUrl: "https://api.groq.com/openai/v1",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process Cerebras models
if (data.cerebras?.models) {
for (const [modelId, model] of Object.entries(data.cerebras.models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "openai-completions",
provider: "cerebras",
baseUrl: "https://api.cerebras.ai/v1",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process Cloudflare Workers AI models
if (data["cloudflare-workers-ai"]?.models) {
for (const [modelId, model] of Object.entries(data["cloudflare-workers-ai"].models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "openai-completions",
provider: "cloudflare-workers-ai",
baseUrl: CLOUDFLARE_WORKERS_AI_BASE_URL,
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
compat: { sendSessionAffinityHeaders: true },
});
}
}
// Process Cloudflare AI Gateway models
if (data["cloudflare-ai-gateway"]?.models) {
for (const [prefixedId, model] of Object.entries(data["cloudflare-ai-gateway"].models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
const slashIdx = prefixedId.indexOf("/");
if (slashIdx === -1) continue;
const upstream = prefixedId.slice(0, slashIdx);
const nativeId = prefixedId.slice(slashIdx + 1);
let api: "anthropic-messages" | "openai-completions" | "openai-responses";
let baseUrl: string;
let id: string;
if (upstream === "openai") {
api = "openai-responses";
baseUrl = CLOUDFLARE_AI_GATEWAY_OPENAI_BASE_URL;
id = nativeId;
} else if (upstream === "anthropic") {
api = "anthropic-messages";
baseUrl = CLOUDFLARE_AI_GATEWAY_ANTHROPIC_BASE_URL;
id = nativeId;
} else if (upstream === "workers-ai") {
api = "openai-completions";
baseUrl = CLOUDFLARE_AI_GATEWAY_COMPAT_BASE_URL;
id = prefixedId;
} else {
continue;
}
// workers-ai/* through the gateway forwards x-session-affinity to
// the underlying Workers AI runtime for prefix-cache routing.
const compat = upstream === "workers-ai" ? { sendSessionAffinityHeaders: true } : undefined;
models.push({
id,
name: m.name || id,
api,
provider: "cloudflare-ai-gateway",
baseUrl,
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
...(compat ? { compat } : {}),
});
}
}
// Process xAi models
if (data.xai?.models) {
for (const [modelId, model] of Object.entries(data.xai.models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "openai-completions",
provider: "xai",
baseUrl: "https://api.x.ai/v1",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process zAi models
if (data["zai-coding-plan"]?.models) {
for (const [modelId, model] of Object.entries(data["zai-coding-plan"].models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
const supportsImage = m.modalities?.input?.includes("image");
models.push({
id: modelId,
name: m.name || modelId,
api: "openai-completions",
provider: "zai",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
reasoning: m.reasoning === true,
input: supportsImage ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
compat: {
supportsDeveloperRole: false,
thinkingFormat: "zai",
...(!ZAI_TOOL_STREAM_UNSUPPORTED_MODELS.has(modelId) ? { zaiToolStream: true } : {}),
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process Mistral models
if (data.mistral?.models) {
for (const [modelId, model] of Object.entries(data.mistral.models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "mistral-conversations",
provider: "mistral",
baseUrl: "https://api.mistral.ai",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process Hugging Face models
if (data.huggingface?.models) {
for (const [modelId, model] of Object.entries(data.huggingface.models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "openai-completions",
provider: "huggingface",
baseUrl: "https://router.huggingface.co/v1",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
compat: {
supportsDeveloperRole: false,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process Fireworks models
if (data["fireworks-ai"]?.models) {
for (const [modelId, model] of Object.entries(data["fireworks-ai"].models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "anthropic-messages",
provider: "fireworks",
// Fireworks Anthropic-compatible API - SDK appends /v1/messages
baseUrl: "https://api.fireworks.ai/inference",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
// Fireworks prompt caching uses automatic prefix matching + session affinity.
// x-session-affinity routes requests to the same replica for cache hits.
// cache_control on tools and eager_input_streaming are not supported.
// See: https://docs.fireworks.ai/tools-sdks/anthropic-compatibility
compat: {
sendSessionAffinityHeaders: true,
supportsEagerToolInputStreaming: false,
supportsCacheControlOnTools: false,
supportsLongCacheRetention: false,
},
});
}
}
// Process Together AI models
const togetherProvider = data.together ?? data.togetherai ?? data["together-ai"];
if (togetherProvider?.models) {
for (const [modelId, model] of Object.entries(togetherProvider.models)) {
const m = model as ModelsDevModel & { status?: string };
if (m.tool_call !== true) continue;
if (m.status === "deprecated") continue;
const reasoning = m.reasoning === true;
const thinkingLevelMap = getTogetherThinkingLevelMap(modelId, reasoning);
models.push({
id: modelId,
name: m.name || modelId,
api: "openai-completions",
provider: "together",
baseUrl: TOGETHER_BASE_URL,
reasoning,
...(thinkingLevelMap ? { thinkingLevelMap } : {}),
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
compat: getTogetherCompat(modelId, reasoning),
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process OpenCode models (Zen and Go)
// API mapping based on provider.npm field:
// - @ai-sdk/openai → openai-responses
// - @ai-sdk/anthropic → anthropic-messages
// - @ai-sdk/google → google-generative-ai
// - null/undefined/@ai-sdk/openai-compatible → openai-completions
const opencodeVariants = [
{ key: "opencode", provider: "opencode", basePath: "https://opencode.ai/zen" },
{ key: "opencode-go", provider: "opencode-go", basePath: "https://opencode.ai/zen/go" },
] as const;
for (const variant of opencodeVariants) {
if (!data[variant.key]?.models) continue;
for (const [modelId, model] of Object.entries(data[variant.key].models)) {
const m = model as ModelsDevModel & { status?: string };
if (m.tool_call !== true) continue;
if (m.status === "deprecated") continue;
const npm = m.provider?.npm;
let api: Api;
let baseUrl: string;
let compat: OpenAICompletionsCompat | undefined;
if (npm === "@ai-sdk/openai") {
api = "openai-responses";
baseUrl = `${variant.basePath}/v1`;
} else if (npm === "@ai-sdk/anthropic") {
api = "anthropic-messages";
// Anthropic SDK appends /v1/messages to baseURL
baseUrl = variant.basePath;
} else if (npm === "@ai-sdk/google") {
api = "google-generative-ai";
baseUrl = `${variant.basePath}/v1`;
} else if (npm === "@ai-sdk/alibaba") {
api = "openai-completions";
baseUrl = `${variant.basePath}/v1`;
compat = { cacheControlFormat: "anthropic" };
} else {
// null, undefined, or @ai-sdk/openai-compatible
api = "openai-completions";
baseUrl = `${variant.basePath}/v1`;
}
// Fix known mismatches between models.dev npm data and actual
// OpenCode Go endpoint behaviour. models.dev reports these models
// as @ai-sdk/anthropic, but the OpenCode Go endpoints either don't
// accept Anthropic SDK auth (MiniMax M2.7) or are served through
// the OpenAI-compatible /v1/chat/completions path (Qwen 3.5/3.6).
// Switch them to openai-completions so requests use Bearer auth
// and the standard /v1/chat/completions endpoint.
if (variant.provider === "opencode-go") {
if (modelId === "minimax-m2.7") {
api = "openai-completions";
baseUrl = `${variant.basePath}/v1`;
}
if (modelId === "qwen3.5-plus" || modelId === "qwen3.6-plus") {
api = "openai-completions";
baseUrl = `${variant.basePath}/v1`;
// Qwen/DashScope uses enable_thinking at the top level.
compat = { ...(compat ?? {}), thinkingFormat: "qwen" };
}
}
models.push({
id: modelId,
name: m.name || modelId,
api,
provider: variant.provider,
baseUrl,
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
...(compat ? { compat } : {}),
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process GitHub Copilot models
if (data["github-copilot"]?.models) {
for (const [modelId, model] of Object.entries(data["github-copilot"].models)) {
const m = model as ModelsDevModel & { status?: string };
if (m.tool_call !== true) continue;
if (m.status === "deprecated") continue;
// Claude 4.x models route to Anthropic Messages API
const isCopilotClaude4 = /^claude-(haiku|sonnet|opus)-4([.\-]|$)/.test(modelId);
// gpt-5 models require responses API, others use completions
const needsResponsesApi = modelId.startsWith("gpt-5") || modelId.startsWith("oswe");
const api: Api = isCopilotClaude4
? "anthropic-messages"
: needsResponsesApi
? "openai-responses"
: "openai-completions";
const anthropicCompat =
api === "anthropic-messages" ? getAnthropicMessagesCompat("github-copilot", modelId) : undefined;
const copilotModel: Model<any> = {
id: modelId,
name: m.name || modelId,
api,
provider: "github-copilot",
baseUrl: "https://api.individual.githubcopilot.com",
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 128000,
maxTokens: m.limit?.output || 8192,
headers: { ...COPILOT_STATIC_HEADERS },
...(anthropicCompat ? { compat: anthropicCompat } : {}),
// compat only applies to openai-completions
...(api === "openai-completions" ? {
compat: {
supportsStore: false,
supportsDeveloperRole: false,
supportsReasoningEffort: false,
},
} : {}),
};
models.push(copilotModel);
}
}
// Process MiniMax models
const minimaxVariants = [
{ key: "minimax", provider: "minimax", baseUrl: "https://api.minimax.io/anthropic" },
{ key: "minimax-cn", provider: "minimax-cn", baseUrl: "https://api.minimaxi.com/anthropic" },
] as const;
for (const { key, provider, baseUrl } of minimaxVariants) {
if (data[key]?.models) {
for (const [modelId, model] of Object.entries(data[key].models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "anthropic-messages",
provider,
// MiniMax's Anthropic-compatible API - SDK appends /v1/messages
baseUrl,
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
}
// Process Kimi For Coding models
if (data["kimi-for-coding"]?.models) {
const kimiModels = data["kimi-for-coding"].models as Record<string, ModelsDevModel>;
const hasCanonicalModel = Object.prototype.hasOwnProperty.call(kimiModels, "kimi-for-coding");
const kimiAliases = new Set(["k2p5", "k2p6"]);
for (const [modelId, model] of Object.entries(kimiModels)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
// models.dev may expose versioned aliases (e.g. k2p5/k2p6).
// Normalize aliases to the canonical model id and drop duplicates when canonical exists.
if (kimiAliases.has(modelId) && hasCanonicalModel) continue;
const normalizedId = kimiAliases.has(modelId) ? "kimi-for-coding" : modelId;
const normalizedName = kimiAliases.has(modelId) ? "Kimi For Coding" : m.name || normalizedId;
models.push({
id: normalizedId,
name: normalizedName,
api: "anthropic-messages",
provider: "kimi-coding",
// Kimi For Coding's Anthropic-compatible API - SDK appends /v1/messages
baseUrl: "https://api.kimi.com/coding",
headers: { ...KIMI_STATIC_HEADERS },
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
// Process Moonshot AI models
const moonshotVariants = [
{ key: "moonshotai", provider: "moonshotai", baseUrl: "https://api.moonshot.ai/v1" },
{ key: "moonshotai-cn", provider: "moonshotai-cn", baseUrl: "https://api.moonshot.cn/v1" },
] as const;
const moonshotCompat: OpenAICompletionsCompat = {
supportsStore: false,
supportsDeveloperRole: false,
supportsReasoningEffort: false,
maxTokensField: "max_tokens",
supportsStrictMode: false,
};
for (const { key, provider, baseUrl } of moonshotVariants) {
if (!data[key]?.models) continue;
for (const [modelId, model] of Object.entries(data[key].models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "openai-completions",
provider,
baseUrl,
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
compat: moonshotCompat,
});
}
}
// Process Xiaomi MiMo models
// Built-in `xiaomi` targets the API billing endpoint (single stable URL,
// keys from platform.xiaomimimo.com). The three `xiaomi-token-plan-*`
// providers cover prepaid Token Plan endpoints in cn / ams / sgp.
const xiaomiVariants = [
{ provider: "xiaomi", baseUrl: "https://api.xiaomimimo.com/anthropic" },
{ provider: "xiaomi-token-plan-cn", baseUrl: "https://token-plan-cn.xiaomimimo.com/anthropic" },
{ provider: "xiaomi-token-plan-ams", baseUrl: "https://token-plan-ams.xiaomimimo.com/anthropic" },
{ provider: "xiaomi-token-plan-sgp", baseUrl: "https://token-plan-sgp.xiaomimimo.com/anthropic" },
] as const;
if (data.xiaomi?.models) {
for (const { provider, baseUrl } of xiaomiVariants) {
for (const [modelId, model] of Object.entries(data.xiaomi.models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
models.push({
id: modelId,
name: m.name || modelId,
api: "anthropic-messages",
provider,
baseUrl,
reasoning: m.reasoning === true,
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
}
console.log(`Loaded ${models.length} tool-capable models from models.dev`);
return models;
} catch (error) {
console.error("Failed to load models.dev data:", error);
return [];
}
}
async function generateModels() {
// Fetch models from both sources
// models.dev: Anthropic, Google, OpenAI, Groq, Cerebras
// OpenRouter: xAI and other providers (excluding Anthropic, Google, OpenAI)
// AI Gateway: OpenAI-compatible catalog with tool-capable models
const modelsDevModels = await loadModelsDevData();
const openRouterModels = await fetchOpenRouterModels();
const aiGatewayModels = await fetchAiGatewayModels();
// Combine models (models.dev has priority)
const allModels = [...modelsDevModels, ...openRouterModels, ...aiGatewayModels].filter(
(model) =>
!((model.provider === "opencode" || model.provider === "opencode-go") && model.id === "gpt-5.3-codex-spark"),
);
// Fix incorrect cache pricing for Claude Opus 4.5 from models.dev
// models.dev has 3x the correct pricing (1.5/18.75 instead of 0.5/6.25)
const opus45 = allModels.find(m => m.provider === "anthropic" && m.id === "claude-opus-4-5");
if (opus45) {
opus45.cost.cacheRead = 0.5;
opus45.cost.cacheWrite = 6.25;
}
// Temporary overrides until upstream model metadata is corrected.
for (const candidate of allModels) {
if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-opus-4-6-v1")) {
candidate.cost.cacheRead = 0.5;
candidate.cost.cacheWrite = 6.25;
}
if (
(candidate.provider === "anthropic" ||
candidate.provider === "opencode" ||
candidate.provider === "opencode-go" ||
candidate.provider === "github-copilot") &&
(candidate.id === "claude-opus-4-6" ||
candidate.id === "claude-sonnet-4-6" ||
candidate.id === "claude-opus-4.6" ||
candidate.id === "claude-sonnet-4.6")
) {
candidate.contextWindow = 1000000;
}
// OpenCode variants list Claude Sonnet 4/4.5 with 1M context, actual limit is 200K
if (
(candidate.provider === "opencode" || candidate.provider === "opencode-go") &&
(candidate.id === "claude-sonnet-4-5" || candidate.id === "claude-sonnet-4")
) {
candidate.contextWindow = 200000;
}
if ((candidate.provider === "opencode" || candidate.provider === "opencode-go") && candidate.id === "gpt-5.4") {
candidate.contextWindow = 272000;
candidate.maxTokens = 128000;
}
if (candidate.provider === "openai" && (candidate.id === "gpt-5.4" || candidate.id === "gpt-5.5")) {
candidate.contextWindow = 272000;
candidate.maxTokens = 128000;
}
// Keep selected OpenRouter model metadata stable until upstream settles.
if (candidate.provider === "openrouter" && candidate.id === "moonshotai/kimi-k2.5") {
candidate.cost.input = 0.41;
candidate.cost.output = 2.06;
candidate.cost.cacheRead = 0.07;
candidate.maxTokens = 4096;
}
if (candidate.provider === "openrouter" && candidate.id === "z-ai/glm-5") {
candidate.cost.input = 0.6;
candidate.cost.output = 1.9;
candidate.cost.cacheRead = 0.119;
}
}
// Add missing EU Opus 4.6 profile
if (!allModels.some((m) => m.provider === "amazon-bedrock" && m.id === "eu.anthropic.claude-opus-4-6-v1")) {
allModels.push({
id: "eu.anthropic.claude-opus-4-6-v1",
name: "Claude Opus 4.6 (EU)",
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
baseUrl: getBedrockBaseUrl("eu.anthropic.claude-opus-4-6-v1"),
reasoning: true,
input: ["text", "image"],
cost: {
input: 5,
output: 25,
cacheRead: 0.5,
cacheWrite: 6.25,
},
contextWindow: 200000,
maxTokens: 128000,
});
}
// Add missing Claude Opus 4.6
if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-opus-4-6")) {
allModels.push({
id: "claude-opus-4-6",
name: "Claude Opus 4.6",
api: "anthropic-messages",
baseUrl: "https://api.anthropic.com",
provider: "anthropic",
reasoning: true,
input: ["text", "image"],
cost: {
input: 5,
output: 25,
cacheRead: 0.5,
cacheWrite: 6.25,
},
contextWindow: 1000000,
maxTokens: 128000,
});
}
// Add missing Claude Opus 4.7
if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-opus-4-7")) {
allModels.push({
id: "claude-opus-4-7",
name: "Claude Opus 4.7",
api: "anthropic-messages",
baseUrl: "https://api.anthropic.com",
provider: "anthropic",
reasoning: true,
input: ["text", "image"],
cost: {
input: 5,
output: 25,
cacheRead: 0.5,
cacheWrite: 6.25,
},
contextWindow: 1000000,
maxTokens: 128000,
});
}
// Add missing Claude Sonnet 4.6
if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-sonnet-4-6")) {
allModels.push({
id: "claude-sonnet-4-6",
name: "Claude Sonnet 4.6",
api: "anthropic-messages",
baseUrl: "https://api.anthropic.com",
provider: "anthropic",
reasoning: true,
input: ["text", "image"],
cost: {
input: 3,
output: 15,
cacheRead: 0.3,
cacheWrite: 3.75,
},
contextWindow: 1000000,
maxTokens: 64000,
});
}
// Add missing Gemini 3.1 Flash Lite Preview until models.dev includes it.
if (!allModels.some((m) => m.provider === "google" && m.id === "gemini-3.1-flash-lite-preview")) {
allModels.push({
id: "gemini-3.1-flash-lite-preview",
name: "Gemini 3.1 Flash Lite Preview",
api: "google-generative-ai",
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
provider: "google",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 1048576,
maxTokens: 65536,
});
}
// Add missing gpt models
if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5-chat-latest")) {
allModels.push({
id: "gpt-5-chat-latest",
name: "GPT-5 Chat Latest",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
provider: "openai",
reasoning: false,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
});
}
if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex")) {
allModels.push({
id: "gpt-5.1-codex",
name: "GPT-5.1 Codex",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
provider: "openai",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 5,
cacheRead: 0.125,
cacheWrite: 1.25,
},
contextWindow: 400000,
maxTokens: 128000,
});
}
if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex-max")) {
allModels.push({
id: "gpt-5.1-codex-max",
name: "GPT-5.1 Codex Max",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
provider: "openai",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
});
}
if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.3-codex-spark")) {
allModels.push({
id: "gpt-5.3-codex-spark",
name: "GPT-5.3 Codex Spark",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
provider: "openai",
reasoning: true,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
});
}
// Add missing GitHub Copilot GPT-5.3 models until models.dev includes them.
const copilotBaseModel = allModels.find(
(m) => m.provider === "github-copilot" && m.id === "gpt-5.2-codex",
);
if (copilotBaseModel) {
if (!allModels.some((m) => m.provider === "github-copilot" && m.id === "gpt-5.3-codex")) {
allModels.push({
...copilotBaseModel,
id: "gpt-5.3-codex",
name: "GPT-5.3 Codex",
});
}
}
if (!allModels.some((m) => m.provider === "openai" && m.id === "gpt-5.4")) {
allModels.push({
id: "gpt-5.4",
name: "GPT-5.4",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
provider: "openai",
reasoning: true,
input: ["text", "image"],
cost: {
input: 2.5,
output: 15,
cacheRead: 0.25,
cacheWrite: 0,
},
contextWindow: 272000,
maxTokens: 128000,
});
}
const deepseekCompat: OpenAICompletionsCompat = {
requiresReasoningContentOnAssistantMessages: true,
thinkingFormat: "deepseek",
};
const deepseekV4Models: Model<"openai-completions">[] = [
{
id: "deepseek-v4-flash",
name: "DeepSeek V4 Flash",
api: "openai-completions",
baseUrl: "https://api.deepseek.com",
provider: "deepseek",
reasoning: true,
input: ["text"],
cost: {
input: 0.14,
output: 0.28,
cacheRead: 0.0028,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 384000,
compat: deepseekCompat,
},
{
id: "deepseek-v4-pro",
name: "DeepSeek V4 Pro",
api: "openai-completions",
baseUrl: "https://api.deepseek.com",
provider: "deepseek",
reasoning: true,
input: ["text"],
cost: {
input: 0.435,
output: 0.87,
cacheRead: 0.003625,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 384000,
compat: deepseekCompat,
},
];
allModels.push(...deepseekV4Models);
for (const candidate of allModels) {
if (candidate.api === "openai-completions" && candidate.id.includes("deepseek-v4")) {
candidate.compat = {
...candidate.compat,
...(candidate.provider === "openrouter"
? {
requiresReasoningContentOnAssistantMessages:
deepseekCompat.requiresReasoningContentOnAssistantMessages,
thinkingFormat: deepseekCompat.thinkingFormat,
}
: deepseekCompat),
};
mergeThinkingLevelMap(candidate, DEEPSEEK_V4_THINKING_LEVEL_MAP);
}
}
const minimaxDirectSupportedIds = new Set(["MiniMax-M2.7", "MiniMax-M2.7-highspeed"]);
for (const candidate of allModels) {
if (
(candidate.provider === "minimax" || candidate.provider === "minimax-cn") &&
minimaxDirectSupportedIds.has(candidate.id)
) {
candidate.contextWindow = 204800;
candidate.maxTokens = 131072;
}
}
for (let i = allModels.length - 1; i >= 0; i--) {
const candidate = allModels[i];
if (
(candidate.provider === "minimax" || candidate.provider === "minimax-cn") &&
!minimaxDirectSupportedIds.has(candidate.id)
) {
allModels.splice(i, 1);
}
}
// OpenAI Codex (ChatGPT OAuth) models
// NOTE: These are not fetched from models.dev; we keep a small, explicit list to avoid aliases.
// Context window is based on observed server limits (400s above ~272k), not marketing numbers.
const CODEX_BASE_URL = "https://chatgpt.com/backend-api";
const CODEX_CONTEXT = 272000;
const CODEX_MAX_TOKENS = 128000;
const codexModels: Model<"openai-codex-responses">[] = [
{
id: "gpt-5.1",
name: "GPT-5.1",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: CODEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
id: "gpt-5.1-codex-max",
name: "GPT-5.1 Codex Max",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: CODEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
id: "gpt-5.1-codex-mini",
name: "GPT-5.1 Codex Mini",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: CODEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.25, output: 2, cacheRead: 0.025, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
id: "gpt-5.2",
name: "GPT-5.2",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: CODEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
id: "gpt-5.2-codex",
name: "GPT-5.2 Codex",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: CODEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
id: "gpt-5.3-codex",
name: "GPT-5.3 Codex",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: CODEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
id: "gpt-5.4",
name: "GPT-5.4",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: CODEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
id: "gpt-5.5",
name: "GPT-5.5",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: CODEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 5, output: 30, cacheRead: 0.5, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
id: "gpt-5.4-mini",
name: "GPT-5.4 Mini",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: CODEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.75, output: 4.5, cacheRead: 0.075, cacheWrite: 0 },
contextWindow: CODEX_CONTEXT,
maxTokens: CODEX_MAX_TOKENS,
},
{
id: "gpt-5.3-codex-spark",
name: "GPT-5.3 Codex Spark",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: CODEX_BASE_URL,
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: CODEX_MAX_TOKENS,
},
];
allModels.push(...codexModels);
// Add missing Grok models
if (!allModels.some(m => m.provider === "xai" && m.id === "grok-code-fast-1")) {
allModels.push({
id: "grok-code-fast-1",
name: "Grok Code Fast 1",
api: "openai-completions",
baseUrl: "https://api.x.ai/v1",
provider: "xai",
reasoning: false,
input: ["text"],
cost: {
input: 0.2,
output: 1.5,
cacheRead: 0.02,
cacheWrite: 0,
},
contextWindow: 32768,
maxTokens: 8192,
});
}
// Add missing Mistral Medium 3.5 model until models.dev includes it
if (!allModels.some(m => m.provider === "mistral" && m.id === "mistral-medium-3.5")) {
allModels.push({
id: "mistral-medium-3.5",
name: "Mistral Medium 3.5",
api: "mistral-conversations",
provider: "mistral",
baseUrl: "https://api.mistral.ai",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.5,
output: 7.5,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144, // 256k tokens
maxTokens: 262144,
});
}
// Add "auto" alias for openrouter/auto
if (!allModels.some(m => m.provider === "openrouter" && m.id === "auto")) {
allModels.push({
id: "auto",
name: "Auto",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text", "image"],
cost: {
// we dont know about the costs because OpenRouter auto routes to different models
// and then charges you for the underlying used model
input:0,
output:0,
cacheRead:0,
cacheWrite:0,
},
contextWindow: 2000000,
maxTokens: 30000,
});
}
const VERTEX_BASE_URL = "https://{location}-aiplatform.googleapis.com";
const vertexModels: Model<"google-vertex">[] = [
{
id: "gemini-3-pro-preview",
name: "Gemini 3 Pro Preview (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 64000,
},
{
id: "gemini-3.1-pro-preview",
name: "Gemini 3.1 Pro Preview (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-3.1-pro-preview-customtools",
name: "Gemini 3.1 Pro Preview Custom Tools (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-3-flash-preview",
name: "Gemini 3 Flash Preview (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-2.0-flash",
name: "Gemini 2.0 Flash (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: false,
input: ["text", "image"],
cost: { input: 0.15, output: 0.6, cacheRead: 0.0375, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 8192,
},
{
id: "gemini-2.0-flash-lite",
name: "Gemini 2.0 Flash Lite (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-2.5-pro",
name: "Gemini 2.5 Pro (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.3, output: 2.5, cacheRead: 0.03, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-2.5-flash-lite-preview-09-2025",
name: "Gemini 2.5 Flash Lite Preview 09-25 (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-2.5-flash-lite",
name: "Gemini 2.5 Flash Lite (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: true,
input: ["text", "image"],
cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 65536,
},
{
id: "gemini-1.5-pro",
name: "Gemini 1.5 Pro (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: false,
input: ["text", "image"],
cost: { input: 1.25, output: 5, cacheRead: 0.3125, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 8192,
},
{
id: "gemini-1.5-flash",
name: "Gemini 1.5 Flash (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: false,
input: ["text", "image"],
cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 8192,
},
{
id: "gemini-1.5-flash-8b",
name: "Gemini 1.5 Flash-8B (Vertex)",
api: "google-vertex",
provider: "google-vertex",
baseUrl: VERTEX_BASE_URL,
reasoning: false,
input: ["text", "image"],
cost: { input: 0.0375, output: 0.15, cacheRead: 0.01, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 8192,
},
];
allModels.push(...vertexModels);
const azureOpenAiModels: Model<Api>[] = allModels
.filter((model) => model.provider === "openai" && model.api === "openai-responses")
.map((model) => ({
...model,
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
}));
allModels.push(...azureOpenAiModels);
for (const model of allModels) {
applyThinkingLevelMetadata(model);
}
// Group by provider and deduplicate by model ID
const providers: Record<string, Record<string, Model<any>>> = {};
for (const model of allModels) {
if (!providers[model.provider]) {
providers[model.provider] = {};
}
// Use model ID as key to automatically deduplicate
// Only add if not already present (models.dev takes priority over OpenRouter)
if (!providers[model.provider][model.id]) {
providers[model.provider][model.id] = model;
}
}
// Generate TypeScript file
let output = `// This file is auto-generated by scripts/generate-models.ts
// Do not edit manually - run 'npm run generate-models' to update
import type { Model } from "./types.js";
export const MODELS = {
`;
// Generate provider sections (sorted for deterministic output)
const sortedProviderIds = Object.keys(providers).sort();
for (const providerId of sortedProviderIds) {
const models = providers[providerId];
output += `\t${JSON.stringify(providerId)}: {\n`;
const sortedModelIds = Object.keys(models).sort();
for (const modelId of sortedModelIds) {
const model = models[modelId];
output += `\t\t"${model.id}": {\n`;
output += `\t\t\tid: "${model.id}",\n`;
output += `\t\t\tname: "${model.name}",\n`;
output += `\t\t\tapi: "${model.api}",\n`;
output += `\t\t\tprovider: "${model.provider}",\n`;
if (model.baseUrl !== undefined) {
output += `\t\t\tbaseUrl: "${model.baseUrl}",\n`;
}
if (model.headers) {
output += `\t\t\theaders: ${JSON.stringify(model.headers)},\n`;
}
if (model.compat) {
output += ` compat: ${JSON.stringify(model.compat)},
`;
}
output += `\t\t\treasoning: ${model.reasoning},\n`;
if (model.thinkingLevelMap) {
output += `\t\t\tthinkingLevelMap: ${JSON.stringify(model.thinkingLevelMap)},\n`;
}
output += `\t\t\tinput: [${model.input.map(i => `"${i}"`).join(", ")}],\n`;
output += `\t\t\tcost: {\n`;
output += `\t\t\t\tinput: ${model.cost.input},\n`;
output += `\t\t\t\toutput: ${model.cost.output},\n`;
output += `\t\t\t\tcacheRead: ${model.cost.cacheRead},\n`;
output += `\t\t\t\tcacheWrite: ${model.cost.cacheWrite},\n`;
output += `\t\t\t},\n`;
output += `\t\t\tcontextWindow: ${model.contextWindow},\n`;
output += `\t\t\tmaxTokens: ${model.maxTokens},\n`;
output += `\t\t} satisfies Model<"${model.api}">,\n`;
}
output += `\t},\n`;
}
output += `} as const;
`;
// Write file
writeFileSync(join(packageRoot, "src/models.generated.ts"), output);
console.log("Generated src/models.generated.ts");
// Print statistics
const totalModels = allModels.length;
const reasoningModels = allModels.filter(m => m.reasoning).length;
console.log(`\nModel Statistics:`);
console.log(` Total tool-capable models: ${totalModels}`);
console.log(` Reasoning-capable models: ${reasoningModels}`);
for (const [provider, models] of Object.entries(providers)) {
console.log(` ${provider}: ${Object.keys(models).length} models`);
}
}
// Run the generator
generateModels().catch(console.error);