diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts index e1d5c3ed3..798d059c3 100644 --- a/packages/ai/scripts/generate-models.ts +++ b/packages/ai/scripts/generate-models.ts @@ -764,6 +764,16 @@ async function loadModelsDevData(): Promise[]> { }, contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, + // Fireworks prompt caching uses automatic prefix matching + session affinity. + // x-session-affinity routes requests to the same replica for cache hits. + // cache_control on tools and eager_input_streaming are not supported. + // See: https://docs.fireworks.ai/tools-sdks/anthropic-compatibility + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, }); } } diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index e700a56c0..69559c781 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -3614,6 +3614,12 @@ export const MODELS = { }, contextWindow: 163840, maxTokens: 163840, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/deepseek-v3p2": { id: "accounts/fireworks/models/deepseek-v3p2", @@ -3631,6 +3637,12 @@ export const MODELS = { }, contextWindow: 160000, maxTokens: 160000, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/deepseek-v4-pro": { id: "accounts/fireworks/models/deepseek-v4-pro", @@ -3648,6 +3660,12 @@ export const MODELS = { }, contextWindow: 1000000, maxTokens: 384000, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/glm-4p5": { id: "accounts/fireworks/models/glm-4p5", @@ -3665,6 +3683,12 @@ export const MODELS = { }, contextWindow: 131072, maxTokens: 131072, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/glm-4p5-air": { id: "accounts/fireworks/models/glm-4p5-air", @@ -3682,6 +3706,12 @@ export const MODELS = { }, contextWindow: 131072, maxTokens: 131072, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/glm-4p7": { id: "accounts/fireworks/models/glm-4p7", @@ -3699,6 +3729,12 @@ export const MODELS = { }, contextWindow: 198000, maxTokens: 198000, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/glm-5": { id: "accounts/fireworks/models/glm-5", @@ -3716,6 +3752,12 @@ export const MODELS = { }, contextWindow: 202752, maxTokens: 131072, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/glm-5p1": { id: "accounts/fireworks/models/glm-5p1", @@ -3733,6 +3775,12 @@ export const MODELS = { }, contextWindow: 202800, maxTokens: 131072, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/gpt-oss-120b": { id: "accounts/fireworks/models/gpt-oss-120b", @@ -3750,6 +3798,12 @@ export const MODELS = { }, contextWindow: 131072, maxTokens: 32768, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/gpt-oss-20b": { id: "accounts/fireworks/models/gpt-oss-20b", @@ -3767,6 +3821,12 @@ export const MODELS = { }, contextWindow: 131072, maxTokens: 32768, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/kimi-k2-instruct": { id: "accounts/fireworks/models/kimi-k2-instruct", @@ -3784,6 +3844,12 @@ export const MODELS = { }, contextWindow: 128000, maxTokens: 16384, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/kimi-k2-thinking": { id: "accounts/fireworks/models/kimi-k2-thinking", @@ -3801,6 +3867,12 @@ export const MODELS = { }, contextWindow: 256000, maxTokens: 256000, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/kimi-k2p5": { id: "accounts/fireworks/models/kimi-k2p5", @@ -3818,6 +3890,12 @@ export const MODELS = { }, contextWindow: 256000, maxTokens: 256000, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/kimi-k2p6": { id: "accounts/fireworks/models/kimi-k2p6", @@ -3835,6 +3913,12 @@ export const MODELS = { }, contextWindow: 262000, maxTokens: 262000, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/minimax-m2p1": { id: "accounts/fireworks/models/minimax-m2p1", @@ -3852,6 +3936,12 @@ export const MODELS = { }, contextWindow: 200000, maxTokens: 200000, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/minimax-m2p5": { id: "accounts/fireworks/models/minimax-m2p5", @@ -3869,6 +3959,12 @@ export const MODELS = { }, contextWindow: 196608, maxTokens: 196608, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/minimax-m2p7": { id: "accounts/fireworks/models/minimax-m2p7", @@ -3886,6 +3982,12 @@ export const MODELS = { }, contextWindow: 196608, maxTokens: 196608, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/models/qwen3p6-plus": { id: "accounts/fireworks/models/qwen3p6-plus", @@ -3903,6 +4005,12 @@ export const MODELS = { }, contextWindow: 128000, maxTokens: 8192, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, "accounts/fireworks/routers/kimi-k2p5-turbo": { id: "accounts/fireworks/routers/kimi-k2p5-turbo", @@ -3920,6 +4028,12 @@ export const MODELS = { }, contextWindow: 256000, maxTokens: 256000, + compat: { + sendSessionAffinityHeaders: true, + supportsEagerToolInputStreaming: false, + supportsCacheControlOnTools: false, + supportsLongCacheRetention: false, + }, } satisfies Model<"anthropic-messages">, }, "github-copilot": { diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts index 62febfc31..0897e8bcf 100644 --- a/packages/ai/src/providers/anthropic.ts +++ b/packages/ai/src/providers/anthropic.ts @@ -165,9 +165,16 @@ const FINE_GRAINED_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14 const INTERLEAVED_THINKING_BETA = "interleaved-thinking-2025-05-14"; function getAnthropicCompat(model: Model<"anthropic-messages">): Required { + // Auto-detect session affinity and cache control support from provider + const isFireworks = model.provider === "fireworks"; + const isCloudflareAiGatewayAnthropic = + model.provider === "cloudflare-ai-gateway" && model.baseUrl.includes("anthropic"); return { - supportsEagerToolInputStreaming: model.compat?.supportsEagerToolInputStreaming ?? true, - supportsLongCacheRetention: model.compat?.supportsLongCacheRetention ?? true, + supportsEagerToolInputStreaming: model.compat?.supportsEagerToolInputStreaming ?? !isFireworks, + supportsLongCacheRetention: model.compat?.supportsLongCacheRetention ?? !isFireworks, + sendSessionAffinityHeaders: + model.compat?.sendSessionAffinityHeaders ?? !!(isFireworks || isCloudflareAiGatewayAnthropic), + supportsCacheControlOnTools: model.compat?.supportsCacheControlOnTools ?? !isFireworks, }; } @@ -463,6 +470,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti }); } + const cacheRetention = options?.cacheRetention ?? resolveCacheRetention(); + const cacheSessionId = cacheRetention === "none" ? undefined : options?.sessionId; + const created = createClient( model, apiKey, @@ -470,6 +480,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti shouldUseFineGrainedToolStreamingBeta(model, context), options?.headers, copilotDynamicHeaders, + cacheSessionId, ); client = created.client; isOAuth = created.isOAuthToken; @@ -766,6 +777,7 @@ function createClient( useFineGrainedToolStreamingBeta: boolean, optionsHeaders?: Record, dynamicHeaders?: Record, + sessionId?: string, ): { client: Anthropic; isOAuthToken: boolean } { // Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in. // The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it. @@ -847,6 +859,8 @@ function createClient( } // API key auth + const sessionAffinityHeaders: Record = + sessionId && getAnthropicCompat(model).sendSessionAffinityHeaders ? { "x-session-affinity": sessionId } : {}; const client = new Anthropic({ apiKey, baseURL: model.baseUrl, @@ -857,6 +871,7 @@ function createClient( "anthropic-dangerous-direct-browser-access": "true", ...(betaFeatures.length > 0 ? { "anthropic-beta": betaFeatures.join(",") } : {}), }, + sessionAffinityHeaders, model.headers, optionsHeaders, ), @@ -912,11 +927,12 @@ function buildParams( } if (context.tools && context.tools.length > 0) { + const compat = getAnthropicCompat(model); params.tools = convertTools( context.tools, isOAuthToken, - getAnthropicCompat(model).supportsEagerToolInputStreaming, - cacheControl, + compat.supportsEagerToolInputStreaming, + compat.supportsCacheControlOnTools ? cacheControl : undefined, ); } diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index ec929ea6a..851e4d4b1 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -419,6 +419,22 @@ export interface AnthropicMessagesCompat { supportsEagerToolInputStreaming?: boolean; /** Whether the provider supports Anthropic long cache retention (`cache_control.ttl: "1h"`). Default: true. */ supportsLongCacheRetention?: boolean; + /** + * Whether to send the `x-session-affinity` header from `options.sessionId` + * when caching is enabled. Required for providers like Fireworks that use + * session affinity for prompt cache routing (requests to the same replica + * maximize cache hits). + * Default: false. + */ + sendSessionAffinityHeaders?: boolean; + /** + * Whether the provider supports Anthropic-style `cache_control` markers on + * tool definitions. When false, `cache_control` is omitted from tool params. + * Some Anthropic-compatible providers (e.g., Fireworks) do not support this + * field on tools and may reject or ignore it. + * Default: true. + */ + supportsCacheControlOnTools?: boolean; } /** diff --git a/packages/ai/test/fireworks-models.test.ts b/packages/ai/test/fireworks-models.test.ts index 7fec9899b..35427c10c 100644 --- a/packages/ai/test/fireworks-models.test.ts +++ b/packages/ai/test/fireworks-models.test.ts @@ -1,6 +1,11 @@ +import { createServer, type IncomingMessage, type ServerResponse } from "node:http"; +import type { AddressInfo } from "node:net"; +import { Type } from "typebox"; import { afterEach, describe, expect, it } from "vitest"; import { findEnvKeys, getEnvApiKey } from "../src/env-api-keys.js"; import { getModel } from "../src/models.js"; +import { streamAnthropic } from "../src/providers/anthropic.js"; +import type { Context, Model, Tool } from "../src/types.js"; const originalFireworksApiKey = process.env.FIREWORKS_API_KEY; @@ -47,4 +52,197 @@ describe("Fireworks models", () => { expect(findEnvKeys("fireworks")).toEqual(["FIREWORKS_API_KEY"]); expect(getEnvApiKey("fireworks")).toBe("test-fireworks-key"); }); + + it("sets Fireworks-specific compat for session affinity and unsupported tool fields", () => { + const model = getModel("fireworks", "accounts/fireworks/models/kimi-k2p6"); + + expect(model.compat).toBeDefined(); + expect(model.compat?.sendSessionAffinityHeaders).toBe(true); + expect(model.compat?.supportsEagerToolInputStreaming).toBe(false); + expect(model.compat?.supportsCacheControlOnTools).toBe(false); + expect(model.compat?.supportsLongCacheRetention).toBe(false); + }); +}); + +// --- Integration tests for Fireworks Anthropic session affinity and tool compat --- + +interface CapturedRequest { + headers: IncomingMessage["headers"]; + body: Record; +} + +const tool: Tool = { + name: "lookup", + description: "Look up a value", + parameters: Type.Object({ value: Type.String() }), +}; + +function createFireworksModel(compat?: Model<"anthropic-messages">["compat"]): Model<"anthropic-messages"> { + return { + id: "accounts/fireworks/models/kimi-k2p6", + name: "Kimi K2.6", + api: "anthropic-messages", + provider: "fireworks", + baseUrl: "http://127.0.0.1:0", // overridden by captureAnthropicRequest + reasoning: true, + input: ["text", "image"], + cost: { input: 0.95, output: 4, cacheRead: 0.16, cacheWrite: 0 }, + contextWindow: 262000, + maxTokens: 262000, + compat, + }; +} + +function createAnthropicModel(): Model<"anthropic-messages"> { + return { + id: "claude-opus-4-7", + name: "Claude Opus 4.7", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "http://127.0.0.1:0", // overridden by captureAnthropicRequest + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 32000, + }; +} + +function createContext(tools: Tool[] = [tool]): Context { + return { + messages: [{ role: "user", content: "Use the tool", timestamp: Date.now() }], + ...(tools.length > 0 ? { tools } : {}), + }; +} + +async function readRequestBody(request: IncomingMessage): Promise> { + const chunks: Buffer[] = []; + for await (const chunk of request) { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + } + return JSON.parse(Buffer.concat(chunks).toString("utf8")) as Record; +} + +function writeEmptySseResponse(response: ServerResponse): void { + response.writeHead(200, { "content-type": "text/event-stream" }); + response.end(); +} + +async function captureAnthropicRequest( + model: Model<"anthropic-messages">, + context: Context, + options?: { sessionId?: string; cacheRetention?: string }, +): Promise { + let capturedRequest: CapturedRequest | undefined; + + const server = createServer(async (request, response) => { + capturedRequest = { + headers: request.headers, + body: await readRequestBody(request), + }; + writeEmptySseResponse(response); + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); + const address = server.address() as AddressInfo; + + try { + // Override the model's baseUrl to point to the local test server + const localModel = { ...model, baseUrl: `http://127.0.0.1:${address.port}` }; + + const stream = streamAnthropic(localModel, context, { + apiKey: "test-key", + cacheRetention: (options?.cacheRetention as "none" | "short" | "long") ?? "short", + sessionId: options?.sessionId, + }); + + for await (const event of stream) { + if (event.type === "done" || event.type === "error") break; + } + } finally { + await new Promise((resolve, reject) => { + server.close((error) => (error ? reject(error) : resolve())); + }); + } + + if (!capturedRequest) { + throw new Error("Anthropic request was not captured"); + } + return capturedRequest; +} + +function getTools(body: Record): Record[] { + const tools = body.tools; + if (!Array.isArray(tools)) { + throw new Error("Expected tools in request body"); + } + return tools as Record[]; +} + +describe("Fireworks Anthropic session affinity and tool compat", () => { + it("sends x-session-affinity header for Fireworks models", async () => { + const model = createFireworksModel(); + // Need a real port, capture will assign one + const request = await captureAnthropicRequest(model, createContext(), { + sessionId: "fireworks-session-1", + }); + + expect(request.headers["x-session-affinity"]).toBe("fireworks-session-1"); + }); + + it("omits x-session-affinity header for native Anthropic models", async () => { + const model = createAnthropicModel(); + const request = await captureAnthropicRequest(model, createContext(), { + sessionId: "anthropic-session-1", + }); + + expect(request.headers["x-session-affinity"]).toBeUndefined(); + }); + + it("omits x-session-affinity header when cacheRetention is none", async () => { + const model = createFireworksModel(); + const request = await captureAnthropicRequest(model, createContext(), { + sessionId: "fireworks-session-2", + cacheRetention: "none", + }); + + expect(request.headers["x-session-affinity"]).toBeUndefined(); + }); + + it("omits cache_control on tools for Fireworks models", async () => { + const model = createFireworksModel(); + const request = await captureAnthropicRequest(model, createContext()); + + const tools = getTools(request.body); + const lastTool = tools[tools.length - 1]; + expect(lastTool.cache_control).toBeUndefined(); + }); + + it("omits eager_input_streaming on tools for Fireworks models", async () => { + const model = createFireworksModel(); + const request = await captureAnthropicRequest(model, createContext()); + + const tools = getTools(request.body); + for (const t of tools) { + expect(t.eager_input_streaming).toBeUndefined(); + } + }); + + it("sends cache_control on tools for native Anthropic models", async () => { + const model = createAnthropicModel(); + const request = await captureAnthropicRequest(model, createContext()); + + const tools = getTools(request.body); + const lastTool = tools[tools.length - 1]; + expect(lastTool.cache_control).toBeDefined(); + expect((lastTool.cache_control as { type: string }).type).toBe("ephemeral"); + }); + + it("sends eager_input_streaming on tools for native Anthropic models", async () => { + const model = createAnthropicModel(); + const request = await captureAnthropicRequest(model, createContext()); + + const tools = getTools(request.body); + expect(tools[0].eager_input_streaming).toBe(true); + }); });