diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 3203ecfe6..76bbc3d7f 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -4,6 +4,7 @@ ### Fixed +- Fixed Z.AI GLM-5.2 thinking requests to send `reasoning_effort` with the provider's `high`/`max` effort mapping ([#5770](https://github.com/earendil-works/pi/issues/5770)). - Fixed Google and `google-vertex` Gemini model metadata to map `latest` aliases to the current models, add Gemini 3.5 Flash for Vertex, correct Gemini 2.5 Flash Vertex cache pricing, and remove shut-down Vertex preview models ([#5761](https://github.com/earendil-works/pi/issues/5761)). - Fixed Moonshot AI China model metadata to include Kimi K2.7 Code, and omitted unsupported thinking-off payloads for Kimi K2.7 Code models ([#5760](https://github.com/earendil-works/pi/issues/5760)). diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts index 26e581bbb..8cc890ee5 100644 --- a/packages/ai/scripts/generate-models.ts +++ b/packages/ai/scripts/generate-models.ts @@ -157,6 +157,13 @@ const NVIDIA_NIM_UNSUPPORTED_MODELS = new Set([ "upstage/solar-10.7b-instruct", ]); const ZAI_TOOL_STREAM_UNSUPPORTED_MODELS = new Set(["glm-4.5", "glm-4.5-air", "glm-4.5-flash", "glm-4.5v"]); +const ZAI_GLM52_THINKING_LEVEL_MAP = { + minimal: null, + low: "high", + medium: "high", + high: "high", + xhigh: "max", +} as const; const EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS = new Set([ "github-copilot:claude-haiku-4.5", "github-copilot:claude-sonnet-4", @@ -894,6 +901,8 @@ async function loadModelsDevData(): Promise[]> { if (m.tool_call !== true) continue; const supportsImage = m.modalities?.input?.includes("image"); + const isGlm52 = modelId === "glm-5.2"; + models.push({ id: modelId, name: m.name || modelId, @@ -901,6 +910,7 @@ async function loadModelsDevData(): Promise[]> { provider, baseUrl, reasoning: m.reasoning === true, + ...(isGlm52 ? { thinkingLevelMap: ZAI_GLM52_THINKING_LEVEL_MAP } : {}), input: supportsImage ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, @@ -911,6 +921,7 @@ async function loadModelsDevData(): Promise[]> { compat: { supportsDeveloperRole: false, thinkingFormat: "zai", + ...(isGlm52 ? { supportsReasoningEffort: true } : {}), ...(!ZAI_TOOL_STREAM_UNSUPPORTED_MODELS.has(modelId) ? { zaiToolStream: true } : {}), }, contextWindow: m.limit?.context || 4096, diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 34575eb08..d7e2f9f3c 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -3921,7 +3921,7 @@ export const MODELS = { cost: { input: 0.15, output: 0.6, - cacheRead: 0.015, + cacheRead: 0.01, cacheWrite: 0, }, contextWindow: 131072, @@ -6390,6 +6390,25 @@ export const MODELS = { contextWindow: 262144, maxTokens: 262144, } satisfies Model<"openai-completions">, + "kimi-k2.7-code-highspeed": { + id: "kimi-k2.7-code-highspeed", + name: "Kimi K2.7 Code HighSpeed", + api: "openai-completions", + provider: "moonshotai", + baseUrl: "https://api.moonshot.ai/v1", + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"thinkingFormat":"deepseek"}, + reasoning: true, + thinkingLevelMap: {"off":null}, + input: ["text", "image"], + cost: { + input: 1.9, + output: 8, + cacheRead: 0.38, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, }, "moonshotai-cn": { "kimi-k2-0711-preview": { @@ -6537,6 +6556,25 @@ export const MODELS = { contextWindow: 262144, maxTokens: 262144, } satisfies Model<"openai-completions">, + "kimi-k2.7-code-highspeed": { + id: "kimi-k2.7-code-highspeed", + name: "Kimi K2.7 Code HighSpeed", + api: "openai-completions", + provider: "moonshotai-cn", + baseUrl: "https://api.moonshot.cn/v1", + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"thinkingFormat":"deepseek"}, + reasoning: true, + thinkingLevelMap: {"off":null}, + input: ["text", "image"], + cost: { + input: 1.9, + output: 8, + cacheRead: 0.38, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, }, "nvidia": { "meta/llama-3.1-70b-instruct": { @@ -12298,13 +12336,13 @@ export const MODELS = { reasoning: true, input: ["text", "image"], cost: { - input: 0.39, - output: 2.34, + input: 0.385, + output: 2.45, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 262144, - maxTokens: 65536, + contextWindow: 256000, + maxTokens: 4096, } satisfies Model<"openai-completions">, "qwen/qwen3.5-9b": { id: "qwen/qwen3.5-9b", @@ -12587,13 +12625,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.063, - output: 0.21, - cacheRead: 0.021, + input: 0.066, + output: 0.26, + cacheRead: 0.029, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 4096, + maxTokens: 262144, } satisfies Model<"openai-completions">, "thedrummer/rocinante-12b": { id: "thedrummer/rocinante-12b", @@ -12774,13 +12812,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.125, + input: 0.13, output: 0.85, - cacheRead: 0.06, + cacheRead: 0.025, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 131070, + maxTokens: 98304, } satisfies Model<"openai-completions">, "z-ai/glm-4.5v": { id: "z-ai/glm-4.5v", @@ -13214,8 +13252,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 2.5, - output: 7.5, + input: 1.25, + output: 3.75, cacheRead: 0, cacheWrite: 0, }, @@ -16876,8 +16914,9 @@ export const MODELS = { api: "openai-completions", provider: "zai", baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","zaiToolStream":true}, + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","supportsReasoningEffort":true,"zaiToolStream":true}, reasoning: true, + thinkingLevelMap: {"minimal":null,"low":"high","medium":"high","high":"high","xhigh":"max"}, input: ["text"], cost: { input: 0, @@ -16986,8 +17025,9 @@ export const MODELS = { api: "openai-completions", provider: "zai-coding-cn", baseUrl: "https://open.bigmodel.cn/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","zaiToolStream":true}, + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","supportsReasoningEffort":true,"zaiToolStream":true}, reasoning: true, + thinkingLevelMap: {"minimal":null,"low":"high","medium":"high","high":"high","xhigh":"max"}, input: ["text"], cost: { input: 0, diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index 140726edc..c2d5cff98 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -554,8 +554,18 @@ function buildParams( } if (compat.thinkingFormat === "zai" && model.reasoning) { - const zaiParams = params as typeof params & { thinking?: { type: "enabled" | "disabled" } }; + const zaiParams = params as Omit & { + thinking?: { type: "enabled" | "disabled" }; + reasoning_effort?: string; + }; zaiParams.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" }; + if (options?.reasoningEffort && compat.supportsReasoningEffort) { + const mappedEffort = model.thinkingLevelMap?.[options.reasoningEffort]; + const effort = mappedEffort === undefined ? options.reasoningEffort : mappedEffort; + if (typeof effort === "string") { + zaiParams.reasoning_effort = effort; + } + } } else if (compat.thinkingFormat === "qwen" && model.reasoning) { (params as any).enable_thinking = !!options?.reasoningEffort; } else if (compat.thinkingFormat === "qwen-chat-template" && model.reasoning) { diff --git a/packages/ai/test/openai-completions-tool-choice.test.ts b/packages/ai/test/openai-completions-tool-choice.test.ts index 9cf844969..8ca86025a 100644 --- a/packages/ai/test/openai-completions-tool-choice.test.ts +++ b/packages/ai/test/openai-completions-tool-choice.test.ts @@ -257,6 +257,86 @@ describe("openai-completions tool_choice", () => { expect(getModel("zai", "glm-4.5-air")?.compat?.zaiToolStream).toBeUndefined(); }); + it("stores z.ai GLM-5.2 effort metadata", () => { + for (const provider of ["zai", "zai-coding-cn"] as const) { + const model = getModel(provider, "glm-5.2")!; + expect(model.compat?.supportsReasoningEffort).toBe(true); + expect(model.thinkingLevelMap).toEqual({ + minimal: null, + low: "high", + medium: "high", + high: "high", + xhigh: "max", + }); + } + }); + + it("maps z.ai GLM-5.2 thinking levels to reasoning_effort", async () => { + const model = getModel("zai", "glm-5.2")!; + const cases = [ + { reasoning: "low", effort: "high" }, + { reasoning: "medium", effort: "high" }, + { reasoning: "high", effort: "high" }, + { reasoning: "xhigh", effort: "max" }, + ] as const; + + for (const testCase of cases) { + let payload: unknown; + + await streamSimple( + model, + { + messages: [ + { + role: "user", + content: "Hi", + timestamp: Date.now(), + }, + ], + }, + { + apiKey: "test", + reasoning: testCase.reasoning, + onPayload: (params: unknown) => { + payload = params; + }, + }, + ).result(); + + const params = (payload ?? mockState.lastParams) as { thinking?: unknown; reasoning_effort?: string }; + expect(params.thinking).toEqual({ type: "enabled" }); + expect(params.reasoning_effort).toBe(testCase.effort); + } + }); + + it("omits z.ai GLM-5.2 reasoning_effort when thinking is off", async () => { + const model = getModel("zai", "glm-5.2")!; + let payload: unknown; + + await streamSimple( + model, + { + messages: [ + { + role: "user", + content: "Hi", + timestamp: Date.now(), + }, + ], + }, + { + apiKey: "test", + onPayload: (params: unknown) => { + payload = params; + }, + }, + ).result(); + + const params = (payload ?? mockState.lastParams) as { thinking?: unknown; reasoning_effort?: string }; + expect(params.thinking).toEqual({ type: "disabled" }); + expect(params.reasoning_effort).toBeUndefined(); + }); + it("omits tool_stream for unsupported z.ai models", async () => { const model = getModel("zai", "glm-4.5-air")!; const tools: Tool[] = [