fix(ai): support Z.AI GLM-5.2 effort levels

closes #5770
2026-06-18 15:54:04 +08:00 · 2026-06-16 14:29:07 +02:00
parent f8a77f47f2
commit 75b0d723c0
5 changed files with 159 additions and 17 deletions
@@ -4,6 +4,7 @@

 ### Fixed

+- Fixed Z.AI GLM-5.2 thinking requests to send `reasoning_effort` with the provider's `high`/`max` effort mapping ([#5770](https://github.com/earendil-works/pi/issues/5770)).
 - Fixed Google and `google-vertex` Gemini model metadata to map `latest` aliases to the current models, add Gemini 3.5 Flash for Vertex, correct Gemini 2.5 Flash Vertex cache pricing, and remove shut-down Vertex preview models ([#5761](https://github.com/earendil-works/pi/issues/5761)).
 - Fixed Moonshot AI China model metadata to include Kimi K2.7 Code, and omitted unsupported thinking-off payloads for Kimi K2.7 Code models ([#5760](https://github.com/earendil-works/pi/issues/5760)).

@@ -157,6 +157,13 @@ const NVIDIA_NIM_UNSUPPORTED_MODELS = new Set([
 	"upstage/solar-10.7b-instruct",
 ]);
 const ZAI_TOOL_STREAM_UNSUPPORTED_MODELS = new Set(["glm-4.5", "glm-4.5-air", "glm-4.5-flash", "glm-4.5v"]);
+const ZAI_GLM52_THINKING_LEVEL_MAP = {
+	minimal: null,
+	low: "high",
+	medium: "high",
+	high: "high",
+	xhigh: "max",
+} as const;
 const EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS = new Set([
 	"github-copilot:claude-haiku-4.5",
 	"github-copilot:claude-sonnet-4",
@@ -894,6 +901,8 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
 					if (m.tool_call !== true) continue;
 					const supportsImage = m.modalities?.input?.includes("image");

+					const isGlm52 = modelId === "glm-5.2";
+
 					models.push({
 						id: modelId,
 						name: m.name || modelId,
@@ -901,6 +910,7 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
 						provider,
 						baseUrl,
 						reasoning: m.reasoning === true,
+						...(isGlm52 ? { thinkingLevelMap: ZAI_GLM52_THINKING_LEVEL_MAP } : {}),
 						input: supportsImage ? ["text", "image"] : ["text"],
 						cost: {
 							input: m.cost?.input || 0,
@@ -911,6 +921,7 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
 						compat: {
 							supportsDeveloperRole: false,
 							thinkingFormat: "zai",
+							...(isGlm52 ? { supportsReasoningEffort: true } : {}),
 							...(!ZAI_TOOL_STREAM_UNSUPPORTED_MODELS.has(modelId) ? { zaiToolStream: true } : {}),
 						},
 						contextWindow: m.limit?.context || 4096,
@@ -3921,7 +3921,7 @@ export const MODELS = {
 			cost: {
 				input: 0.15,
 				output: 0.6,
-				cacheRead: 0.015,
+				cacheRead: 0.01,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -6390,6 +6390,25 @@ export const MODELS = {
 			contextWindow: 262144,
 			maxTokens: 262144,
 		} satisfies Model<"openai-completions">,
+		"kimi-k2.7-code-highspeed": {
+			id: "kimi-k2.7-code-highspeed",
+			name: "Kimi K2.7 Code HighSpeed",
+			api: "openai-completions",
+			provider: "moonshotai",
+			baseUrl: "https://api.moonshot.ai/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"thinkingFormat":"deepseek"},
+			reasoning: true,
+			thinkingLevelMap: {"off":null},
+			input: ["text", "image"],
+			cost: {
+				input: 1.9,
+				output: 8,
+				cacheRead: 0.38,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
 	},
 	"moonshotai-cn": {
 		"kimi-k2-0711-preview": {
@@ -6537,6 +6556,25 @@ export const MODELS = {
 			contextWindow: 262144,
 			maxTokens: 262144,
 		} satisfies Model<"openai-completions">,
+		"kimi-k2.7-code-highspeed": {
+			id: "kimi-k2.7-code-highspeed",
+			name: "Kimi K2.7 Code HighSpeed",
+			api: "openai-completions",
+			provider: "moonshotai-cn",
+			baseUrl: "https://api.moonshot.cn/v1",
+			compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"thinkingFormat":"deepseek"},
+			reasoning: true,
+			thinkingLevelMap: {"off":null},
+			input: ["text", "image"],
+			cost: {
+				input: 1.9,
+				output: 8,
+				cacheRead: 0.38,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
 	},
 	"nvidia": {
 		"meta/llama-3.1-70b-instruct": {
@@ -12298,13 +12336,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 0.39,
-				output: 2.34,
+				input: 0.385,
+				output: 2.45,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 262144,
-			maxTokens: 65536,
+			contextWindow: 256000,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"qwen/qwen3.5-9b": {
 			id: "qwen/qwen3.5-9b",
@@ -12587,13 +12625,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.063,
-				output: 0.21,
-				cacheRead: 0.021,
+				input: 0.066,
+				output: 0.26,
+				cacheRead: 0.029,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
-			maxTokens: 4096,
+			maxTokens: 262144,
 		} satisfies Model<"openai-completions">,
 		"thedrummer/rocinante-12b": {
 			id: "thedrummer/rocinante-12b",
@@ -12774,13 +12812,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.125,
+				input: 0.13,
 				output: 0.85,
-				cacheRead: 0.06,
+				cacheRead: 0.025,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 131070,
+			maxTokens: 98304,
 		} satisfies Model<"openai-completions">,
 		"z-ai/glm-4.5v": {
 			id: "z-ai/glm-4.5v",
@@ -13214,8 +13252,8 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 2.5,
-				output: 7.5,
+				input: 1.25,
+				output: 3.75,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -16876,8 +16914,9 @@ export const MODELS = {
 			api: "openai-completions",
 			provider: "zai",
 			baseUrl: "https://api.z.ai/api/coding/paas/v4",
-			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","zaiToolStream":true},
+			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","supportsReasoningEffort":true,"zaiToolStream":true},
 			reasoning: true,
+			thinkingLevelMap: {"minimal":null,"low":"high","medium":"high","high":"high","xhigh":"max"},
 			input: ["text"],
 			cost: {
 				input: 0,
@@ -16986,8 +17025,9 @@ export const MODELS = {
 			api: "openai-completions",
 			provider: "zai-coding-cn",
 			baseUrl: "https://open.bigmodel.cn/api/coding/paas/v4",
-			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","zaiToolStream":true},
+			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","supportsReasoningEffort":true,"zaiToolStream":true},
 			reasoning: true,
+			thinkingLevelMap: {"minimal":null,"low":"high","medium":"high","high":"high","xhigh":"max"},
 			input: ["text"],
 			cost: {
 				input: 0,
@@ -554,8 +554,18 @@ function buildParams(
 	}

 	if (compat.thinkingFormat === "zai" && model.reasoning) {
-		const zaiParams = params as typeof params & { thinking?: { type: "enabled" | "disabled" } };
+		const zaiParams = params as Omit<typeof params, "reasoning_effort"> & {
+			thinking?: { type: "enabled" | "disabled" };
+			reasoning_effort?: string;
+		};
 		zaiParams.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
+		if (options?.reasoningEffort && compat.supportsReasoningEffort) {
+			const mappedEffort = model.thinkingLevelMap?.[options.reasoningEffort];
+			const effort = mappedEffort === undefined ? options.reasoningEffort : mappedEffort;
+			if (typeof effort === "string") {
+				zaiParams.reasoning_effort = effort;
+			}
+		}
 	} else if (compat.thinkingFormat === "qwen" && model.reasoning) {
 		(params as any).enable_thinking = !!options?.reasoningEffort;
 	} else if (compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
@@ -257,6 +257,86 @@ describe("openai-completions tool_choice", () => {
 		expect(getModel("zai", "glm-4.5-air")?.compat?.zaiToolStream).toBeUndefined();
 	});

+	it("stores z.ai GLM-5.2 effort metadata", () => {
+		for (const provider of ["zai", "zai-coding-cn"] as const) {
+			const model = getModel(provider, "glm-5.2")!;
+			expect(model.compat?.supportsReasoningEffort).toBe(true);
+			expect(model.thinkingLevelMap).toEqual({
+				minimal: null,
+				low: "high",
+				medium: "high",
+				high: "high",
+				xhigh: "max",
+			});
+		}
+	});
+
+	it("maps z.ai GLM-5.2 thinking levels to reasoning_effort", async () => {
+		const model = getModel("zai", "glm-5.2")!;
+		const cases = [
+			{ reasoning: "low", effort: "high" },
+			{ reasoning: "medium", effort: "high" },
+			{ reasoning: "high", effort: "high" },
+			{ reasoning: "xhigh", effort: "max" },
+		] as const;
+
+		for (const testCase of cases) {
+			let payload: unknown;
+
+			await streamSimple(
+				model,
+				{
+					messages: [
+						{
+							role: "user",
+							content: "Hi",
+							timestamp: Date.now(),
+						},
+					],
+				},
+				{
+					apiKey: "test",
+					reasoning: testCase.reasoning,
+					onPayload: (params: unknown) => {
+						payload = params;
+					},
+				},
+			).result();
+
+			const params = (payload ?? mockState.lastParams) as { thinking?: unknown; reasoning_effort?: string };
+			expect(params.thinking).toEqual({ type: "enabled" });
+			expect(params.reasoning_effort).toBe(testCase.effort);
+		}
+	});
+
+	it("omits z.ai GLM-5.2 reasoning_effort when thinking is off", async () => {
+		const model = getModel("zai", "glm-5.2")!;
+		let payload: unknown;
+
+		await streamSimple(
+			model,
+			{
+				messages: [
+					{
+						role: "user",
+						content: "Hi",
+						timestamp: Date.now(),
+					},
+				],
+			},
+			{
+				apiKey: "test",
+				onPayload: (params: unknown) => {
+					payload = params;
+				},
+			},
+		).result();
+
+		const params = (payload ?? mockState.lastParams) as { thinking?: unknown; reasoning_effort?: string };
+		expect(params.thinking).toEqual({ type: "disabled" });
+		expect(params.reasoning_effort).toBeUndefined();
+	});
+
 	it("omits tool_stream for unsupported z.ai models", async () => {
 		const model = getModel("zai", "glm-4.5-air")!;
 		const tools: Tool[] = [