fix(ai): support Z.AI GLM-5.2 effort levels

closes #5770
This commit is contained in:
Armin Ronacher
2026-06-16 14:29:07 +02:00
Unverified
parent f8a77f47f2
commit 75b0d723c0
5 changed files with 159 additions and 17 deletions
+1
View File
@@ -4,6 +4,7 @@
### Fixed
- Fixed Z.AI GLM-5.2 thinking requests to send `reasoning_effort` with the provider's `high`/`max` effort mapping ([#5770](https://github.com/earendil-works/pi/issues/5770)).
- Fixed Google and `google-vertex` Gemini model metadata to map `latest` aliases to the current models, add Gemini 3.5 Flash for Vertex, correct Gemini 2.5 Flash Vertex cache pricing, and remove shut-down Vertex preview models ([#5761](https://github.com/earendil-works/pi/issues/5761)).
- Fixed Moonshot AI China model metadata to include Kimi K2.7 Code, and omitted unsupported thinking-off payloads for Kimi K2.7 Code models ([#5760](https://github.com/earendil-works/pi/issues/5760)).
+11
View File
@@ -157,6 +157,13 @@ const NVIDIA_NIM_UNSUPPORTED_MODELS = new Set([
"upstage/solar-10.7b-instruct",
]);
const ZAI_TOOL_STREAM_UNSUPPORTED_MODELS = new Set(["glm-4.5", "glm-4.5-air", "glm-4.5-flash", "glm-4.5v"]);
const ZAI_GLM52_THINKING_LEVEL_MAP = {
minimal: null,
low: "high",
medium: "high",
high: "high",
xhigh: "max",
} as const;
const EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS = new Set([
"github-copilot:claude-haiku-4.5",
"github-copilot:claude-sonnet-4",
@@ -894,6 +901,8 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
if (m.tool_call !== true) continue;
const supportsImage = m.modalities?.input?.includes("image");
const isGlm52 = modelId === "glm-5.2";
models.push({
id: modelId,
name: m.name || modelId,
@@ -901,6 +910,7 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
provider,
baseUrl,
reasoning: m.reasoning === true,
...(isGlm52 ? { thinkingLevelMap: ZAI_GLM52_THINKING_LEVEL_MAP } : {}),
input: supportsImage ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
@@ -911,6 +921,7 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
compat: {
supportsDeveloperRole: false,
thinkingFormat: "zai",
...(isGlm52 ? { supportsReasoningEffort: true } : {}),
...(!ZAI_TOOL_STREAM_UNSUPPORTED_MODELS.has(modelId) ? { zaiToolStream: true } : {}),
},
contextWindow: m.limit?.context || 4096,
+56 -16
View File
@@ -3921,7 +3921,7 @@ export const MODELS = {
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0.015,
cacheRead: 0.01,
cacheWrite: 0,
},
contextWindow: 131072,
@@ -6390,6 +6390,25 @@ export const MODELS = {
contextWindow: 262144,
maxTokens: 262144,
} satisfies Model<"openai-completions">,
"kimi-k2.7-code-highspeed": {
id: "kimi-k2.7-code-highspeed",
name: "Kimi K2.7 Code HighSpeed",
api: "openai-completions",
provider: "moonshotai",
baseUrl: "https://api.moonshot.ai/v1",
compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"thinkingFormat":"deepseek"},
reasoning: true,
thinkingLevelMap: {"off":null},
input: ["text", "image"],
cost: {
input: 1.9,
output: 8,
cacheRead: 0.38,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 262144,
} satisfies Model<"openai-completions">,
},
"moonshotai-cn": {
"kimi-k2-0711-preview": {
@@ -6537,6 +6556,25 @@ export const MODELS = {
contextWindow: 262144,
maxTokens: 262144,
} satisfies Model<"openai-completions">,
"kimi-k2.7-code-highspeed": {
id: "kimi-k2.7-code-highspeed",
name: "Kimi K2.7 Code HighSpeed",
api: "openai-completions",
provider: "moonshotai-cn",
baseUrl: "https://api.moonshot.cn/v1",
compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"thinkingFormat":"deepseek"},
reasoning: true,
thinkingLevelMap: {"off":null},
input: ["text", "image"],
cost: {
input: 1.9,
output: 8,
cacheRead: 0.38,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 262144,
} satisfies Model<"openai-completions">,
},
"nvidia": {
"meta/llama-3.1-70b-instruct": {
@@ -12298,13 +12336,13 @@ export const MODELS = {
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.39,
output: 2.34,
input: 0.385,
output: 2.45,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 65536,
contextWindow: 256000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"qwen/qwen3.5-9b": {
id: "qwen/qwen3.5-9b",
@@ -12587,13 +12625,13 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.063,
output: 0.21,
cacheRead: 0.021,
input: 0.066,
output: 0.26,
cacheRead: 0.029,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 4096,
maxTokens: 262144,
} satisfies Model<"openai-completions">,
"thedrummer/rocinante-12b": {
id: "thedrummer/rocinante-12b",
@@ -12774,13 +12812,13 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.125,
input: 0.13,
output: 0.85,
cacheRead: 0.06,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 131070,
maxTokens: 98304,
} satisfies Model<"openai-completions">,
"z-ai/glm-4.5v": {
id: "z-ai/glm-4.5v",
@@ -13214,8 +13252,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 2.5,
output: 7.5,
input: 1.25,
output: 3.75,
cacheRead: 0,
cacheWrite: 0,
},
@@ -16876,8 +16914,9 @@ export const MODELS = {
api: "openai-completions",
provider: "zai",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","zaiToolStream":true},
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","supportsReasoningEffort":true,"zaiToolStream":true},
reasoning: true,
thinkingLevelMap: {"minimal":null,"low":"high","medium":"high","high":"high","xhigh":"max"},
input: ["text"],
cost: {
input: 0,
@@ -16986,8 +17025,9 @@ export const MODELS = {
api: "openai-completions",
provider: "zai-coding-cn",
baseUrl: "https://open.bigmodel.cn/api/coding/paas/v4",
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","zaiToolStream":true},
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","supportsReasoningEffort":true,"zaiToolStream":true},
reasoning: true,
thinkingLevelMap: {"minimal":null,"low":"high","medium":"high","high":"high","xhigh":"max"},
input: ["text"],
cost: {
input: 0,
@@ -554,8 +554,18 @@ function buildParams(
}
if (compat.thinkingFormat === "zai" && model.reasoning) {
const zaiParams = params as typeof params & { thinking?: { type: "enabled" | "disabled" } };
const zaiParams = params as Omit<typeof params, "reasoning_effort"> & {
thinking?: { type: "enabled" | "disabled" };
reasoning_effort?: string;
};
zaiParams.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
if (options?.reasoningEffort && compat.supportsReasoningEffort) {
const mappedEffort = model.thinkingLevelMap?.[options.reasoningEffort];
const effort = mappedEffort === undefined ? options.reasoningEffort : mappedEffort;
if (typeof effort === "string") {
zaiParams.reasoning_effort = effort;
}
}
} else if (compat.thinkingFormat === "qwen" && model.reasoning) {
(params as any).enable_thinking = !!options?.reasoningEffort;
} else if (compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
@@ -257,6 +257,86 @@ describe("openai-completions tool_choice", () => {
expect(getModel("zai", "glm-4.5-air")?.compat?.zaiToolStream).toBeUndefined();
});
it("stores z.ai GLM-5.2 effort metadata", () => {
for (const provider of ["zai", "zai-coding-cn"] as const) {
const model = getModel(provider, "glm-5.2")!;
expect(model.compat?.supportsReasoningEffort).toBe(true);
expect(model.thinkingLevelMap).toEqual({
minimal: null,
low: "high",
medium: "high",
high: "high",
xhigh: "max",
});
}
});
it("maps z.ai GLM-5.2 thinking levels to reasoning_effort", async () => {
const model = getModel("zai", "glm-5.2")!;
const cases = [
{ reasoning: "low", effort: "high" },
{ reasoning: "medium", effort: "high" },
{ reasoning: "high", effort: "high" },
{ reasoning: "xhigh", effort: "max" },
] as const;
for (const testCase of cases) {
let payload: unknown;
await streamSimple(
model,
{
messages: [
{
role: "user",
content: "Hi",
timestamp: Date.now(),
},
],
},
{
apiKey: "test",
reasoning: testCase.reasoning,
onPayload: (params: unknown) => {
payload = params;
},
},
).result();
const params = (payload ?? mockState.lastParams) as { thinking?: unknown; reasoning_effort?: string };
expect(params.thinking).toEqual({ type: "enabled" });
expect(params.reasoning_effort).toBe(testCase.effort);
}
});
it("omits z.ai GLM-5.2 reasoning_effort when thinking is off", async () => {
const model = getModel("zai", "glm-5.2")!;
let payload: unknown;
await streamSimple(
model,
{
messages: [
{
role: "user",
content: "Hi",
timestamp: Date.now(),
},
],
},
{
apiKey: "test",
onPayload: (params: unknown) => {
payload = params;
},
},
).result();
const params = (payload ?? mockState.lastParams) as { thinking?: unknown; reasoning_effort?: string };
expect(params.thinking).toEqual({ type: "disabled" });
expect(params.reasoning_effort).toBeUndefined();
});
it("omits tool_stream for unsupported z.ai models", async () => {
const model = getModel("zai", "glm-4.5-air")!;
const tools: Tool[] = [