mirror of
https://github.com/earendil-works/pi.git
synced 2026-06-18 15:54:04 +08:00
@@ -4,6 +4,7 @@
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed Z.AI GLM-5.2 thinking requests to send `reasoning_effort` with the provider's `high`/`max` effort mapping ([#5770](https://github.com/earendil-works/pi/issues/5770)).
|
||||
- Fixed Google and `google-vertex` Gemini model metadata to map `latest` aliases to the current models, add Gemini 3.5 Flash for Vertex, correct Gemini 2.5 Flash Vertex cache pricing, and remove shut-down Vertex preview models ([#5761](https://github.com/earendil-works/pi/issues/5761)).
|
||||
- Fixed Moonshot AI China model metadata to include Kimi K2.7 Code, and omitted unsupported thinking-off payloads for Kimi K2.7 Code models ([#5760](https://github.com/earendil-works/pi/issues/5760)).
|
||||
|
||||
|
||||
@@ -157,6 +157,13 @@ const NVIDIA_NIM_UNSUPPORTED_MODELS = new Set([
|
||||
"upstage/solar-10.7b-instruct",
|
||||
]);
|
||||
const ZAI_TOOL_STREAM_UNSUPPORTED_MODELS = new Set(["glm-4.5", "glm-4.5-air", "glm-4.5-flash", "glm-4.5v"]);
|
||||
const ZAI_GLM52_THINKING_LEVEL_MAP = {
|
||||
minimal: null,
|
||||
low: "high",
|
||||
medium: "high",
|
||||
high: "high",
|
||||
xhigh: "max",
|
||||
} as const;
|
||||
const EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS = new Set([
|
||||
"github-copilot:claude-haiku-4.5",
|
||||
"github-copilot:claude-sonnet-4",
|
||||
@@ -894,6 +901,8 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
|
||||
if (m.tool_call !== true) continue;
|
||||
const supportsImage = m.modalities?.input?.includes("image");
|
||||
|
||||
const isGlm52 = modelId === "glm-5.2";
|
||||
|
||||
models.push({
|
||||
id: modelId,
|
||||
name: m.name || modelId,
|
||||
@@ -901,6 +910,7 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
|
||||
provider,
|
||||
baseUrl,
|
||||
reasoning: m.reasoning === true,
|
||||
...(isGlm52 ? { thinkingLevelMap: ZAI_GLM52_THINKING_LEVEL_MAP } : {}),
|
||||
input: supportsImage ? ["text", "image"] : ["text"],
|
||||
cost: {
|
||||
input: m.cost?.input || 0,
|
||||
@@ -911,6 +921,7 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
|
||||
compat: {
|
||||
supportsDeveloperRole: false,
|
||||
thinkingFormat: "zai",
|
||||
...(isGlm52 ? { supportsReasoningEffort: true } : {}),
|
||||
...(!ZAI_TOOL_STREAM_UNSUPPORTED_MODELS.has(modelId) ? { zaiToolStream: true } : {}),
|
||||
},
|
||||
contextWindow: m.limit?.context || 4096,
|
||||
|
||||
@@ -3921,7 +3921,7 @@ export const MODELS = {
|
||||
cost: {
|
||||
input: 0.15,
|
||||
output: 0.6,
|
||||
cacheRead: 0.015,
|
||||
cacheRead: 0.01,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 131072,
|
||||
@@ -6390,6 +6390,25 @@ export const MODELS = {
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"kimi-k2.7-code-highspeed": {
|
||||
id: "kimi-k2.7-code-highspeed",
|
||||
name: "Kimi K2.7 Code HighSpeed",
|
||||
api: "openai-completions",
|
||||
provider: "moonshotai",
|
||||
baseUrl: "https://api.moonshot.ai/v1",
|
||||
compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"thinkingFormat":"deepseek"},
|
||||
reasoning: true,
|
||||
thinkingLevelMap: {"off":null},
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 1.9,
|
||||
output: 8,
|
||||
cacheRead: 0.38,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
} satisfies Model<"openai-completions">,
|
||||
},
|
||||
"moonshotai-cn": {
|
||||
"kimi-k2-0711-preview": {
|
||||
@@ -6537,6 +6556,25 @@ export const MODELS = {
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"kimi-k2.7-code-highspeed": {
|
||||
id: "kimi-k2.7-code-highspeed",
|
||||
name: "Kimi K2.7 Code HighSpeed",
|
||||
api: "openai-completions",
|
||||
provider: "moonshotai-cn",
|
||||
baseUrl: "https://api.moonshot.cn/v1",
|
||||
compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false,"maxTokensField":"max_tokens","supportsStrictMode":false,"thinkingFormat":"deepseek"},
|
||||
reasoning: true,
|
||||
thinkingLevelMap: {"off":null},
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 1.9,
|
||||
output: 8,
|
||||
cacheRead: 0.38,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
} satisfies Model<"openai-completions">,
|
||||
},
|
||||
"nvidia": {
|
||||
"meta/llama-3.1-70b-instruct": {
|
||||
@@ -12298,13 +12336,13 @@ export const MODELS = {
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 0.39,
|
||||
output: 2.34,
|
||||
input: 0.385,
|
||||
output: 2.45,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 65536,
|
||||
contextWindow: 256000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"qwen/qwen3.5-9b": {
|
||||
id: "qwen/qwen3.5-9b",
|
||||
@@ -12587,13 +12625,13 @@ export const MODELS = {
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.063,
|
||||
output: 0.21,
|
||||
cacheRead: 0.021,
|
||||
input: 0.066,
|
||||
output: 0.26,
|
||||
cacheRead: 0.029,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 4096,
|
||||
maxTokens: 262144,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"thedrummer/rocinante-12b": {
|
||||
id: "thedrummer/rocinante-12b",
|
||||
@@ -12774,13 +12812,13 @@ export const MODELS = {
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.125,
|
||||
input: 0.13,
|
||||
output: 0.85,
|
||||
cacheRead: 0.06,
|
||||
cacheRead: 0.025,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 131072,
|
||||
maxTokens: 131070,
|
||||
maxTokens: 98304,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"z-ai/glm-4.5v": {
|
||||
id: "z-ai/glm-4.5v",
|
||||
@@ -13214,8 +13252,8 @@ export const MODELS = {
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 2.5,
|
||||
output: 7.5,
|
||||
input: 1.25,
|
||||
output: 3.75,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
@@ -16876,8 +16914,9 @@ export const MODELS = {
|
||||
api: "openai-completions",
|
||||
provider: "zai",
|
||||
baseUrl: "https://api.z.ai/api/coding/paas/v4",
|
||||
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","zaiToolStream":true},
|
||||
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","supportsReasoningEffort":true,"zaiToolStream":true},
|
||||
reasoning: true,
|
||||
thinkingLevelMap: {"minimal":null,"low":"high","medium":"high","high":"high","xhigh":"max"},
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
@@ -16986,8 +17025,9 @@ export const MODELS = {
|
||||
api: "openai-completions",
|
||||
provider: "zai-coding-cn",
|
||||
baseUrl: "https://open.bigmodel.cn/api/coding/paas/v4",
|
||||
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","zaiToolStream":true},
|
||||
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai","supportsReasoningEffort":true,"zaiToolStream":true},
|
||||
reasoning: true,
|
||||
thinkingLevelMap: {"minimal":null,"low":"high","medium":"high","high":"high","xhigh":"max"},
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
|
||||
@@ -554,8 +554,18 @@ function buildParams(
|
||||
}
|
||||
|
||||
if (compat.thinkingFormat === "zai" && model.reasoning) {
|
||||
const zaiParams = params as typeof params & { thinking?: { type: "enabled" | "disabled" } };
|
||||
const zaiParams = params as Omit<typeof params, "reasoning_effort"> & {
|
||||
thinking?: { type: "enabled" | "disabled" };
|
||||
reasoning_effort?: string;
|
||||
};
|
||||
zaiParams.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
|
||||
if (options?.reasoningEffort && compat.supportsReasoningEffort) {
|
||||
const mappedEffort = model.thinkingLevelMap?.[options.reasoningEffort];
|
||||
const effort = mappedEffort === undefined ? options.reasoningEffort : mappedEffort;
|
||||
if (typeof effort === "string") {
|
||||
zaiParams.reasoning_effort = effort;
|
||||
}
|
||||
}
|
||||
} else if (compat.thinkingFormat === "qwen" && model.reasoning) {
|
||||
(params as any).enable_thinking = !!options?.reasoningEffort;
|
||||
} else if (compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
|
||||
|
||||
@@ -257,6 +257,86 @@ describe("openai-completions tool_choice", () => {
|
||||
expect(getModel("zai", "glm-4.5-air")?.compat?.zaiToolStream).toBeUndefined();
|
||||
});
|
||||
|
||||
it("stores z.ai GLM-5.2 effort metadata", () => {
|
||||
for (const provider of ["zai", "zai-coding-cn"] as const) {
|
||||
const model = getModel(provider, "glm-5.2")!;
|
||||
expect(model.compat?.supportsReasoningEffort).toBe(true);
|
||||
expect(model.thinkingLevelMap).toEqual({
|
||||
minimal: null,
|
||||
low: "high",
|
||||
medium: "high",
|
||||
high: "high",
|
||||
xhigh: "max",
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it("maps z.ai GLM-5.2 thinking levels to reasoning_effort", async () => {
|
||||
const model = getModel("zai", "glm-5.2")!;
|
||||
const cases = [
|
||||
{ reasoning: "low", effort: "high" },
|
||||
{ reasoning: "medium", effort: "high" },
|
||||
{ reasoning: "high", effort: "high" },
|
||||
{ reasoning: "xhigh", effort: "max" },
|
||||
] as const;
|
||||
|
||||
for (const testCase of cases) {
|
||||
let payload: unknown;
|
||||
|
||||
await streamSimple(
|
||||
model,
|
||||
{
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Hi",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
apiKey: "test",
|
||||
reasoning: testCase.reasoning,
|
||||
onPayload: (params: unknown) => {
|
||||
payload = params;
|
||||
},
|
||||
},
|
||||
).result();
|
||||
|
||||
const params = (payload ?? mockState.lastParams) as { thinking?: unknown; reasoning_effort?: string };
|
||||
expect(params.thinking).toEqual({ type: "enabled" });
|
||||
expect(params.reasoning_effort).toBe(testCase.effort);
|
||||
}
|
||||
});
|
||||
|
||||
it("omits z.ai GLM-5.2 reasoning_effort when thinking is off", async () => {
|
||||
const model = getModel("zai", "glm-5.2")!;
|
||||
let payload: unknown;
|
||||
|
||||
await streamSimple(
|
||||
model,
|
||||
{
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Hi",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
apiKey: "test",
|
||||
onPayload: (params: unknown) => {
|
||||
payload = params;
|
||||
},
|
||||
},
|
||||
).result();
|
||||
|
||||
const params = (payload ?? mockState.lastParams) as { thinking?: unknown; reasoning_effort?: string };
|
||||
expect(params.thinking).toEqual({ type: "disabled" });
|
||||
expect(params.reasoning_effort).toBeUndefined();
|
||||
});
|
||||
|
||||
it("omits tool_stream for unsupported z.ai models", async () => {
|
||||
const model = getModel("zai", "glm-4.5-air")!;
|
||||
const tools: Tool[] = [
|
||||
|
||||
Reference in New Issue
Block a user