From 458a7bc27ca0ff58c1ece28dffff114808609949 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Thu, 28 May 2026 12:02:54 +0200 Subject: [PATCH] Fix Anthropic empty thinking signature replay closes #4464 --- AGENTS.md | 2 +- packages/ai/CHANGELOG.md | 4 + packages/ai/scripts/generate-models.ts | 11 +- packages/ai/src/models.generated.ts | 10 +- packages/ai/src/providers/anthropic.ts | 30 +++-- packages/ai/src/types.ts | 2 + ...ic-empty-thinking-signature-compat.test.ts | 88 ++++++++++++++ ...ms-anthropic-empty-signature-smoke.test.ts | 114 ++++++++++++++++++ packages/coding-agent/docs/custom-provider.md | 3 +- packages/coding-agent/docs/models.md | 6 +- 10 files changed, 252 insertions(+), 18 deletions(-) create mode 100644 packages/ai/test/anthropic-empty-thinking-signature-compat.test.ts create mode 100644 packages/ai/test/xiaomi-token-plan-ams-anthropic-empty-signature-smoke.test.ts diff --git a/AGENTS.md b/AGENTS.md index 933835e38..8662a98da 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,7 +21,7 @@ - Always ask before removing functionality or code that appears intentional. - Do not preserve backward compatibility unless the user asks for it. - Never hardcode key checks (e.g. `matchesKey(keyData, "ctrl+x")`). Add defaults to `DEFAULT_EDITOR_KEYBINDINGS` or `DEFAULT_APP_KEYBINDINGS` so they stay configurable. -- Never modify `packages/ai/src/models.generated.ts` directly; update `packages/ai/scripts/generate-models.ts` instead. +- Never modify `packages/ai/src/models.generated.ts` directly; update `packages/ai/scripts/generate-models.ts` instead, then regenerate. Including the resulting `models.generated.ts` diff is always OK, even if regeneration includes unrelated upstream model metadata changes. ## Commands diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index ba60d6c4b..87e9f22c1 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -6,6 +6,10 @@ - Added OpenAI Codex subscription device-code login as a selectable headless alternative while keeping browser login as the default. +### Fixed + +- Fixed Anthropic-compatible replay for providers that return empty thinking signatures by adding an opt-in `allowEmptySignature` compatibility flag ([#4464](https://github.com/earendil-works/pi/issues/4464)). + ## [0.76.0] - 2026-05-27 ### Fixed diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts index a61813c2c..791dff0f2 100644 --- a/packages/ai/scripts/generate-models.ts +++ b/packages/ai/scripts/generate-models.ts @@ -262,9 +262,14 @@ function applyThinkingLevelMetadata(model: Model): void { } function getAnthropicMessagesCompat(provider: string, modelId: string): AnthropicMessagesCompat | undefined { - return EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`) - ? { supportsEagerToolInputStreaming: false } - : undefined; + const compat: AnthropicMessagesCompat = {}; + if (EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`)) { + compat.supportsEagerToolInputStreaming = false; + } + if (provider === "xiaomi" || provider.startsWith("xiaomi-token-plan-")) { + compat.allowEmptySignature = true; + } + return Object.keys(compat).length > 0 ? compat : undefined; } function getBedrockBaseUrl(modelId: string): string { diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 7bfc3c595..e881ae0e8 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -9307,7 +9307,7 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 204800, + contextWindow: 262144, maxTokens: 8192, } satisfies Model<"openai-completions">, "minimax/minimax-m2.7": { @@ -11915,13 +11915,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.06599999999999999, - output: 0.26, - cacheRead: 0.029, + input: 0.063, + output: 0.21, + cacheRead: 0.020999999999999998, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 262144, + maxTokens: 4096, } satisfies Model<"openai-completions">, "thedrummer/rocinante-12b": { id: "thedrummer/rocinante-12b", diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts index 5fb5ffdaf..2de6198f7 100644 --- a/packages/ai/src/providers/anthropic.ts +++ b/packages/ai/src/providers/anthropic.ts @@ -177,6 +177,7 @@ function getAnthropicCompat( sendSessionAffinityHeaders: model.compat?.sendSessionAffinityHeaders ?? !!(isFireworks || isCloudflareAiGatewayAnthropic), supportsCacheControlOnTools: model.compat?.supportsCacheControlOnTools ?? !isFireworks, + allowEmptySignature: model.compat?.allowEmptySignature ?? false, }; } @@ -895,7 +896,13 @@ function buildParams( const { cacheControl } = getCacheControl(model, options?.cacheRetention); const params: MessageCreateParamsStreaming = { model: model.id, - messages: convertMessages(context.messages, model, isOAuthToken, cacheControl), + messages: convertMessages( + context.messages, + model, + isOAuthToken, + cacheControl, + getAnthropicCompat(model).allowEmptySignature, + ), max_tokens: options?.maxTokens ?? model.maxTokens, stream: true, }; @@ -1001,6 +1008,7 @@ function convertMessages( model: Model<"anthropic-messages">, isOAuthToken: boolean, cacheControl?: CacheControlEphemeral, + allowEmptySignature = false, ): MessageParam[] { const params: MessageParam[] = []; @@ -1069,13 +1077,21 @@ function convertMessages( } if (block.thinking.trim().length === 0) continue; // If thinking signature is missing/empty (e.g., from aborted stream), - // convert to plain text block without tags to avoid API rejection - // and prevent Claude from mimicking the tags in responses + // convert to plain text for Anthropic. Some compatible providers emit + // and accept empty signatures, so let marked models preserve the block. if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) { - blocks.push({ - type: "text", - text: sanitizeSurrogates(block.thinking), - }); + blocks.push( + allowEmptySignature + ? { + type: "thinking", + thinking: sanitizeSurrogates(block.thinking), + signature: "", + } + : { + type: "text", + text: sanitizeSurrogates(block.thinking), + }, + ); } else { blocks.push({ type: "thinking", diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index b27e78b75..4345e3bf5 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -451,6 +451,8 @@ export interface AnthropicMessagesCompat { * Default: false. */ forceAdaptiveThinking?: boolean; + /** Whether to replay empty thinking signatures as `signature: ""` instead of converting thinking to text. Default: false. */ + allowEmptySignature?: boolean; } /** diff --git a/packages/ai/test/anthropic-empty-thinking-signature-compat.test.ts b/packages/ai/test/anthropic-empty-thinking-signature-compat.test.ts new file mode 100644 index 000000000..69e58e27b --- /dev/null +++ b/packages/ai/test/anthropic-empty-thinking-signature-compat.test.ts @@ -0,0 +1,88 @@ +import { describe, expect, it } from "vitest"; +import { streamSimple } from "../src/stream.ts"; +import type { AssistantMessage, Context, Model } from "../src/types.ts"; + +interface AnthropicPayload { + messages?: Array<{ + role: string; + content: Array<{ type: string; text?: string; thinking?: string; signature?: string }>; + }>; +} + +class PayloadCaptured extends Error { + constructor() { + super("payload captured"); + this.name = "PayloadCaptured"; + } +} + +function makeModel(allowEmptySignature?: boolean): Model<"anthropic-messages"> { + return { + id: "mimo-v2.5-pro", + name: "MiMo-V2.5-Pro", + api: "anthropic-messages", + provider: "xiaomi-token-plan-ams", + baseUrl: "http://127.0.0.1:9/anthropic", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 1024, + ...(allowEmptySignature === undefined ? {} : { compat: { allowEmptySignature } }), + }; +} + +function makeContext(thinkingSignature: string): Context { + const assistant: AssistantMessage = { + role: "assistant", + content: [{ type: "thinking", thinking: "internal reasoning", thinkingSignature }], + provider: "xiaomi-token-plan-ams", + api: "anthropic-messages", + model: "mimo-v2.5-pro", + timestamp: Date.now(), + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + }; + return { + messages: [ + { role: "user", content: "first", timestamp: Date.now() }, + assistant, + { role: "user", content: "second", timestamp: Date.now() }, + ], + }; +} + +async function capturePayload(model: Model<"anthropic-messages">, context: Context): Promise { + let capturedPayload: AnthropicPayload | undefined; + const stream = streamSimple(model, context, { + apiKey: "fake-key", + onPayload: (payload) => { + capturedPayload = payload as AnthropicPayload; + throw new PayloadCaptured(); + }, + }); + await stream.result(); + if (!capturedPayload) throw new Error("Expected payload capture before request"); + return capturedPayload; +} + +describe("Anthropic empty thinking signature compat", () => { + it("converts empty-signature thinking to text by default", async () => { + const payload = await capturePayload(makeModel(), makeContext("")); + const assistant = payload.messages?.find((message) => message.role === "assistant"); + expect(assistant?.content).toEqual([{ type: "text", text: "internal reasoning" }]); + }); + + it("preserves empty-signature thinking when allowEmptySignature is enabled", async () => { + const payload = await capturePayload(makeModel(true), makeContext(" ")); + const assistant = payload.messages?.find((message) => message.role === "assistant"); + expect(assistant?.content).toEqual([{ type: "thinking", thinking: "internal reasoning", signature: "" }]); + }); +}); diff --git a/packages/ai/test/xiaomi-token-plan-ams-anthropic-empty-signature-smoke.test.ts b/packages/ai/test/xiaomi-token-plan-ams-anthropic-empty-signature-smoke.test.ts new file mode 100644 index 000000000..ab2fec30a --- /dev/null +++ b/packages/ai/test/xiaomi-token-plan-ams-anthropic-empty-signature-smoke.test.ts @@ -0,0 +1,114 @@ +import { describe, expect, it } from "vitest"; +import { completeSimple, getEnvApiKey, streamSimple } from "../src/stream.ts"; +import type { AssistantMessage, Context, Model } from "../src/types.ts"; + +const provider = "xiaomi-token-plan-ams"; +const apiKey = getEnvApiKey(provider); + +const model: Model<"anthropic-messages"> = { + id: "mimo-v2.5-pro", + name: "MiMo-V2.5-Pro Anthropic smoke", + api: "anthropic-messages", + provider, + baseUrl: "https://token-plan-ams.xiaomimimo.com/anthropic", + reasoning: true, + input: ["text"], + cost: { input: 1, output: 3, cacheRead: 0.2, cacheWrite: 0 }, + contextWindow: 1048576, + maxTokens: 1024, + compat: { allowEmptySignature: true }, +}; + +interface AnthropicPayload { + messages?: Array<{ + role: string; + content: string | Array<{ type: string; text?: string; thinking?: string; signature?: string }>; + }>; +} + +class PayloadCaptured extends Error { + constructor() { + super("payload captured"); + this.name = "PayloadCaptured"; + } +} + +function makeInitialContext(): Context { + return { + systemPrompt: "You are concise. Follow the requested output format exactly.", + messages: [ + { + role: "user", + content: "Think internally if you need to, then reply with exactly this text and nothing else: first-ok", + timestamp: Date.now(), + }, + ], + }; +} + +function getThinkingBlocks(message: AssistantMessage) { + return message.content.filter((block) => block.type === "thinking"); +} + +async function captureReplayPayload(context: Context): Promise { + let capturedPayload: AnthropicPayload | undefined; + const stream = streamSimple(model, context, { + apiKey, + maxTokens: 512, + reasoning: "high", + onPayload: (payload) => { + capturedPayload = payload as AnthropicPayload; + throw new PayloadCaptured(); + }, + }); + + await stream.result(); + + if (!capturedPayload) { + throw new Error("Expected payload capture before request"); + } + return capturedPayload; +} + +describe.skipIf(!apiKey)("Xiaomi Token Plan AMS Anthropic empty thinking signature smoke", () => { + it("reproduces empty thinking signatures and preserves them for replay", { timeout: 60000, retry: 1 }, async () => { + const firstContext = makeInitialContext(); + const first = await completeSimple(model, firstContext, { + apiKey, + maxTokens: 512, + reasoning: "high", + }); + + expect(first.stopReason, first.errorMessage).toBe("stop"); + + const thinkingBlocks = getThinkingBlocks(first); + expect(thinkingBlocks.length).toBeGreaterThan(0); + expect(thinkingBlocks.some((block) => block.thinkingSignature === "")).toBe(true); + + const replayContext: Context = { + ...firstContext, + messages: [ + ...firstContext.messages, + first, + { + role: "user", + content: "Reply with exactly this text and nothing else: second-ok", + timestamp: Date.now(), + }, + ], + }; + + const replayPayload = await captureReplayPayload(replayContext); + const assistantPayload = replayPayload.messages?.find((message) => message.role === "assistant"); + expect(assistantPayload).toBeDefined(); + expect(Array.isArray(assistantPayload!.content)).toBe(true); + const replayedThinking = (assistantPayload!.content as Array<{ type: string; text?: string }>).filter( + (block) => block.type === "thinking", + ); + const replayedText = (assistantPayload!.content as Array<{ type: string; text?: string }>).filter( + (block) => block.type === "text", + ); + expect(replayedThinking).toEqual([{ type: "thinking", thinking: thinkingBlocks[0].thinking, signature: "" }]); + expect(replayedText.some((block) => block.text === thinkingBlocks[0].thinking)).toBe(false); + }); +}); diff --git a/packages/coding-agent/docs/custom-provider.md b/packages/coding-agent/docs/custom-provider.md index 50cb1ef06..c2bf44556 100644 --- a/packages/coding-agent/docs/custom-provider.md +++ b/packages/coding-agent/docs/custom-provider.md @@ -232,7 +232,7 @@ models: [{ Use `openrouter` for OpenRouter-style `reasoning: { effort }` controls. Use `together` for Together-style `reasoning: { enabled }` controls; with `supportsReasoningEffort`, it also sends `reasoning_effort`. Use `qwen-chat-template` instead for local Qwen-compatible servers that read `chat_template_kwargs.enable_thinking`. Use `cacheControlFormat: "anthropic"` for OpenAI-compatible providers that expose Anthropic-style prompt caching via `cache_control` on the system prompt, last tool definition, and last user/assistant text content. -For Anthropic-compatible providers using `api: "anthropic-messages"`, set `compat.forceAdaptiveThinking: true` on models or providers whose upstream model requires adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`). Built-in adaptive Claude models set this automatically. +For Anthropic-compatible providers using `api: "anthropic-messages"`, set `compat.forceAdaptiveThinking: true` on models or providers whose upstream model requires adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`). Built-in adaptive Claude models set this automatically. Set `compat.allowEmptySignature: true` only for providers that emit empty thinking signatures and expect `signature: ""` on replay. > Migration note: Mistral moved from `openai-completions` to `mistral-conversations`. > Use `mistral-conversations` for native Mistral models. @@ -727,6 +727,7 @@ interface ProviderModelConfig { sendSessionAffinityHeaders?: boolean; supportsCacheControlOnTools?: boolean; forceAdaptiveThinking?: boolean; + allowEmptySignature?: boolean; }; } ``` diff --git a/packages/coding-agent/docs/models.md b/packages/coding-agent/docs/models.md index c97a6cf33..d457643dd 100644 --- a/packages/coding-agent/docs/models.md +++ b/packages/coding-agent/docs/models.md @@ -330,6 +330,8 @@ By default pi sends per-tool `eager_input_streaming: true`. If a proxy or Anthro Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`) instead of the legacy budget-based thinking payload. Built-in models set this automatically. For custom providers or aliases that route to those models, set `forceAdaptiveThinking` to `true`. +Some Anthropic-compatible providers emit thinking blocks with empty signatures and still expect them on replay. Set `allowEmptySignature` to `true` only for those providers; real Anthropic rejects empty thinking signatures. + ```json { "providers": { @@ -340,7 +342,8 @@ Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plu "compat": { "supportsEagerToolInputStreaming": false, "supportsLongCacheRetention": true, - "forceAdaptiveThinking": true + "forceAdaptiveThinking": true, + "allowEmptySignature": true }, "models": [ { @@ -361,6 +364,7 @@ Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plu | `sendSessionAffinityHeaders` | Whether to send `x-session-affinity` from the session id when caching is enabled. Default: auto-detected for known providers. | | `supportsCacheControlOnTools` | Whether the provider accepts Anthropic-style `cache_control` markers on tool definitions. Default: `true`. | | `forceAdaptiveThinking` | Whether to send adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`) for this model. Built-in adaptive models set this automatically. Default: `false`. | +| `allowEmptySignature` | Whether to replay empty thinking signatures as `signature: ""` instead of converting thinking to text. Default: `false`. | ## OpenAI Compatibility