Fix Anthropic empty thinking signature replay

closes #4464
2026-06-18 15:54:04 +08:00 · 2026-05-28 12:02:54 +02:00
parent 3e9f717445
commit 458a7bc27c
10 changed files with 252 additions and 18 deletions
@@ -21,7 +21,7 @@
 - Always ask before removing functionality or code that appears intentional.
 - Do not preserve backward compatibility unless the user asks for it.
 - Never hardcode key checks (e.g. `matchesKey(keyData, "ctrl+x")`). Add defaults to `DEFAULT_EDITOR_KEYBINDINGS` or `DEFAULT_APP_KEYBINDINGS` so they stay configurable.
- Never modify `packages/ai/src/models.generated.ts` directly; update `packages/ai/scripts/generate-models.ts` instead.
+- Never modify `packages/ai/src/models.generated.ts` directly; update `packages/ai/scripts/generate-models.ts` instead, then regenerate. Including the resulting `models.generated.ts` diff is always OK, even if regeneration includes unrelated upstream model metadata changes.

 ## Commands

@@ -6,6 +6,10 @@

 - Added OpenAI Codex subscription device-code login as a selectable headless alternative while keeping browser login as the default.

+### Fixed
+
+- Fixed Anthropic-compatible replay for providers that return empty thinking signatures by adding an opt-in `allowEmptySignature` compatibility flag ([#4464](https://github.com/earendil-works/pi/issues/4464)).
+
 ## [0.76.0] - 2026-05-27

 ### Fixed
@@ -262,9 +262,14 @@ function applyThinkingLevelMetadata(model: Model<any>): void {
 }

 function getAnthropicMessagesCompat(provider: string, modelId: string): AnthropicMessagesCompat | undefined {
-	return EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`)
-		? { supportsEagerToolInputStreaming: false }
-		: undefined;
+	const compat: AnthropicMessagesCompat = {};
+	if (EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`)) {
+		compat.supportsEagerToolInputStreaming = false;
+	}
+	if (provider === "xiaomi" || provider.startsWith("xiaomi-token-plan-")) {
+		compat.allowEmptySignature = true;
+	}
+	return Object.keys(compat).length > 0 ? compat : undefined;
 }

 function getBedrockBaseUrl(modelId: string): string {
@@ -9307,7 +9307,7 @@ export const MODELS = {
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 204800,
+			contextWindow: 262144,
 			maxTokens: 8192,
 		} satisfies Model<"openai-completions">,
 		"minimax/minimax-m2.7": {
@@ -11915,13 +11915,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.06599999999999999,
-				output: 0.26,
-				cacheRead: 0.029,
+				input: 0.063,
+				output: 0.21,
+				cacheRead: 0.020999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
-			maxTokens: 262144,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"thedrummer/rocinante-12b": {
 			id: "thedrummer/rocinante-12b",
@@ -177,6 +177,7 @@ function getAnthropicCompat(
 		sendSessionAffinityHeaders:
 			model.compat?.sendSessionAffinityHeaders ?? !!(isFireworks || isCloudflareAiGatewayAnthropic),
 		supportsCacheControlOnTools: model.compat?.supportsCacheControlOnTools ?? !isFireworks,
+		allowEmptySignature: model.compat?.allowEmptySignature ?? false,
 	};
 }

@@ -895,7 +896,13 @@ function buildParams(
 	const { cacheControl } = getCacheControl(model, options?.cacheRetention);
 	const params: MessageCreateParamsStreaming = {
 		model: model.id,
-		messages: convertMessages(context.messages, model, isOAuthToken, cacheControl),
+		messages: convertMessages(
+			context.messages,
+			model,
+			isOAuthToken,
+			cacheControl,
+			getAnthropicCompat(model).allowEmptySignature,
+		),
 		max_tokens: options?.maxTokens ?? model.maxTokens,
 		stream: true,
 	};
@@ -1001,6 +1008,7 @@ function convertMessages(
 	model: Model<"anthropic-messages">,
 	isOAuthToken: boolean,
 	cacheControl?: CacheControlEphemeral,
+	allowEmptySignature = false,
 ): MessageParam[] {
 	const params: MessageParam[] = [];

@@ -1069,13 +1077,21 @@ function convertMessages(
 					}
 					if (block.thinking.trim().length === 0) continue;
 					// If thinking signature is missing/empty (e.g., from aborted stream),
-					// convert to plain text block without <thinking> tags to avoid API rejection
-					// and prevent Claude from mimicking the tags in responses
+					// convert to plain text for Anthropic. Some compatible providers emit
+					// and accept empty signatures, so let marked models preserve the block.
 					if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
-						blocks.push({
-							type: "text",
-							text: sanitizeSurrogates(block.thinking),
-						});
+						blocks.push(
+							allowEmptySignature
+								? {
+										type: "thinking",
+										thinking: sanitizeSurrogates(block.thinking),
+										signature: "",
+									}
+								: {
+										type: "text",
+										text: sanitizeSurrogates(block.thinking),
+									},
+						);
 					} else {
 						blocks.push({
 							type: "thinking",
@@ -451,6 +451,8 @@ export interface AnthropicMessagesCompat {
 	 * Default: false.
 	 */
 	forceAdaptiveThinking?: boolean;
+	/** Whether to replay empty thinking signatures as `signature: ""` instead of converting thinking to text. Default: false. */
+	allowEmptySignature?: boolean;
 }

 /**
@@ -0,0 +1,88 @@
+import { describe, expect, it } from "vitest";
+import { streamSimple } from "../src/stream.ts";
+import type { AssistantMessage, Context, Model } from "../src/types.ts";
+
+interface AnthropicPayload {
+	messages?: Array<{
+		role: string;
+		content: Array<{ type: string; text?: string; thinking?: string; signature?: string }>;
+	}>;
+}
+
+class PayloadCaptured extends Error {
+	constructor() {
+		super("payload captured");
+		this.name = "PayloadCaptured";
+	}
+}
+
+function makeModel(allowEmptySignature?: boolean): Model<"anthropic-messages"> {
+	return {
+		id: "mimo-v2.5-pro",
+		name: "MiMo-V2.5-Pro",
+		api: "anthropic-messages",
+		provider: "xiaomi-token-plan-ams",
+		baseUrl: "http://127.0.0.1:9/anthropic",
+		reasoning: true,
+		input: ["text"],
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 1048576,
+		maxTokens: 1024,
+		...(allowEmptySignature === undefined ? {} : { compat: { allowEmptySignature } }),
+	};
+}
+
+function makeContext(thinkingSignature: string): Context {
+	const assistant: AssistantMessage = {
+		role: "assistant",
+		content: [{ type: "thinking", thinking: "internal reasoning", thinkingSignature }],
+		provider: "xiaomi-token-plan-ams",
+		api: "anthropic-messages",
+		model: "mimo-v2.5-pro",
+		timestamp: Date.now(),
+		usage: {
+			input: 0,
+			output: 0,
+			cacheRead: 0,
+			cacheWrite: 0,
+			totalTokens: 0,
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+		},
+		stopReason: "stop",
+	};
+	return {
+		messages: [
+			{ role: "user", content: "first", timestamp: Date.now() },
+			assistant,
+			{ role: "user", content: "second", timestamp: Date.now() },
+		],
+	};
+}
+
+async function capturePayload(model: Model<"anthropic-messages">, context: Context): Promise<AnthropicPayload> {
+	let capturedPayload: AnthropicPayload | undefined;
+	const stream = streamSimple(model, context, {
+		apiKey: "fake-key",
+		onPayload: (payload) => {
+			capturedPayload = payload as AnthropicPayload;
+			throw new PayloadCaptured();
+		},
+	});
+	await stream.result();
+	if (!capturedPayload) throw new Error("Expected payload capture before request");
+	return capturedPayload;
+}
+
+describe("Anthropic empty thinking signature compat", () => {
+	it("converts empty-signature thinking to text by default", async () => {
+		const payload = await capturePayload(makeModel(), makeContext(""));
+		const assistant = payload.messages?.find((message) => message.role === "assistant");
+		expect(assistant?.content).toEqual([{ type: "text", text: "internal reasoning" }]);
+	});
+
+	it("preserves empty-signature thinking when allowEmptySignature is enabled", async () => {
+		const payload = await capturePayload(makeModel(true), makeContext(" "));
+		const assistant = payload.messages?.find((message) => message.role === "assistant");
+		expect(assistant?.content).toEqual([{ type: "thinking", thinking: "internal reasoning", signature: "" }]);
+	});
+});
@@ -0,0 +1,114 @@
+import { describe, expect, it } from "vitest";
+import { completeSimple, getEnvApiKey, streamSimple } from "../src/stream.ts";
+import type { AssistantMessage, Context, Model } from "../src/types.ts";
+
+const provider = "xiaomi-token-plan-ams";
+const apiKey = getEnvApiKey(provider);
+
+const model: Model<"anthropic-messages"> = {
+	id: "mimo-v2.5-pro",
+	name: "MiMo-V2.5-Pro Anthropic smoke",
+	api: "anthropic-messages",
+	provider,
+	baseUrl: "https://token-plan-ams.xiaomimimo.com/anthropic",
+	reasoning: true,
+	input: ["text"],
+	cost: { input: 1, output: 3, cacheRead: 0.2, cacheWrite: 0 },
+	contextWindow: 1048576,
+	maxTokens: 1024,
+	compat: { allowEmptySignature: true },
+};
+
+interface AnthropicPayload {
+	messages?: Array<{
+		role: string;
+		content: string | Array<{ type: string; text?: string; thinking?: string; signature?: string }>;
+	}>;
+}
+
+class PayloadCaptured extends Error {
+	constructor() {
+		super("payload captured");
+		this.name = "PayloadCaptured";
+	}
+}
+
+function makeInitialContext(): Context {
+	return {
+		systemPrompt: "You are concise. Follow the requested output format exactly.",
+		messages: [
+			{
+				role: "user",
+				content: "Think internally if you need to, then reply with exactly this text and nothing else: first-ok",
+				timestamp: Date.now(),
+			},
+		],
+	};
+}
+
+function getThinkingBlocks(message: AssistantMessage) {
+	return message.content.filter((block) => block.type === "thinking");
+}
+
+async function captureReplayPayload(context: Context): Promise<AnthropicPayload> {
+	let capturedPayload: AnthropicPayload | undefined;
+	const stream = streamSimple(model, context, {
+		apiKey,
+		maxTokens: 512,
+		reasoning: "high",
+		onPayload: (payload) => {
+			capturedPayload = payload as AnthropicPayload;
+			throw new PayloadCaptured();
+		},
+	});
+
+	await stream.result();
+
+	if (!capturedPayload) {
+		throw new Error("Expected payload capture before request");
+	}
+	return capturedPayload;
+}
+
+describe.skipIf(!apiKey)("Xiaomi Token Plan AMS Anthropic empty thinking signature smoke", () => {
+	it("reproduces empty thinking signatures and preserves them for replay", { timeout: 60000, retry: 1 }, async () => {
+		const firstContext = makeInitialContext();
+		const first = await completeSimple(model, firstContext, {
+			apiKey,
+			maxTokens: 512,
+			reasoning: "high",
+		});
+
+		expect(first.stopReason, first.errorMessage).toBe("stop");
+
+		const thinkingBlocks = getThinkingBlocks(first);
+		expect(thinkingBlocks.length).toBeGreaterThan(0);
+		expect(thinkingBlocks.some((block) => block.thinkingSignature === "")).toBe(true);
+
+		const replayContext: Context = {
+			...firstContext,
+			messages: [
+				...firstContext.messages,
+				first,
+				{
+					role: "user",
+					content: "Reply with exactly this text and nothing else: second-ok",
+					timestamp: Date.now(),
+				},
+			],
+		};
+
+		const replayPayload = await captureReplayPayload(replayContext);
+		const assistantPayload = replayPayload.messages?.find((message) => message.role === "assistant");
+		expect(assistantPayload).toBeDefined();
+		expect(Array.isArray(assistantPayload!.content)).toBe(true);
+		const replayedThinking = (assistantPayload!.content as Array<{ type: string; text?: string }>).filter(
+			(block) => block.type === "thinking",
+		);
+		const replayedText = (assistantPayload!.content as Array<{ type: string; text?: string }>).filter(
+			(block) => block.type === "text",
+		);
+		expect(replayedThinking).toEqual([{ type: "thinking", thinking: thinkingBlocks[0].thinking, signature: "" }]);
+		expect(replayedText.some((block) => block.text === thinkingBlocks[0].thinking)).toBe(false);
+	});
+});
@@ -232,7 +232,7 @@ models: [{
 Use `openrouter` for OpenRouter-style `reasoning: { effort }` controls. Use `together` for Together-style `reasoning: { enabled }` controls; with `supportsReasoningEffort`, it also sends `reasoning_effort`. Use `qwen-chat-template` instead for local Qwen-compatible servers that read `chat_template_kwargs.enable_thinking`.
 Use `cacheControlFormat: "anthropic"` for OpenAI-compatible providers that expose Anthropic-style prompt caching via `cache_control` on the system prompt, last tool definition, and last user/assistant text content.

-For Anthropic-compatible providers using `api: "anthropic-messages"`, set `compat.forceAdaptiveThinking: true` on models or providers whose upstream model requires adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`). Built-in adaptive Claude models set this automatically.
+For Anthropic-compatible providers using `api: "anthropic-messages"`, set `compat.forceAdaptiveThinking: true` on models or providers whose upstream model requires adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`). Built-in adaptive Claude models set this automatically. Set `compat.allowEmptySignature: true` only for providers that emit empty thinking signatures and expect `signature: ""` on replay.

 > Migration note: Mistral moved from `openai-completions` to `mistral-conversations`.
 > Use `mistral-conversations` for native Mistral models.
@@ -727,6 +727,7 @@ interface ProviderModelConfig {
    sendSessionAffinityHeaders?: boolean;
    supportsCacheControlOnTools?: boolean;
    forceAdaptiveThinking?: boolean;
+    allowEmptySignature?: boolean;
  };
 }
 ```
@@ -330,6 +330,8 @@ By default pi sends per-tool `eager_input_streaming: true`. If a proxy or Anthro

 Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`) instead of the legacy budget-based thinking payload. Built-in models set this automatically. For custom providers or aliases that route to those models, set `forceAdaptiveThinking` to `true`.

+Some Anthropic-compatible providers emit thinking blocks with empty signatures and still expect them on replay. Set `allowEmptySignature` to `true` only for those providers; real Anthropic rejects empty thinking signatures.
+
 ```json
 {
  "providers": {
@@ -340,7 +342,8 @@ Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plu
      "compat": {
        "supportsEagerToolInputStreaming": false,
        "supportsLongCacheRetention": true,
-        "forceAdaptiveThinking": true
+        "forceAdaptiveThinking": true,
+        "allowEmptySignature": true
      },
      "models": [
        {
@@ -361,6 +364,7 @@ Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plu
 | `sendSessionAffinityHeaders` | Whether to send `x-session-affinity` from the session id when caching is enabled. Default: auto-detected for known providers. |
 | `supportsCacheControlOnTools` | Whether the provider accepts Anthropic-style `cache_control` markers on tool definitions. Default: `true`. |
 | `forceAdaptiveThinking` | Whether to send adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`) for this model. Built-in adaptive models set this automatically. Default: `false`. |
+| `allowEmptySignature` | Whether to replay empty thinking signatures as `signature: ""` instead of converting thinking to text. Default: `false`. |

 ## OpenAI Compatibility