Fix Anthropic empty thinking signature replay

closes #4464
This commit is contained in:
Mario Zechner
2026-05-28 12:02:54 +02:00
Unverified
parent 3e9f717445
commit 458a7bc27c
10 changed files with 252 additions and 18 deletions
+1 -1
View File
@@ -21,7 +21,7 @@
- Always ask before removing functionality or code that appears intentional.
- Do not preserve backward compatibility unless the user asks for it.
- Never hardcode key checks (e.g. `matchesKey(keyData, "ctrl+x")`). Add defaults to `DEFAULT_EDITOR_KEYBINDINGS` or `DEFAULT_APP_KEYBINDINGS` so they stay configurable.
- Never modify `packages/ai/src/models.generated.ts` directly; update `packages/ai/scripts/generate-models.ts` instead.
- Never modify `packages/ai/src/models.generated.ts` directly; update `packages/ai/scripts/generate-models.ts` instead, then regenerate. Including the resulting `models.generated.ts` diff is always OK, even if regeneration includes unrelated upstream model metadata changes.
## Commands
+4
View File
@@ -6,6 +6,10 @@
- Added OpenAI Codex subscription device-code login as a selectable headless alternative while keeping browser login as the default.
### Fixed
- Fixed Anthropic-compatible replay for providers that return empty thinking signatures by adding an opt-in `allowEmptySignature` compatibility flag ([#4464](https://github.com/earendil-works/pi/issues/4464)).
## [0.76.0] - 2026-05-27
### Fixed
+8 -3
View File
@@ -262,9 +262,14 @@ function applyThinkingLevelMetadata(model: Model<any>): void {
}
function getAnthropicMessagesCompat(provider: string, modelId: string): AnthropicMessagesCompat | undefined {
return EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`)
? { supportsEagerToolInputStreaming: false }
: undefined;
const compat: AnthropicMessagesCompat = {};
if (EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`)) {
compat.supportsEagerToolInputStreaming = false;
}
if (provider === "xiaomi" || provider.startsWith("xiaomi-token-plan-")) {
compat.allowEmptySignature = true;
}
return Object.keys(compat).length > 0 ? compat : undefined;
}
function getBedrockBaseUrl(modelId: string): string {
+5 -5
View File
@@ -9307,7 +9307,7 @@ export const MODELS = {
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 204800,
contextWindow: 262144,
maxTokens: 8192,
} satisfies Model<"openai-completions">,
"minimax/minimax-m2.7": {
@@ -11915,13 +11915,13 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.06599999999999999,
output: 0.26,
cacheRead: 0.029,
input: 0.063,
output: 0.21,
cacheRead: 0.020999999999999998,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 262144,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"thedrummer/rocinante-12b": {
id: "thedrummer/rocinante-12b",
+23 -7
View File
@@ -177,6 +177,7 @@ function getAnthropicCompat(
sendSessionAffinityHeaders:
model.compat?.sendSessionAffinityHeaders ?? !!(isFireworks || isCloudflareAiGatewayAnthropic),
supportsCacheControlOnTools: model.compat?.supportsCacheControlOnTools ?? !isFireworks,
allowEmptySignature: model.compat?.allowEmptySignature ?? false,
};
}
@@ -895,7 +896,13 @@ function buildParams(
const { cacheControl } = getCacheControl(model, options?.cacheRetention);
const params: MessageCreateParamsStreaming = {
model: model.id,
messages: convertMessages(context.messages, model, isOAuthToken, cacheControl),
messages: convertMessages(
context.messages,
model,
isOAuthToken,
cacheControl,
getAnthropicCompat(model).allowEmptySignature,
),
max_tokens: options?.maxTokens ?? model.maxTokens,
stream: true,
};
@@ -1001,6 +1008,7 @@ function convertMessages(
model: Model<"anthropic-messages">,
isOAuthToken: boolean,
cacheControl?: CacheControlEphemeral,
allowEmptySignature = false,
): MessageParam[] {
const params: MessageParam[] = [];
@@ -1069,13 +1077,21 @@ function convertMessages(
}
if (block.thinking.trim().length === 0) continue;
// If thinking signature is missing/empty (e.g., from aborted stream),
// convert to plain text block without <thinking> tags to avoid API rejection
// and prevent Claude from mimicking the tags in responses
// convert to plain text for Anthropic. Some compatible providers emit
// and accept empty signatures, so let marked models preserve the block.
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
blocks.push({
type: "text",
text: sanitizeSurrogates(block.thinking),
});
blocks.push(
allowEmptySignature
? {
type: "thinking",
thinking: sanitizeSurrogates(block.thinking),
signature: "",
}
: {
type: "text",
text: sanitizeSurrogates(block.thinking),
},
);
} else {
blocks.push({
type: "thinking",
+2
View File
@@ -451,6 +451,8 @@ export interface AnthropicMessagesCompat {
* Default: false.
*/
forceAdaptiveThinking?: boolean;
/** Whether to replay empty thinking signatures as `signature: ""` instead of converting thinking to text. Default: false. */
allowEmptySignature?: boolean;
}
/**
@@ -0,0 +1,88 @@
import { describe, expect, it } from "vitest";
import { streamSimple } from "../src/stream.ts";
import type { AssistantMessage, Context, Model } from "../src/types.ts";
interface AnthropicPayload {
messages?: Array<{
role: string;
content: Array<{ type: string; text?: string; thinking?: string; signature?: string }>;
}>;
}
class PayloadCaptured extends Error {
constructor() {
super("payload captured");
this.name = "PayloadCaptured";
}
}
function makeModel(allowEmptySignature?: boolean): Model<"anthropic-messages"> {
return {
id: "mimo-v2.5-pro",
name: "MiMo-V2.5-Pro",
api: "anthropic-messages",
provider: "xiaomi-token-plan-ams",
baseUrl: "http://127.0.0.1:9/anthropic",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 1024,
...(allowEmptySignature === undefined ? {} : { compat: { allowEmptySignature } }),
};
}
function makeContext(thinkingSignature: string): Context {
const assistant: AssistantMessage = {
role: "assistant",
content: [{ type: "thinking", thinking: "internal reasoning", thinkingSignature }],
provider: "xiaomi-token-plan-ams",
api: "anthropic-messages",
model: "mimo-v2.5-pro",
timestamp: Date.now(),
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
};
return {
messages: [
{ role: "user", content: "first", timestamp: Date.now() },
assistant,
{ role: "user", content: "second", timestamp: Date.now() },
],
};
}
async function capturePayload(model: Model<"anthropic-messages">, context: Context): Promise<AnthropicPayload> {
let capturedPayload: AnthropicPayload | undefined;
const stream = streamSimple(model, context, {
apiKey: "fake-key",
onPayload: (payload) => {
capturedPayload = payload as AnthropicPayload;
throw new PayloadCaptured();
},
});
await stream.result();
if (!capturedPayload) throw new Error("Expected payload capture before request");
return capturedPayload;
}
describe("Anthropic empty thinking signature compat", () => {
it("converts empty-signature thinking to text by default", async () => {
const payload = await capturePayload(makeModel(), makeContext(""));
const assistant = payload.messages?.find((message) => message.role === "assistant");
expect(assistant?.content).toEqual([{ type: "text", text: "internal reasoning" }]);
});
it("preserves empty-signature thinking when allowEmptySignature is enabled", async () => {
const payload = await capturePayload(makeModel(true), makeContext(" "));
const assistant = payload.messages?.find((message) => message.role === "assistant");
expect(assistant?.content).toEqual([{ type: "thinking", thinking: "internal reasoning", signature: "" }]);
});
});
@@ -0,0 +1,114 @@
import { describe, expect, it } from "vitest";
import { completeSimple, getEnvApiKey, streamSimple } from "../src/stream.ts";
import type { AssistantMessage, Context, Model } from "../src/types.ts";
const provider = "xiaomi-token-plan-ams";
const apiKey = getEnvApiKey(provider);
const model: Model<"anthropic-messages"> = {
id: "mimo-v2.5-pro",
name: "MiMo-V2.5-Pro Anthropic smoke",
api: "anthropic-messages",
provider,
baseUrl: "https://token-plan-ams.xiaomimimo.com/anthropic",
reasoning: true,
input: ["text"],
cost: { input: 1, output: 3, cacheRead: 0.2, cacheWrite: 0 },
contextWindow: 1048576,
maxTokens: 1024,
compat: { allowEmptySignature: true },
};
interface AnthropicPayload {
messages?: Array<{
role: string;
content: string | Array<{ type: string; text?: string; thinking?: string; signature?: string }>;
}>;
}
class PayloadCaptured extends Error {
constructor() {
super("payload captured");
this.name = "PayloadCaptured";
}
}
function makeInitialContext(): Context {
return {
systemPrompt: "You are concise. Follow the requested output format exactly.",
messages: [
{
role: "user",
content: "Think internally if you need to, then reply with exactly this text and nothing else: first-ok",
timestamp: Date.now(),
},
],
};
}
function getThinkingBlocks(message: AssistantMessage) {
return message.content.filter((block) => block.type === "thinking");
}
async function captureReplayPayload(context: Context): Promise<AnthropicPayload> {
let capturedPayload: AnthropicPayload | undefined;
const stream = streamSimple(model, context, {
apiKey,
maxTokens: 512,
reasoning: "high",
onPayload: (payload) => {
capturedPayload = payload as AnthropicPayload;
throw new PayloadCaptured();
},
});
await stream.result();
if (!capturedPayload) {
throw new Error("Expected payload capture before request");
}
return capturedPayload;
}
describe.skipIf(!apiKey)("Xiaomi Token Plan AMS Anthropic empty thinking signature smoke", () => {
it("reproduces empty thinking signatures and preserves them for replay", { timeout: 60000, retry: 1 }, async () => {
const firstContext = makeInitialContext();
const first = await completeSimple(model, firstContext, {
apiKey,
maxTokens: 512,
reasoning: "high",
});
expect(first.stopReason, first.errorMessage).toBe("stop");
const thinkingBlocks = getThinkingBlocks(first);
expect(thinkingBlocks.length).toBeGreaterThan(0);
expect(thinkingBlocks.some((block) => block.thinkingSignature === "")).toBe(true);
const replayContext: Context = {
...firstContext,
messages: [
...firstContext.messages,
first,
{
role: "user",
content: "Reply with exactly this text and nothing else: second-ok",
timestamp: Date.now(),
},
],
};
const replayPayload = await captureReplayPayload(replayContext);
const assistantPayload = replayPayload.messages?.find((message) => message.role === "assistant");
expect(assistantPayload).toBeDefined();
expect(Array.isArray(assistantPayload!.content)).toBe(true);
const replayedThinking = (assistantPayload!.content as Array<{ type: string; text?: string }>).filter(
(block) => block.type === "thinking",
);
const replayedText = (assistantPayload!.content as Array<{ type: string; text?: string }>).filter(
(block) => block.type === "text",
);
expect(replayedThinking).toEqual([{ type: "thinking", thinking: thinkingBlocks[0].thinking, signature: "" }]);
expect(replayedText.some((block) => block.text === thinkingBlocks[0].thinking)).toBe(false);
});
});
@@ -232,7 +232,7 @@ models: [{
Use `openrouter` for OpenRouter-style `reasoning: { effort }` controls. Use `together` for Together-style `reasoning: { enabled }` controls; with `supportsReasoningEffort`, it also sends `reasoning_effort`. Use `qwen-chat-template` instead for local Qwen-compatible servers that read `chat_template_kwargs.enable_thinking`.
Use `cacheControlFormat: "anthropic"` for OpenAI-compatible providers that expose Anthropic-style prompt caching via `cache_control` on the system prompt, last tool definition, and last user/assistant text content.
For Anthropic-compatible providers using `api: "anthropic-messages"`, set `compat.forceAdaptiveThinking: true` on models or providers whose upstream model requires adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`). Built-in adaptive Claude models set this automatically.
For Anthropic-compatible providers using `api: "anthropic-messages"`, set `compat.forceAdaptiveThinking: true` on models or providers whose upstream model requires adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`). Built-in adaptive Claude models set this automatically. Set `compat.allowEmptySignature: true` only for providers that emit empty thinking signatures and expect `signature: ""` on replay.
> Migration note: Mistral moved from `openai-completions` to `mistral-conversations`.
> Use `mistral-conversations` for native Mistral models.
@@ -727,6 +727,7 @@ interface ProviderModelConfig {
sendSessionAffinityHeaders?: boolean;
supportsCacheControlOnTools?: boolean;
forceAdaptiveThinking?: boolean;
allowEmptySignature?: boolean;
};
}
```
+5 -1
View File
@@ -330,6 +330,8 @@ By default pi sends per-tool `eager_input_streaming: true`. If a proxy or Anthro
Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`) instead of the legacy budget-based thinking payload. Built-in models set this automatically. For custom providers or aliases that route to those models, set `forceAdaptiveThinking` to `true`.
Some Anthropic-compatible providers emit thinking blocks with empty signatures and still expect them on replay. Set `allowEmptySignature` to `true` only for those providers; real Anthropic rejects empty thinking signatures.
```json
{
"providers": {
@@ -340,7 +342,8 @@ Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plu
"compat": {
"supportsEagerToolInputStreaming": false,
"supportsLongCacheRetention": true,
"forceAdaptiveThinking": true
"forceAdaptiveThinking": true,
"allowEmptySignature": true
},
"models": [
{
@@ -361,6 +364,7 @@ Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plu
| `sendSessionAffinityHeaders` | Whether to send `x-session-affinity` from the session id when caching is enabled. Default: auto-detected for known providers. |
| `supportsCacheControlOnTools` | Whether the provider accepts Anthropic-style `cache_control` markers on tool definitions. Default: `true`. |
| `forceAdaptiveThinking` | Whether to send adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`) for this model. Built-in adaptive models set this automatically. Default: `false`. |
| `allowEmptySignature` | Whether to replay empty thinking signatures as `signature: ""` instead of converting thinking to text. Default: `false`. |
## OpenAI Compatibility