mirror of
https://github.com/earendil-works/pi.git
synced 2026-06-18 15:54:04 +08:00
@@ -21,7 +21,7 @@
|
||||
- Always ask before removing functionality or code that appears intentional.
|
||||
- Do not preserve backward compatibility unless the user asks for it.
|
||||
- Never hardcode key checks (e.g. `matchesKey(keyData, "ctrl+x")`). Add defaults to `DEFAULT_EDITOR_KEYBINDINGS` or `DEFAULT_APP_KEYBINDINGS` so they stay configurable.
|
||||
- Never modify `packages/ai/src/models.generated.ts` directly; update `packages/ai/scripts/generate-models.ts` instead.
|
||||
- Never modify `packages/ai/src/models.generated.ts` directly; update `packages/ai/scripts/generate-models.ts` instead, then regenerate. Including the resulting `models.generated.ts` diff is always OK, even if regeneration includes unrelated upstream model metadata changes.
|
||||
|
||||
## Commands
|
||||
|
||||
|
||||
@@ -6,6 +6,10 @@
|
||||
|
||||
- Added OpenAI Codex subscription device-code login as a selectable headless alternative while keeping browser login as the default.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed Anthropic-compatible replay for providers that return empty thinking signatures by adding an opt-in `allowEmptySignature` compatibility flag ([#4464](https://github.com/earendil-works/pi/issues/4464)).
|
||||
|
||||
## [0.76.0] - 2026-05-27
|
||||
|
||||
### Fixed
|
||||
|
||||
@@ -262,9 +262,14 @@ function applyThinkingLevelMetadata(model: Model<any>): void {
|
||||
}
|
||||
|
||||
function getAnthropicMessagesCompat(provider: string, modelId: string): AnthropicMessagesCompat | undefined {
|
||||
return EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`)
|
||||
? { supportsEagerToolInputStreaming: false }
|
||||
: undefined;
|
||||
const compat: AnthropicMessagesCompat = {};
|
||||
if (EAGER_TOOL_INPUT_STREAMING_UNSUPPORTED_ANTHROPIC_MODELS.has(`${provider}:${modelId}`)) {
|
||||
compat.supportsEagerToolInputStreaming = false;
|
||||
}
|
||||
if (provider === "xiaomi" || provider.startsWith("xiaomi-token-plan-")) {
|
||||
compat.allowEmptySignature = true;
|
||||
}
|
||||
return Object.keys(compat).length > 0 ? compat : undefined;
|
||||
}
|
||||
|
||||
function getBedrockBaseUrl(modelId: string): string {
|
||||
|
||||
@@ -9307,7 +9307,7 @@ export const MODELS = {
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 204800,
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"minimax/minimax-m2.7": {
|
||||
@@ -11915,13 +11915,13 @@ export const MODELS = {
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.06599999999999999,
|
||||
output: 0.26,
|
||||
cacheRead: 0.029,
|
||||
input: 0.063,
|
||||
output: 0.21,
|
||||
cacheRead: 0.020999999999999998,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"thedrummer/rocinante-12b": {
|
||||
id: "thedrummer/rocinante-12b",
|
||||
|
||||
@@ -177,6 +177,7 @@ function getAnthropicCompat(
|
||||
sendSessionAffinityHeaders:
|
||||
model.compat?.sendSessionAffinityHeaders ?? !!(isFireworks || isCloudflareAiGatewayAnthropic),
|
||||
supportsCacheControlOnTools: model.compat?.supportsCacheControlOnTools ?? !isFireworks,
|
||||
allowEmptySignature: model.compat?.allowEmptySignature ?? false,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -895,7 +896,13 @@ function buildParams(
|
||||
const { cacheControl } = getCacheControl(model, options?.cacheRetention);
|
||||
const params: MessageCreateParamsStreaming = {
|
||||
model: model.id,
|
||||
messages: convertMessages(context.messages, model, isOAuthToken, cacheControl),
|
||||
messages: convertMessages(
|
||||
context.messages,
|
||||
model,
|
||||
isOAuthToken,
|
||||
cacheControl,
|
||||
getAnthropicCompat(model).allowEmptySignature,
|
||||
),
|
||||
max_tokens: options?.maxTokens ?? model.maxTokens,
|
||||
stream: true,
|
||||
};
|
||||
@@ -1001,6 +1008,7 @@ function convertMessages(
|
||||
model: Model<"anthropic-messages">,
|
||||
isOAuthToken: boolean,
|
||||
cacheControl?: CacheControlEphemeral,
|
||||
allowEmptySignature = false,
|
||||
): MessageParam[] {
|
||||
const params: MessageParam[] = [];
|
||||
|
||||
@@ -1069,13 +1077,21 @@ function convertMessages(
|
||||
}
|
||||
if (block.thinking.trim().length === 0) continue;
|
||||
// If thinking signature is missing/empty (e.g., from aborted stream),
|
||||
// convert to plain text block without <thinking> tags to avoid API rejection
|
||||
// and prevent Claude from mimicking the tags in responses
|
||||
// convert to plain text for Anthropic. Some compatible providers emit
|
||||
// and accept empty signatures, so let marked models preserve the block.
|
||||
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
|
||||
blocks.push({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(block.thinking),
|
||||
});
|
||||
blocks.push(
|
||||
allowEmptySignature
|
||||
? {
|
||||
type: "thinking",
|
||||
thinking: sanitizeSurrogates(block.thinking),
|
||||
signature: "",
|
||||
}
|
||||
: {
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(block.thinking),
|
||||
},
|
||||
);
|
||||
} else {
|
||||
blocks.push({
|
||||
type: "thinking",
|
||||
|
||||
@@ -451,6 +451,8 @@ export interface AnthropicMessagesCompat {
|
||||
* Default: false.
|
||||
*/
|
||||
forceAdaptiveThinking?: boolean;
|
||||
/** Whether to replay empty thinking signatures as `signature: ""` instead of converting thinking to text. Default: false. */
|
||||
allowEmptySignature?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { streamSimple } from "../src/stream.ts";
|
||||
import type { AssistantMessage, Context, Model } from "../src/types.ts";
|
||||
|
||||
interface AnthropicPayload {
|
||||
messages?: Array<{
|
||||
role: string;
|
||||
content: Array<{ type: string; text?: string; thinking?: string; signature?: string }>;
|
||||
}>;
|
||||
}
|
||||
|
||||
class PayloadCaptured extends Error {
|
||||
constructor() {
|
||||
super("payload captured");
|
||||
this.name = "PayloadCaptured";
|
||||
}
|
||||
}
|
||||
|
||||
function makeModel(allowEmptySignature?: boolean): Model<"anthropic-messages"> {
|
||||
return {
|
||||
id: "mimo-v2.5-pro",
|
||||
name: "MiMo-V2.5-Pro",
|
||||
api: "anthropic-messages",
|
||||
provider: "xiaomi-token-plan-ams",
|
||||
baseUrl: "http://127.0.0.1:9/anthropic",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 1048576,
|
||||
maxTokens: 1024,
|
||||
...(allowEmptySignature === undefined ? {} : { compat: { allowEmptySignature } }),
|
||||
};
|
||||
}
|
||||
|
||||
function makeContext(thinkingSignature: string): Context {
|
||||
const assistant: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [{ type: "thinking", thinking: "internal reasoning", thinkingSignature }],
|
||||
provider: "xiaomi-token-plan-ams",
|
||||
api: "anthropic-messages",
|
||||
model: "mimo-v2.5-pro",
|
||||
timestamp: Date.now(),
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
};
|
||||
return {
|
||||
messages: [
|
||||
{ role: "user", content: "first", timestamp: Date.now() },
|
||||
assistant,
|
||||
{ role: "user", content: "second", timestamp: Date.now() },
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
async function capturePayload(model: Model<"anthropic-messages">, context: Context): Promise<AnthropicPayload> {
|
||||
let capturedPayload: AnthropicPayload | undefined;
|
||||
const stream = streamSimple(model, context, {
|
||||
apiKey: "fake-key",
|
||||
onPayload: (payload) => {
|
||||
capturedPayload = payload as AnthropicPayload;
|
||||
throw new PayloadCaptured();
|
||||
},
|
||||
});
|
||||
await stream.result();
|
||||
if (!capturedPayload) throw new Error("Expected payload capture before request");
|
||||
return capturedPayload;
|
||||
}
|
||||
|
||||
describe("Anthropic empty thinking signature compat", () => {
|
||||
it("converts empty-signature thinking to text by default", async () => {
|
||||
const payload = await capturePayload(makeModel(), makeContext(""));
|
||||
const assistant = payload.messages?.find((message) => message.role === "assistant");
|
||||
expect(assistant?.content).toEqual([{ type: "text", text: "internal reasoning" }]);
|
||||
});
|
||||
|
||||
it("preserves empty-signature thinking when allowEmptySignature is enabled", async () => {
|
||||
const payload = await capturePayload(makeModel(true), makeContext(" "));
|
||||
const assistant = payload.messages?.find((message) => message.role === "assistant");
|
||||
expect(assistant?.content).toEqual([{ type: "thinking", thinking: "internal reasoning", signature: "" }]);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,114 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { completeSimple, getEnvApiKey, streamSimple } from "../src/stream.ts";
|
||||
import type { AssistantMessage, Context, Model } from "../src/types.ts";
|
||||
|
||||
const provider = "xiaomi-token-plan-ams";
|
||||
const apiKey = getEnvApiKey(provider);
|
||||
|
||||
const model: Model<"anthropic-messages"> = {
|
||||
id: "mimo-v2.5-pro",
|
||||
name: "MiMo-V2.5-Pro Anthropic smoke",
|
||||
api: "anthropic-messages",
|
||||
provider,
|
||||
baseUrl: "https://token-plan-ams.xiaomimimo.com/anthropic",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 1, output: 3, cacheRead: 0.2, cacheWrite: 0 },
|
||||
contextWindow: 1048576,
|
||||
maxTokens: 1024,
|
||||
compat: { allowEmptySignature: true },
|
||||
};
|
||||
|
||||
interface AnthropicPayload {
|
||||
messages?: Array<{
|
||||
role: string;
|
||||
content: string | Array<{ type: string; text?: string; thinking?: string; signature?: string }>;
|
||||
}>;
|
||||
}
|
||||
|
||||
class PayloadCaptured extends Error {
|
||||
constructor() {
|
||||
super("payload captured");
|
||||
this.name = "PayloadCaptured";
|
||||
}
|
||||
}
|
||||
|
||||
function makeInitialContext(): Context {
|
||||
return {
|
||||
systemPrompt: "You are concise. Follow the requested output format exactly.",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Think internally if you need to, then reply with exactly this text and nothing else: first-ok",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
function getThinkingBlocks(message: AssistantMessage) {
|
||||
return message.content.filter((block) => block.type === "thinking");
|
||||
}
|
||||
|
||||
async function captureReplayPayload(context: Context): Promise<AnthropicPayload> {
|
||||
let capturedPayload: AnthropicPayload | undefined;
|
||||
const stream = streamSimple(model, context, {
|
||||
apiKey,
|
||||
maxTokens: 512,
|
||||
reasoning: "high",
|
||||
onPayload: (payload) => {
|
||||
capturedPayload = payload as AnthropicPayload;
|
||||
throw new PayloadCaptured();
|
||||
},
|
||||
});
|
||||
|
||||
await stream.result();
|
||||
|
||||
if (!capturedPayload) {
|
||||
throw new Error("Expected payload capture before request");
|
||||
}
|
||||
return capturedPayload;
|
||||
}
|
||||
|
||||
describe.skipIf(!apiKey)("Xiaomi Token Plan AMS Anthropic empty thinking signature smoke", () => {
|
||||
it("reproduces empty thinking signatures and preserves them for replay", { timeout: 60000, retry: 1 }, async () => {
|
||||
const firstContext = makeInitialContext();
|
||||
const first = await completeSimple(model, firstContext, {
|
||||
apiKey,
|
||||
maxTokens: 512,
|
||||
reasoning: "high",
|
||||
});
|
||||
|
||||
expect(first.stopReason, first.errorMessage).toBe("stop");
|
||||
|
||||
const thinkingBlocks = getThinkingBlocks(first);
|
||||
expect(thinkingBlocks.length).toBeGreaterThan(0);
|
||||
expect(thinkingBlocks.some((block) => block.thinkingSignature === "")).toBe(true);
|
||||
|
||||
const replayContext: Context = {
|
||||
...firstContext,
|
||||
messages: [
|
||||
...firstContext.messages,
|
||||
first,
|
||||
{
|
||||
role: "user",
|
||||
content: "Reply with exactly this text and nothing else: second-ok",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const replayPayload = await captureReplayPayload(replayContext);
|
||||
const assistantPayload = replayPayload.messages?.find((message) => message.role === "assistant");
|
||||
expect(assistantPayload).toBeDefined();
|
||||
expect(Array.isArray(assistantPayload!.content)).toBe(true);
|
||||
const replayedThinking = (assistantPayload!.content as Array<{ type: string; text?: string }>).filter(
|
||||
(block) => block.type === "thinking",
|
||||
);
|
||||
const replayedText = (assistantPayload!.content as Array<{ type: string; text?: string }>).filter(
|
||||
(block) => block.type === "text",
|
||||
);
|
||||
expect(replayedThinking).toEqual([{ type: "thinking", thinking: thinkingBlocks[0].thinking, signature: "" }]);
|
||||
expect(replayedText.some((block) => block.text === thinkingBlocks[0].thinking)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -232,7 +232,7 @@ models: [{
|
||||
Use `openrouter` for OpenRouter-style `reasoning: { effort }` controls. Use `together` for Together-style `reasoning: { enabled }` controls; with `supportsReasoningEffort`, it also sends `reasoning_effort`. Use `qwen-chat-template` instead for local Qwen-compatible servers that read `chat_template_kwargs.enable_thinking`.
|
||||
Use `cacheControlFormat: "anthropic"` for OpenAI-compatible providers that expose Anthropic-style prompt caching via `cache_control` on the system prompt, last tool definition, and last user/assistant text content.
|
||||
|
||||
For Anthropic-compatible providers using `api: "anthropic-messages"`, set `compat.forceAdaptiveThinking: true` on models or providers whose upstream model requires adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`). Built-in adaptive Claude models set this automatically.
|
||||
For Anthropic-compatible providers using `api: "anthropic-messages"`, set `compat.forceAdaptiveThinking: true` on models or providers whose upstream model requires adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`). Built-in adaptive Claude models set this automatically. Set `compat.allowEmptySignature: true` only for providers that emit empty thinking signatures and expect `signature: ""` on replay.
|
||||
|
||||
> Migration note: Mistral moved from `openai-completions` to `mistral-conversations`.
|
||||
> Use `mistral-conversations` for native Mistral models.
|
||||
@@ -727,6 +727,7 @@ interface ProviderModelConfig {
|
||||
sendSessionAffinityHeaders?: boolean;
|
||||
supportsCacheControlOnTools?: boolean;
|
||||
forceAdaptiveThinking?: boolean;
|
||||
allowEmptySignature?: boolean;
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
@@ -330,6 +330,8 @@ By default pi sends per-tool `eager_input_streaming: true`. If a proxy or Anthro
|
||||
|
||||
Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`) instead of the legacy budget-based thinking payload. Built-in models set this automatically. For custom providers or aliases that route to those models, set `forceAdaptiveThinking` to `true`.
|
||||
|
||||
Some Anthropic-compatible providers emit thinking blocks with empty signatures and still expect them on replay. Set `allowEmptySignature` to `true` only for those providers; real Anthropic rejects empty thinking signatures.
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
@@ -340,7 +342,8 @@ Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plu
|
||||
"compat": {
|
||||
"supportsEagerToolInputStreaming": false,
|
||||
"supportsLongCacheRetention": true,
|
||||
"forceAdaptiveThinking": true
|
||||
"forceAdaptiveThinking": true,
|
||||
"allowEmptySignature": true
|
||||
},
|
||||
"models": [
|
||||
{
|
||||
@@ -361,6 +364,7 @@ Some Anthropic models require adaptive thinking (`thinking.type: "adaptive"` plu
|
||||
| `sendSessionAffinityHeaders` | Whether to send `x-session-affinity` from the session id when caching is enabled. Default: auto-detected for known providers. |
|
||||
| `supportsCacheControlOnTools` | Whether the provider accepts Anthropic-style `cache_control` markers on tool definitions. Default: `true`. |
|
||||
| `forceAdaptiveThinking` | Whether to send adaptive thinking (`thinking.type: "adaptive"` plus `output_config.effort`) for this model. Built-in adaptive models set this automatically. Default: `false`. |
|
||||
| `allowEmptySignature` | Whether to replay empty thinking signatures as `signature: ""` instead of converting thinking to text. Default: `false`. |
|
||||
|
||||
## OpenAI Compatibility
|
||||
|
||||
|
||||
Reference in New Issue
Block a user