feat(agent): AgentHarness streams through a required Models instance (phase 6)

AgentHarnessOptions.models is required; the harness stream path,
compaction, and branch summarization go through models.streamSimple()/
completeSimple() instead of the compat globals. getApiKeyAndHeaders
stays and wins per-field over provider-resolved auth, but is no longer
required: without it, requests resolve through provider auth.

compact()/generateSummary()/generateBranchSummary() take a Models
parameter; explicit apiKey becomes optional. StreamFn is redefined
structurally (Models.streamSimple satisfies it), dropping the compat
type dependency from agent types.

Harness tests build per-file Models collections with fauxProvider()
and unique provider ids instead of mutating the global api-registry.
This commit is contained in:
Mario Zechner
2026-06-10 21:27:21 +02:00
Unverified
parent 8a0903ebf2
commit f0ccbbf011
10 changed files with 175 additions and 110 deletions
+5 -4
View File
@@ -820,9 +820,9 @@ Check items off as they land. Keep this list current; it is the working state fo
### Phase 6 — AgentHarness
- [ ] `AgentHarnessOptions.models` required; harness stream path uses `models.streamSimple()`.
- [ ] Compaction/branch-summarization paths use the harness `Models` instance.
- [ ] Harness tests use `createModels()` + faux provider.
- [x] `AgentHarnessOptions.models` required (`readonly models` on the harness); the harness stream path uses `models.streamSimple()`. `StreamFn` redefined structurally (no compat type dependency); `Models.streamSimple` satisfies it.
- [x] Compaction/branch-summarization take the harness `Models` instance; explicit `getApiKeyAndHeaders` auth stays and wins per-field, but is no longer required — requests resolve through provider auth otherwise (the hard "No auth available" throws are gone).
- [x] Harness tests use `createModels()` + `fauxProvider()` with unique per-fake provider ids; no global api-registry state, no unregister bookkeeping.
### Phase 7 — coding-agent bridge (minimal)
@@ -843,8 +843,9 @@ The full AuthStorage deletion (`FileCredentialStore` + decorators, see "Replacin
### Deferred / follow-ups
- [ ] Web OAuth implementations (sitegeist-style) behind `oauth: "web"`.
- [ ] Web OAuth implementations (sitegeist-style) as an alternative `OAuthAuth`.
- [ ] coding-agent `ModelRegistry` -> session `ModelManager` migration; delete `/compat`.
- [ ] Move ALL internal `/compat` imports to the new API before compat is deleted: every package's src, all tests, and the example extensions (examples then demonstrate the new API; the extension-loader root-to-compat alias dies with compat). Nothing inside the repo may import `/compat` at that point.
- [ ] Images API registry redesign (untouched in this pass).
## Error behavior
+12 -14
View File
@@ -1,10 +1,4 @@
import {
type AssistantMessage,
type ImageContent,
type Model,
streamSimple,
type UserMessage,
} from "@earendil-works/pi-ai/compat";
import type { AssistantMessage, ImageContent, Model, Models, UserMessage } from "@earendil-works/pi-ai";
import { runAgentLoop } from "../agent-loop.ts";
import type {
AgentContext,
@@ -178,6 +172,7 @@ export class AgentHarness<
> {
readonly env: ExecutionEnv;
private session: Session;
readonly models: Models;
private phase: AgentHarnessPhase = "idle";
private runAbortController?: AbortController;
private runPromise?: Promise<void>;
@@ -200,6 +195,7 @@ export class AgentHarness<
constructor(options: AgentHarnessOptions<TSkill, TPromptTemplate, TTool>) {
this.env = options.env;
this.session = options.session;
this.models = options.models;
this.resources = options.resources ?? {};
this.streamOptions = cloneStreamOptions(options.streamOptions);
this.systemPrompt = options.systemPrompt;
@@ -382,7 +378,7 @@ export class AgentHarness<
headers: mergeHeaders(turnState.streamOptions.headers, auth?.headers),
};
const requestOptions = await this.emitBeforeProviderRequest(model, turnState.sessionId, snapshotOptions);
return streamSimple(model, context, {
return this.models.streamSimple(model, context, {
cacheRetention: requestOptions.cacheRetention,
headers: requestOptions.headers,
maxRetries: requestOptions.maxRetries,
@@ -713,8 +709,8 @@ export class AgentHarness<
try {
const model = this.model;
if (!model) throw new AgentHarnessError("invalid_state", "No model set for compaction");
// Explicit auth wins; otherwise the request resolves through provider auth.
const auth = await this.getApiKeyAndHeaders?.(model);
if (!auth) throw new AgentHarnessError("auth", "No auth available for compaction");
const branchEntries = await this.session.getBranch();
const preparationResult = prepareCompaction(branchEntries, DEFAULT_COMPACTION_SETTINGS);
if (!preparationResult.ok) throw preparationResult.error;
@@ -733,9 +729,10 @@ export class AgentHarness<
? { ok: true as const, value: provided }
: await compact(
preparation,
this.models,
model,
auth.apiKey,
auth.headers,
auth?.apiKey,
auth?.headers,
customInstructions,
undefined,
this.thinkingLevel,
@@ -792,12 +789,13 @@ export class AgentHarness<
if (!summaryText && options?.summarize && entries.length > 0) {
const model = this.model;
if (!model) throw new AgentHarnessError("invalid_state", "No model set for branch summary");
// Explicit auth wins; otherwise the request resolves through provider auth.
const auth = await this.getApiKeyAndHeaders?.(model);
if (!auth) throw new AgentHarnessError("auth", "No auth available for branch summary");
const branchSummary = await generateBranchSummary(entries, {
models: this.models,
model,
apiKey: auth.apiKey,
headers: auth.headers,
apiKey: auth?.apiKey,
headers: auth?.headers,
signal: new AbortController().signal,
customInstructions: hookResult?.customInstructions ?? options?.customInstructions,
replaceInstructions: hookResult?.replaceInstructions ?? options?.replaceInstructions,
@@ -1,5 +1,5 @@
import type { Model } from "@earendil-works/pi-ai/compat";
import { completeSimple } from "@earendil-works/pi-ai/compat";
import type { Model, Models } from "@earendil-works/pi-ai";
import type { AgentMessage } from "../../types.ts";
import {
convertToLlm,
@@ -49,10 +49,12 @@ export interface CollectEntriesResult {
/** Options for generating a branch summary. */
export interface GenerateBranchSummaryOptions {
/** Provider collection the summarization request goes through. */
models: Models;
/** Model used for summarization. */
model: Model<any>;
/** API key forwarded to the provider. */
apiKey: string;
/** Explicit API key; wins over provider-resolved auth. */
apiKey?: string;
/** Optional request headers forwarded to the provider. */
headers?: Record<string, string>;
/** Abort signal for the summarization request. */
@@ -202,7 +204,16 @@ export async function generateBranchSummary(
entries: SessionTreeEntry[],
options: GenerateBranchSummaryOptions,
): Promise<Result<BranchSummaryResult, BranchSummaryError>> {
const { model, apiKey, headers, signal, customInstructions, replaceInstructions, reserveTokens = 16384 } = options;
const {
models,
model,
apiKey,
headers,
signal,
customInstructions,
replaceInstructions,
reserveTokens = 16384,
} = options;
const contextWindow = model.contextWindow || 128000;
const tokenBudget = contextWindow - reserveTokens;
@@ -230,7 +241,7 @@ export async function generateBranchSummary(
timestamp: Date.now(),
},
];
const response = await completeSimple(
const response = await models.completeSimple(
model,
{ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: summarizationMessages },
{ apiKey, headers, signal, maxTokens: 2048 },
@@ -1,5 +1,4 @@
import type { AssistantMessage, ImageContent, Model, TextContent, Usage } from "@earendil-works/pi-ai/compat";
import { completeSimple } from "@earendil-works/pi-ai/compat";
import type { AssistantMessage, ImageContent, Model, Models, TextContent, Usage } from "@earendil-works/pi-ai";
import type { AgentMessage, ThinkingLevel } from "../../types.ts";
import {
convertToLlm,
@@ -455,9 +454,10 @@ Keep each section concise. Preserve exact file paths, function names, and error
/** Generate or update a conversation summary for compaction. */
export async function generateSummary(
currentMessages: AgentMessage[],
models: Models,
model: Model<any>,
reserveTokens: number,
apiKey: string,
apiKey?: string,
headers?: Record<string, string>,
signal?: AbortSignal,
customInstructions?: string,
@@ -493,7 +493,7 @@ export async function generateSummary(
? { maxTokens, signal, apiKey, headers, reasoning: thinkingLevel }
: { maxTokens, signal, apiKey, headers };
const response = await completeSimple(
const response = await models.completeSimple(
model,
{ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: summarizationMessages },
completionOptions,
@@ -626,8 +626,9 @@ export { serializeConversation } from "./utils.ts";
/** Generate compaction summary data from prepared session history. */
export async function compact(
preparation: CompactionPreparation,
models: Models,
model: Model<any>,
apiKey: string,
apiKey?: string,
headers?: Record<string, string>,
customInstructions?: string,
signal?: AbortSignal,
@@ -655,6 +656,7 @@ export async function compact(
messagesToSummarize.length > 0
? generateSummary(
messagesToSummarize,
models,
model,
settings.reserveTokens,
apiKey,
@@ -667,6 +669,7 @@ export async function compact(
: Promise.resolve(ok<string, CompactionError>("No prior history.")),
generateTurnPrefixSummary(
turnPrefixMessages,
models,
model,
settings.reserveTokens,
apiKey,
@@ -681,6 +684,7 @@ export async function compact(
} else {
const summaryResult = await generateSummary(
messagesToSummarize,
models,
model,
settings.reserveTokens,
apiKey,
@@ -706,9 +710,10 @@ export async function compact(
}
async function generateTurnPrefixSummary(
messages: AgentMessage[],
models: Models,
model: Model<any>,
reserveTokens: number,
apiKey: string,
apiKey?: string,
headers?: Record<string, string>,
signal?: AbortSignal,
thinkingLevel?: ThinkingLevel,
@@ -728,7 +733,7 @@ async function generateTurnPrefixSummary(
},
];
const response = await completeSimple(
const response = await models.completeSimple(
model,
{ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: summarizationMessages },
model.reasoning && thinkingLevel && thinkingLevel !== "off"
+7 -1
View File
@@ -1,4 +1,4 @@
import type { ImageContent, Model, SimpleStreamOptions, TextContent, Transport } from "@earendil-works/pi-ai";
import type { ImageContent, Model, Models, SimpleStreamOptions, TextContent, Transport } from "@earendil-works/pi-ai";
import type { AgentEvent, AgentMessage, AgentTool, QueueMode, ThinkingLevel } from "../index.ts";
import type { Session } from "./session/session.ts";
@@ -802,6 +802,12 @@ export interface AgentHarnessOptions<
> {
env: ExecutionEnv;
session: Session;
/**
* Provider collection used for all model requests (turn streaming,
* compaction, branch summarization). Auth resolves through the providers'
* auth; explicit per-request values (`getApiKeyAndHeaders`) win per field.
*/
models: Models;
tools?: TTool[];
/**
* Concrete resources available to explicit invocation methods and system-prompt callbacks.
+10 -5
View File
@@ -1,19 +1,22 @@
import type {
Api,
AssistantMessage,
AssistantMessageEvent,
AssistantMessageEventStream,
Context,
ImageContent,
Message,
Model,
SimpleStreamOptions,
streamSimple,
TextContent,
Tool,
ToolResultMessage,
} from "@earendil-works/pi-ai/compat";
} from "@earendil-works/pi-ai";
import type { Static, TSchema } from "typebox";
/**
* Stream function used by the agent loop.
* Stream function used by the agent loop. `Models.streamSimple` satisfies
* this shape.
*
* Contract:
* - Must not throw or return a rejected promise for request/model/runtime failures.
@@ -22,8 +25,10 @@ import type { Static, TSchema } from "typebox";
* final AssistantMessage with stopReason "error" or "aborted" and errorMessage.
*/
export type StreamFn = (
...args: Parameters<typeof streamSimple>
) => ReturnType<typeof streamSimple> | Promise<ReturnType<typeof streamSimple>>;
model: Model<Api>,
context: Context,
options?: SimpleStreamOptions,
) => AssistantMessageEventStream | Promise<AssistantMessageEventStream>;
/**
* Configuration for how tool calls from a single assistant message are executed.
@@ -1,18 +1,27 @@
import { fauxAssistantMessage, fauxToolCall, registerFauxProvider, type StreamOptions } from "@earendil-works/pi-ai";
import { afterEach, describe, expect, it } from "vitest";
import {
createModels,
type FauxProviderHandle,
fauxAssistantMessage,
fauxProvider,
fauxToolCall,
type StreamOptions,
} from "@earendil-works/pi-ai";
import { describe, expect, it } from "vitest";
import { AgentHarness } from "../../src/harness/agent-harness.ts";
import { NodeExecutionEnv } from "../../src/harness/env/nodejs.ts";
import { InMemorySessionStorage } from "../../src/harness/session/memory-storage.ts";
import { Session } from "../../src/harness/session/session.ts";
import { calculateTool } from "../utils/calculate.ts";
const registrations: Array<{ unregister(): void }> = [];
/** Shared collection; each faux provider gets a unique id so coexisting fakes route correctly. */
const models = createModels();
let fauxCount = 0;
afterEach(() => {
for (const registration of registrations.splice(0)) {
registration.unregister();
}
});
function newFaux(): FauxProviderHandle {
const faux = fauxProvider({ provider: `faux-${++fauxCount}` });
models.setProvider(faux.provider);
return faux;
}
function createHarness(options: ConstructorParameters<typeof AgentHarness>[0]): AgentHarness {
return new AgentHarness(options);
@@ -29,8 +38,7 @@ function captureOptions(options: StreamOptions | undefined): StreamOptions {
describe("AgentHarness stream configuration", () => {
it("snapshots stream options and merges auth headers before provider request hooks", async () => {
let capturedOptions: StreamOptions | undefined;
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
registration.setResponses([
(_context, options) => {
capturedOptions = options;
@@ -40,6 +48,7 @@ describe("AgentHarness stream configuration", () => {
const session = new Session(new InMemorySessionStorage({ metadata: { id: "session-1", createdAt: "now" } }));
const harness = createHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session,
model: registration.getModel(),
@@ -81,8 +90,7 @@ describe("AgentHarness stream configuration", () => {
it("chains provider request patches and supports deletion semantics", async () => {
let capturedOptions: StreamOptions | undefined;
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
registration.setResponses([
(_context, options) => {
capturedOptions = options;
@@ -91,6 +99,7 @@ describe("AgentHarness stream configuration", () => {
]);
const harness = createHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session: new Session(new InMemorySessionStorage()),
model: registration.getModel(),
@@ -133,8 +142,7 @@ describe("AgentHarness stream configuration", () => {
it("uses updated stream options for save-point snapshots without mutating the active request", async () => {
const capturedOptions: StreamOptions[] = [];
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
registration.setResponses([
(_context, options) => {
capturedOptions.push(captureOptions(options));
@@ -149,6 +157,7 @@ describe("AgentHarness stream configuration", () => {
]);
const harness = createHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session: new Session(new InMemorySessionStorage()),
model: registration.getModel(),
@@ -174,8 +183,7 @@ describe("AgentHarness stream configuration", () => {
it("chains provider payload hooks", async () => {
const seenPayloads: unknown[] = [];
let finalPayload: unknown;
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
registration.setResponses([
async (_context, options, _state, model) => {
finalPayload = await options?.onPayload?.({ steps: ["provider"] }, model);
@@ -184,6 +192,7 @@ describe("AgentHarness stream configuration", () => {
]);
const harness = createHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session: new Session(new InMemorySessionStorage()),
model: registration.getModel(),
@@ -1,5 +1,13 @@
import { fauxAssistantMessage, fauxToolCall, getModel, registerFauxProvider } from "@earendil-works/pi-ai/compat";
import { afterEach, describe, expect, it } from "vitest";
import {
createModels,
type FauxProviderHandle,
fauxAssistantMessage,
fauxProvider,
fauxToolCall,
type RegisterFauxProviderOptions,
} from "@earendil-works/pi-ai";
import { getModel } from "@earendil-works/pi-ai/compat";
import { describe, expect, it } from "vitest";
import { AgentHarness } from "../../src/harness/agent-harness.ts";
import { NodeExecutionEnv } from "../../src/harness/env/nodejs.ts";
import { InMemorySessionStorage } from "../../src/harness/session/memory-storage.ts";
@@ -17,7 +25,15 @@ interface AppPromptTemplate extends PromptTemplate {
source: "project" | "user";
}
const registrations: Array<{ unregister(): void }> = [];
/** Shared collection; each faux provider gets a unique id so coexisting fakes route correctly. */
const models = createModels();
let fauxCount = 0;
function newFaux(options: RegisterFauxProviderOptions = {}): FauxProviderHandle {
const faux = fauxProvider({ provider: `faux-${++fauxCount}`, ...options });
models.setProvider(faux.provider);
return faux;
}
function textFromUserMessages(messages: Array<{ role: string; content: unknown }>): string[] {
return messages.flatMap((message) => {
@@ -44,18 +60,13 @@ function getReasoning(options: unknown): unknown {
return options.reasoning;
}
afterEach(() => {
for (const registration of registrations.splice(0)) {
registration.unregister();
}
});
describe("AgentHarness", () => {
it("constructs directly and exposes queue modes", () => {
const session = new Session(new InMemorySessionStorage());
const env = new NodeExecutionEnv({ cwd: process.cwd() });
const initialModel = getModel("anthropic", "claude-sonnet-4-5");
const harness = new AgentHarness({
models,
env,
session,
model: initialModel,
@@ -76,8 +87,7 @@ describe("AgentHarness", () => {
});
it("drains one queued steering message at a time and emits queue updates", async () => {
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
const userCounts: number[] = [];
registration.setResponses([
(context) => {
@@ -94,6 +104,7 @@ describe("AgentHarness", () => {
},
]);
const harness = new AgentHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session: new Session(new InMemorySessionStorage()),
model: registration.getModel(),
@@ -119,8 +130,7 @@ describe("AgentHarness", () => {
});
it("appends before_agent_start messages and persists them", async () => {
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
let requestText: string[] = [];
registration.setResponses([
(context) => {
@@ -130,6 +140,7 @@ describe("AgentHarness", () => {
]);
const session = new Session(new InMemorySessionStorage());
const harness = new AgentHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session,
model: registration.getModel(),
@@ -151,8 +162,7 @@ describe("AgentHarness", () => {
});
it("abort clears steer and follow-up queues but preserves next-turn messages", async () => {
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
let releaseFirstResponse: (() => void) | undefined;
let abortedSignal: AbortSignal | undefined;
const firstResponseReleased = new Promise<void>((resolve) => {
@@ -171,6 +181,7 @@ describe("AgentHarness", () => {
},
]);
const harness = new AgentHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session: new Session(new InMemorySessionStorage()),
model: registration.getModel(),
@@ -206,8 +217,7 @@ describe("AgentHarness", () => {
});
it("drains follow-up messages one at a time after the agent would otherwise stop", async () => {
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
const userCounts: number[] = [];
registration.setResponses([
(context) => {
@@ -224,6 +234,7 @@ describe("AgentHarness", () => {
},
]);
const harness = new AgentHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session: new Session(new InMemorySessionStorage()),
model: registration.getModel(),
@@ -249,11 +260,11 @@ describe("AgentHarness", () => {
});
it("settles thrown hook failures with persisted assistant error messages", async () => {
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
registration.setResponses([() => fauxAssistantMessage("should not be used")]);
const session = new Session(new InMemorySessionStorage());
const harness = new AgentHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session,
model: registration.getModel(),
@@ -280,13 +291,12 @@ describe("AgentHarness", () => {
});
it("refreshes model, thinking level, resources, system prompt, and active tools at save points", async () => {
const registration = registerFauxProvider({
const registration = newFaux({
models: [
{ id: "first", reasoning: true },
{ id: "second", reasoning: true },
],
});
registrations.push(registration);
const secondModel = registration.getModel("second");
if (!secondModel) throw new Error("missing second faux model");
const captured: Array<{ modelId: string; reasoning: unknown; systemPrompt: string; tools: string[] }> = [];
@@ -313,6 +323,7 @@ describe("AgentHarness", () => {
},
]);
const harness = new AgentHarness<Skill, PromptTemplate, AgentTool>({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session: new Session(new InMemorySessionStorage()),
model: registration.getModel(),
@@ -345,11 +356,11 @@ describe("AgentHarness", () => {
});
it("orders pending listener session writes after agent-emitted messages", async () => {
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
registration.setResponses([() => fauxAssistantMessage("ok")]);
const session = new Session(new InMemorySessionStorage());
const harness = new AgentHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session,
model: registration.getModel(),
@@ -376,11 +387,11 @@ describe("AgentHarness", () => {
});
it("waitForIdle waits for external run settlement and awaited listeners", async () => {
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
registration.setResponses([() => fauxAssistantMessage("ok")]);
const barrier = deferred();
const harness = new AgentHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session: new Session(new InMemorySessionStorage()),
model: registration.getModel(),
@@ -408,8 +419,7 @@ describe("AgentHarness", () => {
});
it("runs tool_call and tool_result hooks through the direct loop", async () => {
const registration = registerFauxProvider();
registrations.push(registration);
const registration = newFaux();
registration.setResponses([
() =>
fauxAssistantMessage(fauxToolCall("calculate", { expression: "2 + 2" }, { id: "call-1" }), {
@@ -418,6 +428,7 @@ describe("AgentHarness", () => {
]);
const session = new Session(new InMemorySessionStorage());
const harness = new AgentHarness({
models,
env: new NodeExecutionEnv({ cwd: process.cwd() }),
session,
model: registration.getModel(),
@@ -462,6 +473,7 @@ describe("AgentHarness", () => {
const inspectTool: AppTool = { ...calculateTool, name: "inspect", source: "builtin" };
const searchTool: AppTool = { ...calculateTool, name: "search", source: "extension" };
const harness = new AgentHarness<AppSkill, AppPromptTemplate, AppTool>({
models,
env,
session,
model,
@@ -530,11 +542,12 @@ describe("AgentHarness", () => {
const env = new NodeExecutionEnv({ cwd: process.cwd() });
const model = getModel("anthropic", "claude-sonnet-4-5");
expect(
() => new AgentHarness({ env, session, model, tools: [calculateTool], activeToolNames: ["missing"] }),
() => new AgentHarness({ env, session, models, model, tools: [calculateTool], activeToolNames: ["missing"] }),
).toThrow(/Unknown tool/);
expect(
() =>
new AgentHarness({
models,
env,
session,
model,
@@ -545,6 +558,7 @@ describe("AgentHarness", () => {
expect(
() =>
new AgentHarness({
models,
env,
session,
model,
@@ -558,7 +572,7 @@ describe("AgentHarness", () => {
const session = new Session(new InMemorySessionStorage());
const env = new NodeExecutionEnv({ cwd: process.cwd() });
const model = getModel("anthropic", "claude-sonnet-4-5");
const harness = new AgentHarness<AppSkill, AppPromptTemplate, AgentTool>({ env, session, model });
const harness = new AgentHarness<AppSkill, AppPromptTemplate, AgentTool>({ env, session, models, model });
const skill: AppSkill = {
name: "inspect",
description: "Inspect things",
+36 -26
View File
@@ -1,13 +1,14 @@
import {
type AssistantMessage,
type FauxProviderRegistration,
createModels,
type FauxProviderHandle,
fauxAssistantMessage,
fauxProvider,
type Message,
type Model,
registerFauxProvider,
type Usage,
} from "@earendil-works/pi-ai";
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { beforeEach, describe, expect, it } from "vitest";
import {
type CompactionPreparation,
calculateContextTokens,
@@ -121,11 +122,13 @@ function createModelChangeEntry(provider: string, modelId: string, parentId: str
};
}
function createFauxModel(
reasoning: boolean,
maxTokens = 8192,
): { faux: FauxProviderRegistration; model: Model<string> } {
const faux = registerFauxProvider({
/** Shared collection; each faux provider gets a unique id so coexisting fakes route correctly. */
const models = createModels();
let fauxCount = 0;
function createFauxModel(reasoning: boolean, maxTokens = 8192): { faux: FauxProviderHandle; model: Model<string> } {
const faux = fauxProvider({
provider: `faux-${++fauxCount}`,
models: [
{
id: reasoning ? "reasoning-model" : "non-reasoning-model",
@@ -135,18 +138,10 @@ function createFauxModel(
},
],
});
fauxRegistrations.push(faux);
models.setProvider(faux.provider);
return { faux, model: faux.getModel() };
}
const fauxRegistrations: FauxProviderRegistration[] = [];
afterEach(() => {
while (fauxRegistrations.length > 0) {
fauxRegistrations.pop()?.unregister();
}
});
describe("harness compaction", () => {
beforeEach(() => {
nextId = 0;
@@ -447,6 +442,7 @@ describe("harness compaction", () => {
getOrThrow(
await generateSummary(
messages,
models,
reasoningModel,
2000,
"test-key",
@@ -467,7 +463,18 @@ describe("harness compaction", () => {
},
]);
getOrThrow(
await generateSummary(messages, offModel, 2000, "test-key", undefined, undefined, undefined, undefined, "off"),
await generateSummary(
messages,
models,
offModel,
2000,
"test-key",
undefined,
undefined,
undefined,
undefined,
"off",
),
);
expect(seenOptions[1]).not.toHaveProperty("reasoning");
@@ -481,6 +488,7 @@ describe("harness compaction", () => {
getOrThrow(
await generateSummary(
messages,
models,
nonReasoningModel,
2000,
"test-key",
@@ -510,6 +518,7 @@ describe("harness compaction", () => {
const summary = getOrThrow(
await generateSummary(
messages,
models,
model,
2000,
"test-key",
@@ -529,7 +538,7 @@ describe("harness compaction", () => {
const messages: AgentMessage[] = [createUserMessage("Summarize this.")];
const { faux: errorFaux, model: errorModel } = createFauxModel(false);
errorFaux.setResponses([fauxAssistantMessage("", { stopReason: "error", errorMessage: "boom" })]);
const errorResult = await generateSummary(messages, errorModel, 2000, "test-key");
const errorResult = await generateSummary(messages, models, errorModel, 2000, "test-key");
expect(errorResult).toMatchObject({
ok: false,
error: { code: "summarization_failed", message: "Summarization failed: boom" },
@@ -537,7 +546,7 @@ describe("harness compaction", () => {
const { faux: abortedFaux, model: abortedModel } = createFauxModel(false);
abortedFaux.setResponses([fauxAssistantMessage("", { stopReason: "aborted", errorMessage: "stopped" })]);
const abortedResult = await generateSummary(messages, abortedModel, 2000, "test-key");
const abortedResult = await generateSummary(messages, models, abortedModel, 2000, "test-key");
expect(abortedResult).toMatchObject({ ok: false, error: { code: "aborted", message: "stopped" } });
});
@@ -565,7 +574,7 @@ describe("harness compaction", () => {
settings: { enabled: true, reserveTokens: 500000, keepRecentTokens: 20000 },
};
getOrThrow(await compact(preparation, model, "test-key"));
getOrThrow(await compact(preparation, models, model, "test-key"));
expect(seenOptions.map((options) => options?.maxTokens)).toEqual([128000, 128000]);
});
@@ -583,7 +592,7 @@ describe("harness compaction", () => {
};
const { faux: historyFaux, model: historyModel } = createFauxModel(false);
historyFaux.setResponses([fauxAssistantMessage("", { stopReason: "error", errorMessage: "history failed" })]);
expect(await compact(preparation, historyModel, "test-key")).toMatchObject({
expect(await compact(preparation, models, historyModel, "test-key")).toMatchObject({
ok: false,
error: { code: "summarization_failed", message: "Summarization failed: history failed" },
});
@@ -591,6 +600,7 @@ describe("harness compaction", () => {
const { model: invalidModel } = createFauxModel(false);
const invalidResult = await compact(
{ ...preparation, messagesToSummarize: [], firstKeptEntryId: "" },
models,
invalidModel,
"test-key",
);
@@ -617,7 +627,7 @@ describe("harness compaction", () => {
settings: { enabled: true, reserveTokens: 2000, keepRecentTokens: 20 },
};
getOrThrow(await compact(preparation, model, "test-key", undefined, undefined, undefined, "high"));
getOrThrow(await compact(preparation, models, model, "test-key", undefined, undefined, undefined, "high"));
expect(seenOptions[0]).toMatchObject({ reasoning: "high" });
});
@@ -636,14 +646,14 @@ describe("harness compaction", () => {
const { faux, model } = createFauxModel(false);
faux.setResponses([fauxAssistantMessage("", { stopReason: "error", errorMessage: "prefix failed" })]);
expect(await compact(preparation, model, "test-key")).toMatchObject({
expect(await compact(preparation, models, model, "test-key")).toMatchObject({
ok: false,
error: { code: "summarization_failed", message: "Turn prefix summarization failed: prefix failed" },
});
const { faux: abortedFaux, model: abortedModel } = createFauxModel(false);
abortedFaux.setResponses([fauxAssistantMessage("", { stopReason: "aborted", errorMessage: "prefix stopped" })]);
expect(await compact(preparation, abortedModel, "test-key")).toMatchObject({
expect(await compact(preparation, models, abortedModel, "test-key")).toMatchObject({
ok: false,
error: { code: "aborted", message: "prefix stopped" },
});
@@ -662,7 +672,7 @@ describe("harness compaction", () => {
expect(preparation).toBeDefined();
const { faux, model } = createFauxModel(false);
faux.setResponses([fauxAssistantMessage("## Goal\nTest summary")]);
const result = getOrThrow(await compact(preparation!, model, "test-key"));
const result = getOrThrow(await compact(preparation!, models, model, "test-key"));
expect(result.summary.length).toBeGreaterThan(0);
expect(result.firstKeptEntryId).toBeTruthy();
expect(result.details).toBeDefined();
+8 -2
View File
@@ -1,6 +1,8 @@
import { homedir } from "node:os";
import { join } from "node:path";
import { getModel } from "@earendil-works/pi-ai/compat";
import { createModels } from "@earendil-works/pi-ai";
import { getBuiltinModel } from "@earendil-works/pi-ai/providers/all";
import { openaiProvider } from "@earendil-works/pi-ai/providers/openai";
import { NodeExecutionEnv } from "../../src/harness/env/nodejs.ts";
import { InMemorySessionStorage } from "../../src/harness/session/memory-storage.ts";
import {
@@ -35,11 +37,15 @@ const { promptTemplates: sourcedPromptTemplates } = await loadSourcedPromptTempl
(promptTemplate, source) => ({ ...promptTemplate, source }),
);
const models = createModels();
models.setProvider(openaiProvider());
const session = new Session(new InMemorySessionStorage());
const agent = new AgentHarness({
env,
session,
model: getModel("openai", "gpt-5.5"),
models,
model: getBuiltinModel("openai", "gpt-5.5"),
thinkingLevel: "low",
systemPrompt: ({ env, resources }) =>
[