mirror of
https://github.com/earendil-works/pi.git
synced 2026-06-18 15:54:04 +08:00
feat(agent): AgentHarness streams through a required Models instance (phase 6)
AgentHarnessOptions.models is required; the harness stream path, compaction, and branch summarization go through models.streamSimple()/ completeSimple() instead of the compat globals. getApiKeyAndHeaders stays and wins per-field over provider-resolved auth, but is no longer required: without it, requests resolve through provider auth. compact()/generateSummary()/generateBranchSummary() take a Models parameter; explicit apiKey becomes optional. StreamFn is redefined structurally (Models.streamSimple satisfies it), dropping the compat type dependency from agent types. Harness tests build per-file Models collections with fauxProvider() and unique provider ids instead of mutating the global api-registry.
This commit is contained in:
@@ -820,9 +820,9 @@ Check items off as they land. Keep this list current; it is the working state fo
|
||||
|
||||
### Phase 6 — AgentHarness
|
||||
|
||||
- [ ] `AgentHarnessOptions.models` required; harness stream path uses `models.streamSimple()`.
|
||||
- [ ] Compaction/branch-summarization paths use the harness `Models` instance.
|
||||
- [ ] Harness tests use `createModels()` + faux provider.
|
||||
- [x] `AgentHarnessOptions.models` required (`readonly models` on the harness); the harness stream path uses `models.streamSimple()`. `StreamFn` redefined structurally (no compat type dependency); `Models.streamSimple` satisfies it.
|
||||
- [x] Compaction/branch-summarization take the harness `Models` instance; explicit `getApiKeyAndHeaders` auth stays and wins per-field, but is no longer required — requests resolve through provider auth otherwise (the hard "No auth available" throws are gone).
|
||||
- [x] Harness tests use `createModels()` + `fauxProvider()` with unique per-fake provider ids; no global api-registry state, no unregister bookkeeping.
|
||||
|
||||
### Phase 7 — coding-agent bridge (minimal)
|
||||
|
||||
@@ -843,8 +843,9 @@ The full AuthStorage deletion (`FileCredentialStore` + decorators, see "Replacin
|
||||
|
||||
### Deferred / follow-ups
|
||||
|
||||
- [ ] Web OAuth implementations (sitegeist-style) behind `oauth: "web"`.
|
||||
- [ ] Web OAuth implementations (sitegeist-style) as an alternative `OAuthAuth`.
|
||||
- [ ] coding-agent `ModelRegistry` -> session `ModelManager` migration; delete `/compat`.
|
||||
- [ ] Move ALL internal `/compat` imports to the new API before compat is deleted: every package's src, all tests, and the example extensions (examples then demonstrate the new API; the extension-loader root-to-compat alias dies with compat). Nothing inside the repo may import `/compat` at that point.
|
||||
- [ ] Images API registry redesign (untouched in this pass).
|
||||
|
||||
## Error behavior
|
||||
|
||||
@@ -1,10 +1,4 @@
|
||||
import {
|
||||
type AssistantMessage,
|
||||
type ImageContent,
|
||||
type Model,
|
||||
streamSimple,
|
||||
type UserMessage,
|
||||
} from "@earendil-works/pi-ai/compat";
|
||||
import type { AssistantMessage, ImageContent, Model, Models, UserMessage } from "@earendil-works/pi-ai";
|
||||
import { runAgentLoop } from "../agent-loop.ts";
|
||||
import type {
|
||||
AgentContext,
|
||||
@@ -178,6 +172,7 @@ export class AgentHarness<
|
||||
> {
|
||||
readonly env: ExecutionEnv;
|
||||
private session: Session;
|
||||
readonly models: Models;
|
||||
private phase: AgentHarnessPhase = "idle";
|
||||
private runAbortController?: AbortController;
|
||||
private runPromise?: Promise<void>;
|
||||
@@ -200,6 +195,7 @@ export class AgentHarness<
|
||||
constructor(options: AgentHarnessOptions<TSkill, TPromptTemplate, TTool>) {
|
||||
this.env = options.env;
|
||||
this.session = options.session;
|
||||
this.models = options.models;
|
||||
this.resources = options.resources ?? {};
|
||||
this.streamOptions = cloneStreamOptions(options.streamOptions);
|
||||
this.systemPrompt = options.systemPrompt;
|
||||
@@ -382,7 +378,7 @@ export class AgentHarness<
|
||||
headers: mergeHeaders(turnState.streamOptions.headers, auth?.headers),
|
||||
};
|
||||
const requestOptions = await this.emitBeforeProviderRequest(model, turnState.sessionId, snapshotOptions);
|
||||
return streamSimple(model, context, {
|
||||
return this.models.streamSimple(model, context, {
|
||||
cacheRetention: requestOptions.cacheRetention,
|
||||
headers: requestOptions.headers,
|
||||
maxRetries: requestOptions.maxRetries,
|
||||
@@ -713,8 +709,8 @@ export class AgentHarness<
|
||||
try {
|
||||
const model = this.model;
|
||||
if (!model) throw new AgentHarnessError("invalid_state", "No model set for compaction");
|
||||
// Explicit auth wins; otherwise the request resolves through provider auth.
|
||||
const auth = await this.getApiKeyAndHeaders?.(model);
|
||||
if (!auth) throw new AgentHarnessError("auth", "No auth available for compaction");
|
||||
const branchEntries = await this.session.getBranch();
|
||||
const preparationResult = prepareCompaction(branchEntries, DEFAULT_COMPACTION_SETTINGS);
|
||||
if (!preparationResult.ok) throw preparationResult.error;
|
||||
@@ -733,9 +729,10 @@ export class AgentHarness<
|
||||
? { ok: true as const, value: provided }
|
||||
: await compact(
|
||||
preparation,
|
||||
this.models,
|
||||
model,
|
||||
auth.apiKey,
|
||||
auth.headers,
|
||||
auth?.apiKey,
|
||||
auth?.headers,
|
||||
customInstructions,
|
||||
undefined,
|
||||
this.thinkingLevel,
|
||||
@@ -792,12 +789,13 @@ export class AgentHarness<
|
||||
if (!summaryText && options?.summarize && entries.length > 0) {
|
||||
const model = this.model;
|
||||
if (!model) throw new AgentHarnessError("invalid_state", "No model set for branch summary");
|
||||
// Explicit auth wins; otherwise the request resolves through provider auth.
|
||||
const auth = await this.getApiKeyAndHeaders?.(model);
|
||||
if (!auth) throw new AgentHarnessError("auth", "No auth available for branch summary");
|
||||
const branchSummary = await generateBranchSummary(entries, {
|
||||
models: this.models,
|
||||
model,
|
||||
apiKey: auth.apiKey,
|
||||
headers: auth.headers,
|
||||
apiKey: auth?.apiKey,
|
||||
headers: auth?.headers,
|
||||
signal: new AbortController().signal,
|
||||
customInstructions: hookResult?.customInstructions ?? options?.customInstructions,
|
||||
replaceInstructions: hookResult?.replaceInstructions ?? options?.replaceInstructions,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { Model } from "@earendil-works/pi-ai/compat";
|
||||
import { completeSimple } from "@earendil-works/pi-ai/compat";
|
||||
import type { Model, Models } from "@earendil-works/pi-ai";
|
||||
|
||||
import type { AgentMessage } from "../../types.ts";
|
||||
import {
|
||||
convertToLlm,
|
||||
@@ -49,10 +49,12 @@ export interface CollectEntriesResult {
|
||||
|
||||
/** Options for generating a branch summary. */
|
||||
export interface GenerateBranchSummaryOptions {
|
||||
/** Provider collection the summarization request goes through. */
|
||||
models: Models;
|
||||
/** Model used for summarization. */
|
||||
model: Model<any>;
|
||||
/** API key forwarded to the provider. */
|
||||
apiKey: string;
|
||||
/** Explicit API key; wins over provider-resolved auth. */
|
||||
apiKey?: string;
|
||||
/** Optional request headers forwarded to the provider. */
|
||||
headers?: Record<string, string>;
|
||||
/** Abort signal for the summarization request. */
|
||||
@@ -202,7 +204,16 @@ export async function generateBranchSummary(
|
||||
entries: SessionTreeEntry[],
|
||||
options: GenerateBranchSummaryOptions,
|
||||
): Promise<Result<BranchSummaryResult, BranchSummaryError>> {
|
||||
const { model, apiKey, headers, signal, customInstructions, replaceInstructions, reserveTokens = 16384 } = options;
|
||||
const {
|
||||
models,
|
||||
model,
|
||||
apiKey,
|
||||
headers,
|
||||
signal,
|
||||
customInstructions,
|
||||
replaceInstructions,
|
||||
reserveTokens = 16384,
|
||||
} = options;
|
||||
const contextWindow = model.contextWindow || 128000;
|
||||
const tokenBudget = contextWindow - reserveTokens;
|
||||
|
||||
@@ -230,7 +241,7 @@ export async function generateBranchSummary(
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
];
|
||||
const response = await completeSimple(
|
||||
const response = await models.completeSimple(
|
||||
model,
|
||||
{ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: summarizationMessages },
|
||||
{ apiKey, headers, signal, maxTokens: 2048 },
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import type { AssistantMessage, ImageContent, Model, TextContent, Usage } from "@earendil-works/pi-ai/compat";
|
||||
import { completeSimple } from "@earendil-works/pi-ai/compat";
|
||||
import type { AssistantMessage, ImageContent, Model, Models, TextContent, Usage } from "@earendil-works/pi-ai";
|
||||
import type { AgentMessage, ThinkingLevel } from "../../types.ts";
|
||||
import {
|
||||
convertToLlm,
|
||||
@@ -455,9 +454,10 @@ Keep each section concise. Preserve exact file paths, function names, and error
|
||||
/** Generate or update a conversation summary for compaction. */
|
||||
export async function generateSummary(
|
||||
currentMessages: AgentMessage[],
|
||||
models: Models,
|
||||
model: Model<any>,
|
||||
reserveTokens: number,
|
||||
apiKey: string,
|
||||
apiKey?: string,
|
||||
headers?: Record<string, string>,
|
||||
signal?: AbortSignal,
|
||||
customInstructions?: string,
|
||||
@@ -493,7 +493,7 @@ export async function generateSummary(
|
||||
? { maxTokens, signal, apiKey, headers, reasoning: thinkingLevel }
|
||||
: { maxTokens, signal, apiKey, headers };
|
||||
|
||||
const response = await completeSimple(
|
||||
const response = await models.completeSimple(
|
||||
model,
|
||||
{ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: summarizationMessages },
|
||||
completionOptions,
|
||||
@@ -626,8 +626,9 @@ export { serializeConversation } from "./utils.ts";
|
||||
/** Generate compaction summary data from prepared session history. */
|
||||
export async function compact(
|
||||
preparation: CompactionPreparation,
|
||||
models: Models,
|
||||
model: Model<any>,
|
||||
apiKey: string,
|
||||
apiKey?: string,
|
||||
headers?: Record<string, string>,
|
||||
customInstructions?: string,
|
||||
signal?: AbortSignal,
|
||||
@@ -655,6 +656,7 @@ export async function compact(
|
||||
messagesToSummarize.length > 0
|
||||
? generateSummary(
|
||||
messagesToSummarize,
|
||||
models,
|
||||
model,
|
||||
settings.reserveTokens,
|
||||
apiKey,
|
||||
@@ -667,6 +669,7 @@ export async function compact(
|
||||
: Promise.resolve(ok<string, CompactionError>("No prior history.")),
|
||||
generateTurnPrefixSummary(
|
||||
turnPrefixMessages,
|
||||
models,
|
||||
model,
|
||||
settings.reserveTokens,
|
||||
apiKey,
|
||||
@@ -681,6 +684,7 @@ export async function compact(
|
||||
} else {
|
||||
const summaryResult = await generateSummary(
|
||||
messagesToSummarize,
|
||||
models,
|
||||
model,
|
||||
settings.reserveTokens,
|
||||
apiKey,
|
||||
@@ -706,9 +710,10 @@ export async function compact(
|
||||
}
|
||||
async function generateTurnPrefixSummary(
|
||||
messages: AgentMessage[],
|
||||
models: Models,
|
||||
model: Model<any>,
|
||||
reserveTokens: number,
|
||||
apiKey: string,
|
||||
apiKey?: string,
|
||||
headers?: Record<string, string>,
|
||||
signal?: AbortSignal,
|
||||
thinkingLevel?: ThinkingLevel,
|
||||
@@ -728,7 +733,7 @@ async function generateTurnPrefixSummary(
|
||||
},
|
||||
];
|
||||
|
||||
const response = await completeSimple(
|
||||
const response = await models.completeSimple(
|
||||
model,
|
||||
{ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: summarizationMessages },
|
||||
model.reasoning && thinkingLevel && thinkingLevel !== "off"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { ImageContent, Model, SimpleStreamOptions, TextContent, Transport } from "@earendil-works/pi-ai";
|
||||
import type { ImageContent, Model, Models, SimpleStreamOptions, TextContent, Transport } from "@earendil-works/pi-ai";
|
||||
import type { AgentEvent, AgentMessage, AgentTool, QueueMode, ThinkingLevel } from "../index.ts";
|
||||
import type { Session } from "./session/session.ts";
|
||||
|
||||
@@ -802,6 +802,12 @@ export interface AgentHarnessOptions<
|
||||
> {
|
||||
env: ExecutionEnv;
|
||||
session: Session;
|
||||
/**
|
||||
* Provider collection used for all model requests (turn streaming,
|
||||
* compaction, branch summarization). Auth resolves through the providers'
|
||||
* auth; explicit per-request values (`getApiKeyAndHeaders`) win per field.
|
||||
*/
|
||||
models: Models;
|
||||
tools?: TTool[];
|
||||
/**
|
||||
* Concrete resources available to explicit invocation methods and system-prompt callbacks.
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
AssistantMessageEvent,
|
||||
AssistantMessageEventStream,
|
||||
Context,
|
||||
ImageContent,
|
||||
Message,
|
||||
Model,
|
||||
SimpleStreamOptions,
|
||||
streamSimple,
|
||||
TextContent,
|
||||
Tool,
|
||||
ToolResultMessage,
|
||||
} from "@earendil-works/pi-ai/compat";
|
||||
} from "@earendil-works/pi-ai";
|
||||
import type { Static, TSchema } from "typebox";
|
||||
|
||||
/**
|
||||
* Stream function used by the agent loop.
|
||||
* Stream function used by the agent loop. `Models.streamSimple` satisfies
|
||||
* this shape.
|
||||
*
|
||||
* Contract:
|
||||
* - Must not throw or return a rejected promise for request/model/runtime failures.
|
||||
@@ -22,8 +25,10 @@ import type { Static, TSchema } from "typebox";
|
||||
* final AssistantMessage with stopReason "error" or "aborted" and errorMessage.
|
||||
*/
|
||||
export type StreamFn = (
|
||||
...args: Parameters<typeof streamSimple>
|
||||
) => ReturnType<typeof streamSimple> | Promise<ReturnType<typeof streamSimple>>;
|
||||
model: Model<Api>,
|
||||
context: Context,
|
||||
options?: SimpleStreamOptions,
|
||||
) => AssistantMessageEventStream | Promise<AssistantMessageEventStream>;
|
||||
|
||||
/**
|
||||
* Configuration for how tool calls from a single assistant message are executed.
|
||||
|
||||
@@ -1,18 +1,27 @@
|
||||
import { fauxAssistantMessage, fauxToolCall, registerFauxProvider, type StreamOptions } from "@earendil-works/pi-ai";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
createModels,
|
||||
type FauxProviderHandle,
|
||||
fauxAssistantMessage,
|
||||
fauxProvider,
|
||||
fauxToolCall,
|
||||
type StreamOptions,
|
||||
} from "@earendil-works/pi-ai";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { AgentHarness } from "../../src/harness/agent-harness.ts";
|
||||
import { NodeExecutionEnv } from "../../src/harness/env/nodejs.ts";
|
||||
import { InMemorySessionStorage } from "../../src/harness/session/memory-storage.ts";
|
||||
import { Session } from "../../src/harness/session/session.ts";
|
||||
import { calculateTool } from "../utils/calculate.ts";
|
||||
|
||||
const registrations: Array<{ unregister(): void }> = [];
|
||||
/** Shared collection; each faux provider gets a unique id so coexisting fakes route correctly. */
|
||||
const models = createModels();
|
||||
let fauxCount = 0;
|
||||
|
||||
afterEach(() => {
|
||||
for (const registration of registrations.splice(0)) {
|
||||
registration.unregister();
|
||||
}
|
||||
});
|
||||
function newFaux(): FauxProviderHandle {
|
||||
const faux = fauxProvider({ provider: `faux-${++fauxCount}` });
|
||||
models.setProvider(faux.provider);
|
||||
return faux;
|
||||
}
|
||||
|
||||
function createHarness(options: ConstructorParameters<typeof AgentHarness>[0]): AgentHarness {
|
||||
return new AgentHarness(options);
|
||||
@@ -29,8 +38,7 @@ function captureOptions(options: StreamOptions | undefined): StreamOptions {
|
||||
describe("AgentHarness stream configuration", () => {
|
||||
it("snapshots stream options and merges auth headers before provider request hooks", async () => {
|
||||
let capturedOptions: StreamOptions | undefined;
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
registration.setResponses([
|
||||
(_context, options) => {
|
||||
capturedOptions = options;
|
||||
@@ -40,6 +48,7 @@ describe("AgentHarness stream configuration", () => {
|
||||
|
||||
const session = new Session(new InMemorySessionStorage({ metadata: { id: "session-1", createdAt: "now" } }));
|
||||
const harness = createHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session,
|
||||
model: registration.getModel(),
|
||||
@@ -81,8 +90,7 @@ describe("AgentHarness stream configuration", () => {
|
||||
|
||||
it("chains provider request patches and supports deletion semantics", async () => {
|
||||
let capturedOptions: StreamOptions | undefined;
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
registration.setResponses([
|
||||
(_context, options) => {
|
||||
capturedOptions = options;
|
||||
@@ -91,6 +99,7 @@ describe("AgentHarness stream configuration", () => {
|
||||
]);
|
||||
|
||||
const harness = createHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session: new Session(new InMemorySessionStorage()),
|
||||
model: registration.getModel(),
|
||||
@@ -133,8 +142,7 @@ describe("AgentHarness stream configuration", () => {
|
||||
|
||||
it("uses updated stream options for save-point snapshots without mutating the active request", async () => {
|
||||
const capturedOptions: StreamOptions[] = [];
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
registration.setResponses([
|
||||
(_context, options) => {
|
||||
capturedOptions.push(captureOptions(options));
|
||||
@@ -149,6 +157,7 @@ describe("AgentHarness stream configuration", () => {
|
||||
]);
|
||||
|
||||
const harness = createHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session: new Session(new InMemorySessionStorage()),
|
||||
model: registration.getModel(),
|
||||
@@ -174,8 +183,7 @@ describe("AgentHarness stream configuration", () => {
|
||||
it("chains provider payload hooks", async () => {
|
||||
const seenPayloads: unknown[] = [];
|
||||
let finalPayload: unknown;
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
registration.setResponses([
|
||||
async (_context, options, _state, model) => {
|
||||
finalPayload = await options?.onPayload?.({ steps: ["provider"] }, model);
|
||||
@@ -184,6 +192,7 @@ describe("AgentHarness stream configuration", () => {
|
||||
]);
|
||||
|
||||
const harness = createHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session: new Session(new InMemorySessionStorage()),
|
||||
model: registration.getModel(),
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
import { fauxAssistantMessage, fauxToolCall, getModel, registerFauxProvider } from "@earendil-works/pi-ai/compat";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
createModels,
|
||||
type FauxProviderHandle,
|
||||
fauxAssistantMessage,
|
||||
fauxProvider,
|
||||
fauxToolCall,
|
||||
type RegisterFauxProviderOptions,
|
||||
} from "@earendil-works/pi-ai";
|
||||
import { getModel } from "@earendil-works/pi-ai/compat";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { AgentHarness } from "../../src/harness/agent-harness.ts";
|
||||
import { NodeExecutionEnv } from "../../src/harness/env/nodejs.ts";
|
||||
import { InMemorySessionStorage } from "../../src/harness/session/memory-storage.ts";
|
||||
@@ -17,7 +25,15 @@ interface AppPromptTemplate extends PromptTemplate {
|
||||
source: "project" | "user";
|
||||
}
|
||||
|
||||
const registrations: Array<{ unregister(): void }> = [];
|
||||
/** Shared collection; each faux provider gets a unique id so coexisting fakes route correctly. */
|
||||
const models = createModels();
|
||||
let fauxCount = 0;
|
||||
|
||||
function newFaux(options: RegisterFauxProviderOptions = {}): FauxProviderHandle {
|
||||
const faux = fauxProvider({ provider: `faux-${++fauxCount}`, ...options });
|
||||
models.setProvider(faux.provider);
|
||||
return faux;
|
||||
}
|
||||
|
||||
function textFromUserMessages(messages: Array<{ role: string; content: unknown }>): string[] {
|
||||
return messages.flatMap((message) => {
|
||||
@@ -44,18 +60,13 @@ function getReasoning(options: unknown): unknown {
|
||||
return options.reasoning;
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
for (const registration of registrations.splice(0)) {
|
||||
registration.unregister();
|
||||
}
|
||||
});
|
||||
|
||||
describe("AgentHarness", () => {
|
||||
it("constructs directly and exposes queue modes", () => {
|
||||
const session = new Session(new InMemorySessionStorage());
|
||||
const env = new NodeExecutionEnv({ cwd: process.cwd() });
|
||||
const initialModel = getModel("anthropic", "claude-sonnet-4-5");
|
||||
const harness = new AgentHarness({
|
||||
models,
|
||||
env,
|
||||
session,
|
||||
model: initialModel,
|
||||
@@ -76,8 +87,7 @@ describe("AgentHarness", () => {
|
||||
});
|
||||
|
||||
it("drains one queued steering message at a time and emits queue updates", async () => {
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
const userCounts: number[] = [];
|
||||
registration.setResponses([
|
||||
(context) => {
|
||||
@@ -94,6 +104,7 @@ describe("AgentHarness", () => {
|
||||
},
|
||||
]);
|
||||
const harness = new AgentHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session: new Session(new InMemorySessionStorage()),
|
||||
model: registration.getModel(),
|
||||
@@ -119,8 +130,7 @@ describe("AgentHarness", () => {
|
||||
});
|
||||
|
||||
it("appends before_agent_start messages and persists them", async () => {
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
let requestText: string[] = [];
|
||||
registration.setResponses([
|
||||
(context) => {
|
||||
@@ -130,6 +140,7 @@ describe("AgentHarness", () => {
|
||||
]);
|
||||
const session = new Session(new InMemorySessionStorage());
|
||||
const harness = new AgentHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session,
|
||||
model: registration.getModel(),
|
||||
@@ -151,8 +162,7 @@ describe("AgentHarness", () => {
|
||||
});
|
||||
|
||||
it("abort clears steer and follow-up queues but preserves next-turn messages", async () => {
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
let releaseFirstResponse: (() => void) | undefined;
|
||||
let abortedSignal: AbortSignal | undefined;
|
||||
const firstResponseReleased = new Promise<void>((resolve) => {
|
||||
@@ -171,6 +181,7 @@ describe("AgentHarness", () => {
|
||||
},
|
||||
]);
|
||||
const harness = new AgentHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session: new Session(new InMemorySessionStorage()),
|
||||
model: registration.getModel(),
|
||||
@@ -206,8 +217,7 @@ describe("AgentHarness", () => {
|
||||
});
|
||||
|
||||
it("drains follow-up messages one at a time after the agent would otherwise stop", async () => {
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
const userCounts: number[] = [];
|
||||
registration.setResponses([
|
||||
(context) => {
|
||||
@@ -224,6 +234,7 @@ describe("AgentHarness", () => {
|
||||
},
|
||||
]);
|
||||
const harness = new AgentHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session: new Session(new InMemorySessionStorage()),
|
||||
model: registration.getModel(),
|
||||
@@ -249,11 +260,11 @@ describe("AgentHarness", () => {
|
||||
});
|
||||
|
||||
it("settles thrown hook failures with persisted assistant error messages", async () => {
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
registration.setResponses([() => fauxAssistantMessage("should not be used")]);
|
||||
const session = new Session(new InMemorySessionStorage());
|
||||
const harness = new AgentHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session,
|
||||
model: registration.getModel(),
|
||||
@@ -280,13 +291,12 @@ describe("AgentHarness", () => {
|
||||
});
|
||||
|
||||
it("refreshes model, thinking level, resources, system prompt, and active tools at save points", async () => {
|
||||
const registration = registerFauxProvider({
|
||||
const registration = newFaux({
|
||||
models: [
|
||||
{ id: "first", reasoning: true },
|
||||
{ id: "second", reasoning: true },
|
||||
],
|
||||
});
|
||||
registrations.push(registration);
|
||||
const secondModel = registration.getModel("second");
|
||||
if (!secondModel) throw new Error("missing second faux model");
|
||||
const captured: Array<{ modelId: string; reasoning: unknown; systemPrompt: string; tools: string[] }> = [];
|
||||
@@ -313,6 +323,7 @@ describe("AgentHarness", () => {
|
||||
},
|
||||
]);
|
||||
const harness = new AgentHarness<Skill, PromptTemplate, AgentTool>({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session: new Session(new InMemorySessionStorage()),
|
||||
model: registration.getModel(),
|
||||
@@ -345,11 +356,11 @@ describe("AgentHarness", () => {
|
||||
});
|
||||
|
||||
it("orders pending listener session writes after agent-emitted messages", async () => {
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
registration.setResponses([() => fauxAssistantMessage("ok")]);
|
||||
const session = new Session(new InMemorySessionStorage());
|
||||
const harness = new AgentHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session,
|
||||
model: registration.getModel(),
|
||||
@@ -376,11 +387,11 @@ describe("AgentHarness", () => {
|
||||
});
|
||||
|
||||
it("waitForIdle waits for external run settlement and awaited listeners", async () => {
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
registration.setResponses([() => fauxAssistantMessage("ok")]);
|
||||
const barrier = deferred();
|
||||
const harness = new AgentHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session: new Session(new InMemorySessionStorage()),
|
||||
model: registration.getModel(),
|
||||
@@ -408,8 +419,7 @@ describe("AgentHarness", () => {
|
||||
});
|
||||
|
||||
it("runs tool_call and tool_result hooks through the direct loop", async () => {
|
||||
const registration = registerFauxProvider();
|
||||
registrations.push(registration);
|
||||
const registration = newFaux();
|
||||
registration.setResponses([
|
||||
() =>
|
||||
fauxAssistantMessage(fauxToolCall("calculate", { expression: "2 + 2" }, { id: "call-1" }), {
|
||||
@@ -418,6 +428,7 @@ describe("AgentHarness", () => {
|
||||
]);
|
||||
const session = new Session(new InMemorySessionStorage());
|
||||
const harness = new AgentHarness({
|
||||
models,
|
||||
env: new NodeExecutionEnv({ cwd: process.cwd() }),
|
||||
session,
|
||||
model: registration.getModel(),
|
||||
@@ -462,6 +473,7 @@ describe("AgentHarness", () => {
|
||||
const inspectTool: AppTool = { ...calculateTool, name: "inspect", source: "builtin" };
|
||||
const searchTool: AppTool = { ...calculateTool, name: "search", source: "extension" };
|
||||
const harness = new AgentHarness<AppSkill, AppPromptTemplate, AppTool>({
|
||||
models,
|
||||
env,
|
||||
session,
|
||||
model,
|
||||
@@ -530,11 +542,12 @@ describe("AgentHarness", () => {
|
||||
const env = new NodeExecutionEnv({ cwd: process.cwd() });
|
||||
const model = getModel("anthropic", "claude-sonnet-4-5");
|
||||
expect(
|
||||
() => new AgentHarness({ env, session, model, tools: [calculateTool], activeToolNames: ["missing"] }),
|
||||
() => new AgentHarness({ env, session, models, model, tools: [calculateTool], activeToolNames: ["missing"] }),
|
||||
).toThrow(/Unknown tool/);
|
||||
expect(
|
||||
() =>
|
||||
new AgentHarness({
|
||||
models,
|
||||
env,
|
||||
session,
|
||||
model,
|
||||
@@ -545,6 +558,7 @@ describe("AgentHarness", () => {
|
||||
expect(
|
||||
() =>
|
||||
new AgentHarness({
|
||||
models,
|
||||
env,
|
||||
session,
|
||||
model,
|
||||
@@ -558,7 +572,7 @@ describe("AgentHarness", () => {
|
||||
const session = new Session(new InMemorySessionStorage());
|
||||
const env = new NodeExecutionEnv({ cwd: process.cwd() });
|
||||
const model = getModel("anthropic", "claude-sonnet-4-5");
|
||||
const harness = new AgentHarness<AppSkill, AppPromptTemplate, AgentTool>({ env, session, model });
|
||||
const harness = new AgentHarness<AppSkill, AppPromptTemplate, AgentTool>({ env, session, models, model });
|
||||
const skill: AppSkill = {
|
||||
name: "inspect",
|
||||
description: "Inspect things",
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
import {
|
||||
type AssistantMessage,
|
||||
type FauxProviderRegistration,
|
||||
createModels,
|
||||
type FauxProviderHandle,
|
||||
fauxAssistantMessage,
|
||||
fauxProvider,
|
||||
type Message,
|
||||
type Model,
|
||||
registerFauxProvider,
|
||||
type Usage,
|
||||
} from "@earendil-works/pi-ai";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { beforeEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
type CompactionPreparation,
|
||||
calculateContextTokens,
|
||||
@@ -121,11 +122,13 @@ function createModelChangeEntry(provider: string, modelId: string, parentId: str
|
||||
};
|
||||
}
|
||||
|
||||
function createFauxModel(
|
||||
reasoning: boolean,
|
||||
maxTokens = 8192,
|
||||
): { faux: FauxProviderRegistration; model: Model<string> } {
|
||||
const faux = registerFauxProvider({
|
||||
/** Shared collection; each faux provider gets a unique id so coexisting fakes route correctly. */
|
||||
const models = createModels();
|
||||
let fauxCount = 0;
|
||||
|
||||
function createFauxModel(reasoning: boolean, maxTokens = 8192): { faux: FauxProviderHandle; model: Model<string> } {
|
||||
const faux = fauxProvider({
|
||||
provider: `faux-${++fauxCount}`,
|
||||
models: [
|
||||
{
|
||||
id: reasoning ? "reasoning-model" : "non-reasoning-model",
|
||||
@@ -135,18 +138,10 @@ function createFauxModel(
|
||||
},
|
||||
],
|
||||
});
|
||||
fauxRegistrations.push(faux);
|
||||
models.setProvider(faux.provider);
|
||||
return { faux, model: faux.getModel() };
|
||||
}
|
||||
|
||||
const fauxRegistrations: FauxProviderRegistration[] = [];
|
||||
|
||||
afterEach(() => {
|
||||
while (fauxRegistrations.length > 0) {
|
||||
fauxRegistrations.pop()?.unregister();
|
||||
}
|
||||
});
|
||||
|
||||
describe("harness compaction", () => {
|
||||
beforeEach(() => {
|
||||
nextId = 0;
|
||||
@@ -447,6 +442,7 @@ describe("harness compaction", () => {
|
||||
getOrThrow(
|
||||
await generateSummary(
|
||||
messages,
|
||||
models,
|
||||
reasoningModel,
|
||||
2000,
|
||||
"test-key",
|
||||
@@ -467,7 +463,18 @@ describe("harness compaction", () => {
|
||||
},
|
||||
]);
|
||||
getOrThrow(
|
||||
await generateSummary(messages, offModel, 2000, "test-key", undefined, undefined, undefined, undefined, "off"),
|
||||
await generateSummary(
|
||||
messages,
|
||||
models,
|
||||
offModel,
|
||||
2000,
|
||||
"test-key",
|
||||
undefined,
|
||||
undefined,
|
||||
undefined,
|
||||
undefined,
|
||||
"off",
|
||||
),
|
||||
);
|
||||
expect(seenOptions[1]).not.toHaveProperty("reasoning");
|
||||
|
||||
@@ -481,6 +488,7 @@ describe("harness compaction", () => {
|
||||
getOrThrow(
|
||||
await generateSummary(
|
||||
messages,
|
||||
models,
|
||||
nonReasoningModel,
|
||||
2000,
|
||||
"test-key",
|
||||
@@ -510,6 +518,7 @@ describe("harness compaction", () => {
|
||||
const summary = getOrThrow(
|
||||
await generateSummary(
|
||||
messages,
|
||||
models,
|
||||
model,
|
||||
2000,
|
||||
"test-key",
|
||||
@@ -529,7 +538,7 @@ describe("harness compaction", () => {
|
||||
const messages: AgentMessage[] = [createUserMessage("Summarize this.")];
|
||||
const { faux: errorFaux, model: errorModel } = createFauxModel(false);
|
||||
errorFaux.setResponses([fauxAssistantMessage("", { stopReason: "error", errorMessage: "boom" })]);
|
||||
const errorResult = await generateSummary(messages, errorModel, 2000, "test-key");
|
||||
const errorResult = await generateSummary(messages, models, errorModel, 2000, "test-key");
|
||||
expect(errorResult).toMatchObject({
|
||||
ok: false,
|
||||
error: { code: "summarization_failed", message: "Summarization failed: boom" },
|
||||
@@ -537,7 +546,7 @@ describe("harness compaction", () => {
|
||||
|
||||
const { faux: abortedFaux, model: abortedModel } = createFauxModel(false);
|
||||
abortedFaux.setResponses([fauxAssistantMessage("", { stopReason: "aborted", errorMessage: "stopped" })]);
|
||||
const abortedResult = await generateSummary(messages, abortedModel, 2000, "test-key");
|
||||
const abortedResult = await generateSummary(messages, models, abortedModel, 2000, "test-key");
|
||||
expect(abortedResult).toMatchObject({ ok: false, error: { code: "aborted", message: "stopped" } });
|
||||
});
|
||||
|
||||
@@ -565,7 +574,7 @@ describe("harness compaction", () => {
|
||||
settings: { enabled: true, reserveTokens: 500000, keepRecentTokens: 20000 },
|
||||
};
|
||||
|
||||
getOrThrow(await compact(preparation, model, "test-key"));
|
||||
getOrThrow(await compact(preparation, models, model, "test-key"));
|
||||
|
||||
expect(seenOptions.map((options) => options?.maxTokens)).toEqual([128000, 128000]);
|
||||
});
|
||||
@@ -583,7 +592,7 @@ describe("harness compaction", () => {
|
||||
};
|
||||
const { faux: historyFaux, model: historyModel } = createFauxModel(false);
|
||||
historyFaux.setResponses([fauxAssistantMessage("", { stopReason: "error", errorMessage: "history failed" })]);
|
||||
expect(await compact(preparation, historyModel, "test-key")).toMatchObject({
|
||||
expect(await compact(preparation, models, historyModel, "test-key")).toMatchObject({
|
||||
ok: false,
|
||||
error: { code: "summarization_failed", message: "Summarization failed: history failed" },
|
||||
});
|
||||
@@ -591,6 +600,7 @@ describe("harness compaction", () => {
|
||||
const { model: invalidModel } = createFauxModel(false);
|
||||
const invalidResult = await compact(
|
||||
{ ...preparation, messagesToSummarize: [], firstKeptEntryId: "" },
|
||||
models,
|
||||
invalidModel,
|
||||
"test-key",
|
||||
);
|
||||
@@ -617,7 +627,7 @@ describe("harness compaction", () => {
|
||||
settings: { enabled: true, reserveTokens: 2000, keepRecentTokens: 20 },
|
||||
};
|
||||
|
||||
getOrThrow(await compact(preparation, model, "test-key", undefined, undefined, undefined, "high"));
|
||||
getOrThrow(await compact(preparation, models, model, "test-key", undefined, undefined, undefined, "high"));
|
||||
|
||||
expect(seenOptions[0]).toMatchObject({ reasoning: "high" });
|
||||
});
|
||||
@@ -636,14 +646,14 @@ describe("harness compaction", () => {
|
||||
const { faux, model } = createFauxModel(false);
|
||||
faux.setResponses([fauxAssistantMessage("", { stopReason: "error", errorMessage: "prefix failed" })]);
|
||||
|
||||
expect(await compact(preparation, model, "test-key")).toMatchObject({
|
||||
expect(await compact(preparation, models, model, "test-key")).toMatchObject({
|
||||
ok: false,
|
||||
error: { code: "summarization_failed", message: "Turn prefix summarization failed: prefix failed" },
|
||||
});
|
||||
|
||||
const { faux: abortedFaux, model: abortedModel } = createFauxModel(false);
|
||||
abortedFaux.setResponses([fauxAssistantMessage("", { stopReason: "aborted", errorMessage: "prefix stopped" })]);
|
||||
expect(await compact(preparation, abortedModel, "test-key")).toMatchObject({
|
||||
expect(await compact(preparation, models, abortedModel, "test-key")).toMatchObject({
|
||||
ok: false,
|
||||
error: { code: "aborted", message: "prefix stopped" },
|
||||
});
|
||||
@@ -662,7 +672,7 @@ describe("harness compaction", () => {
|
||||
expect(preparation).toBeDefined();
|
||||
const { faux, model } = createFauxModel(false);
|
||||
faux.setResponses([fauxAssistantMessage("## Goal\nTest summary")]);
|
||||
const result = getOrThrow(await compact(preparation!, model, "test-key"));
|
||||
const result = getOrThrow(await compact(preparation!, models, model, "test-key"));
|
||||
expect(result.summary.length).toBeGreaterThan(0);
|
||||
expect(result.firstKeptEntryId).toBeTruthy();
|
||||
expect(result.details).toBeDefined();
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { getModel } from "@earendil-works/pi-ai/compat";
|
||||
import { createModels } from "@earendil-works/pi-ai";
|
||||
import { getBuiltinModel } from "@earendil-works/pi-ai/providers/all";
|
||||
import { openaiProvider } from "@earendil-works/pi-ai/providers/openai";
|
||||
import { NodeExecutionEnv } from "../../src/harness/env/nodejs.ts";
|
||||
import { InMemorySessionStorage } from "../../src/harness/session/memory-storage.ts";
|
||||
import {
|
||||
@@ -35,11 +37,15 @@ const { promptTemplates: sourcedPromptTemplates } = await loadSourcedPromptTempl
|
||||
(promptTemplate, source) => ({ ...promptTemplate, source }),
|
||||
);
|
||||
|
||||
const models = createModels();
|
||||
models.setProvider(openaiProvider());
|
||||
|
||||
const session = new Session(new InMemorySessionStorage());
|
||||
const agent = new AgentHarness({
|
||||
env,
|
||||
session,
|
||||
model: getModel("openai", "gpt-5.5"),
|
||||
models,
|
||||
model: getBuiltinModel("openai", "gpt-5.5"),
|
||||
thinkingLevel: "low",
|
||||
systemPrompt: ({ env, resources }) =>
|
||||
[
|
||||
|
||||
Reference in New Issue
Block a user