diff --git a/packages/ai/README.md b/packages/ai/README.md index 4190fcb8b..72ab339bc 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -1,6 +1,6 @@ # @earendil-works/pi-ai -Unified LLM API with automatic model discovery, provider configuration, token and cost tracking, and simple context persistence and hand-off to other models mid-session. +Unified LLM API with provider collections, automatic auth resolution, token and cost tracking, and simple context persistence and hand-off to other models mid-session. **Note**: This library only includes models that support tool calling (function calling), as this is essential for agentic workflows. @@ -9,6 +9,16 @@ Unified LLM API with automatic model discovery, provider configuration, token an - [Supported Providers](#supported-providers) - [Installation](#installation) - [Quick Start](#quick-start) +- [Providers and Models](#providers-and-models) + - [Provider Factories](#provider-factories) + - [All Built-in Providers](#all-built-in-providers) + - [Querying Models](#querying-models) + - [Static Catalog Reads](#static-catalog-reads) + - [Dynamic Providers](#dynamic-providers) +- [Auth](#auth) + - [How Auth Resolves](#how-auth-resolves) + - [Credential Store](#credential-store) + - [Environment Variables](#environment-variables) - [Tools](#tools) - [Defining Tools](#defining-tools) - [Handling Tool Calls](#handling-tool-calls) @@ -17,8 +27,6 @@ Unified LLM API with automatic model discovery, provider configuration, token an - [Complete Event Reference](#complete-event-reference) - [Image Input](#image-input) - [Image Generation](#image-generation) - - [Basic Image Generation](#basic-image-generation) - - [Notes and Limitations](#notes-and-limitations) - [Thinking/Reasoning](#thinkingreasoning) - [Unified Interface](#unified-interface-streamsimplecompletesimple) - [Provider-Specific Options](#provider-specific-options-streamcomplete) @@ -27,25 +35,21 @@ Unified LLM API with automatic model discovery, provider configuration, token an - [Error Handling](#error-handling) - [Aborting Requests](#aborting-requests) - [Continuing After Abort](#continuing-after-abort) -- [APIs, Models, and Providers](#apis-models-and-providers) - - [Providers and Models](#providers-and-models) - - [Querying Providers and Models](#querying-providers-and-models) - - [Custom Models](#custom-models) + - [Debugging Provider Payloads](#debugging-provider-payloads) +- [Custom Providers](#custom-providers) + - [createProvider()](#createprovider) + - [Calling API Implementations Directly](#calling-api-implementations-directly) - [OpenAI Compatibility Settings](#openai-compatibility-settings) - - [Type Safety](#type-safety) +- [Faux Provider for Tests](#faux-provider-for-tests) - [Cross-Provider Handoffs](#cross-provider-handoffs) - [Context Serialization](#context-serialization) - [Browser Usage](#browser-usage) - - [Browser Compatibility Notes](#browser-compatibility-notes) - - [Environment Variables](#environment-variables-nodejs-only) - - [Checking Environment Variables](#checking-environment-variables) - [OAuth Providers](#oauth-providers) - [Vertex AI](#vertex-ai) - [CLI Login](#cli-login) - [Programmatic OAuth](#programmatic-oauth) - - [Login Flow Example](#login-flow-example) - - [Using OAuth Tokens](#using-oauth-tokens) - - [Provider Notes](#provider-notes) +- [Migrating from the Old Global API](#migrating-from-the-old-global-api) +- [Development](#development) - [License](#license) ## Supported Providers @@ -89,11 +93,17 @@ TypeBox exports are re-exported from `@earendil-works/pi-ai`: `Type`, `Static`, ## Quick Start -```typescript -import { Type, getModel, stream, complete, Context, Tool, StringEnum } from '@earendil-works/pi-ai'; +You build a `Models` collection, register the providers you want, and stream through it. Importing a provider pulls only that provider's catalog; SDKs load lazily on first request. -// Fully typed with auto-complete support for both providers and models -const model = getModel('openai', 'gpt-4o-mini'); +```typescript +import { Type, createModels, type Context, type Tool } from '@earendil-works/pi-ai'; +import { openaiProvider } from '@earendil-works/pi-ai/providers/openai'; + +const models = createModels(); +models.setProvider(openaiProvider()); + +// Sync lookup against the collection +const model = models.getModel('openai', 'gpt-4o-mini')!; // Define tools with TypeBox schemas for type safety and validation const tools: Tool[] = [{ @@ -107,46 +117,25 @@ const tools: Tool[] = [{ // Build a conversation context (easily serializable and transferable between models) const context: Context = { systemPrompt: 'You are a helpful assistant.', - messages: [{ role: 'user', content: 'What time is it?' }], + messages: [{ role: 'user', content: 'What time is it?', timestamp: Date.now() }], tools }; -// Option 1: Streaming with all event types -const s = stream(model, context); +// Option 1: Streaming with all event types. +// Auth resolves through the provider (OPENAI_API_KEY from the environment here). +const s = models.stream(model, context); for await (const event of s) { switch (event.type) { case 'start': console.log(`Starting with ${event.partial.model}`); break; - case 'text_start': - console.log('\n[Text started]'); - break; case 'text_delta': process.stdout.write(event.delta); break; - case 'text_end': - console.log('\n[Text ended]'); - break; - case 'thinking_start': - console.log('[Model is thinking...]'); - break; case 'thinking_delta': process.stdout.write(event.delta); break; - case 'thinking_end': - console.log('[Thinking complete]'); - break; - case 'toolcall_start': - console.log(`\n[Tool call started: index ${event.contentIndex}]`); - break; - case 'toolcall_delta': - // Partial tool arguments are being streamed - const partialCall = event.partial.content[event.contentIndex]; - if (partialCall.type === 'toolCall') { - console.log(`[Streaming args for ${partialCall.name}]`); - } - break; case 'toolcall_end': console.log(`\nTool called: ${event.toolCall.name}`); console.log(`Arguments: ${JSON.stringify(event.toolCall.arguments)}`); @@ -155,7 +144,7 @@ for await (const event of s) { console.log(`\nFinished: ${event.reason}`); break; case 'error': - console.error(`Error: ${event.error}`); + console.error(`Error: ${event.error.errorMessage}`); break; } } @@ -167,7 +156,6 @@ context.messages.push(finalMessage); // Handle tool calls if any const toolCalls = finalMessage.content.filter(b => b.type === 'toolCall'); for (const call of toolCalls) { - // Execute the tool const result = call.name === 'get_time' ? new Date().toLocaleString('en-US', { timeZone: call.arguments.timezone || 'UTC', @@ -189,7 +177,7 @@ for (const call of toolCalls) { // Continue if there were tool calls if (toolCalls.length > 0) { - const continuation = await complete(model, context); + const continuation = await models.complete(model, context); context.messages.push(continuation); console.log('After tool execution:', continuation.content); } @@ -198,7 +186,7 @@ console.log(`Total tokens: ${finalMessage.usage.input} in, ${finalMessage.usage. console.log(`Cost: $${finalMessage.usage.cost.total.toFixed(4)}`); // Option 2: Get complete response without streaming -const response = await complete(model, context); +const response = await models.complete(model, context); for (const block of response.content) { if (block.type === 'text') { @@ -209,6 +197,180 @@ for (const block of response.content) { } ``` +Snippets in the rest of this README assume a `models` collection set up like this (with the relevant provider registered). + +## Providers and Models + +A **provider** is the runtime unit: it owns its model catalog, its auth (API key resolution, OAuth flows), and its stream behavior. A `Models` collection holds providers and routes every request to the provider that owns the model. + +Providers internally share **API implementations** (the wire protocols): Anthropic models use `anthropic-messages`, OpenAI uses `openai-responses`, while xAI, Groq, Cerebras, OpenRouter, and most others share `openai-completions`. Mixed-API providers (GitHub Copilot, OpenCode Zen) dispatch per model. + +### Provider Factories + +One factory per built-in provider, each a subpath import that pulls only that provider's catalog: + +```typescript +import { anthropicProvider } from '@earendil-works/pi-ai/providers/anthropic'; +import { openaiProvider } from '@earendil-works/pi-ai/providers/openai'; +import { openrouterProvider } from '@earendil-works/pi-ai/providers/openrouter'; +import { amazonBedrockProvider } from '@earendil-works/pi-ai/providers/amazon-bedrock'; +// ...one module per provider in the Supported Providers list + +const models = createModels(); +models.setProvider(anthropicProvider()); +models.setProvider(openrouterProvider()); +``` + +Provider SDKs (`@anthropic-ai/sdk`, `openai`, `@google/genai`, AWS) are **not** imported by registering a provider — they load lazily on the first request to a model of that API. + +### All Built-in Providers + +For apps that want everything: + +```typescript +import { builtinModels } from '@earendil-works/pi-ai/providers/all'; + +const models = builtinModels(); // a Models collection with every built-in provider registered +``` + +This imports all catalogs (it is the heavy, explicit entrypoint) but still no SDKs. + +### Querying Models + +Reads are synchronous and return the last-known lists: + +```typescript +const providers = models.getProviders(); // registered Provider objects +const provider = models.getProvider('anthropic'); // one provider + +const all = models.getModels(); // every model across providers +const anthropicModels = models.getModels('anthropic'); +const model = models.getModel('anthropic', 'claude-sonnet-4-5'); + +for (const m of anthropicModels) { + console.log(`${m.id}: ${m.name}`); + console.log(` API: ${m.api}`); + console.log(` Context: ${m.contextWindow} tokens`); + console.log(` Vision: ${m.input.includes('image')}`); + console.log(` Reasoning: ${m.reasoning}`); +} +``` + +Dynamically listed models are typed `Model`. Narrow with the `hasApi()` guard when you need API-specific option typing: + +```typescript +import { hasApi } from '@earendil-works/pi-ai'; + +const m = models.getModel('anthropic', 'claude-sonnet-4-5'); +if (m && hasApi(m, 'anthropic-messages')) { + // m: Model<'anthropic-messages'> — stream options fully typed + models.stream(m, context, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); +} +``` + +### Static Catalog Reads + +For tooling that wants the generated built-in catalog with full literal typing (provider and model IDs auto-complete), independent of any collection: + +```typescript +import { getBuiltinModel, getBuiltinModels, getBuiltinProviders } from '@earendil-works/pi-ai/providers/all'; + +const model = getBuiltinModel('openai', 'gpt-4o-mini'); // typed Model<'openai-responses'> +const providers = getBuiltinProviders(); +const anthropic = getBuiltinModels('anthropic'); +``` + +### Dynamic Providers + +Providers may have dynamic model lists (a llama.cpp server, a live OpenRouter listing). Reads stay sync; fetching is an explicit async verb: + +```typescript +// getModels() returns the last-known list (empty before the first refresh) +await models.refresh('llamacpp'); // fetch one provider's list; rejects on failure +await models.refresh(); // refresh all providers concurrently, best-effort +const fresh = models.getModel('llamacpp', 'qwen3-30b'); +``` + +Static built-in providers are no-ops for `refresh()`. See [createProvider()](#createprovider) for building a dynamic provider. + +## Auth + +Every provider owns its auth: how API keys resolve (stored credentials, environment variables, ambient sources like AWS profiles or gcloud ADC) and, where supported, OAuth login/refresh flows. + +### How Auth Resolves + +When you call `models.stream()`, the collection resolves auth through the owning provider and merges it into the request. Explicit per-request values always win: + +```typescript +// Resolved through the provider (env var, stored credential, OAuth token): +await models.complete(model, context); + +// Explicit key wins over anything the provider would resolve: +await models.complete(model, context, { apiKey: 'sk-explicit' }); +``` + +You can inspect resolution without making a request — useful for status UIs: + +```typescript +const auth = await models.getAuth(model); +if (auth) { + console.log(`configured via ${auth.source}`); // e.g. "ANTHROPIC_API_KEY", "OAuth", "stored credential" +} else { + console.log('not configured'); +} +``` + +`getAuth()` resolves `undefined` for unconfigured providers and rejects with `ModelsError` when something is actually broken (`"oauth"`: token refresh failed, credential preserved for re-login; `"auth"`: key resolution or credential store failure). Request paths surface the same failures as stream errors. + +### Credential Store + +Stored credentials (API keys entered interactively, OAuth tokens) live in a `CredentialStore` — one type-tagged credential per provider. pi-ai ships an in-memory default; apps inject persistent storage: + +```typescript +import { createModels, type CredentialStore } from '@earendil-works/pi-ai'; + +const models = createModels({ credentials: myFileBackedStore }); +``` + +The contract is small: `read(providerId)`, `modify(providerId, fn)` (the only write path — a serialized read-modify-write), and `delete(providerId)`. OAuth token refresh runs inside `modify`, so concurrent requests and processes cannot double-refresh a rotated token. A stored credential *owns* its provider: environment variables are only consulted when nothing is stored, and a failed refresh never silently falls back to an env key. + +### Environment Variables + +Built-in providers resolve these env vars (Node.js; in browsers pass `apiKey` explicitly): + +| Provider | Environment Variable(s) | +|----------|------------------------| +| OpenAI | `OPENAI_API_KEY` | +| Ant Ling | `ANT_LING_API_KEY` | +| Azure OpenAI | `AZURE_OPENAI_API_KEY` + `AZURE_OPENAI_BASE_URL` (e.g. `https://{resource}.openai.azure.com`) or `AZURE_OPENAI_RESOURCE_NAME`. Supports `*.openai.azure.com` and `*.cognitiveservices.azure.com`; root endpoints auto-normalize to `/openai/v1`. Optional: `AZURE_OPENAI_API_VERSION` (default `v1`), `AZURE_OPENAI_DEPLOYMENT_NAME_MAP`. | +| Anthropic | `ANTHROPIC_API_KEY` or `ANTHROPIC_OAUTH_TOKEN` | +| DeepSeek | `DEEPSEEK_API_KEY` | +| NVIDIA NIM | `NVIDIA_API_KEY` | +| Google | `GEMINI_API_KEY` | +| Vertex AI | `GOOGLE_CLOUD_API_KEY` or `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) + `GOOGLE_CLOUD_LOCATION` + ADC | +| Mistral | `MISTRAL_API_KEY` | +| Groq | `GROQ_API_KEY` | +| Cerebras | `CEREBRAS_API_KEY` | +| Cloudflare AI Gateway | `CLOUDFLARE_API_KEY` + `CLOUDFLARE_ACCOUNT_ID` + `CLOUDFLARE_GATEWAY_ID` | +| Cloudflare Workers AI | `CLOUDFLARE_API_KEY` + `CLOUDFLARE_ACCOUNT_ID` | +| xAI | `XAI_API_KEY` | +| Fireworks | `FIREWORKS_API_KEY` | +| Together AI | `TOGETHER_API_KEY` | +| OpenRouter | `OPENROUTER_API_KEY` | +| Vercel AI Gateway | `AI_GATEWAY_API_KEY` | +| zAI | `ZAI_API_KEY` | +| ZAI Coding Plan (China) | `ZAI_CODING_CN_API_KEY` | +| MiniMax | `MINIMAX_API_KEY` | +| OpenCode Zen / OpenCode Go | `OPENCODE_API_KEY` | +| Kimi For Coding | `KIMI_API_KEY` | +| Xiaomi MiMo (API billing) | `XIAOMI_API_KEY` | +| Xiaomi MiMo Token Plan (China) | `XIAOMI_TOKEN_PLAN_CN_API_KEY` | +| Xiaomi MiMo Token Plan (Amsterdam) | `XIAOMI_TOKEN_PLAN_AMS_API_KEY` | +| Xiaomi MiMo Token Plan (Singapore) | `XIAOMI_TOKEN_PLAN_SGP_API_KEY` | +| GitHub Copilot | `COPILOT_GITHUB_TOKEN` | + +Amazon Bedrock resolves ambient AWS credentials (`AWS_PROFILE`, access key pairs, `AWS_BEARER_TOKEN_BEDROCK`, ECS task roles, web identity tokens). Vertex AI resolves either an explicit key or gcloud Application Default Credentials plus project/location. + ## Tools Tools enable LLMs to interact with external systems. This library uses TypeBox schemas for type-safe tool definitions with automatic validation using TypeBox's built-in validator and value conversion utilities. TypeBox schemas can be serialized and deserialized as plain JSON, making them ideal for distributed systems. @@ -216,7 +378,7 @@ Tools enable LLMs to interact with external systems. This library uses TypeBox s ### Defining Tools ```typescript -import { Type, Tool, StringEnum } from '@earendil-works/pi-ai'; +import { Type, type Tool, StringEnum } from '@earendil-works/pi-ai'; // Define tool parameters with TypeBox const weatherTool: Tool = { @@ -251,11 +413,11 @@ Tool results use content blocks and can include both text and images: import { readFileSync } from 'fs'; const context: Context = { - messages: [{ role: 'user', content: 'What is the weather in London?' }], + messages: [{ role: 'user', content: 'What is the weather in London?', timestamp: Date.now() }], tools: [weatherTool] }; -const response = await complete(model, context); +const response = await models.complete(model, context); // Check for tool calls in the response for (const block of response.content) { @@ -296,7 +458,7 @@ context.messages.push({ During streaming, tool call arguments are progressively parsed as they arrive. This enables real-time UI updates before the complete arguments are available: ```typescript -const s = stream(model, context); +const s = models.stream(model, context); for await (const event of s) { if (event.type === 'toolcall_delta') { @@ -337,15 +499,13 @@ for await (const event of s) { ### Validating Tool Arguments -When using `agentLoop`, tool arguments are automatically validated against your TypeBox schemas before execution. If validation fails, the error is returned to the model as a tool result, allowing it to retry. - -When implementing your own tool execution loop with `stream()` or `complete()`, use `validateToolCall` to validate arguments before passing them to your tools: +When implementing your own tool execution loop, use `validateToolCall` to validate arguments before passing them to your tools: ```typescript -import { stream, validateToolCall, Tool } from '@earendil-works/pi-ai'; +import { validateToolCall, type Tool } from '@earendil-works/pi-ai'; const tools: Tool[] = [weatherTool, calculatorTool]; -const s = stream(model, { messages, tools }); +const s = models.stream(model, { messages, tools }); for await (const event of s) { if (event.type === 'toolcall_end') { @@ -398,9 +558,8 @@ Models with vision capabilities can process images. You can check if a model sup ```typescript import { readFileSync } from 'fs'; -import { getModel, complete } from '@earendil-works/pi-ai'; -const model = getModel('openai', 'gpt-4o-mini'); +const model = models.getModel('openai', 'gpt-4o-mini')!; // Check if model supports images if (model.input.includes('image')) { @@ -410,13 +569,14 @@ if (model.input.includes('image')) { const imageBuffer = readFileSync('image.png'); const base64Image = imageBuffer.toString('base64'); -const response = await complete(model, { +const response = await models.complete(model, { messages: [{ role: 'user', content: [ { type: 'text', text: 'What is in this image?' }, { type: 'image', data: base64Image, mimeType: 'image/png' } - ] + ], + timestamp: Date.now() }] }); @@ -430,14 +590,10 @@ for (const block of response.content) { ## Image Generation -Image generation uses a separate API surface from text/chat generation. Use `getImageModel()` / `getImageModels()` / `getImageProviders()` to discover image-generation models, and `generateImages()` to get the final result. - -Do not use `stream()` or `complete()` for image generation. Image generation is a one-shot API: `generateImages()` waits for the provider response and returns the final `AssistantImages` result. - -### Basic Image Generation +Image generation uses a separate API surface from text/chat generation and currently lives on the [compat entrypoint](#migrating-from-the-old-global-api). Use `getImageModel()` / `getImageModels()` / `getImageProviders()` to discover image-generation models, and `generateImages()` to get the final result. ```typescript -import { getImageModel, generateImages } from '@mariozechner/pi-ai'; +import { getImageModel, generateImages } from '@earendil-works/pi-ai/compat'; const model = getImageModel('openrouter', 'google/gemini-2.5-flash-image'); @@ -457,39 +613,11 @@ for (const block of result.output) { } ``` -Some models also support image input: +Notes: -```typescript -import { readFileSync } from 'fs'; - -const imageBuffer = readFileSync('input.png'); -const result = await generateImages(model, { - input: [ - { type: 'text', text: 'Create a variation of this image with a blue background.' }, - { type: 'image', data: imageBuffer.toString('base64'), mimeType: 'image/png' } - ] -}, { - apiKey: process.env.OPENROUTER_API_KEY -}); -``` - -Check capabilities on the model metadata: - -```typescript -console.log(model.input); // ['text', 'image'] -console.log(model.output); // ['image'] or ['image', 'text'] -``` - -### Notes and Limitations - -- Use `getImageModel(...)`, not `getModel(...)`. -- Use `generateImages()`, not `stream()` / `complete()`. -- Image-generation models do not participate in tool calling. -- Outputs are returned in `AssistantImages.output` and can include both base64-encoded `ImageContent` blocks and `TextContent` blocks. -- Some models return only images, others return images plus text. Check `model.output`. -- Some models accept image input, others are text-to-image only. Check `model.input`. -- Like the streaming APIs, image generation supports options such as `apiKey`, `signal`, `headers`, `onPayload`, and `onResponse`, and results may include `stopReason`, `responseId`, and `usage`. -- If you want a model to analyze images in a conversation or call tools, use the regular `stream()` / `complete()` APIs with a model that supports image input. +- Use `getImageModel(...)` and `generateImages()`; image-generation models do not work with the chat/stream APIs and do not participate in tool calling. +- Outputs are returned in `AssistantImages.output` and can include both base64-encoded `ImageContent` blocks and `TextContent` blocks. Check `model.output` and `model.input` for capabilities. +- Options such as `apiKey`, `signal`, `headers`, `onPayload`, and `onResponse` are supported; results may include `stopReason`, `responseId`, and `usage`. - At the moment, image generation is available through only one provider, OpenRouter. ## Thinking/Reasoning @@ -499,16 +627,11 @@ Many models support thinking/reasoning capabilities where they can show their in ### Unified Interface (streamSimple/completeSimple) ```typescript -import { getModel, streamSimple, completeSimple } from '@earendil-works/pi-ai'; - // Many models across providers support thinking/reasoning -const model = getModel('anthropic', 'claude-sonnet-4-20250514'); -// or getModel('openai', 'gpt-5-mini'); -// or getModel('google', 'gemini-2.5-flash'); -// or getModel('xai', 'grok-code-fast-1'); -// or getModel('groq', 'openai/gpt-oss-20b'); -// or getModel('cerebras', 'gpt-oss-120b'); -// or getModel('openrouter', 'z-ai/glm-4.5v'); +const model = models.getModel('anthropic', 'claude-sonnet-4-5')!; +// or models.getModel('openai', 'gpt-5-mini'); +// or models.getModel('google', 'gemini-2.5-flash'); +// or models.getModel('xai', 'grok-code-fast-1'); // Check if model supports reasoning if (model.reasoning) { @@ -516,8 +639,8 @@ if (model.reasoning) { } // Use the simplified reasoning option -const response = await completeSimple(model, { - messages: [{ role: 'user', content: 'Solve: 2x + 5 = 13' }] +const response = await models.completeSimple(model, { + messages: [{ role: 'user', content: 'Solve: 2x + 5 = 13', timestamp: Date.now() }] }, { reasoning: 'medium' // 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' }); @@ -534,33 +657,39 @@ for (const block of response.content) { ### Provider-Specific Options (stream/complete) -For fine-grained control, use the provider-specific options: +`models.stream()`/`complete()` accept the owning API's full option set. Use `hasApi()` to narrow a dynamically looked-up model to its API for full option typing: ```typescript -import { getModel, complete } from '@earendil-works/pi-ai'; +import { hasApi } from '@earendil-works/pi-ai'; // OpenAI Reasoning (o1, o3, gpt-5) -const openaiModel = getModel('openai', 'gpt-5-mini'); -await complete(openaiModel, context, { - reasoningEffort: 'medium', - reasoningSummary: 'detailed' // OpenAI Responses API only -}); +const openaiModel = models.getModel('openai', 'gpt-5-mini')!; +if (hasApi(openaiModel, 'openai-responses')) { + await models.complete(openaiModel, context, { + reasoningEffort: 'medium', + reasoningSummary: 'detailed' // OpenAI Responses API only + }); +} -// Anthropic Thinking (Claude Sonnet 4) -const anthropicModel = getModel('anthropic', 'claude-sonnet-4-20250514'); -await complete(anthropicModel, context, { - thinkingEnabled: true, - thinkingBudgetTokens: 8192 // Optional token limit -}); +// Anthropic Thinking +const anthropicModel = models.getModel('anthropic', 'claude-sonnet-4-5')!; +if (hasApi(anthropicModel, 'anthropic-messages')) { + await models.complete(anthropicModel, context, { + thinkingEnabled: true, + thinkingBudgetTokens: 8192 // Optional token limit + }); +} // Google Gemini Thinking -const googleModel = getModel('google', 'gemini-2.5-flash'); -await complete(googleModel, context, { - thinking: { - enabled: true, - budgetTokens: 8192 // -1 for dynamic, 0 to disable - } -}); +const googleModel = models.getModel('google', 'gemini-2.5-flash')!; +if (hasApi(googleModel, 'google-generative-ai')) { + await models.complete(googleModel, context, { + thinking: { + enabled: true, + budgetTokens: 8192 // -1 for dynamic, 0 to disable + } + }); +} ``` ### Streaming Thinking Content @@ -568,7 +697,7 @@ await complete(googleModel, context, { When streaming, thinking content is delivered through specific events: ```typescript -const s = streamSimple(model, context, { reasoning: 'high' }); +const s = models.streamSimple(model, context, { reasoning: 'high' }); for await (const event of s) { switch (event.type) { @@ -599,11 +728,11 @@ Every `AssistantMessage` includes a `stopReason` field that indicates how the ge ## Error Handling -When a request ends with an error (including aborts and tool call validation errors), the streaming API emits an error event: +Request failures never throw out of the stream functions: when a request ends with an error (including aborts and tool call validation errors), the streaming API emits an error event and the final message carries the details: ```typescript // In streaming -for await (const event of stream) { +for await (const event of s) { if (event.type === 'error') { // event.reason is either "error" or "aborted" // event.error is the AssistantMessage with partial content @@ -613,7 +742,7 @@ for await (const event of stream) { } // The final message will have the error details -const message = await stream.result(); +const message = await s.result(); if (message.stopReason === 'error' || message.stopReason === 'aborted') { console.error('Request failed:', message.errorMessage); // message.content contains any partial content received before the error @@ -621,21 +750,20 @@ if (message.stopReason === 'error' || message.stopReason === 'aborted') { } ``` +Auth failures (no key configured, OAuth refresh failed, unknown provider) surface the same way: as a stream error with `stopReason: "error"`. + ### Aborting Requests The abort signal allows you to cancel in-progress requests. Aborted requests have `stopReason === 'aborted'`: ```typescript -import { getModel, stream } from '@earendil-works/pi-ai'; - -const model = getModel('openai', 'gpt-4o-mini'); const controller = new AbortController(); // Abort after 2 seconds setTimeout(() => controller.abort(), 2000); -const s = stream(model, { - messages: [{ role: 'user', content: 'Write a long story' }] +const s = models.stream(model, { + messages: [{ role: 'user', content: 'Write a long story', timestamp: Date.now() }] }, { signal: controller.signal }); @@ -665,7 +793,7 @@ Aborted messages can be added to the conversation context and continued in subse ```typescript const context = { messages: [ - { role: 'user', content: 'Explain quantum computing in detail' } + { role: 'user', content: 'Explain quantum computing in detail', timestamp: Date.now() } ] }; @@ -673,14 +801,14 @@ const context = { const controller1 = new AbortController(); setTimeout(() => controller1.abort(), 2000); -const partial = await complete(model, context, { signal: controller1.signal }); +const partial = await models.complete(model, context, { signal: controller1.signal }); // Add the partial response to context context.messages.push(partial); -context.messages.push({ role: 'user', content: 'Please continue' }); +context.messages.push({ role: 'user', content: 'Please continue', timestamp: Date.now() }); // Continue the conversation -const continuation = await complete(model, context); +const continuation = await models.complete(model, context); ``` ### Debugging Provider Payloads @@ -688,7 +816,7 @@ const continuation = await complete(model, context); Use the `onPayload` callback to inspect the request payload sent to the provider. This is useful for debugging request formatting issues or provider validation errors. ```typescript -const response = await complete(model, context, { +const response = await models.complete(model, context, { onPayload: (payload) => { console.log('Provider payload:', JSON.stringify(payload, null, 2)); } @@ -697,147 +825,16 @@ const response = await complete(model, context, { The callback is supported by `stream`, `complete`, `streamSimple`, and `completeSimple`. -## APIs, Models, and Providers +## Custom Providers -The library uses a registry of API implementations. Built-in APIs include: +### createProvider() -- **`anthropic-messages`**: Anthropic Messages API (`streamAnthropic`, `AnthropicOptions`) -- **`google-generative-ai`**: Google Generative AI API (`streamGoogle`, `GoogleOptions`) -- **`google-vertex`**: Google Vertex AI API (`streamGoogleVertex`, `GoogleVertexOptions`) -- **`mistral-conversations`**: Mistral Conversations API (`streamMistral`, `MistralOptions`) -- **`openai-completions`**: OpenAI Chat Completions API (`streamOpenAICompletions`, `OpenAICompletionsOptions`) -- **`openai-responses`**: OpenAI Responses API (`streamOpenAIResponses`, `OpenAIResponsesOptions`) -- **`openai-codex-responses`**: OpenAI Codex Responses API (`streamOpenAICodexResponses`, `OpenAICodexResponsesOptions`) -- **`azure-openai-responses`**: Azure OpenAI Responses API (`streamAzureOpenAIResponses`, `AzureOpenAIResponsesOptions`) -- **`bedrock-converse-stream`**: Amazon Bedrock Converse API (`streamBedrock`, `BedrockOptions`) - -### Faux provider for tests - -`registerFauxProvider()` registers a temporary in-memory provider for tests and demos. It is opt-in and not part of the built-in provider set. +`createProvider()` builds a provider from parts: identity, auth, a model list, and an API implementation. Use it for local inference servers, proxies, or any OpenAI/Anthropic-compatible endpoint: ```typescript -import { - complete, - fauxAssistantMessage, - fauxText, - fauxThinking, - fauxToolCall, - registerFauxProvider, - stream, -} from '@earendil-works/pi-ai'; +import { createModels, createProvider, envApiKeyAuth, type Model } from '@earendil-works/pi-ai'; +import { openAICompletionsApi } from '@earendil-works/pi-ai/api/openai-completions.lazy'; -const registration = registerFauxProvider({ - tokensPerSecond: 50 // optional -}); - -const model = registration.getModel(); -const context = { - messages: [{ role: 'user', content: 'Summarize package.json and then call echo', timestamp: Date.now() }] -}; - -registration.setResponses([ - fauxAssistantMessage([ - fauxThinking('Need to inspect package metadata first.'), - fauxToolCall('echo', { text: 'package.json' }) - ], { stopReason: 'toolUse' }) -]); - -const first = await complete(model, context, { - sessionId: 'session-1', - cacheRetention: 'short' -}); -context.messages.push(first); - -context.messages.push({ - role: 'toolResult', - toolCallId: first.content.find((block) => block.type === 'toolCall')!.id, - toolName: 'echo', - content: [{ type: 'text', text: 'package.json contents here' }], - isError: false, - timestamp: Date.now() -}); - -registration.setResponses([ - fauxAssistantMessage([ - fauxThinking('Now I can summarize the tool output.'), - fauxText('Here is the summary.') - ]) -]); - -const s = stream(model, context); -for await (const event of s) { - console.log(event.type); -} - -// Optional: register multiple faux models for model-switching tests -const multiModel = registerFauxProvider({ - models: [ - { id: 'faux-fast', reasoning: false }, - { id: 'faux-thinker', reasoning: true } - ] -}); -const thinker = multiModel.getModel('faux-thinker'); - -console.log(thinker?.reasoning); -console.log(registration.getPendingResponseCount()); -console.log(registration.state.callCount); -registration.unregister(); -multiModel.unregister(); -``` - -Notes: -- Responses are consumed from a queue in request start order. -- If the queue is empty, the faux provider returns an assistant error message with `errorMessage: "No more faux responses queued"`. -- Use `registration.setResponses([...])` to replace the remaining queue and `registration.appendResponses([...])` to add more responses. -- `registration.models` exposes all registered faux models. `registration.getModel()` returns the first one, and `registration.getModel(id)` returns a specific one. -- Use `fauxAssistantMessage(...)` for scripted assistant replies. Use `fauxText(...)`, `fauxThinking(...)`, and `fauxToolCall(...)` to build content blocks without filling in low-level fields manually. -- `registration.unregister()` removes the temporary provider from the global API registry. -- Usage is estimated at roughly 1 token per 4 characters. When `sessionId` is present and `cacheRetention` is not `"none"`, prompt cache reads and writes are simulated automatically. -- Tool call arguments stream incrementally via `toolcall_delta` chunks. -- By default, each streamed chunk is emitted on its own microtask. Set `tokensPerSecond` to pace chunk delivery in real time. -- The intended use is one deterministic scripted flow per registration. If you need independent concurrent flows, register separate faux providers. - -### Providers and Models - -A **provider** offers models through a specific API. For example: -- **Anthropic** models use the `anthropic-messages` API -- **Google** models use the `google-generative-ai` API -- **OpenAI** models use the `openai-responses` API -- **Mistral** models use the `mistral-conversations` API -- **xAI, Cerebras, Groq, NVIDIA NIM, Together AI, etc.** models use the `openai-completions` API (OpenAI-compatible) - -### Querying Providers and Models - -```typescript -import { getProviders, getModels, getModel } from '@earendil-works/pi-ai'; - -// Get all available providers -const providers = getProviders(); -console.log(providers); // ['openai', 'anthropic', 'google', 'xai', 'groq', ...] - -// Get all models from a provider (fully typed) -const anthropicModels = getModels('anthropic'); -for (const model of anthropicModels) { - console.log(`${model.id}: ${model.name}`); - console.log(` API: ${model.api}`); // 'anthropic-messages' - console.log(` Context: ${model.contextWindow} tokens`); - console.log(` Vision: ${model.input.includes('image')}`); - console.log(` Reasoning: ${model.reasoning}`); -} - -// Get a specific model (both provider and model ID are auto-completed in IDEs) -const model = getModel('openai', 'gpt-4o-mini'); -console.log(`Using ${model.name} via ${model.api} API`); -``` - -### Custom Models - -You can create custom models for local inference servers or custom endpoints: - -```typescript -import { Model, stream } from '@earendil-works/pi-ai'; - -// Example: Ollama using OpenAI-compatible API const ollamaModel: Model<'openai-completions'> = { id: 'llama-3.1-8b', name: 'Llama 3.1 8B (Ollama)', @@ -851,53 +848,71 @@ const ollamaModel: Model<'openai-completions'> = { maxTokens: 32000 }; -// Example: LiteLLM proxy with explicit compat settings -const litellmModel: Model<'openai-completions'> = { - id: 'gpt-4o', - name: 'GPT-4o (via LiteLLM)', - api: 'openai-completions', - provider: 'litellm', - baseUrl: 'http://localhost:4000/v1', - reasoning: false, - input: ['text', 'image'], - cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 128000, - maxTokens: 16384, - compat: { - supportsStore: false, // LiteLLM doesn't support the store field - } -}; +const ollama = createProvider({ + id: 'ollama', + name: 'Ollama', + baseUrl: 'http://localhost:11434/v1', + // Every provider declares auth; keyless local servers resolve as configured with no key. + auth: { apiKey: { name: 'Ollama', resolve: async () => ({ auth: {} }) } }, + models: [ollamaModel], + api: openAICompletionsApi(), +}); -// Example: Custom endpoint with headers (bypassing Cloudflare bot detection) -const proxyModel: Model<'anthropic-messages'> = { - id: 'claude-sonnet-4', - name: 'Claude Sonnet 4 (Proxied)', - api: 'anthropic-messages', - provider: 'custom-proxy', - baseUrl: 'https://proxy.example.com/v1', - reasoning: true, - input: ['text', 'image'], - cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, - contextWindow: 200000, - maxTokens: 8192, - headers: { - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36', - 'X-Custom-Auth': 'bearer-token-here' - } -}; +const models = createModels(); +models.setProvider(ollama); -// Use the custom model -const response = await stream(ollamaModel, context, { - apiKey: 'dummy' // Ollama doesn't need a real key +await models.complete(models.getModel('ollama', 'llama-3.1-8b')!, context); +``` + +For providers with real keys, `envApiKeyAuth(displayName, envVars)` gives the standard behavior (stored credential wins, then the first set env var): + +```typescript +const proxy = createProvider({ + id: 'my-proxy', + auth: { apiKey: envApiKeyAuth('My proxy API key', ['MY_PROXY_API_KEY']) }, + models: [/* ... */], + api: openAICompletionsApi(), }); ``` -Some OpenAI-compatible servers do not understand the `developer` role used for reasoning-capable models. For those providers, set `compat.supportsDeveloperRole` to `false` so the system prompt is sent as a `system` message instead. If the server also does not support `reasoning_effort`, set `compat.supportsReasoningEffort` to `false` too. +Mixed-API providers pass a map keyed by `model.api`; each model dispatches to its API's implementation: + +```typescript +import { anthropicMessagesApi } from '@earendil-works/pi-ai/api/anthropic-messages.lazy'; +import { openAIResponsesApi } from '@earendil-works/pi-ai/api/openai-responses.lazy'; + +const gateway = createProvider({ + id: 'my-gateway', + auth: { apiKey: envApiKeyAuth('Gateway key', ['GATEWAY_API_KEY']) }, + models: [/* models with api: 'anthropic-messages' or 'openai-responses' */], + api: { + 'anthropic-messages': anthropicMessagesApi(), + 'openai-responses': openAIResponsesApi(), + }, +}); +``` + +Dynamic model lists use `refreshModels`; the provider lists empty until the first `models.refresh()`: + +```typescript +const llamacpp = createProvider({ + id: 'llamacpp', + auth: { apiKey: { name: 'llama.cpp', resolve: async () => ({ auth: {} }) } }, + models: [], + refreshModels: async () => fetchModelsFromServer('http://localhost:8080'), + api: openAICompletionsApi(), +}); + +models.setProvider(llamacpp); +await models.refresh('llamacpp'); +``` + +Custom models can carry `headers` (e.g. proxies behind bot detection) and `compat` flags — see [OpenAI Compatibility Settings](#openai-compatibility-settings). + +Some OpenAI-compatible servers do not understand the `developer` role used for reasoning-capable models. For those providers, set `compat.supportsDeveloperRole` to `false` so the system prompt is sent as a `system` message instead. If the server also does not support `reasoning_effort`, set `compat.supportsReasoningEffort` to `false` too. This commonly applies to Ollama, vLLM, SGLang, and similar OpenAI-compatible servers. Use model-level `thinkingLevelMap` to describe model-specific thinking controls. Keys are pi thinking levels (`off`, `minimal`, `low`, `medium`, `high`, `xhigh`). Missing keys use provider defaults, string values are sent to the provider, and `null` marks a level unsupported. -This commonly applies to Ollama, vLLM, SGLang, and similar OpenAI-compatible servers. You can set `compat` at the provider level or per model. - ```typescript const ollamaReasoningModel: Model<'openai-completions'> = { id: 'gpt-oss:20b', @@ -924,6 +939,36 @@ const ollamaReasoningModel: Model<'openai-completions'> = { }; ``` +### Calling API Implementations Directly + +The API implementations are importable on their own. Each module exports exactly `stream` and `streamSimple` with that API's full option typing. Direct calls bypass provider auth — pass `apiKey` explicitly: + +```typescript +import { stream } from '@earendil-works/pi-ai/api/anthropic-messages'; + +const s = stream(claudeModel, context, { + apiKey: process.env.ANTHROPIC_API_KEY, + thinkingEnabled: true, + thinkingBudgetTokens: 2048, +}); +``` + +Built-in API implementations live under `./api/`: + +| API id | Options type | +|--------|--------------| +| `anthropic-messages` | `AnthropicOptions` | +| `openai-completions` | `OpenAICompletionsOptions` | +| `openai-responses` | `OpenAIResponsesOptions` | +| `openai-codex-responses` | `OpenAICodexResponsesOptions` | +| `azure-openai-responses` | `AzureOpenAIResponsesOptions` | +| `google-generative-ai` | `GoogleOptions` | +| `google-vertex` | `GoogleVertexOptions` | +| `mistral-conversations` | `MistralOptions` | +| `bedrock-converse-stream` | `BedrockOptions` | + +Importing an implementation module loads its SDK. The `./api/.lazy` wrappers (used by the provider factories) defer that load to the first request. Legacy subpaths from older releases (`./anthropic`, `./google`, `./mistral`, `./openai-completions`, ...) still resolve to the corresponding API implementation modules. + ### OpenAI Compatibility Settings The `openai-completions` API is implemented by many providers with minor differences. By default, the library auto-detects compatibility settings based on `baseUrl` for a small set of known OpenAI-compatible providers (Cerebras, xAI, Chutes, DeepSeek, NVIDIA NIM, Together AI, zAi, OpenCode, Cloudflare Workers AI, etc.). For custom proxies or unknown endpoints, you can override these settings via the `compat` field. For `openai-responses` models, the compat field supports Responses-specific flags. @@ -960,30 +1005,97 @@ If `compat` is not set, the library falls back to URL-based detection. If `compa - **Custom inference servers**: May use non-standard field names - **Self-hosted endpoints**: May have different feature support -### Type Safety +## Faux Provider for Tests -Models are typed by their API, which keeps the model metadata accurate. Provider-specific option types are enforced when you call the provider functions directly. The generic `stream` and `complete` functions accept `StreamOptions` with additional provider fields. +`fauxProvider()` builds an in-memory provider with scripted responses for tests and demos: ```typescript -import { streamAnthropic, type AnthropicOptions } from '@earendil-works/pi-ai'; +import { + createModels, + fauxAssistantMessage, + fauxProvider, + fauxText, + fauxThinking, + fauxToolCall, +} from '@earendil-works/pi-ai'; -// TypeScript knows this is an Anthropic model -const claude = getModel('anthropic', 'claude-sonnet-4-20250514'); +const faux = fauxProvider({ + tokensPerSecond: 50 // optional +}); -const options: AnthropicOptions = { - thinkingEnabled: true, - thinkingBudgetTokens: 2048 +const models = createModels(); +models.setProvider(faux.provider); + +const model = faux.getModel(); +const context = { + messages: [{ role: 'user', content: 'Summarize package.json and then call echo', timestamp: Date.now() }] }; -await streamAnthropic(claude, context, options); +faux.setResponses([ + fauxAssistantMessage([ + fauxThinking('Need to inspect package metadata first.'), + fauxToolCall('echo', { text: 'package.json' }) + ], { stopReason: 'toolUse' }) +]); + +const first = await models.complete(model, context, { + sessionId: 'session-1', + cacheRetention: 'short' +}); +context.messages.push(first); + +context.messages.push({ + role: 'toolResult', + toolCallId: first.content.find((block) => block.type === 'toolCall')!.id, + toolName: 'echo', + content: [{ type: 'text', text: 'package.json contents here' }], + isError: false, + timestamp: Date.now() +}); + +faux.setResponses([ + fauxAssistantMessage([ + fauxThinking('Now I can summarize the tool output.'), + fauxText('Here is the summary.') + ]) +]); + +const s = models.stream(model, context); +for await (const event of s) { + console.log(event.type); +} + +// Optional: multiple faux models for model-switching tests +const multiModel = fauxProvider({ + provider: 'faux-multi', + models: [ + { id: 'faux-fast', reasoning: false }, + { id: 'faux-thinker', reasoning: true } + ] +}); +models.setProvider(multiModel.provider); +const thinker = multiModel.getModel('faux-thinker'); + +console.log(thinker?.reasoning); +console.log(faux.getPendingResponseCount()); +console.log(faux.state.callCount); ``` +Notes: +- Responses are consumed from a queue in request start order. +- If the queue is empty, the faux provider returns an assistant error message with `errorMessage: "No more faux responses queued"`. +- Use `faux.setResponses([...])` to replace the remaining queue and `faux.appendResponses([...])` to add more responses. +- `faux.models` exposes all faux models. `faux.getModel()` returns the first one, and `faux.getModel(id)` returns a specific one. +- Use `fauxAssistantMessage(...)` for scripted assistant replies. Use `fauxText(...)`, `fauxThinking(...)`, and `fauxToolCall(...)` to build content blocks without filling in low-level fields manually. +- Usage is estimated at roughly 1 token per 4 characters. When `sessionId` is present and `cacheRetention` is not `"none"`, prompt cache reads and writes are simulated automatically. +- Tool call arguments stream incrementally via `toolcall_delta` chunks. +- By default, each streamed chunk is emitted on its own microtask. Set `tokensPerSecond` to pace chunk delivery in real time. +- The intended use is one deterministic scripted flow per handle. If you need independent concurrent flows, create separate faux providers with distinct `provider` ids. + ## Cross-Provider Handoffs The library supports seamless handoffs between different LLM providers within the same conversation. This allows you to switch models mid-conversation while preserving context, including thinking blocks, tool calls, and tool results. -### How It Works - When messages from one provider are sent to a different provider, the library automatically transforms them for compatibility: - **User and tool result messages** are passed through unchanged @@ -991,98 +1103,86 @@ When messages from one provider are sent to a different provider, the library au - **Assistant messages from different providers** have their thinking blocks converted to text with `` tags - **Tool calls and regular text** are preserved unchanged -### Example: Multi-Provider Conversation - ```typescript -import { getModel, complete, Context } from '@earendil-works/pi-ai'; +import { createModels, type Context } from '@earendil-works/pi-ai'; +import { anthropicProvider } from '@earendil-works/pi-ai/providers/anthropic'; +import { openaiProvider } from '@earendil-works/pi-ai/providers/openai'; +import { googleProvider } from '@earendil-works/pi-ai/providers/google'; + +const models = createModels(); +models.setProvider(anthropicProvider()); +models.setProvider(openaiProvider()); +models.setProvider(googleProvider()); + +const context: Context = { messages: [] }; // Start with Claude -const claude = getModel('anthropic', 'claude-sonnet-4-20250514'); -const context: Context = { - messages: [] -}; - -context.messages.push({ role: 'user', content: 'What is 25 * 18?' }); -const claudeResponse = await complete(claude, context, { - thinkingEnabled: true -}); -context.messages.push(claudeResponse); +const claude = models.getModel('anthropic', 'claude-sonnet-4-5')!; +context.messages.push({ role: 'user', content: 'What is 25 * 18?', timestamp: Date.now() }); +context.messages.push(await models.completeSimple(claude, context, { reasoning: 'medium' })); // Switch to GPT-5 - it will see Claude's thinking as tagged text -const gpt5 = getModel('openai', 'gpt-5-mini'); -context.messages.push({ role: 'user', content: 'Is that calculation correct?' }); -const gptResponse = await complete(gpt5, context); -context.messages.push(gptResponse); +const gpt5 = models.getModel('openai', 'gpt-5-mini')!; +context.messages.push({ role: 'user', content: 'Is that calculation correct?', timestamp: Date.now() }); +context.messages.push(await models.complete(gpt5, context)); // Switch to Gemini -const gemini = getModel('google', 'gemini-2.5-flash'); -context.messages.push({ role: 'user', content: 'What was the original question?' }); -const geminiResponse = await complete(gemini, context); +const gemini = models.getModel('google', 'gemini-2.5-flash')!; +context.messages.push({ role: 'user', content: 'What was the original question?', timestamp: Date.now() }); +const geminiResponse = await models.complete(gemini, context); ``` -### Provider Compatibility - -All providers can handle messages from other providers, including: -- Text content -- Tool calls and tool results (including images in tool results) -- Thinking/reasoning blocks (transformed to tagged text for cross-provider compatibility) -- Aborted messages with partial content - -This enables flexible workflows where you can: -- Start with a fast model for initial responses -- Switch to a more capable model for complex reasoning -- Use specialized models for specific tasks -- Maintain conversation continuity across provider outages +All providers can handle messages from other providers — text, tool calls and results (including images), thinking blocks (transformed to tagged text), and aborted messages with partial content. This enables flexible workflows: start with a fast model, switch to a more capable one for complex reasoning, or maintain continuity across provider outages. ## Context Serialization The `Context` object can be easily serialized and deserialized using standard JSON methods, making it simple to persist conversations, implement chat history, or transfer contexts between services: ```typescript -import { Context, getModel, complete } from '@earendil-works/pi-ai'; - -// Create and use a context const context: Context = { systemPrompt: 'You are a helpful assistant.', messages: [ - { role: 'user', content: 'What is TypeScript?' } + { role: 'user', content: 'What is TypeScript?', timestamp: Date.now() } ] }; -const model = getModel('openai', 'gpt-4o-mini'); -const response = await complete(model, context); +const model = models.getModel('openai', 'gpt-4o-mini')!; +const response = await models.complete(model, context); context.messages.push(response); // Serialize the entire context const serialized = JSON.stringify(context); -console.log('Serialized context size:', serialized.length, 'bytes'); // Save to database, localStorage, file, etc. localStorage.setItem('conversation', serialized); // Later: deserialize and continue the conversation const restored: Context = JSON.parse(localStorage.getItem('conversation')!); -restored.messages.push({ role: 'user', content: 'Tell me more about its type system' }); +restored.messages.push({ role: 'user', content: 'Tell me more about its type system', timestamp: Date.now() }); // Continue with any model -const newModel = getModel('anthropic', 'claude-3-5-haiku-20241022'); -const continuation = await complete(newModel, restored); +const newModel = models.getModel('anthropic', 'claude-3-5-haiku-20241022')!; +const continuation = await models.complete(newModel, restored); ``` +Models are plain serializable data too — no functions or implementations attached — so persisting "which model was this conversation using" is a `JSON.stringify` away. + > **Note**: If the context contains images (encoded as base64 as shown in the Image Input section), those will also be serialized. ## Browser Usage -The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers: +The library supports browser environments. The core entrypoint and provider factories are side-effect free and bundle cleanly. Pass API keys explicitly since environment variables are not available in browsers: ```typescript -import { getModel, complete } from '@earendil-works/pi-ai'; +import { createModels } from '@earendil-works/pi-ai'; +import { anthropicProvider } from '@earendil-works/pi-ai/providers/anthropic'; -// API key must be passed explicitly in browser -const model = getModel('anthropic', 'claude-3-5-haiku-20241022'); +const models = createModels(); +models.setProvider(anthropicProvider()); -const response = await complete(model, { - messages: [{ role: 'user', content: 'Hello!' }] +const model = models.getModel('anthropic', 'claude-3-5-haiku-20241022')!; +const response = await models.complete(model, { + messages: [{ role: 'user', content: 'Hello!', timestamp: Date.now() }] }, { apiKey: 'your-api-key' }); @@ -1090,79 +1190,53 @@ const response = await complete(model, { > **Security Warning**: Exposing API keys in frontend code is dangerous. Anyone can extract and abuse your keys. Only use this approach for internal tools or demos. For production applications, use a backend proxy that keeps your API keys secure. -### Browser Compatibility Notes +Browser compatibility notes: -- Amazon Bedrock (`bedrock-converse-stream`) is not supported in browser environments. -- OAuth login flows are not supported in browser environments. Use the `@earendil-works/pi-ai/oauth` entry point in Node.js. -- In browser builds, Bedrock can still appear in model lists. Calls to Bedrock models fail at runtime. +- Amazon Bedrock (`bedrock-converse-stream`) is not supported in browser environments. It can still appear in model lists; calls fail at runtime. +- OAuth login flows are Node-only. They are lazy-loaded behind bundler-opaque imports, so registering an OAuth-capable provider does not pull Node-only code into a browser bundle — only actually logging in would. - Use a server-side proxy or backend service if you need Bedrock or OAuth-based auth from a web app. -### Environment Variables (Node.js only) - -In Node.js environments, you can set environment variables to avoid passing API keys: - -| Provider | Environment Variable(s) | -|----------|------------------------| -| OpenAI | `OPENAI_API_KEY` | -| Ant Ling | `ANT_LING_API_KEY` | -| Azure OpenAI | `AZURE_OPENAI_API_KEY` + `AZURE_OPENAI_BASE_URL` (e.g. `https://{resource}.openai.azure.com`) or `AZURE_OPENAI_RESOURCE_NAME`. Supports `*.openai.azure.com` and `*.cognitiveservices.azure.com`; root endpoints auto-normalize to `/openai/v1`. Optional: `AZURE_OPENAI_API_VERSION` (default `v1`), `AZURE_OPENAI_DEPLOYMENT_NAME_MAP`. | -| Anthropic | `ANTHROPIC_API_KEY` or `ANTHROPIC_OAUTH_TOKEN` | -| DeepSeek | `DEEPSEEK_API_KEY` | -| NVIDIA NIM | `NVIDIA_API_KEY` | -| Google | `GEMINI_API_KEY` | -| Vertex AI | `GOOGLE_CLOUD_API_KEY` or `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) + `GOOGLE_CLOUD_LOCATION` + ADC | -| Mistral | `MISTRAL_API_KEY` | -| Groq | `GROQ_API_KEY` | -| Cerebras | `CEREBRAS_API_KEY` | -| Cloudflare AI Gateway | `CLOUDFLARE_API_KEY` + `CLOUDFLARE_ACCOUNT_ID` + `CLOUDFLARE_GATEWAY_ID` | -| Cloudflare Workers AI | `CLOUDFLARE_API_KEY` + `CLOUDFLARE_ACCOUNT_ID` | -| xAI | `XAI_API_KEY` | -| Fireworks | `FIREWORKS_API_KEY` | -| Together AI | `TOGETHER_API_KEY` | -| OpenRouter | `OPENROUTER_API_KEY` | -| Vercel AI Gateway | `AI_GATEWAY_API_KEY` | -| zAI | `ZAI_API_KEY` | -| ZAI Coding Plan (China) | `ZAI_CODING_CN_API_KEY` | -| MiniMax | `MINIMAX_API_KEY` | -| OpenCode Zen / OpenCode Go | `OPENCODE_API_KEY` | -| Kimi For Coding | `KIMI_API_KEY` | -| Xiaomi MiMo (API billing) | `XIAOMI_API_KEY` | -| Xiaomi MiMo Token Plan (China) | `XIAOMI_TOKEN_PLAN_CN_API_KEY` | -| Xiaomi MiMo Token Plan (Amsterdam) | `XIAOMI_TOKEN_PLAN_AMS_API_KEY` | -| Xiaomi MiMo Token Plan (Singapore) | `XIAOMI_TOKEN_PLAN_SGP_API_KEY` | -| GitHub Copilot | `COPILOT_GITHUB_TOKEN` | - -When set, the library automatically uses these keys: - -```typescript -// Uses OPENAI_API_KEY from environment -const model = getModel('openai', 'gpt-4o-mini'); -const response = await complete(model, context); - -// Or override with explicit key -const response = await complete(model, context, { - apiKey: 'sk-different-key' -}); -``` - -### Checking Environment Variables - -```typescript -import { getEnvApiKey } from '@earendil-works/pi-ai'; - -// Check if an API key is set in environment variables -const key = getEnvApiKey('openai'); // checks OPENAI_API_KEY -``` - ## OAuth Providers -Several providers require OAuth authentication instead of static API keys: +Several providers support OAuth authentication instead of static API keys: - **Anthropic** (Claude Pro/Max subscription) - **OpenAI Codex** (ChatGPT Plus/Pro subscription, access to GPT-5.x Codex models) - **GitHub Copilot** (Copilot subscription) -For paid Cloud Code Assist subscriptions, set `GOOGLE_CLOUD_PROJECT` or `GOOGLE_CLOUD_PROJECT_ID` to your project ID. +Each of these providers carries an `OAuthAuth` on `provider.auth.oauth` with three operations: `login(callbacks)` runs the interactive flow and returns a credential, `refresh(credential)` exchanges the refresh token, and `toAuth(credential)` derives request auth (GitHub Copilot's per-account base URL comes from here). Refresh is automatic: `models.getAuth()` and the request paths refresh expired tokens under a credential-store lock, so concurrent requests and processes cannot double-refresh. + +```typescript +import { createModels } from '@earendil-works/pi-ai'; +import { anthropicProvider } from '@earendil-works/pi-ai/providers/anthropic'; + +const models = createModels({ credentials: myStore }); // persistent CredentialStore +models.setProvider(anthropicProvider()); + +// Login: drive the flow with prompt()/notify() callbacks, persist the credential +const provider = models.getProvider('anthropic')!; +const credential = await provider.auth.oauth!.login({ + prompt: async (p) => { + // p.type: 'text' | 'secret' | 'select' | 'manual_code' + // manual_code prompts race a local callback server; p.signal aborts them when the server wins + return await askUser(p.message); + }, + notify: (event) => { + // event.type: 'auth_url' | 'device_code' | 'progress' + if (event.type === 'auth_url') console.log(`Open: ${event.url}`); + if (event.type === 'device_code') console.log(`Code: ${event.userCode} at ${event.verificationUri}`); + if (event.type === 'progress') console.log(event.message); + }, +}); +await myStore.modify('anthropic', async () => credential); + +// From here on, requests resolve and refresh the token automatically +const model = models.getModel('anthropic', 'claude-sonnet-4-5')!; +await models.complete(model, context); + +// Logout +await myStore.delete('anthropic'); +``` ### Vertex AI @@ -1174,8 +1248,6 @@ Vertex AI models support either a Google Cloud API key or Application Default Cr When using ADC, also set `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) and `GOOGLE_CLOUD_LOCATION`. You can also pass `project`/`location` in the call options. When using `GOOGLE_CLOUD_API_KEY`, `project` and `location` are not required. -Example: - ```bash # Local (uses your user credentials) gcloud auth application-default login @@ -1186,23 +1258,6 @@ export GOOGLE_CLOUD_LOCATION="us-central1" export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account.json" ``` -```typescript -import { getModel, complete } from '@earendil-works/pi-ai'; - -(async () => { - const model = getModel('google-vertex', 'gemini-2.5-flash'); - const response = await complete(model, { - messages: [{ role: 'user', content: 'Hello from Vertex AI' }] - }, { - apiKey: process.env.GOOGLE_CLOUD_API_KEY, - }); - - for (const block of response.content) { - if (block.type === 'text') console.log(block.text); - } -})().catch(console.error); -``` - Official docs: [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials) ### CLI Login @@ -1219,76 +1274,9 @@ Credentials are saved to `auth.json` in the current directory. ### Programmatic OAuth -The library provides login and token refresh functions via the `@earendil-works/pi-ai/oauth` entry point. Credential storage is the caller's responsibility. +The legacy flow functions remain available via the `@earendil-works/pi-ai/oauth` entry point (`loginAnthropic`, `loginOpenAICodex`, `loginGitHubCopilot`, `refreshOAuthToken`, `getOAuthApiKey`); credential storage is the caller's responsibility there. New code should prefer the provider-owned `OAuthAuth` shown above — it composes with the credential store and gets locked auto-refresh for free. -```typescript -import { - // Login functions (return credentials, do not store) - loginAnthropic, - loginOpenAICodex, - loginGitHubCopilot, - loginGeminiCli, - - // Token management - refreshOAuthToken, // (provider, credentials) => new credentials - getOAuthApiKey, // (provider, credentialsMap) => { newCredentials, apiKey } | null - - // Types - type OAuthProvider, - type OAuthCredentials, -} from '@earendil-works/pi-ai/oauth'; -``` - -### Login Flow Example - -```typescript -import { loginGitHubCopilot } from '@earendil-works/pi-ai/oauth'; -import { writeFileSync } from 'fs'; - -const credentials = await loginGitHubCopilot({ - onAuth: (url, instructions) => { - console.log(`Open: ${url}`); - if (instructions) console.log(instructions); - }, - onPrompt: async (prompt) => { - return await getUserInput(prompt.message); - }, - onProgress: (message) => console.log(message) -}); - -// Store credentials yourself -const auth = { 'github-copilot': { type: 'oauth', ...credentials } }; -writeFileSync('auth.json', JSON.stringify(auth, null, 2)); -``` - -### Using OAuth Tokens - -Use `getOAuthApiKey()` to get an API key, automatically refreshing if expired: - -```typescript -import { getModel, complete } from '@earendil-works/pi-ai'; -import { getOAuthApiKey } from '@earendil-works/pi-ai/oauth'; -import { readFileSync, writeFileSync } from 'fs'; - -// Load your stored credentials -const auth = JSON.parse(readFileSync('auth.json', 'utf-8')); - -// Get API key (refreshes if expired) -const result = await getOAuthApiKey('github-copilot', auth); -if (!result) throw new Error('Not logged in'); - -// Save refreshed credentials -auth['github-copilot'] = { type: 'oauth', ...result.newCredentials }; -writeFileSync('auth.json', JSON.stringify(auth, null, 2)); - -// Use the API key -const model = getModel('github-copilot', 'gpt-4o'); -const response = await complete(model, { - messages: [{ role: 'user', content: 'Hello!' }] -}, { apiKey: result.apiKey }); -``` - -### Provider Notes +Provider notes: **OpenAI Codex**: Requires a ChatGPT Plus or Pro subscription. Provides access to GPT-5.x Codex models with extended context windows and reasoning capabilities. The library automatically handles session-based prompt caching when `sessionId` is provided in stream options. You can set `transport` in stream options to `"sse"`, `"websocket"`, or `"auto"` for Codex Responses transport selection. When using WebSocket with a `sessionId`, connections are reused per session and expire after 5 minutes of inactivity. @@ -1296,95 +1284,44 @@ const response = await complete(model, { **GitHub Copilot**: If you get "The requested model is not supported" error, enable the model manually in VS Code: open Copilot Chat, click the model selector, select the model (warning icon), and click "Enable". +## Migrating from the Old Global API + +Older versions exposed a global API: `stream()`/`complete()` dispatching on `model.api` via a global registry, sync `getModel()`/`getModels()`/`getProviders()` catalog reads, `registerApiProvider()`, `getEnvApiKey()`, and per-API lazy stream functions. That surface lives unchanged on the **compat entrypoint**: + +```typescript +// Before +import { getModel, complete } from '@earendil-works/pi-ai'; + +// After (verbatim behavior, one import-path change) +import { getModel, complete } from '@earendil-works/pi-ai/compat'; +``` + +Compat is a strict superset of the root entrypoint, so a file can switch its import path wholesale. It will be removed in a future release; migrate to `createModels()` + provider factories: + +| Old | New | +|-----|-----| +| `getModel('openai', 'gpt-4o-mini')` | `models.getModel('openai', 'gpt-4o-mini')` or `getBuiltinModel()` from `providers/all` | +| `getModels('anthropic')` / `getProviders()` | `models.getModels('anthropic')` / `models.getProviders()` or `getBuiltin*` | +| `stream(model, ctx, opts)` (env-key injection) | `models.stream(model, ctx, opts)` (provider auth resolution) | +| `registerApiProvider({ api, stream, streamSimple })` | `createProvider({ id, auth, models, api })` + `models.setProvider()` | +| `getEnvApiKey('openai')` | `await models.getAuth(model)` | +| `streamAnthropic(model, ctx, opts)` | `stream` from `@earendil-works/pi-ai/anthropic`, or a provider in a collection | +| `registerFauxProvider()` | `fauxProvider()` + `models.setProvider()` | + ## Development ### Adding a New Provider -Adding a new LLM provider requires changes across multiple files. This checklist covers all necessary steps: +The layered layout: API implementations live in `src/api/`, provider factories in `src/providers/`, generated catalogs in `src/providers/.models.ts`. -#### 1. Core Types (`src/types.ts`) - -- Add the API identifier to `KnownApi` (for example `"bedrock-converse-stream"`) -- Create an options interface extending `StreamOptions` (for example `BedrockOptions`) -- Add the provider name to `KnownProvider` (for example `"amazon-bedrock"`) - -#### 2. Provider Implementation (`src/providers/`) - -Create a new provider file (for example `amazon-bedrock.ts`) that exports: - -- `stream()` function returning `AssistantMessageEventStream` -- `streamSimple()` for `SimpleStreamOptions` mapping -- Provider-specific options interface -- Message conversion functions to transform `Context` to provider format -- Tool conversion if the provider supports tools -- Response parsing to emit standardized events (`text`, `tool_call`, `thinking`, `usage`, `stop`) - -#### 3. API Registry Integration (`src/providers/register-builtins.ts`) - -- Register the API with `registerApiProvider()` -- Add a package subpath export in `package.json` for the provider module (`./dist/providers/.js`) -- Add lazy loader wrappers in `src/providers/register-builtins.ts`, do not statically import provider implementation modules there -- Add any root-level `export type` re-exports in `src/index.ts` that should remain available from `@earendil-works/pi-ai` -- Add credential detection in `env-api-keys.ts` for the new provider -- Ensure `streamSimple` handles auth lookup via `getEnvApiKey()` or provider-specific auth - -#### 4. Model Generation (`scripts/generate-models.ts`, `scripts/generate-image-models.ts`) - -- Add logic to fetch and parse models from the provider's source (e.g., models.dev API) -- Map chat/tool-capable provider model data to the standardized `Model` interface via `scripts/generate-models.ts` -- Map image-generation provider model data to the standardized `ImagesModel` interface via `scripts/generate-image-models.ts` -- Handle provider-specific quirks (pricing format, capability flags, model ID transformations) - -#### 5. Tests (`test/`) - -Create or update test files to cover the new provider: - -- `stream.test.ts` - Basic streaming and tool use -- `tokens.test.ts` - Token usage reporting -- `abort.test.ts` - Request cancellation -- `empty.test.ts` - Empty message handling -- `context-overflow.test.ts` - Context limit errors -- `image-limits.test.ts` - Image support (if applicable) -- `unicode-surrogate.test.ts` - Unicode handling -- `tool-call-without-result.test.ts` - Orphaned tool calls -- `image-tool-result.test.ts` - Images in tool results -- `total-tokens.test.ts` - Token counting accuracy -- `cross-provider-handoff.test.ts` - Cross-provider context replay - -For `cross-provider-handoff.test.ts`, add at least one provider/model pair. If the provider exposes multiple model families (for example GPT and Claude), add at least one pair per family. - -For providers with non-standard auth (AWS, Google Vertex), create a utility like `bedrock-utils.ts` with credential detection helpers. - -#### 6. Coding Agent Integration (`../coding-agent/`) - -Update `src/core/model-resolver.ts`: - -- Add a default model ID for the provider in `DEFAULT_MODELS` - -Update `src/cli/args.ts`: - -- Add environment variable documentation in the help text - -Update `README.md`: - -- Add the provider to the providers section with setup instructions - -#### 7. Documentation - -Update `packages/ai/README.md`: - -- Add to the Supported Providers table -- Document any provider-specific options or authentication requirements -- Add environment variable to the Environment Variables section - -#### 8. Changelog - -Add an entry to `packages/ai/CHANGELOG.md` under `## [Unreleased]`: - -```markdown -### Added -- Added support for [Provider Name] provider ([#PR](link) by [@author](link)) -``` +1. **Core types** (`src/types.ts`): add the API id to `KnownApi` (if it is a new API), the provider id to `KnownProvider`, and the options type to `ApiOptionsMap`. +2. **API implementation** (`src/api/.ts`, only for a new API): export exactly `stream` and `streamSimple`, plus the options interface extending `StreamOptions`. Add a lazy wrapper `src/api/.lazy.ts` (`Api()` via `lazyApi()`). +3. **Catalog** (`scripts/generate-models.ts`): add fetching/mapping for the provider's models (e.g. from models.dev); regeneration emits `src/providers/.models.ts` and the aggregator. +4. **Provider factory** (`src/providers/.ts`): `createProvider()` wiring catalog + auth (`envApiKeyAuth` for standard key providers, custom `ApiKeyAuth` for ambient auth, `lazyOAuth` where OAuth exists) + the lazy API wrapper. Register it in `src/providers/all.ts`. +5. **Compat**: if it is a new API, register it in the builtin list in `src/compat.ts` and add the legacy subpath in `package.json` if warranted. +6. **Tests** (`test/`): cover streaming/tools/abort/tokens for new APIs (`stream.test.ts` and friends, env-gated), `cross-provider-handoff.test.ts` pairs, and provider listing/auth in `providers.test.ts`. +7. **Docs**: this README (Supported Providers, env var table) and `CHANGELOG.md` under `## [Unreleased]`. +8. **coding-agent**: default model id in `src/core/model-resolver.ts`, env var docs in `src/cli/args.ts`. ## License