mirror of
https://github.com/musistudio/claude-code-router.git
synced 2026-02-19 07:00:49 +08:00
add tokenizer
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import Server, { calculateTokenCount } from "@musistudio/llms";
|
||||
import Server, { calculateTokenCount, TokenizerService } from "@musistudio/llms";
|
||||
import { readConfigFile, writeConfigFile, backupConfigFile } from "./utils";
|
||||
import { join } from "path";
|
||||
import fastifyStatic from "@fastify/static";
|
||||
@@ -34,7 +34,48 @@ export const createServer = async (config: any): Promise<any> => {
|
||||
});
|
||||
|
||||
app.post("/v1/messages/count_tokens", async (req: any, reply: any) => {
|
||||
const {messages, tools, system} = req.body;
|
||||
const {messages, tools, system, model} = req.body;
|
||||
const tokenizerService = (app as any)._server!.tokenizerService as TokenizerService;
|
||||
|
||||
// If model is specified in "providerName,modelName" format, use the configured tokenizer
|
||||
if (model && model.includes(",") && tokenizerService) {
|
||||
try {
|
||||
const [provider, modelName] = model.split(",");
|
||||
req.log?.info(`Looking up tokenizer for provider: ${provider}, model: ${modelName}`);
|
||||
|
||||
const tokenizerConfig = tokenizerService.getTokenizerConfigForModel(provider, modelName);
|
||||
|
||||
if (!tokenizerConfig) {
|
||||
req.log?.warn(`No tokenizer config found for ${provider},${modelName}, using default tiktoken`);
|
||||
} else {
|
||||
req.log?.info(`Using tokenizer config: ${JSON.stringify(tokenizerConfig)}`);
|
||||
}
|
||||
|
||||
const result = await tokenizerService.countTokens(
|
||||
{ messages, system, tools },
|
||||
tokenizerConfig
|
||||
);
|
||||
|
||||
return {
|
||||
"input_tokens": result.tokenCount,
|
||||
"tokenizer": result.tokenizerUsed,
|
||||
};
|
||||
} catch (error: any) {
|
||||
req.log?.error(`Error using configured tokenizer: ${error.message}`);
|
||||
req.log?.error(error.stack);
|
||||
// Fall back to default calculation
|
||||
}
|
||||
} else {
|
||||
if (!model) {
|
||||
req.log?.info(`No model specified, using default tiktoken`);
|
||||
} else if (!model.includes(",")) {
|
||||
req.log?.info(`Model "${model}" does not contain comma, using default tiktoken`);
|
||||
} else if (!tokenizerService) {
|
||||
req.log?.warn(`TokenizerService not available, using default tiktoken`);
|
||||
}
|
||||
}
|
||||
|
||||
// Default to tiktoken calculation
|
||||
const tokenCount = calculateTokenCount(messages, system, tools);
|
||||
return { "input_tokens": tokenCount }
|
||||
});
|
||||
|
||||
50
packages/server/src/types.d.ts
vendored
50
packages/server/src/types.d.ts
vendored
@@ -68,4 +68,54 @@ declare module "@musistudio/llms" {
|
||||
constructor(configService: any, logger: any);
|
||||
initialize(): Promise<void>;
|
||||
}
|
||||
|
||||
// Tokenizer types
|
||||
export type TokenizerType = 'tiktoken' | 'huggingface' | 'api';
|
||||
export type ApiRequestFormat = 'standard' | 'openai' | 'anthropic' | 'custom';
|
||||
|
||||
export interface TokenizerConfig {
|
||||
type: TokenizerType;
|
||||
encoding?: string;
|
||||
model?: string;
|
||||
url?: string;
|
||||
apiKey?: string;
|
||||
requestFormat?: ApiRequestFormat;
|
||||
responseField?: string;
|
||||
headers?: Record<string, string>;
|
||||
fallback?: TokenizerType;
|
||||
}
|
||||
|
||||
export interface TokenizeRequest {
|
||||
messages: Array<{
|
||||
role: string;
|
||||
content: string | Array<{
|
||||
type: string;
|
||||
text?: string;
|
||||
input?: any;
|
||||
content?: string | any;
|
||||
}>;
|
||||
}>;
|
||||
system?: string | Array<{
|
||||
type: string;
|
||||
text?: string | string[];
|
||||
}>;
|
||||
tools?: Array<{
|
||||
name: string;
|
||||
description?: string;
|
||||
input_schema: object;
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface TokenizerResult {
|
||||
tokenCount: number;
|
||||
tokenizerUsed: string;
|
||||
cached: boolean;
|
||||
}
|
||||
|
||||
export class TokenizerService {
|
||||
countTokens(request: TokenizeRequest, config?: TokenizerConfig): Promise<TokenizerResult>;
|
||||
getTokenizerConfigForModel(providerName: string, modelName: string): TokenizerConfig | undefined;
|
||||
clearCache(): void;
|
||||
dispose(): void;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user