feat(core): add fuzzy search engine with Fuse.js

Add SearchEngine class with fuzzy matching across node name, tags,
summary, and languageNotes fields. Supports type filtering, result
limiting, and dynamic re-indexing via updateNodes(). Uses Fuse.js
extended search with OR-token splitting for multi-word queries.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Lum1104
2026-03-14 18:29:33 +08:00
Unverified
parent b41cf15ce9
commit a243a07d6f
5 changed files with 234 additions and 0 deletions
+1
View File
@@ -14,6 +14,7 @@
"vitest": "^3.1.0"
},
"dependencies": {
"fuse.js": "^7.1.0",
"tree-sitter-javascript": "^0.25.0",
"tree-sitter-typescript": "^0.23.2",
"web-tree-sitter": "^0.26.6",
+158
View File
@@ -0,0 +1,158 @@
import { describe, it, expect } from "vitest";
import { SearchEngine } from "../search.js";
import type { GraphNode } from "../types.js";
const makeNode = (overrides: Partial<GraphNode> & { id: string; name: string }): GraphNode => ({
type: "file",
summary: "",
tags: [],
complexity: "simple",
...overrides,
});
const sampleNodes: GraphNode[] = [
makeNode({
id: "auth-ctrl",
name: "AuthenticationController",
type: "class",
summary: "Handles user login, logout, and session management",
tags: ["auth", "controller", "security"],
languageNotes: "Uses Express middleware pattern",
}),
makeNode({
id: "db-pool",
name: "DatabasePool",
type: "class",
summary: "Manages PostgreSQL connection pooling",
tags: ["database", "connection"],
}),
makeNode({
id: "user-model",
name: "UserModel",
type: "class",
summary: "ORM model for the users table",
tags: ["model", "database", "user"],
}),
makeNode({
id: "config",
name: "config.ts",
type: "file",
summary: "Application configuration and environment variables",
tags: ["config", "env"],
}),
makeNode({
id: "helpers",
name: "helpers.ts",
type: "function",
summary: "Utility helper functions for string manipulation",
tags: ["utils", "helpers"],
}),
makeNode({
id: "auth-middleware",
name: "authMiddleware",
type: "function",
summary: "Express middleware that validates JWT tokens for authentication",
tags: ["auth", "middleware", "security"],
}),
];
describe("SearchEngine", () => {
it("returns empty results for empty query", () => {
const engine = new SearchEngine(sampleNodes);
expect(engine.search("")).toEqual([]);
expect(engine.search(" ")).toEqual([]);
});
it("finds exact name match", () => {
const engine = new SearchEngine(sampleNodes);
const results = engine.search("AuthenticationController");
expect(results.length).toBeGreaterThan(0);
expect(results[0].nodeId).toBe("auth-ctrl");
});
it("finds fuzzy name match", () => {
const engine = new SearchEngine(sampleNodes);
const results = engine.search("auth contrl");
expect(results.length).toBeGreaterThan(0);
expect(results.some((r) => r.nodeId === "auth-ctrl")).toBe(true);
});
it("searches across summary field", () => {
const engine = new SearchEngine(sampleNodes);
const results = engine.search("PostgreSQL connection");
expect(results.length).toBeGreaterThan(0);
expect(results.some((r) => r.nodeId === "db-pool")).toBe(true);
});
it("searches across tags", () => {
const engine = new SearchEngine(sampleNodes);
const results = engine.search("security");
expect(results.length).toBeGreaterThan(0);
const nodeIds = results.map((r) => r.nodeId);
expect(nodeIds).toContain("auth-ctrl");
expect(nodeIds).toContain("auth-middleware");
});
it("ranks name matches higher than summary matches", () => {
const engine = new SearchEngine(sampleNodes);
const results = engine.search("UserModel");
expect(results.length).toBeGreaterThan(0);
// UserModel is an exact name match; it should rank first
expect(results[0].nodeId).toBe("user-model");
});
it("returns scored results with score between 0 and 1", () => {
const engine = new SearchEngine(sampleNodes);
const results = engine.search("database");
expect(results.length).toBeGreaterThan(0);
for (const result of results) {
expect(result.score).toBeGreaterThanOrEqual(0);
expect(result.score).toBeLessThanOrEqual(1);
}
});
it("can updateNodes and re-index", () => {
const engine = new SearchEngine(sampleNodes);
// Initially no "PaymentService" results
const before = engine.search("PaymentService");
const hadPayment = before.some((r) => r.nodeId === "payment");
// Add a new node
engine.updateNodes([
...sampleNodes,
makeNode({
id: "payment",
name: "PaymentService",
type: "class",
summary: "Handles payment processing",
tags: ["payment", "billing"],
}),
]);
const after = engine.search("PaymentService");
expect(hadPayment).toBe(false);
expect(after.length).toBeGreaterThan(0);
expect(after[0].nodeId).toBe("payment");
});
it("filters by node type", () => {
const engine = new SearchEngine(sampleNodes);
const results = engine.search("auth", { types: ["function"] });
expect(results.length).toBeGreaterThan(0);
// Should only return function-type nodes
for (const result of results) {
const node = sampleNodes.find((n) => n.id === result.nodeId);
expect(node?.type).toBe("function");
}
// Specifically, authMiddleware (function) should appear but AuthenticationController (class) should not
expect(results.some((r) => r.nodeId === "auth-middleware")).toBe(true);
expect(results.some((r) => r.nodeId === "auth-ctrl")).toBe(false);
});
it("respects the limit option", () => {
const engine = new SearchEngine(sampleNodes);
const results = engine.search("auth", { limit: 1 });
expect(results.length).toBe(1);
});
});
+1
View File
@@ -10,3 +10,4 @@ export {
parseProjectSummaryResponse,
} from "./analyzer/llm-analyzer.js";
export type { LLMFileAnalysis, LLMProjectSummary } from "./analyzer/llm-analyzer.js";
export { SearchEngine, type SearchResult, type SearchOptions } from "./search.js";
+65
View File
@@ -0,0 +1,65 @@
import Fuse, { type IFuseOptions } from "fuse.js";
import type { GraphNode } from "./types.js";
export interface SearchResult {
nodeId: string;
score: number; // 0 = perfect match, 1 = worst match
}
export interface SearchOptions {
types?: GraphNode["type"][];
limit?: number;
}
const FUSE_OPTIONS: IFuseOptions<GraphNode> = {
keys: [
{ name: "name", weight: 0.4 },
{ name: "tags", weight: 0.3 },
{ name: "summary", weight: 0.2 },
{ name: "languageNotes", weight: 0.1 },
],
threshold: 0.4,
includeScore: true,
ignoreLocation: true,
useExtendedSearch: true,
};
export class SearchEngine {
private fuse: Fuse<GraphNode>;
private nodes: GraphNode[];
constructor(nodes: GraphNode[]) {
this.nodes = nodes;
this.fuse = new Fuse(nodes, FUSE_OPTIONS);
}
search(query: string, options?: SearchOptions): SearchResult[] {
const trimmed = query.trim();
if (!trimmed) {
return [];
}
const limit = options?.limit ?? 50;
// Use extended search: join space-separated tokens with | (OR)
// so "auth contrl" becomes "auth | contrl" — matches items containing either token
const extendedQuery = trimmed.split(/\s+/).join(" | ");
const rawResults = this.fuse.search(extendedQuery);
let filtered = rawResults;
if (options?.types && options.types.length > 0) {
const allowedTypes = new Set(options.types);
filtered = filtered.filter((r) => allowedTypes.has(r.item.type));
}
return filtered.slice(0, limit).map((r) => ({
nodeId: r.item.id,
score: r.score ?? 0,
}));
}
updateNodes(nodes: GraphNode[]): void {
this.nodes = nodes;
this.fuse = new Fuse(nodes, FUSE_OPTIONS);
}
}
+9
View File
@@ -17,6 +17,9 @@ importers:
packages/core:
dependencies:
fuse.js:
specifier: ^7.1.0
version: 7.1.0
tree-sitter-javascript:
specifier: ^0.25.0
version: 0.25.0
@@ -962,6 +965,10 @@ packages:
engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
os: [darwin]
fuse.js@7.1.0:
resolution: {integrity: sha512-trLf4SzuuUxfusZADLINj+dE8clK1frKdmqiJNb1Es75fmI5oY6X2mxLVUciLLjxqw/xr72Dhy+lER6dGd02FQ==}
engines: {node: '>=10'}
gensync@1.0.0-beta.2:
resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==}
engines: {node: '>=6.9.0'}
@@ -2139,6 +2146,8 @@ snapshots:
fsevents@2.3.3:
optional: true
fuse.js@7.1.0: {}
gensync@1.0.0-beta.2: {}
graceful-fs@4.2.11: {}