mirror of
https://github.com/Egonex-AI/Understand-Anything.git
synced 2026-06-22 10:58:03 +08:00
feat(core): add fuzzy search engine with Fuse.js
Add SearchEngine class with fuzzy matching across node name, tags, summary, and languageNotes fields. Supports type filtering, result limiting, and dynamic re-indexing via updateNodes(). Uses Fuse.js extended search with OR-token splitting for multi-word queries. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
"vitest": "^3.1.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"fuse.js": "^7.1.0",
|
||||
"tree-sitter-javascript": "^0.25.0",
|
||||
"tree-sitter-typescript": "^0.23.2",
|
||||
"web-tree-sitter": "^0.26.6",
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { SearchEngine } from "../search.js";
|
||||
import type { GraphNode } from "../types.js";
|
||||
|
||||
const makeNode = (overrides: Partial<GraphNode> & { id: string; name: string }): GraphNode => ({
|
||||
type: "file",
|
||||
summary: "",
|
||||
tags: [],
|
||||
complexity: "simple",
|
||||
...overrides,
|
||||
});
|
||||
|
||||
const sampleNodes: GraphNode[] = [
|
||||
makeNode({
|
||||
id: "auth-ctrl",
|
||||
name: "AuthenticationController",
|
||||
type: "class",
|
||||
summary: "Handles user login, logout, and session management",
|
||||
tags: ["auth", "controller", "security"],
|
||||
languageNotes: "Uses Express middleware pattern",
|
||||
}),
|
||||
makeNode({
|
||||
id: "db-pool",
|
||||
name: "DatabasePool",
|
||||
type: "class",
|
||||
summary: "Manages PostgreSQL connection pooling",
|
||||
tags: ["database", "connection"],
|
||||
}),
|
||||
makeNode({
|
||||
id: "user-model",
|
||||
name: "UserModel",
|
||||
type: "class",
|
||||
summary: "ORM model for the users table",
|
||||
tags: ["model", "database", "user"],
|
||||
}),
|
||||
makeNode({
|
||||
id: "config",
|
||||
name: "config.ts",
|
||||
type: "file",
|
||||
summary: "Application configuration and environment variables",
|
||||
tags: ["config", "env"],
|
||||
}),
|
||||
makeNode({
|
||||
id: "helpers",
|
||||
name: "helpers.ts",
|
||||
type: "function",
|
||||
summary: "Utility helper functions for string manipulation",
|
||||
tags: ["utils", "helpers"],
|
||||
}),
|
||||
makeNode({
|
||||
id: "auth-middleware",
|
||||
name: "authMiddleware",
|
||||
type: "function",
|
||||
summary: "Express middleware that validates JWT tokens for authentication",
|
||||
tags: ["auth", "middleware", "security"],
|
||||
}),
|
||||
];
|
||||
|
||||
describe("SearchEngine", () => {
|
||||
it("returns empty results for empty query", () => {
|
||||
const engine = new SearchEngine(sampleNodes);
|
||||
expect(engine.search("")).toEqual([]);
|
||||
expect(engine.search(" ")).toEqual([]);
|
||||
});
|
||||
|
||||
it("finds exact name match", () => {
|
||||
const engine = new SearchEngine(sampleNodes);
|
||||
const results = engine.search("AuthenticationController");
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results[0].nodeId).toBe("auth-ctrl");
|
||||
});
|
||||
|
||||
it("finds fuzzy name match", () => {
|
||||
const engine = new SearchEngine(sampleNodes);
|
||||
const results = engine.search("auth contrl");
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results.some((r) => r.nodeId === "auth-ctrl")).toBe(true);
|
||||
});
|
||||
|
||||
it("searches across summary field", () => {
|
||||
const engine = new SearchEngine(sampleNodes);
|
||||
const results = engine.search("PostgreSQL connection");
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results.some((r) => r.nodeId === "db-pool")).toBe(true);
|
||||
});
|
||||
|
||||
it("searches across tags", () => {
|
||||
const engine = new SearchEngine(sampleNodes);
|
||||
const results = engine.search("security");
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
const nodeIds = results.map((r) => r.nodeId);
|
||||
expect(nodeIds).toContain("auth-ctrl");
|
||||
expect(nodeIds).toContain("auth-middleware");
|
||||
});
|
||||
|
||||
it("ranks name matches higher than summary matches", () => {
|
||||
const engine = new SearchEngine(sampleNodes);
|
||||
const results = engine.search("UserModel");
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
// UserModel is an exact name match; it should rank first
|
||||
expect(results[0].nodeId).toBe("user-model");
|
||||
});
|
||||
|
||||
it("returns scored results with score between 0 and 1", () => {
|
||||
const engine = new SearchEngine(sampleNodes);
|
||||
const results = engine.search("database");
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
for (const result of results) {
|
||||
expect(result.score).toBeGreaterThanOrEqual(0);
|
||||
expect(result.score).toBeLessThanOrEqual(1);
|
||||
}
|
||||
});
|
||||
|
||||
it("can updateNodes and re-index", () => {
|
||||
const engine = new SearchEngine(sampleNodes);
|
||||
|
||||
// Initially no "PaymentService" results
|
||||
const before = engine.search("PaymentService");
|
||||
const hadPayment = before.some((r) => r.nodeId === "payment");
|
||||
|
||||
// Add a new node
|
||||
engine.updateNodes([
|
||||
...sampleNodes,
|
||||
makeNode({
|
||||
id: "payment",
|
||||
name: "PaymentService",
|
||||
type: "class",
|
||||
summary: "Handles payment processing",
|
||||
tags: ["payment", "billing"],
|
||||
}),
|
||||
]);
|
||||
|
||||
const after = engine.search("PaymentService");
|
||||
expect(hadPayment).toBe(false);
|
||||
expect(after.length).toBeGreaterThan(0);
|
||||
expect(after[0].nodeId).toBe("payment");
|
||||
});
|
||||
|
||||
it("filters by node type", () => {
|
||||
const engine = new SearchEngine(sampleNodes);
|
||||
const results = engine.search("auth", { types: ["function"] });
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
// Should only return function-type nodes
|
||||
for (const result of results) {
|
||||
const node = sampleNodes.find((n) => n.id === result.nodeId);
|
||||
expect(node?.type).toBe("function");
|
||||
}
|
||||
// Specifically, authMiddleware (function) should appear but AuthenticationController (class) should not
|
||||
expect(results.some((r) => r.nodeId === "auth-middleware")).toBe(true);
|
||||
expect(results.some((r) => r.nodeId === "auth-ctrl")).toBe(false);
|
||||
});
|
||||
|
||||
it("respects the limit option", () => {
|
||||
const engine = new SearchEngine(sampleNodes);
|
||||
const results = engine.search("auth", { limit: 1 });
|
||||
expect(results.length).toBe(1);
|
||||
});
|
||||
});
|
||||
@@ -10,3 +10,4 @@ export {
|
||||
parseProjectSummaryResponse,
|
||||
} from "./analyzer/llm-analyzer.js";
|
||||
export type { LLMFileAnalysis, LLMProjectSummary } from "./analyzer/llm-analyzer.js";
|
||||
export { SearchEngine, type SearchResult, type SearchOptions } from "./search.js";
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
import Fuse, { type IFuseOptions } from "fuse.js";
|
||||
import type { GraphNode } from "./types.js";
|
||||
|
||||
export interface SearchResult {
|
||||
nodeId: string;
|
||||
score: number; // 0 = perfect match, 1 = worst match
|
||||
}
|
||||
|
||||
export interface SearchOptions {
|
||||
types?: GraphNode["type"][];
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
const FUSE_OPTIONS: IFuseOptions<GraphNode> = {
|
||||
keys: [
|
||||
{ name: "name", weight: 0.4 },
|
||||
{ name: "tags", weight: 0.3 },
|
||||
{ name: "summary", weight: 0.2 },
|
||||
{ name: "languageNotes", weight: 0.1 },
|
||||
],
|
||||
threshold: 0.4,
|
||||
includeScore: true,
|
||||
ignoreLocation: true,
|
||||
useExtendedSearch: true,
|
||||
};
|
||||
|
||||
export class SearchEngine {
|
||||
private fuse: Fuse<GraphNode>;
|
||||
private nodes: GraphNode[];
|
||||
|
||||
constructor(nodes: GraphNode[]) {
|
||||
this.nodes = nodes;
|
||||
this.fuse = new Fuse(nodes, FUSE_OPTIONS);
|
||||
}
|
||||
|
||||
search(query: string, options?: SearchOptions): SearchResult[] {
|
||||
const trimmed = query.trim();
|
||||
if (!trimmed) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const limit = options?.limit ?? 50;
|
||||
|
||||
// Use extended search: join space-separated tokens with | (OR)
|
||||
// so "auth contrl" becomes "auth | contrl" — matches items containing either token
|
||||
const extendedQuery = trimmed.split(/\s+/).join(" | ");
|
||||
const rawResults = this.fuse.search(extendedQuery);
|
||||
|
||||
let filtered = rawResults;
|
||||
if (options?.types && options.types.length > 0) {
|
||||
const allowedTypes = new Set(options.types);
|
||||
filtered = filtered.filter((r) => allowedTypes.has(r.item.type));
|
||||
}
|
||||
|
||||
return filtered.slice(0, limit).map((r) => ({
|
||||
nodeId: r.item.id,
|
||||
score: r.score ?? 0,
|
||||
}));
|
||||
}
|
||||
|
||||
updateNodes(nodes: GraphNode[]): void {
|
||||
this.nodes = nodes;
|
||||
this.fuse = new Fuse(nodes, FUSE_OPTIONS);
|
||||
}
|
||||
}
|
||||
Generated
+9
@@ -17,6 +17,9 @@ importers:
|
||||
|
||||
packages/core:
|
||||
dependencies:
|
||||
fuse.js:
|
||||
specifier: ^7.1.0
|
||||
version: 7.1.0
|
||||
tree-sitter-javascript:
|
||||
specifier: ^0.25.0
|
||||
version: 0.25.0
|
||||
@@ -962,6 +965,10 @@ packages:
|
||||
engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
|
||||
os: [darwin]
|
||||
|
||||
fuse.js@7.1.0:
|
||||
resolution: {integrity: sha512-trLf4SzuuUxfusZADLINj+dE8clK1frKdmqiJNb1Es75fmI5oY6X2mxLVUciLLjxqw/xr72Dhy+lER6dGd02FQ==}
|
||||
engines: {node: '>=10'}
|
||||
|
||||
gensync@1.0.0-beta.2:
|
||||
resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==}
|
||||
engines: {node: '>=6.9.0'}
|
||||
@@ -2139,6 +2146,8 @@ snapshots:
|
||||
fsevents@2.3.3:
|
||||
optional: true
|
||||
|
||||
fuse.js@7.1.0: {}
|
||||
|
||||
gensync@1.0.0-beta.2: {}
|
||||
|
||||
graceful-fs@4.2.11: {}
|
||||
|
||||
Reference in New Issue
Block a user