From 60c2ead398c819ab7c50ba2ae696a4a1ee9d45d3 Mon Sep 17 00:00:00 2001 From: Sreeram Date: Tue, 31 Mar 2026 00:36:53 +0530 Subject: [PATCH 1/2] fix(core): add deterministic normalization for parallel batch analyzer output Parallel file-analyzer subagents can produce inconsistent node IDs (project-name prefixed, double-prefixed, bare paths) and invalid complexity values. Phase 3 ASSEMBLE now normalizes these deterministically before merging, preventing cascading edge drops and dashboard load failures. - Add normalize-graph.ts with normalizeNodeId, normalizeComplexity, and normalizeBatchOutput utilities - Rewrite SKILL.md Phase 3 with 6-step normalization sequence - Strengthen file-analyzer prompt with ID format warnings - Add 32 normalization tests and 2 schema boundary tests --- .../src/__tests__/normalize-graph.test.ts | 375 ++++++++++++++++++ .../core/src/__tests__/schema.test.ts | 8 + .../core/src/analyzer/normalize-graph.ts | 221 +++++++++++ .../packages/core/src/index.ts | 7 + .../skills/understand/SKILL.md | 25 +- .../skills/understand/file-analyzer-prompt.md | 2 + 6 files changed, 634 insertions(+), 4 deletions(-) create mode 100644 understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts create mode 100644 understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts diff --git a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts new file mode 100644 index 0000000..ae83e7e --- /dev/null +++ b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts @@ -0,0 +1,375 @@ +import { describe, it, expect } from "vitest"; +import { + normalizeNodeId, + normalizeComplexity, + normalizeBatchOutput, +} from "../analyzer/normalize-graph.js"; + +describe("normalizeNodeId", () => { + it("passes through a correct file ID unchanged", () => { + expect( + normalizeNodeId("file:src/index.ts", { type: "file" }), + ).toBe("file:src/index.ts"); + }); + + it("passes through a correct func ID unchanged", () => { + expect( + normalizeNodeId("func:src/utils.ts:formatDate", { type: "function" }), + ).toBe("func:src/utils.ts:formatDate"); + }); + + it("passes through a correct class ID unchanged", () => { + expect( + normalizeNodeId("class:src/models/User.ts:User", { type: "class" }), + ).toBe("class:src/models/User.ts:User"); + }); + + it("fixes double-prefixed IDs", () => { + expect( + normalizeNodeId("file:file:src/foo.ts", { type: "file" }), + ).toBe("file:src/foo.ts"); + }); + + it("strips project-name prefix when valid prefix follows", () => { + expect( + normalizeNodeId("my-project:file:src/foo.ts", { type: "file" }), + ).toBe("file:src/foo.ts"); + }); + + it("strips project-name prefix and adds correct prefix for bare path", () => { + expect( + normalizeNodeId("my-project:src/foo.ts", { type: "file" }), + ).toBe("file:src/foo.ts"); + }); + + it("adds file: prefix to bare paths", () => { + expect( + normalizeNodeId("frontend/src/utils/constants.ts", { type: "file" }), + ).toBe("file:frontend/src/utils/constants.ts"); + }); + + it("reconstructs func ID from filePath and name for bare paths", () => { + expect( + normalizeNodeId("formatDate", { + type: "function", + filePath: "src/utils.ts", + name: "formatDate", + }), + ).toBe("func:src/utils.ts:formatDate"); + }); + + it("reconstructs class ID from filePath and name for bare paths", () => { + expect( + normalizeNodeId("User", { + type: "class", + filePath: "src/models/User.ts", + name: "User", + }), + ).toBe("class:src/models/User.ts:User"); + }); + + it("trims whitespace", () => { + expect( + normalizeNodeId(" file:src/foo.ts ", { type: "file" }), + ).toBe("file:src/foo.ts"); + }); + + it("handles module: and concept: prefixes", () => { + expect( + normalizeNodeId("module:auth", { type: "module" }), + ).toBe("module:auth"); + expect( + normalizeNodeId("concept:caching", { type: "concept" }), + ).toBe("concept:caching"); + }); + + it("handles double project-name prefix", () => { + expect( + normalizeNodeId("my-project:service:docker-compose.yml", { + type: "file", + }), + ).toBe("file:docker-compose.yml"); + }); + + it("returns empty string for empty input", () => { + expect(normalizeNodeId("", { type: "file" })).toBe(""); + }); + + it("falls back to untouched ID for unknown node type", () => { + expect(normalizeNodeId("some-id", { type: "widget" as any })).toBe("some-id"); + }); +}); + +describe("normalizeComplexity", () => { + it("passes through valid values unchanged", () => { + expect(normalizeComplexity("simple")).toBe("simple"); + expect(normalizeComplexity("moderate")).toBe("moderate"); + expect(normalizeComplexity("complex")).toBe("complex"); + }); + + it("maps 'low' to 'simple'", () => { + expect(normalizeComplexity("low")).toBe("simple"); + }); + + it("maps 'high' to 'complex'", () => { + expect(normalizeComplexity("high")).toBe("complex"); + }); + + it("maps 'medium' to 'moderate'", () => { + expect(normalizeComplexity("medium")).toBe("moderate"); + }); + + it("maps other aliases from upstream COMPLEXITY_ALIASES", () => { + expect(normalizeComplexity("easy")).toBe("simple"); + expect(normalizeComplexity("hard")).toBe("complex"); + expect(normalizeComplexity("difficult")).toBe("complex"); + expect(normalizeComplexity("intermediate")).toBe("moderate"); + }); + + it("is case-insensitive", () => { + expect(normalizeComplexity("LOW")).toBe("simple"); + expect(normalizeComplexity("High")).toBe("complex"); + expect(normalizeComplexity("MODERATE")).toBe("moderate"); + }); + + it("maps numeric 1-3 to simple", () => { + expect(normalizeComplexity(1)).toBe("simple"); + expect(normalizeComplexity(3)).toBe("simple"); + }); + + it("maps numeric 4-6 to moderate", () => { + expect(normalizeComplexity(4)).toBe("moderate"); + expect(normalizeComplexity(6)).toBe("moderate"); + }); + + it("maps numeric 7-10 to complex", () => { + expect(normalizeComplexity(7)).toBe("complex"); + expect(normalizeComplexity(10)).toBe("complex"); + }); + + it("defaults free-text to moderate", () => { + expect(normalizeComplexity("detailed")).toBe("moderate"); + expect(normalizeComplexity("very complex with many deps")).toBe("moderate"); + }); + + it("defaults undefined/null to moderate", () => { + expect(normalizeComplexity(undefined)).toBe("moderate"); + expect(normalizeComplexity(null)).toBe("moderate"); + }); + + it("defaults zero and negative numbers to moderate", () => { + expect(normalizeComplexity(0)).toBe("moderate"); + expect(normalizeComplexity(-5)).toBe("moderate"); + }); +}); + +describe("normalizeBatchOutput", () => { + it("normalizes IDs and numeric complexity, rewrites edges", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/good.ts", + type: "file", + name: "good.ts", + filePath: "src/good.ts", + summary: "A good file", + tags: ["util"], + complexity: "simple", + }, + { + id: "my-project:file:src/bad.ts", + type: "file", + name: "bad.ts", + filePath: "src/bad.ts", + summary: "Project-prefixed", + tags: ["api"], + complexity: "simple", + }, + { + id: "src/bare.ts", + type: "file", + name: "bare.ts", + filePath: "src/bare.ts", + summary: "Bare path", + tags: [], + complexity: 4, + }, + ], + edges: [ + { + source: "file:src/good.ts", + target: "my-project:file:src/bad.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + { + source: "src/bare.ts", + target: "file:src/good.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.nodes).toHaveLength(3); + expect(result.nodes[0].id).toBe("file:src/good.ts"); + expect(result.nodes[1].id).toBe("file:src/bad.ts"); + expect(result.nodes[2].id).toBe("file:src/bare.ts"); + // Only numeric complexity is fixed here; string aliases are upstream's job + expect(result.nodes[2].complexity).toBe("moderate"); + + // Edges should be rewritten through the ID map + expect(result.edges).toHaveLength(2); + expect(result.edges[0].source).toBe("file:src/good.ts"); + expect(result.edges[0].target).toBe("file:src/bad.ts"); + expect(result.edges[1].source).toBe("file:src/bare.ts"); + + expect(result.stats.idsFixed).toBe(2); + expect(result.stats.complexityFixed).toBe(1); // only the numeric one + expect(result.stats.edgesRewritten).toBe(2); + expect(result.stats.danglingEdgesDropped).toBe(0); + }); + + it("drops dangling edges after normalization", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/a.ts", + type: "file", + name: "a.ts", + summary: "File A", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "file:src/a.ts", + target: "file:src/nonexistent.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.edges).toHaveLength(0); + expect(result.stats.danglingEdgesDropped).toBe(1); + }); + + it("deduplicates nodes keeping last occurrence", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/a.ts", + type: "file", + name: "a.ts", + summary: "First version", + tags: [], + complexity: "simple", + }, + { + id: "file:src/a.ts", + type: "file", + name: "a.ts", + summary: "Second version", + tags: ["updated"], + complexity: "complex", + }, + ], + edges: [], + }); + + expect(result.nodes).toHaveLength(1); + expect(result.nodes[0].summary).toBe("Second version"); + }); + + it("deduplicates edges after ID rewriting", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/a.ts", + type: "file", + name: "a.ts", + summary: "A", + tags: [], + complexity: "simple", + }, + { + id: "file:src/b.ts", + type: "file", + name: "b.ts", + summary: "B", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "file:src/a.ts", + target: "file:src/b.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + { + source: "proj:file:src/a.ts", + target: "file:src/b.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + // Both edges resolve to the same source after normalization — deduplicated + expect(result.edges).toHaveLength(1); + }); + + it("returns accurate stats", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/ok.ts", + type: "file", + name: "ok.ts", + summary: "OK", + tags: [], + complexity: "simple", + }, + { + id: "proj:file:src/fix.ts", + type: "file", + name: "fix.ts", + summary: "Needs fix", + tags: [], + complexity: 2, + }, + ], + edges: [ + { + source: "proj:file:src/fix.ts", + target: "file:src/ok.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + { + source: "file:src/ok.ts", + target: "file:src/gone.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.stats.idsFixed).toBe(1); + expect(result.stats.complexityFixed).toBe(1); + expect(result.stats.edgesRewritten).toBe(1); + expect(result.stats.danglingEdgesDropped).toBe(1); + expect(result.edges).toHaveLength(1); + }); +}); diff --git a/understand-anything-plugin/packages/core/src/__tests__/schema.test.ts b/understand-anything-plugin/packages/core/src/__tests__/schema.test.ts index ae3fd7e..2ef37a3 100644 --- a/understand-anything-plugin/packages/core/src/__tests__/schema.test.ts +++ b/understand-anything-plugin/packages/core/src/__tests__/schema.test.ts @@ -109,4 +109,12 @@ describe("schema validation", () => { expect(result.success).toBe(false); expect(result.errors).toBeDefined(); }); + + it("accepts node with bare string ID (schema is lenient on format)", () => { + const graph = structuredClone(validGraph); + graph.nodes[0].id = "src/foo.ts"; + + const result = validateGraph(graph); + expect(result.success).toBe(true); + }); }); diff --git a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts new file mode 100644 index 0000000..71550c8 --- /dev/null +++ b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts @@ -0,0 +1,221 @@ +import type { GraphNode, GraphEdge } from "../types.js"; + +const VALID_PREFIXES = new Set(["file", "func", "class", "module", "concept"]); + +const TYPE_TO_PREFIX: Record = { + file: "file", + function: "func", + class: "class", + module: "module", + concept: "concept", +}; + +/** + * Strips all non-valid prefixes from an ID, returning the bare path + * and the first valid prefix found (if any). + */ +function stripToValidPrefix(id: string): { prefix: string | null; path: string } { + let remaining = id; + + // Peel off colon-separated segments until we find a valid prefix or run out + while (true) { + const colonIdx = remaining.indexOf(":"); + if (colonIdx <= 0) break; + + const segment = remaining.slice(0, colonIdx); + if (VALID_PREFIXES.has(segment)) { + // Check for double valid prefix (e.g., "file:file:src/foo.ts") + const rest = remaining.slice(colonIdx + 1); + const innerColonIdx = rest.indexOf(":"); + if (innerColonIdx > 0 && VALID_PREFIXES.has(rest.slice(0, innerColonIdx))) { + // Double-prefixed — skip the outer, recurse on inner + remaining = rest; + continue; + } + return { prefix: segment, path: rest }; + } + + // Not a valid prefix — strip it and continue + remaining = remaining.slice(colonIdx + 1); + } + + return { prefix: null, path: remaining }; +} + +/** + * Normalizes a node ID to the canonical `type:path` format. + * Handles: double-prefixed IDs, project-name-prefixed IDs, bare paths. + * Idempotent — correct IDs pass through unchanged. + */ +export function normalizeNodeId( + id: string, + node: { type: string; filePath?: string; name?: string }, +): string { + const trimmed = id.trim(); + if (!trimmed) return trimmed; + + const expectedPrefix = TYPE_TO_PREFIX[node.type]; + const { prefix, path } = stripToValidPrefix(trimmed); + + if (prefix) { + return `${prefix}:${path}`; + } + + // No valid prefix found — bare path + if (expectedPrefix) { + // For func/class, reconstruct from filePath + name if available + if ( + (node.type === "function" || node.type === "class") && + node.filePath && + node.name + ) { + return `${expectedPrefix}:${node.filePath}:${node.name}`; + } + return `${expectedPrefix}:${path}`; + } + + return trimmed; +} + +const VALID_COMPLEXITIES = new Set(["simple", "moderate", "complex"]); + +// String aliases for complexity — mirrors upstream's COMPLEXITY_ALIASES. +// After rebasing onto upstream main, this can be replaced with an import. +const COMPLEXITY_STRING_MAP: Record = { + low: "simple", + easy: "simple", + medium: "moderate", + intermediate: "moderate", + high: "complex", + hard: "complex", + difficult: "complex", +}; + +/** + * Normalizes a complexity value to one of "simple" | "moderate" | "complex". + * Handles both string aliases and numeric scales — defaults to "moderate". + */ +export function normalizeComplexity( + value: unknown, +): "simple" | "moderate" | "complex" { + if (typeof value === "string") { + const lower = value.toLowerCase().trim(); + if (VALID_COMPLEXITIES.has(lower)) return lower as "simple" | "moderate" | "complex"; + const aliased = COMPLEXITY_STRING_MAP[lower]; + if (aliased) return aliased as "simple" | "moderate" | "complex"; + return "moderate"; + } + + if (typeof value === "number" && Number.isFinite(value) && value >= 1) { + if (value <= 3) return "simple"; + if (value <= 6) return "moderate"; + return "complex"; + } + + return "moderate"; +} + +export interface NormalizationStats { + idsFixed: number; + complexityFixed: number; + edgesRewritten: number; + danglingEdgesDropped: number; +} + +export interface NormalizeBatchResult { + nodes: GraphNode[]; + edges: GraphEdge[]; + idMap: Map; + stats: NormalizationStats; +} + +/** + * Normalizes a merged batch output: fixes node IDs and numeric complexity, + * rewrites edge references, deduplicates nodes and edges, and drops dangling edges. + * + * This runs BEFORE upstream's sanitizeGraph/autoFixGraph/normalizeGraph pipeline, + * handling concerns that pipeline does not cover: malformed IDs, numeric complexity, + * edge reference rewriting after ID correction, and edge deduplication. + */ +export function normalizeBatchOutput(data: { + nodes: Record[]; + edges: Record[]; +}): NormalizeBatchResult { + const stats: NormalizationStats = { + idsFixed: 0, + complexityFixed: 0, + edgesRewritten: 0, + danglingEdgesDropped: 0, + }; + + const idMap = new Map(); + + // Pass 1: Normalize node IDs and numeric complexity + const nodes = data.nodes.map((raw) => { + const oldId = String(raw.id ?? ""); + const nodeType = String(raw.type ?? "file"); + const newId = normalizeNodeId(oldId, { + type: nodeType, + filePath: typeof raw.filePath === "string" ? raw.filePath : undefined, + name: typeof raw.name === "string" ? raw.name : undefined, + }); + + if (newId !== oldId) { + stats.idsFixed++; + } + idMap.set(oldId, newId); + + const result: Record = { ...raw, id: newId }; + + // Only fix numeric complexity here — string aliases are handled by upstream's + // COMPLEXITY_ALIASES in autoFixGraph + if (typeof raw.complexity === "number") { + result.complexity = normalizeComplexity(raw.complexity); + stats.complexityFixed++; + } + + return result; + }); + + // Deduplicate nodes (keep last occurrence) + const seenIds = new Map(); + for (let i = 0; i < nodes.length; i++) { + seenIds.set(String(nodes[i].id), i); + } + const deduped = nodes.filter((_, i) => seenIds.get(String(nodes[i].id)) === i); + const validNodeIds = new Set(deduped.map((n) => String(n.id))); + + // Pass 2: Rewrite edge references and deduplicate + const edges: Record[] = []; + const seenEdges = new Set(); + for (const raw of data.edges) { + const oldSource = String(raw.source ?? ""); + const oldTarget = String(raw.target ?? ""); + const newSource = idMap.get(oldSource) ?? oldSource; + const newTarget = idMap.get(oldTarget) ?? oldTarget; + + if (newSource !== oldSource || newTarget !== oldTarget) { + stats.edgesRewritten++; + } + + if (!validNodeIds.has(newSource) || !validNodeIds.has(newTarget)) { + stats.danglingEdgesDropped++; + continue; + } + + // Deduplicate by composite key (source + target + type) + const edgeType = String(raw.type ?? ""); + const edgeKey = `${newSource}|${newTarget}|${edgeType}`; + if (seenEdges.has(edgeKey)) continue; + seenEdges.add(edgeKey); + + edges.push({ ...raw, source: newSource, target: newTarget }); + } + + return { + nodes: deduped as unknown as GraphNode[], + edges: edges as unknown as GraphEdge[], + idMap, + stats, + }; +} diff --git a/understand-anything-plugin/packages/core/src/index.ts b/understand-anything-plugin/packages/core/src/index.ts index 6438720..fd63ddc 100644 --- a/understand-anything-plugin/packages/core/src/index.ts +++ b/understand-anything-plugin/packages/core/src/index.ts @@ -10,6 +10,13 @@ export { parseProjectSummaryResponse, } from "./analyzer/llm-analyzer.js"; export type { LLMFileAnalysis, LLMProjectSummary } from "./analyzer/llm-analyzer.js"; +export { + normalizeNodeId, + normalizeComplexity, + normalizeBatchOutput, + type NormalizationStats, + type NormalizeBatchResult, +} from "./analyzer/normalize-graph.js"; export { SearchEngine, type SearchResult, type SearchOptions } from "./search.js"; export { getChangedFiles, diff --git a/understand-anything-plugin/skills/understand/SKILL.md b/understand-anything-plugin/skills/understand/SKILL.md index 46d9b7a..80bc9e4 100644 --- a/understand-anything-plugin/skills/understand/SKILL.md +++ b/understand-anything-plugin/skills/understand/SKILL.md @@ -150,11 +150,28 @@ After batches complete, merge with the existing graph: ## Phase 3 — ASSEMBLE -Merge all file-analyzer results into a single set of nodes and edges. Then perform basic integrity cleanup: +Merge all file-analyzer results into a single set of nodes and edges. Then perform normalization and integrity cleanup **in this order**: -- Remove any edge whose `source` or `target` references a node ID that does not exist in the merged node set -- Remove duplicate node IDs (keep the last occurrence) -- Log any removed edges or nodes for the final summary +1. **Normalize node IDs:** For every node, verify the `id` field follows the convention `:` where type-prefix is one of `file`, `func`, `class`, `module`, `concept`. Apply these fixes: + - If the ID has a double prefix (e.g., `file:file:src/foo.ts`), strip the duplicate prefix. + - If the ID has a project-name prefix (e.g., `my-project:file:src/foo.ts`), strip the project-name portion. + - If the ID is a bare file path with no prefix, add the appropriate prefix based on the node's `type` field: `file` → `file:`, `function` → `func::`, `class` → `class::`. + - Build a mapping of original IDs → corrected IDs. + +2. **Normalize complexity values:** For every node, verify `complexity` is one of `"simple"`, `"moderate"`, `"complex"`. Apply these mappings for invalid values: + - `"low"`, `"easy"`, `"trivial"`, `"basic"` → `"simple"` + - `"medium"`, `"mid"`, `"average"` → `"moderate"` + - `"high"`, `"hard"`, `"difficult"`, `"advanced"` → `"complex"` + - Numeric 1-3 → `"simple"`, 4-6 → `"moderate"`, 7-10 → `"complex"` + - Any other value → `"moderate"` + +3. **Rewrite edge references:** Using the ID mapping from step 1, update every edge's `source` and `target` fields. This prevents cascading edge drops when only the ID format was wrong. + +4. **Remove duplicate node IDs:** If duplicate node IDs exist after normalization, keep the last occurrence. + +5. **Remove dangling edges:** Remove any edge whose `source` or `target` references a node ID that does not exist in the merged node set. + +6. **Log changes:** Record counts of IDs corrected, complexity values fixed, edges rewritten, duplicates removed, and dangling edges dropped. Include these counts in the Phase warnings list passed to the reviewer. --- diff --git a/understand-anything-plugin/skills/understand/file-analyzer-prompt.md b/understand-anything-plugin/skills/understand/file-analyzer-prompt.md index 15f90be..3e7abca 100644 --- a/understand-anything-plugin/skills/understand/file-analyzer-prompt.md +++ b/understand-anything-plugin/skills/understand/file-analyzer-prompt.md @@ -226,6 +226,8 @@ You MUST use these exact prefixes for node IDs: **Scope restriction:** Only produce `file:`, `func:`, and `class:` nodes. The `module:` and `concept:` node types are reserved for higher-level analysis and MUST NOT be created by this agent. +> **WARNING:** Node IDs MUST use the exact prefix formats shown above. Do NOT prefix IDs with the project name (e.g., `my-project:file:src/foo.ts` is WRONG). Do NOT use bare file paths without a type prefix (e.g., `src/foo.ts` is WRONG). Invalid IDs will be auto-corrected during assembly, which may cause unexpected edge rewiring. + ## Output Format Produce a single, valid JSON block. Validate it mentally before writing -- malformed JSON breaks the entire pipeline. From 6f443fbc14e8db6e05acc3d412cd4531a4d6622a Mon Sep 17 00:00:00 2001 From: Lum1104 Date: Wed, 1 Apr 2026 21:10:43 +0800 Subject: [PATCH 2/2] fix(core): handle all 13 node types, edge cross-variant resolution, and dropped edge traceability - Add all 13 node types (including non-code) to VALID_PREFIXES and TYPE_TO_PREFIX to prevent valid IDs like config:tsconfig.json from being stripped - Add fallback normalizeNodeId on edge endpoints not found in idMap, fixing silent relationship loss when edges use different malformed variants than nodes - Add DroppedEdge interface with source, target, type, and reason fields so callers can surface exactly which edges were lost and why - Use honest Record[] return types instead of unsafe type casts - Align SKILL.md complexity aliases with COMPLEXITY_STRING_MAP - Add 5 new tests for non-code types, cross-variant edges, dropped edge detail, and validateGraph integration Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/__tests__/normalize-graph.test.ts | 127 +++++++++++++++++- .../core/src/analyzer/normalize-graph.ts | 58 ++++++-- .../packages/core/src/index.ts | 1 + .../skills/understand/SKILL.md | 8 +- 4 files changed, 177 insertions(+), 17 deletions(-) diff --git a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts index ae83e7e..fa150c2 100644 --- a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts +++ b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts @@ -4,6 +4,7 @@ import { normalizeComplexity, normalizeBatchOutput, } from "../analyzer/normalize-graph.js"; +import { validateGraph } from "../schema.js"; describe("normalizeNodeId", () => { it("passes through a correct file ID unchanged", () => { @@ -83,12 +84,12 @@ describe("normalizeNodeId", () => { ).toBe("concept:caching"); }); - it("handles double project-name prefix", () => { + it("handles project-name prefix before a valid non-code prefix", () => { expect( normalizeNodeId("my-project:service:docker-compose.yml", { type: "file", }), - ).toBe("file:docker-compose.yml"); + ).toBe("service:docker-compose.yml"); }); it("returns empty string for empty input", () => { @@ -98,6 +99,26 @@ describe("normalizeNodeId", () => { it("falls back to untouched ID for unknown node type", () => { expect(normalizeNodeId("some-id", { type: "widget" as any })).toBe("some-id"); }); + + it("passes through non-code type IDs unchanged", () => { + expect(normalizeNodeId("config:tsconfig.json", { type: "config" })).toBe("config:tsconfig.json"); + expect(normalizeNodeId("document:README.md", { type: "document" })).toBe("document:README.md"); + expect(normalizeNodeId("service:docker-compose.yml", { type: "service" })).toBe("service:docker-compose.yml"); + expect(normalizeNodeId("table:migrations/001.sql:users", { type: "table" })).toBe("table:migrations/001.sql:users"); + expect(normalizeNodeId("endpoint:src/routes.ts:GET /api/users", { type: "endpoint" })).toBe("endpoint:src/routes.ts:GET /api/users"); + expect(normalizeNodeId("pipeline:.github/workflows/ci.yml", { type: "pipeline" })).toBe("pipeline:.github/workflows/ci.yml"); + expect(normalizeNodeId("schema:schema.graphql", { type: "schema" })).toBe("schema:schema.graphql"); + expect(normalizeNodeId("resource:main.tf", { type: "resource" })).toBe("resource:main.tf"); + }); + + it("adds prefix for bare paths with non-code types", () => { + expect(normalizeNodeId("tsconfig.json", { type: "config" })).toBe("config:tsconfig.json"); + expect(normalizeNodeId("README.md", { type: "document" })).toBe("document:README.md"); + }); + + it("strips project-name prefix from non-code type IDs", () => { + expect(normalizeNodeId("my-project:config:tsconfig.json", { type: "config" })).toBe("config:tsconfig.json"); + }); }); describe("normalizeComplexity", () => { @@ -257,6 +278,13 @@ describe("normalizeBatchOutput", () => { expect(result.edges).toHaveLength(0); expect(result.stats.danglingEdgesDropped).toBe(1); + expect(result.stats.droppedEdges).toHaveLength(1); + expect(result.stats.droppedEdges[0]).toEqual({ + source: "file:src/a.ts", + target: "file:src/nonexistent.ts", + type: "imports", + reason: "missing-target", + }); }); it("deduplicates nodes keeping last occurrence", () => { @@ -372,4 +400,99 @@ describe("normalizeBatchOutput", () => { expect(result.stats.danglingEdgesDropped).toBe(1); expect(result.edges).toHaveLength(1); }); + + it("resolves edge endpoints with different malformed variants than node IDs", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "src/bare.ts", + type: "file", + name: "bare.ts", + filePath: "src/bare.ts", + summary: "Bare", + tags: [], + complexity: "simple", + }, + { + id: "file:src/target.ts", + type: "file", + name: "target.ts", + filePath: "src/target.ts", + summary: "Target", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "my-project:file:src/bare.ts", + target: "file:src/target.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.edges).toHaveLength(1); + expect(result.edges[0].source).toBe("file:src/bare.ts"); + expect(result.edges[0].target).toBe("file:src/target.ts"); + }); +}); + +describe("normalizeBatchOutput integration", () => { + it("produces output that passes validateGraph after wrapping", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "my-project:file:src/index.ts", + type: "file", + name: "index.ts", + filePath: "src/index.ts", + summary: "Entry point", + tags: ["entry"], + complexity: 3, + }, + { + id: "src/utils.ts", + type: "file", + name: "utils.ts", + filePath: "src/utils.ts", + summary: "Utilities", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "my-project:file:src/index.ts", + target: "src/utils.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + const graph = { + version: "1.0.0", + project: { + name: "test", + languages: ["typescript"], + frameworks: [], + description: "Test project", + analyzedAt: new Date().toISOString(), + gitCommitHash: "abc123", + }, + nodes: result.nodes, + edges: result.edges, + layers: [], + tour: [], + }; + + const validation = validateGraph(graph); + expect(validation.success).toBe(true); + expect(validation.data?.nodes).toHaveLength(2); + expect(validation.data?.edges).toHaveLength(1); + }); }); diff --git a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts index 71550c8..9409693 100644 --- a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts +++ b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts @@ -1,6 +1,8 @@ -import type { GraphNode, GraphEdge } from "../types.js"; - -const VALID_PREFIXES = new Set(["file", "func", "class", "module", "concept"]); +const VALID_PREFIXES = new Set([ + "file", "func", "class", "module", "concept", + "config", "document", "service", "table", "endpoint", + "pipeline", "schema", "resource", +]); const TYPE_TO_PREFIX: Record = { file: "file", @@ -8,6 +10,14 @@ const TYPE_TO_PREFIX: Record = { class: "class", module: "module", concept: "concept", + config: "config", + document: "document", + service: "service", + table: "table", + endpoint: "endpoint", + pipeline: "pipeline", + schema: "schema", + resource: "resource", }; /** @@ -79,8 +89,6 @@ export function normalizeNodeId( const VALID_COMPLEXITIES = new Set(["simple", "moderate", "complex"]); -// String aliases for complexity — mirrors upstream's COMPLEXITY_ALIASES. -// After rebasing onto upstream main, this can be replaced with an import. const COMPLEXITY_STRING_MAP: Record = { low: "simple", easy: "simple", @@ -115,16 +123,24 @@ export function normalizeComplexity( return "moderate"; } +export interface DroppedEdge { + source: string; + target: string; + type: string; + reason: "missing-source" | "missing-target" | "missing-both"; +} + export interface NormalizationStats { idsFixed: number; complexityFixed: number; edgesRewritten: number; danglingEdgesDropped: number; + droppedEdges: DroppedEdge[]; } export interface NormalizeBatchResult { - nodes: GraphNode[]; - edges: GraphEdge[]; + nodes: Record[]; + edges: Record[]; idMap: Map; stats: NormalizationStats; } @@ -146,6 +162,7 @@ export function normalizeBatchOutput(data: { complexityFixed: 0, edgesRewritten: 0, danglingEdgesDropped: 0, + droppedEdges: [], }; const idMap = new Map(); @@ -191,15 +208,34 @@ export function normalizeBatchOutput(data: { for (const raw of data.edges) { const oldSource = String(raw.source ?? ""); const oldTarget = String(raw.target ?? ""); - const newSource = idMap.get(oldSource) ?? oldSource; - const newTarget = idMap.get(oldTarget) ?? oldTarget; + let newSource = idMap.get(oldSource) ?? oldSource; + let newTarget = idMap.get(oldTarget) ?? oldTarget; + + // Fallback: if endpoint not found in idMap, normalize it directly + // (handles cross-variant malformed IDs between nodes and edges) + if (!validNodeIds.has(newSource)) { + const normalized = normalizeNodeId(newSource, { type: "file" }); + if (validNodeIds.has(normalized)) newSource = normalized; + } + if (!validNodeIds.has(newTarget)) { + const normalized = normalizeNodeId(newTarget, { type: "file" }); + if (validNodeIds.has(normalized)) newTarget = normalized; + } if (newSource !== oldSource || newTarget !== oldTarget) { stats.edgesRewritten++; } if (!validNodeIds.has(newSource) || !validNodeIds.has(newTarget)) { + const missingSource = !validNodeIds.has(newSource); + const missingTarget = !validNodeIds.has(newTarget); stats.danglingEdgesDropped++; + stats.droppedEdges.push({ + source: newSource, + target: newTarget, + type: String(raw.type ?? ""), + reason: missingSource && missingTarget ? "missing-both" : missingSource ? "missing-source" : "missing-target", + }); continue; } @@ -213,8 +249,8 @@ export function normalizeBatchOutput(data: { } return { - nodes: deduped as unknown as GraphNode[], - edges: edges as unknown as GraphEdge[], + nodes: deduped, + edges, idMap, stats, }; diff --git a/understand-anything-plugin/packages/core/src/index.ts b/understand-anything-plugin/packages/core/src/index.ts index fd63ddc..907a976 100644 --- a/understand-anything-plugin/packages/core/src/index.ts +++ b/understand-anything-plugin/packages/core/src/index.ts @@ -14,6 +14,7 @@ export { normalizeNodeId, normalizeComplexity, normalizeBatchOutput, + type DroppedEdge, type NormalizationStats, type NormalizeBatchResult, } from "./analyzer/normalize-graph.js"; diff --git a/understand-anything-plugin/skills/understand/SKILL.md b/understand-anything-plugin/skills/understand/SKILL.md index 80bc9e4..0306ad9 100644 --- a/understand-anything-plugin/skills/understand/SKILL.md +++ b/understand-anything-plugin/skills/understand/SKILL.md @@ -152,16 +152,16 @@ After batches complete, merge with the existing graph: Merge all file-analyzer results into a single set of nodes and edges. Then perform normalization and integrity cleanup **in this order**: -1. **Normalize node IDs:** For every node, verify the `id` field follows the convention `:` where type-prefix is one of `file`, `func`, `class`, `module`, `concept`. Apply these fixes: +1. **Normalize node IDs:** For every node, verify the `id` field follows the convention `:` where type-prefix is one of `file`, `func`, `class`, `module`, `concept`, `config`, `document`, `service`, `table`, `endpoint`, `pipeline`, `schema`, `resource`. Apply these fixes: - If the ID has a double prefix (e.g., `file:file:src/foo.ts`), strip the duplicate prefix. - If the ID has a project-name prefix (e.g., `my-project:file:src/foo.ts`), strip the project-name portion. - If the ID is a bare file path with no prefix, add the appropriate prefix based on the node's `type` field: `file` → `file:`, `function` → `func::`, `class` → `class::`. - Build a mapping of original IDs → corrected IDs. 2. **Normalize complexity values:** For every node, verify `complexity` is one of `"simple"`, `"moderate"`, `"complex"`. Apply these mappings for invalid values: - - `"low"`, `"easy"`, `"trivial"`, `"basic"` → `"simple"` - - `"medium"`, `"mid"`, `"average"` → `"moderate"` - - `"high"`, `"hard"`, `"difficult"`, `"advanced"` → `"complex"` + - `"low"`, `"easy"` → `"simple"` + - `"medium"`, `"intermediate"` → `"moderate"` + - `"high"`, `"hard"`, `"difficult"` → `"complex"` - Numeric 1-3 → `"simple"`, 4-6 → `"moderate"`, 7-10 → `"complex"` - Any other value → `"moderate"`