From 6f443fbc14e8db6e05acc3d412cd4531a4d6622a Mon Sep 17 00:00:00 2001 From: Lum1104 Date: Wed, 1 Apr 2026 21:10:43 +0800 Subject: [PATCH] fix(core): handle all 13 node types, edge cross-variant resolution, and dropped edge traceability - Add all 13 node types (including non-code) to VALID_PREFIXES and TYPE_TO_PREFIX to prevent valid IDs like config:tsconfig.json from being stripped - Add fallback normalizeNodeId on edge endpoints not found in idMap, fixing silent relationship loss when edges use different malformed variants than nodes - Add DroppedEdge interface with source, target, type, and reason fields so callers can surface exactly which edges were lost and why - Use honest Record[] return types instead of unsafe type casts - Align SKILL.md complexity aliases with COMPLEXITY_STRING_MAP - Add 5 new tests for non-code types, cross-variant edges, dropped edge detail, and validateGraph integration Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/__tests__/normalize-graph.test.ts | 127 +++++++++++++++++- .../core/src/analyzer/normalize-graph.ts | 58 ++++++-- .../packages/core/src/index.ts | 1 + .../skills/understand/SKILL.md | 8 +- 4 files changed, 177 insertions(+), 17 deletions(-) diff --git a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts index ae83e7e..fa150c2 100644 --- a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts +++ b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts @@ -4,6 +4,7 @@ import { normalizeComplexity, normalizeBatchOutput, } from "../analyzer/normalize-graph.js"; +import { validateGraph } from "../schema.js"; describe("normalizeNodeId", () => { it("passes through a correct file ID unchanged", () => { @@ -83,12 +84,12 @@ describe("normalizeNodeId", () => { ).toBe("concept:caching"); }); - it("handles double project-name prefix", () => { + it("handles project-name prefix before a valid non-code prefix", () => { expect( normalizeNodeId("my-project:service:docker-compose.yml", { type: "file", }), - ).toBe("file:docker-compose.yml"); + ).toBe("service:docker-compose.yml"); }); it("returns empty string for empty input", () => { @@ -98,6 +99,26 @@ describe("normalizeNodeId", () => { it("falls back to untouched ID for unknown node type", () => { expect(normalizeNodeId("some-id", { type: "widget" as any })).toBe("some-id"); }); + + it("passes through non-code type IDs unchanged", () => { + expect(normalizeNodeId("config:tsconfig.json", { type: "config" })).toBe("config:tsconfig.json"); + expect(normalizeNodeId("document:README.md", { type: "document" })).toBe("document:README.md"); + expect(normalizeNodeId("service:docker-compose.yml", { type: "service" })).toBe("service:docker-compose.yml"); + expect(normalizeNodeId("table:migrations/001.sql:users", { type: "table" })).toBe("table:migrations/001.sql:users"); + expect(normalizeNodeId("endpoint:src/routes.ts:GET /api/users", { type: "endpoint" })).toBe("endpoint:src/routes.ts:GET /api/users"); + expect(normalizeNodeId("pipeline:.github/workflows/ci.yml", { type: "pipeline" })).toBe("pipeline:.github/workflows/ci.yml"); + expect(normalizeNodeId("schema:schema.graphql", { type: "schema" })).toBe("schema:schema.graphql"); + expect(normalizeNodeId("resource:main.tf", { type: "resource" })).toBe("resource:main.tf"); + }); + + it("adds prefix for bare paths with non-code types", () => { + expect(normalizeNodeId("tsconfig.json", { type: "config" })).toBe("config:tsconfig.json"); + expect(normalizeNodeId("README.md", { type: "document" })).toBe("document:README.md"); + }); + + it("strips project-name prefix from non-code type IDs", () => { + expect(normalizeNodeId("my-project:config:tsconfig.json", { type: "config" })).toBe("config:tsconfig.json"); + }); }); describe("normalizeComplexity", () => { @@ -257,6 +278,13 @@ describe("normalizeBatchOutput", () => { expect(result.edges).toHaveLength(0); expect(result.stats.danglingEdgesDropped).toBe(1); + expect(result.stats.droppedEdges).toHaveLength(1); + expect(result.stats.droppedEdges[0]).toEqual({ + source: "file:src/a.ts", + target: "file:src/nonexistent.ts", + type: "imports", + reason: "missing-target", + }); }); it("deduplicates nodes keeping last occurrence", () => { @@ -372,4 +400,99 @@ describe("normalizeBatchOutput", () => { expect(result.stats.danglingEdgesDropped).toBe(1); expect(result.edges).toHaveLength(1); }); + + it("resolves edge endpoints with different malformed variants than node IDs", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "src/bare.ts", + type: "file", + name: "bare.ts", + filePath: "src/bare.ts", + summary: "Bare", + tags: [], + complexity: "simple", + }, + { + id: "file:src/target.ts", + type: "file", + name: "target.ts", + filePath: "src/target.ts", + summary: "Target", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "my-project:file:src/bare.ts", + target: "file:src/target.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.edges).toHaveLength(1); + expect(result.edges[0].source).toBe("file:src/bare.ts"); + expect(result.edges[0].target).toBe("file:src/target.ts"); + }); +}); + +describe("normalizeBatchOutput integration", () => { + it("produces output that passes validateGraph after wrapping", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "my-project:file:src/index.ts", + type: "file", + name: "index.ts", + filePath: "src/index.ts", + summary: "Entry point", + tags: ["entry"], + complexity: 3, + }, + { + id: "src/utils.ts", + type: "file", + name: "utils.ts", + filePath: "src/utils.ts", + summary: "Utilities", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "my-project:file:src/index.ts", + target: "src/utils.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + const graph = { + version: "1.0.0", + project: { + name: "test", + languages: ["typescript"], + frameworks: [], + description: "Test project", + analyzedAt: new Date().toISOString(), + gitCommitHash: "abc123", + }, + nodes: result.nodes, + edges: result.edges, + layers: [], + tour: [], + }; + + const validation = validateGraph(graph); + expect(validation.success).toBe(true); + expect(validation.data?.nodes).toHaveLength(2); + expect(validation.data?.edges).toHaveLength(1); + }); }); diff --git a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts index 71550c8..9409693 100644 --- a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts +++ b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts @@ -1,6 +1,8 @@ -import type { GraphNode, GraphEdge } from "../types.js"; - -const VALID_PREFIXES = new Set(["file", "func", "class", "module", "concept"]); +const VALID_PREFIXES = new Set([ + "file", "func", "class", "module", "concept", + "config", "document", "service", "table", "endpoint", + "pipeline", "schema", "resource", +]); const TYPE_TO_PREFIX: Record = { file: "file", @@ -8,6 +10,14 @@ const TYPE_TO_PREFIX: Record = { class: "class", module: "module", concept: "concept", + config: "config", + document: "document", + service: "service", + table: "table", + endpoint: "endpoint", + pipeline: "pipeline", + schema: "schema", + resource: "resource", }; /** @@ -79,8 +89,6 @@ export function normalizeNodeId( const VALID_COMPLEXITIES = new Set(["simple", "moderate", "complex"]); -// String aliases for complexity — mirrors upstream's COMPLEXITY_ALIASES. -// After rebasing onto upstream main, this can be replaced with an import. const COMPLEXITY_STRING_MAP: Record = { low: "simple", easy: "simple", @@ -115,16 +123,24 @@ export function normalizeComplexity( return "moderate"; } +export interface DroppedEdge { + source: string; + target: string; + type: string; + reason: "missing-source" | "missing-target" | "missing-both"; +} + export interface NormalizationStats { idsFixed: number; complexityFixed: number; edgesRewritten: number; danglingEdgesDropped: number; + droppedEdges: DroppedEdge[]; } export interface NormalizeBatchResult { - nodes: GraphNode[]; - edges: GraphEdge[]; + nodes: Record[]; + edges: Record[]; idMap: Map; stats: NormalizationStats; } @@ -146,6 +162,7 @@ export function normalizeBatchOutput(data: { complexityFixed: 0, edgesRewritten: 0, danglingEdgesDropped: 0, + droppedEdges: [], }; const idMap = new Map(); @@ -191,15 +208,34 @@ export function normalizeBatchOutput(data: { for (const raw of data.edges) { const oldSource = String(raw.source ?? ""); const oldTarget = String(raw.target ?? ""); - const newSource = idMap.get(oldSource) ?? oldSource; - const newTarget = idMap.get(oldTarget) ?? oldTarget; + let newSource = idMap.get(oldSource) ?? oldSource; + let newTarget = idMap.get(oldTarget) ?? oldTarget; + + // Fallback: if endpoint not found in idMap, normalize it directly + // (handles cross-variant malformed IDs between nodes and edges) + if (!validNodeIds.has(newSource)) { + const normalized = normalizeNodeId(newSource, { type: "file" }); + if (validNodeIds.has(normalized)) newSource = normalized; + } + if (!validNodeIds.has(newTarget)) { + const normalized = normalizeNodeId(newTarget, { type: "file" }); + if (validNodeIds.has(normalized)) newTarget = normalized; + } if (newSource !== oldSource || newTarget !== oldTarget) { stats.edgesRewritten++; } if (!validNodeIds.has(newSource) || !validNodeIds.has(newTarget)) { + const missingSource = !validNodeIds.has(newSource); + const missingTarget = !validNodeIds.has(newTarget); stats.danglingEdgesDropped++; + stats.droppedEdges.push({ + source: newSource, + target: newTarget, + type: String(raw.type ?? ""), + reason: missingSource && missingTarget ? "missing-both" : missingSource ? "missing-source" : "missing-target", + }); continue; } @@ -213,8 +249,8 @@ export function normalizeBatchOutput(data: { } return { - nodes: deduped as unknown as GraphNode[], - edges: edges as unknown as GraphEdge[], + nodes: deduped, + edges, idMap, stats, }; diff --git a/understand-anything-plugin/packages/core/src/index.ts b/understand-anything-plugin/packages/core/src/index.ts index fd63ddc..907a976 100644 --- a/understand-anything-plugin/packages/core/src/index.ts +++ b/understand-anything-plugin/packages/core/src/index.ts @@ -14,6 +14,7 @@ export { normalizeNodeId, normalizeComplexity, normalizeBatchOutput, + type DroppedEdge, type NormalizationStats, type NormalizeBatchResult, } from "./analyzer/normalize-graph.js"; diff --git a/understand-anything-plugin/skills/understand/SKILL.md b/understand-anything-plugin/skills/understand/SKILL.md index 80bc9e4..0306ad9 100644 --- a/understand-anything-plugin/skills/understand/SKILL.md +++ b/understand-anything-plugin/skills/understand/SKILL.md @@ -152,16 +152,16 @@ After batches complete, merge with the existing graph: Merge all file-analyzer results into a single set of nodes and edges. Then perform normalization and integrity cleanup **in this order**: -1. **Normalize node IDs:** For every node, verify the `id` field follows the convention `:` where type-prefix is one of `file`, `func`, `class`, `module`, `concept`. Apply these fixes: +1. **Normalize node IDs:** For every node, verify the `id` field follows the convention `:` where type-prefix is one of `file`, `func`, `class`, `module`, `concept`, `config`, `document`, `service`, `table`, `endpoint`, `pipeline`, `schema`, `resource`. Apply these fixes: - If the ID has a double prefix (e.g., `file:file:src/foo.ts`), strip the duplicate prefix. - If the ID has a project-name prefix (e.g., `my-project:file:src/foo.ts`), strip the project-name portion. - If the ID is a bare file path with no prefix, add the appropriate prefix based on the node's `type` field: `file` → `file:`, `function` → `func::`, `class` → `class::`. - Build a mapping of original IDs → corrected IDs. 2. **Normalize complexity values:** For every node, verify `complexity` is one of `"simple"`, `"moderate"`, `"complex"`. Apply these mappings for invalid values: - - `"low"`, `"easy"`, `"trivial"`, `"basic"` → `"simple"` - - `"medium"`, `"mid"`, `"average"` → `"moderate"` - - `"high"`, `"hard"`, `"difficult"`, `"advanced"` → `"complex"` + - `"low"`, `"easy"` → `"simple"` + - `"medium"`, `"intermediate"` → `"moderate"` + - `"high"`, `"hard"`, `"difficult"` → `"complex"` - Numeric 1-3 → `"simple"`, 4-6 → `"moderate"`, 7-10 → `"complex"` - Any other value → `"moderate"`