diff --git a/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts new file mode 100644 index 0000000..fa150c2 --- /dev/null +++ b/understand-anything-plugin/packages/core/src/__tests__/normalize-graph.test.ts @@ -0,0 +1,498 @@ +import { describe, it, expect } from "vitest"; +import { + normalizeNodeId, + normalizeComplexity, + normalizeBatchOutput, +} from "../analyzer/normalize-graph.js"; +import { validateGraph } from "../schema.js"; + +describe("normalizeNodeId", () => { + it("passes through a correct file ID unchanged", () => { + expect( + normalizeNodeId("file:src/index.ts", { type: "file" }), + ).toBe("file:src/index.ts"); + }); + + it("passes through a correct func ID unchanged", () => { + expect( + normalizeNodeId("func:src/utils.ts:formatDate", { type: "function" }), + ).toBe("func:src/utils.ts:formatDate"); + }); + + it("passes through a correct class ID unchanged", () => { + expect( + normalizeNodeId("class:src/models/User.ts:User", { type: "class" }), + ).toBe("class:src/models/User.ts:User"); + }); + + it("fixes double-prefixed IDs", () => { + expect( + normalizeNodeId("file:file:src/foo.ts", { type: "file" }), + ).toBe("file:src/foo.ts"); + }); + + it("strips project-name prefix when valid prefix follows", () => { + expect( + normalizeNodeId("my-project:file:src/foo.ts", { type: "file" }), + ).toBe("file:src/foo.ts"); + }); + + it("strips project-name prefix and adds correct prefix for bare path", () => { + expect( + normalizeNodeId("my-project:src/foo.ts", { type: "file" }), + ).toBe("file:src/foo.ts"); + }); + + it("adds file: prefix to bare paths", () => { + expect( + normalizeNodeId("frontend/src/utils/constants.ts", { type: "file" }), + ).toBe("file:frontend/src/utils/constants.ts"); + }); + + it("reconstructs func ID from filePath and name for bare paths", () => { + expect( + normalizeNodeId("formatDate", { + type: "function", + filePath: "src/utils.ts", + name: "formatDate", + }), + ).toBe("func:src/utils.ts:formatDate"); + }); + + it("reconstructs class ID from filePath and name for bare paths", () => { + expect( + normalizeNodeId("User", { + type: "class", + filePath: "src/models/User.ts", + name: "User", + }), + ).toBe("class:src/models/User.ts:User"); + }); + + it("trims whitespace", () => { + expect( + normalizeNodeId(" file:src/foo.ts ", { type: "file" }), + ).toBe("file:src/foo.ts"); + }); + + it("handles module: and concept: prefixes", () => { + expect( + normalizeNodeId("module:auth", { type: "module" }), + ).toBe("module:auth"); + expect( + normalizeNodeId("concept:caching", { type: "concept" }), + ).toBe("concept:caching"); + }); + + it("handles project-name prefix before a valid non-code prefix", () => { + expect( + normalizeNodeId("my-project:service:docker-compose.yml", { + type: "file", + }), + ).toBe("service:docker-compose.yml"); + }); + + it("returns empty string for empty input", () => { + expect(normalizeNodeId("", { type: "file" })).toBe(""); + }); + + it("falls back to untouched ID for unknown node type", () => { + expect(normalizeNodeId("some-id", { type: "widget" as any })).toBe("some-id"); + }); + + it("passes through non-code type IDs unchanged", () => { + expect(normalizeNodeId("config:tsconfig.json", { type: "config" })).toBe("config:tsconfig.json"); + expect(normalizeNodeId("document:README.md", { type: "document" })).toBe("document:README.md"); + expect(normalizeNodeId("service:docker-compose.yml", { type: "service" })).toBe("service:docker-compose.yml"); + expect(normalizeNodeId("table:migrations/001.sql:users", { type: "table" })).toBe("table:migrations/001.sql:users"); + expect(normalizeNodeId("endpoint:src/routes.ts:GET /api/users", { type: "endpoint" })).toBe("endpoint:src/routes.ts:GET /api/users"); + expect(normalizeNodeId("pipeline:.github/workflows/ci.yml", { type: "pipeline" })).toBe("pipeline:.github/workflows/ci.yml"); + expect(normalizeNodeId("schema:schema.graphql", { type: "schema" })).toBe("schema:schema.graphql"); + expect(normalizeNodeId("resource:main.tf", { type: "resource" })).toBe("resource:main.tf"); + }); + + it("adds prefix for bare paths with non-code types", () => { + expect(normalizeNodeId("tsconfig.json", { type: "config" })).toBe("config:tsconfig.json"); + expect(normalizeNodeId("README.md", { type: "document" })).toBe("document:README.md"); + }); + + it("strips project-name prefix from non-code type IDs", () => { + expect(normalizeNodeId("my-project:config:tsconfig.json", { type: "config" })).toBe("config:tsconfig.json"); + }); +}); + +describe("normalizeComplexity", () => { + it("passes through valid values unchanged", () => { + expect(normalizeComplexity("simple")).toBe("simple"); + expect(normalizeComplexity("moderate")).toBe("moderate"); + expect(normalizeComplexity("complex")).toBe("complex"); + }); + + it("maps 'low' to 'simple'", () => { + expect(normalizeComplexity("low")).toBe("simple"); + }); + + it("maps 'high' to 'complex'", () => { + expect(normalizeComplexity("high")).toBe("complex"); + }); + + it("maps 'medium' to 'moderate'", () => { + expect(normalizeComplexity("medium")).toBe("moderate"); + }); + + it("maps other aliases from upstream COMPLEXITY_ALIASES", () => { + expect(normalizeComplexity("easy")).toBe("simple"); + expect(normalizeComplexity("hard")).toBe("complex"); + expect(normalizeComplexity("difficult")).toBe("complex"); + expect(normalizeComplexity("intermediate")).toBe("moderate"); + }); + + it("is case-insensitive", () => { + expect(normalizeComplexity("LOW")).toBe("simple"); + expect(normalizeComplexity("High")).toBe("complex"); + expect(normalizeComplexity("MODERATE")).toBe("moderate"); + }); + + it("maps numeric 1-3 to simple", () => { + expect(normalizeComplexity(1)).toBe("simple"); + expect(normalizeComplexity(3)).toBe("simple"); + }); + + it("maps numeric 4-6 to moderate", () => { + expect(normalizeComplexity(4)).toBe("moderate"); + expect(normalizeComplexity(6)).toBe("moderate"); + }); + + it("maps numeric 7-10 to complex", () => { + expect(normalizeComplexity(7)).toBe("complex"); + expect(normalizeComplexity(10)).toBe("complex"); + }); + + it("defaults free-text to moderate", () => { + expect(normalizeComplexity("detailed")).toBe("moderate"); + expect(normalizeComplexity("very complex with many deps")).toBe("moderate"); + }); + + it("defaults undefined/null to moderate", () => { + expect(normalizeComplexity(undefined)).toBe("moderate"); + expect(normalizeComplexity(null)).toBe("moderate"); + }); + + it("defaults zero and negative numbers to moderate", () => { + expect(normalizeComplexity(0)).toBe("moderate"); + expect(normalizeComplexity(-5)).toBe("moderate"); + }); +}); + +describe("normalizeBatchOutput", () => { + it("normalizes IDs and numeric complexity, rewrites edges", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/good.ts", + type: "file", + name: "good.ts", + filePath: "src/good.ts", + summary: "A good file", + tags: ["util"], + complexity: "simple", + }, + { + id: "my-project:file:src/bad.ts", + type: "file", + name: "bad.ts", + filePath: "src/bad.ts", + summary: "Project-prefixed", + tags: ["api"], + complexity: "simple", + }, + { + id: "src/bare.ts", + type: "file", + name: "bare.ts", + filePath: "src/bare.ts", + summary: "Bare path", + tags: [], + complexity: 4, + }, + ], + edges: [ + { + source: "file:src/good.ts", + target: "my-project:file:src/bad.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + { + source: "src/bare.ts", + target: "file:src/good.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.nodes).toHaveLength(3); + expect(result.nodes[0].id).toBe("file:src/good.ts"); + expect(result.nodes[1].id).toBe("file:src/bad.ts"); + expect(result.nodes[2].id).toBe("file:src/bare.ts"); + // Only numeric complexity is fixed here; string aliases are upstream's job + expect(result.nodes[2].complexity).toBe("moderate"); + + // Edges should be rewritten through the ID map + expect(result.edges).toHaveLength(2); + expect(result.edges[0].source).toBe("file:src/good.ts"); + expect(result.edges[0].target).toBe("file:src/bad.ts"); + expect(result.edges[1].source).toBe("file:src/bare.ts"); + + expect(result.stats.idsFixed).toBe(2); + expect(result.stats.complexityFixed).toBe(1); // only the numeric one + expect(result.stats.edgesRewritten).toBe(2); + expect(result.stats.danglingEdgesDropped).toBe(0); + }); + + it("drops dangling edges after normalization", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/a.ts", + type: "file", + name: "a.ts", + summary: "File A", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "file:src/a.ts", + target: "file:src/nonexistent.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.edges).toHaveLength(0); + expect(result.stats.danglingEdgesDropped).toBe(1); + expect(result.stats.droppedEdges).toHaveLength(1); + expect(result.stats.droppedEdges[0]).toEqual({ + source: "file:src/a.ts", + target: "file:src/nonexistent.ts", + type: "imports", + reason: "missing-target", + }); + }); + + it("deduplicates nodes keeping last occurrence", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/a.ts", + type: "file", + name: "a.ts", + summary: "First version", + tags: [], + complexity: "simple", + }, + { + id: "file:src/a.ts", + type: "file", + name: "a.ts", + summary: "Second version", + tags: ["updated"], + complexity: "complex", + }, + ], + edges: [], + }); + + expect(result.nodes).toHaveLength(1); + expect(result.nodes[0].summary).toBe("Second version"); + }); + + it("deduplicates edges after ID rewriting", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/a.ts", + type: "file", + name: "a.ts", + summary: "A", + tags: [], + complexity: "simple", + }, + { + id: "file:src/b.ts", + type: "file", + name: "b.ts", + summary: "B", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "file:src/a.ts", + target: "file:src/b.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + { + source: "proj:file:src/a.ts", + target: "file:src/b.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + // Both edges resolve to the same source after normalization — deduplicated + expect(result.edges).toHaveLength(1); + }); + + it("returns accurate stats", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "file:src/ok.ts", + type: "file", + name: "ok.ts", + summary: "OK", + tags: [], + complexity: "simple", + }, + { + id: "proj:file:src/fix.ts", + type: "file", + name: "fix.ts", + summary: "Needs fix", + tags: [], + complexity: 2, + }, + ], + edges: [ + { + source: "proj:file:src/fix.ts", + target: "file:src/ok.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + { + source: "file:src/ok.ts", + target: "file:src/gone.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.stats.idsFixed).toBe(1); + expect(result.stats.complexityFixed).toBe(1); + expect(result.stats.edgesRewritten).toBe(1); + expect(result.stats.danglingEdgesDropped).toBe(1); + expect(result.edges).toHaveLength(1); + }); + + it("resolves edge endpoints with different malformed variants than node IDs", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "src/bare.ts", + type: "file", + name: "bare.ts", + filePath: "src/bare.ts", + summary: "Bare", + tags: [], + complexity: "simple", + }, + { + id: "file:src/target.ts", + type: "file", + name: "target.ts", + filePath: "src/target.ts", + summary: "Target", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "my-project:file:src/bare.ts", + target: "file:src/target.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + expect(result.edges).toHaveLength(1); + expect(result.edges[0].source).toBe("file:src/bare.ts"); + expect(result.edges[0].target).toBe("file:src/target.ts"); + }); +}); + +describe("normalizeBatchOutput integration", () => { + it("produces output that passes validateGraph after wrapping", () => { + const result = normalizeBatchOutput({ + nodes: [ + { + id: "my-project:file:src/index.ts", + type: "file", + name: "index.ts", + filePath: "src/index.ts", + summary: "Entry point", + tags: ["entry"], + complexity: 3, + }, + { + id: "src/utils.ts", + type: "file", + name: "utils.ts", + filePath: "src/utils.ts", + summary: "Utilities", + tags: [], + complexity: "simple", + }, + ], + edges: [ + { + source: "my-project:file:src/index.ts", + target: "src/utils.ts", + type: "imports", + direction: "forward", + weight: 0.7, + }, + ], + }); + + const graph = { + version: "1.0.0", + project: { + name: "test", + languages: ["typescript"], + frameworks: [], + description: "Test project", + analyzedAt: new Date().toISOString(), + gitCommitHash: "abc123", + }, + nodes: result.nodes, + edges: result.edges, + layers: [], + tour: [], + }; + + const validation = validateGraph(graph); + expect(validation.success).toBe(true); + expect(validation.data?.nodes).toHaveLength(2); + expect(validation.data?.edges).toHaveLength(1); + }); +}); diff --git a/understand-anything-plugin/packages/core/src/__tests__/schema.test.ts b/understand-anything-plugin/packages/core/src/__tests__/schema.test.ts index fe5e205..2ea911a 100644 --- a/understand-anything-plugin/packages/core/src/__tests__/schema.test.ts +++ b/understand-anything-plugin/packages/core/src/__tests__/schema.test.ts @@ -719,4 +719,12 @@ describe("Extended node/edge types", () => { expect(result.data!.edges[0].type).toBe(canonical); } }); + + it("accepts node with bare string ID (schema is lenient on format)", () => { + const graph = structuredClone(validGraph); + graph.nodes[0].id = "src/foo.ts"; + + const result = validateGraph(graph); + expect(result.success).toBe(true); + }); }); diff --git a/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts new file mode 100644 index 0000000..9409693 --- /dev/null +++ b/understand-anything-plugin/packages/core/src/analyzer/normalize-graph.ts @@ -0,0 +1,257 @@ +const VALID_PREFIXES = new Set([ + "file", "func", "class", "module", "concept", + "config", "document", "service", "table", "endpoint", + "pipeline", "schema", "resource", +]); + +const TYPE_TO_PREFIX: Record = { + file: "file", + function: "func", + class: "class", + module: "module", + concept: "concept", + config: "config", + document: "document", + service: "service", + table: "table", + endpoint: "endpoint", + pipeline: "pipeline", + schema: "schema", + resource: "resource", +}; + +/** + * Strips all non-valid prefixes from an ID, returning the bare path + * and the first valid prefix found (if any). + */ +function stripToValidPrefix(id: string): { prefix: string | null; path: string } { + let remaining = id; + + // Peel off colon-separated segments until we find a valid prefix or run out + while (true) { + const colonIdx = remaining.indexOf(":"); + if (colonIdx <= 0) break; + + const segment = remaining.slice(0, colonIdx); + if (VALID_PREFIXES.has(segment)) { + // Check for double valid prefix (e.g., "file:file:src/foo.ts") + const rest = remaining.slice(colonIdx + 1); + const innerColonIdx = rest.indexOf(":"); + if (innerColonIdx > 0 && VALID_PREFIXES.has(rest.slice(0, innerColonIdx))) { + // Double-prefixed — skip the outer, recurse on inner + remaining = rest; + continue; + } + return { prefix: segment, path: rest }; + } + + // Not a valid prefix — strip it and continue + remaining = remaining.slice(colonIdx + 1); + } + + return { prefix: null, path: remaining }; +} + +/** + * Normalizes a node ID to the canonical `type:path` format. + * Handles: double-prefixed IDs, project-name-prefixed IDs, bare paths. + * Idempotent — correct IDs pass through unchanged. + */ +export function normalizeNodeId( + id: string, + node: { type: string; filePath?: string; name?: string }, +): string { + const trimmed = id.trim(); + if (!trimmed) return trimmed; + + const expectedPrefix = TYPE_TO_PREFIX[node.type]; + const { prefix, path } = stripToValidPrefix(trimmed); + + if (prefix) { + return `${prefix}:${path}`; + } + + // No valid prefix found — bare path + if (expectedPrefix) { + // For func/class, reconstruct from filePath + name if available + if ( + (node.type === "function" || node.type === "class") && + node.filePath && + node.name + ) { + return `${expectedPrefix}:${node.filePath}:${node.name}`; + } + return `${expectedPrefix}:${path}`; + } + + return trimmed; +} + +const VALID_COMPLEXITIES = new Set(["simple", "moderate", "complex"]); + +const COMPLEXITY_STRING_MAP: Record = { + low: "simple", + easy: "simple", + medium: "moderate", + intermediate: "moderate", + high: "complex", + hard: "complex", + difficult: "complex", +}; + +/** + * Normalizes a complexity value to one of "simple" | "moderate" | "complex". + * Handles both string aliases and numeric scales — defaults to "moderate". + */ +export function normalizeComplexity( + value: unknown, +): "simple" | "moderate" | "complex" { + if (typeof value === "string") { + const lower = value.toLowerCase().trim(); + if (VALID_COMPLEXITIES.has(lower)) return lower as "simple" | "moderate" | "complex"; + const aliased = COMPLEXITY_STRING_MAP[lower]; + if (aliased) return aliased as "simple" | "moderate" | "complex"; + return "moderate"; + } + + if (typeof value === "number" && Number.isFinite(value) && value >= 1) { + if (value <= 3) return "simple"; + if (value <= 6) return "moderate"; + return "complex"; + } + + return "moderate"; +} + +export interface DroppedEdge { + source: string; + target: string; + type: string; + reason: "missing-source" | "missing-target" | "missing-both"; +} + +export interface NormalizationStats { + idsFixed: number; + complexityFixed: number; + edgesRewritten: number; + danglingEdgesDropped: number; + droppedEdges: DroppedEdge[]; +} + +export interface NormalizeBatchResult { + nodes: Record[]; + edges: Record[]; + idMap: Map; + stats: NormalizationStats; +} + +/** + * Normalizes a merged batch output: fixes node IDs and numeric complexity, + * rewrites edge references, deduplicates nodes and edges, and drops dangling edges. + * + * This runs BEFORE upstream's sanitizeGraph/autoFixGraph/normalizeGraph pipeline, + * handling concerns that pipeline does not cover: malformed IDs, numeric complexity, + * edge reference rewriting after ID correction, and edge deduplication. + */ +export function normalizeBatchOutput(data: { + nodes: Record[]; + edges: Record[]; +}): NormalizeBatchResult { + const stats: NormalizationStats = { + idsFixed: 0, + complexityFixed: 0, + edgesRewritten: 0, + danglingEdgesDropped: 0, + droppedEdges: [], + }; + + const idMap = new Map(); + + // Pass 1: Normalize node IDs and numeric complexity + const nodes = data.nodes.map((raw) => { + const oldId = String(raw.id ?? ""); + const nodeType = String(raw.type ?? "file"); + const newId = normalizeNodeId(oldId, { + type: nodeType, + filePath: typeof raw.filePath === "string" ? raw.filePath : undefined, + name: typeof raw.name === "string" ? raw.name : undefined, + }); + + if (newId !== oldId) { + stats.idsFixed++; + } + idMap.set(oldId, newId); + + const result: Record = { ...raw, id: newId }; + + // Only fix numeric complexity here — string aliases are handled by upstream's + // COMPLEXITY_ALIASES in autoFixGraph + if (typeof raw.complexity === "number") { + result.complexity = normalizeComplexity(raw.complexity); + stats.complexityFixed++; + } + + return result; + }); + + // Deduplicate nodes (keep last occurrence) + const seenIds = new Map(); + for (let i = 0; i < nodes.length; i++) { + seenIds.set(String(nodes[i].id), i); + } + const deduped = nodes.filter((_, i) => seenIds.get(String(nodes[i].id)) === i); + const validNodeIds = new Set(deduped.map((n) => String(n.id))); + + // Pass 2: Rewrite edge references and deduplicate + const edges: Record[] = []; + const seenEdges = new Set(); + for (const raw of data.edges) { + const oldSource = String(raw.source ?? ""); + const oldTarget = String(raw.target ?? ""); + let newSource = idMap.get(oldSource) ?? oldSource; + let newTarget = idMap.get(oldTarget) ?? oldTarget; + + // Fallback: if endpoint not found in idMap, normalize it directly + // (handles cross-variant malformed IDs between nodes and edges) + if (!validNodeIds.has(newSource)) { + const normalized = normalizeNodeId(newSource, { type: "file" }); + if (validNodeIds.has(normalized)) newSource = normalized; + } + if (!validNodeIds.has(newTarget)) { + const normalized = normalizeNodeId(newTarget, { type: "file" }); + if (validNodeIds.has(normalized)) newTarget = normalized; + } + + if (newSource !== oldSource || newTarget !== oldTarget) { + stats.edgesRewritten++; + } + + if (!validNodeIds.has(newSource) || !validNodeIds.has(newTarget)) { + const missingSource = !validNodeIds.has(newSource); + const missingTarget = !validNodeIds.has(newTarget); + stats.danglingEdgesDropped++; + stats.droppedEdges.push({ + source: newSource, + target: newTarget, + type: String(raw.type ?? ""), + reason: missingSource && missingTarget ? "missing-both" : missingSource ? "missing-source" : "missing-target", + }); + continue; + } + + // Deduplicate by composite key (source + target + type) + const edgeType = String(raw.type ?? ""); + const edgeKey = `${newSource}|${newTarget}|${edgeType}`; + if (seenEdges.has(edgeKey)) continue; + seenEdges.add(edgeKey); + + edges.push({ ...raw, source: newSource, target: newTarget }); + } + + return { + nodes: deduped, + edges, + idMap, + stats, + }; +} diff --git a/understand-anything-plugin/packages/core/src/index.ts b/understand-anything-plugin/packages/core/src/index.ts index 0213ad5..756284d 100644 --- a/understand-anything-plugin/packages/core/src/index.ts +++ b/understand-anything-plugin/packages/core/src/index.ts @@ -19,6 +19,14 @@ export { parseProjectSummaryResponse, } from "./analyzer/llm-analyzer.js"; export type { LLMFileAnalysis, LLMProjectSummary } from "./analyzer/llm-analyzer.js"; +export { + normalizeNodeId, + normalizeComplexity, + normalizeBatchOutput, + type DroppedEdge, + type NormalizationStats, + type NormalizeBatchResult, +} from "./analyzer/normalize-graph.js"; export { SearchEngine, type SearchResult, type SearchOptions } from "./search.js"; export { getChangedFiles, diff --git a/understand-anything-plugin/skills/understand/SKILL.md b/understand-anything-plugin/skills/understand/SKILL.md index b6503dc..64c8cba 100644 --- a/understand-anything-plugin/skills/understand/SKILL.md +++ b/understand-anything-plugin/skills/understand/SKILL.md @@ -174,11 +174,28 @@ After batches complete, merge with the existing graph: ## Phase 3 — ASSEMBLE -Merge all file-analyzer results into a single set of nodes and edges. Then perform basic integrity cleanup: +Merge all file-analyzer results into a single set of nodes and edges. Then perform normalization and integrity cleanup **in this order**: -- Remove any edge whose `source` or `target` references a node ID that does not exist in the merged node set -- Remove duplicate node IDs (keep the last occurrence) -- Log any removed edges or nodes for the final summary +1. **Normalize node IDs:** For every node, verify the `id` field follows the convention `:` where type-prefix is one of `file`, `func`, `class`, `module`, `concept`, `config`, `document`, `service`, `table`, `endpoint`, `pipeline`, `schema`, `resource`. Apply these fixes: + - If the ID has a double prefix (e.g., `file:file:src/foo.ts`), strip the duplicate prefix. + - If the ID has a project-name prefix (e.g., `my-project:file:src/foo.ts`), strip the project-name portion. + - If the ID is a bare file path with no prefix, add the appropriate prefix based on the node's `type` field: `file` → `file:`, `function` → `func::`, `class` → `class::`. + - Build a mapping of original IDs → corrected IDs. + +2. **Normalize complexity values:** For every node, verify `complexity` is one of `"simple"`, `"moderate"`, `"complex"`. Apply these mappings for invalid values: + - `"low"`, `"easy"` → `"simple"` + - `"medium"`, `"intermediate"` → `"moderate"` + - `"high"`, `"hard"`, `"difficult"` → `"complex"` + - Numeric 1-3 → `"simple"`, 4-6 → `"moderate"`, 7-10 → `"complex"` + - Any other value → `"moderate"` + +3. **Rewrite edge references:** Using the ID mapping from step 1, update every edge's `source` and `target` fields. This prevents cascading edge drops when only the ID format was wrong. + +4. **Remove duplicate node IDs:** If duplicate node IDs exist after normalization, keep the last occurrence. + +5. **Remove dangling edges:** Remove any edge whose `source` or `target` references a node ID that does not exist in the merged node set. + +6. **Log changes:** Record counts of IDs corrected, complexity values fixed, edges rewritten, duplicates removed, and dangling edges dropped. Include these counts in the Phase warnings list passed to the reviewer. --- diff --git a/understand-anything-plugin/skills/understand/file-analyzer-prompt.md b/understand-anything-plugin/skills/understand/file-analyzer-prompt.md index 974d0a0..1177b16 100644 --- a/understand-anything-plugin/skills/understand/file-analyzer-prompt.md +++ b/understand-anything-plugin/skills/understand/file-analyzer-prompt.md @@ -444,6 +444,8 @@ You MUST use these exact prefixes for node IDs: **Scope restriction:** Only produce node types listed above. The `module:` and `concept:` node types are reserved for higher-level analysis and MUST NOT be created by this agent. +> **WARNING:** Node IDs MUST use the exact prefix formats shown above. Do NOT prefix IDs with the project name (e.g., `my-project:file:src/foo.ts` is WRONG). Do NOT use bare file paths without a type prefix (e.g., `src/foo.ts` is WRONG). Invalid IDs will be auto-corrected during assembly, which may cause unexpected edge rewiring. + ## Output Format Produce a single, valid JSON block. Validate it mentally before writing -- malformed JSON breaks the entire pipeline.