fix(core): handle all 13 node types, edge cross-variant resolution, and dropped edge traceability

- Add all 13 node types (including non-code) to VALID_PREFIXES and TYPE_TO_PREFIX
  to prevent valid IDs like config:tsconfig.json from being stripped
- Add fallback normalizeNodeId on edge endpoints not found in idMap, fixing
  silent relationship loss when edges use different malformed variants than nodes
- Add DroppedEdge interface with source, target, type, and reason fields so
  callers can surface exactly which edges were lost and why
- Use honest Record<string, unknown>[] return types instead of unsafe type casts
- Align SKILL.md complexity aliases with COMPLEXITY_STRING_MAP
- Add 5 new tests for non-code types, cross-variant edges, dropped edge detail,
  and validateGraph integration

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Lum1104
2026-04-01 21:10:43 +08:00
Unverified
parent 60c2ead398
commit 6f443fbc14
4 changed files with 177 additions and 17 deletions
@@ -4,6 +4,7 @@ import {
normalizeComplexity,
normalizeBatchOutput,
} from "../analyzer/normalize-graph.js";
import { validateGraph } from "../schema.js";
describe("normalizeNodeId", () => {
it("passes through a correct file ID unchanged", () => {
@@ -83,12 +84,12 @@ describe("normalizeNodeId", () => {
).toBe("concept:caching");
});
it("handles double project-name prefix", () => {
it("handles project-name prefix before a valid non-code prefix", () => {
expect(
normalizeNodeId("my-project:service:docker-compose.yml", {
type: "file",
}),
).toBe("file:docker-compose.yml");
).toBe("service:docker-compose.yml");
});
it("returns empty string for empty input", () => {
@@ -98,6 +99,26 @@ describe("normalizeNodeId", () => {
it("falls back to untouched ID for unknown node type", () => {
expect(normalizeNodeId("some-id", { type: "widget" as any })).toBe("some-id");
});
it("passes through non-code type IDs unchanged", () => {
expect(normalizeNodeId("config:tsconfig.json", { type: "config" })).toBe("config:tsconfig.json");
expect(normalizeNodeId("document:README.md", { type: "document" })).toBe("document:README.md");
expect(normalizeNodeId("service:docker-compose.yml", { type: "service" })).toBe("service:docker-compose.yml");
expect(normalizeNodeId("table:migrations/001.sql:users", { type: "table" })).toBe("table:migrations/001.sql:users");
expect(normalizeNodeId("endpoint:src/routes.ts:GET /api/users", { type: "endpoint" })).toBe("endpoint:src/routes.ts:GET /api/users");
expect(normalizeNodeId("pipeline:.github/workflows/ci.yml", { type: "pipeline" })).toBe("pipeline:.github/workflows/ci.yml");
expect(normalizeNodeId("schema:schema.graphql", { type: "schema" })).toBe("schema:schema.graphql");
expect(normalizeNodeId("resource:main.tf", { type: "resource" })).toBe("resource:main.tf");
});
it("adds prefix for bare paths with non-code types", () => {
expect(normalizeNodeId("tsconfig.json", { type: "config" })).toBe("config:tsconfig.json");
expect(normalizeNodeId("README.md", { type: "document" })).toBe("document:README.md");
});
it("strips project-name prefix from non-code type IDs", () => {
expect(normalizeNodeId("my-project:config:tsconfig.json", { type: "config" })).toBe("config:tsconfig.json");
});
});
describe("normalizeComplexity", () => {
@@ -257,6 +278,13 @@ describe("normalizeBatchOutput", () => {
expect(result.edges).toHaveLength(0);
expect(result.stats.danglingEdgesDropped).toBe(1);
expect(result.stats.droppedEdges).toHaveLength(1);
expect(result.stats.droppedEdges[0]).toEqual({
source: "file:src/a.ts",
target: "file:src/nonexistent.ts",
type: "imports",
reason: "missing-target",
});
});
it("deduplicates nodes keeping last occurrence", () => {
@@ -372,4 +400,99 @@ describe("normalizeBatchOutput", () => {
expect(result.stats.danglingEdgesDropped).toBe(1);
expect(result.edges).toHaveLength(1);
});
it("resolves edge endpoints with different malformed variants than node IDs", () => {
const result = normalizeBatchOutput({
nodes: [
{
id: "src/bare.ts",
type: "file",
name: "bare.ts",
filePath: "src/bare.ts",
summary: "Bare",
tags: [],
complexity: "simple",
},
{
id: "file:src/target.ts",
type: "file",
name: "target.ts",
filePath: "src/target.ts",
summary: "Target",
tags: [],
complexity: "simple",
},
],
edges: [
{
source: "my-project:file:src/bare.ts",
target: "file:src/target.ts",
type: "imports",
direction: "forward",
weight: 0.7,
},
],
});
expect(result.edges).toHaveLength(1);
expect(result.edges[0].source).toBe("file:src/bare.ts");
expect(result.edges[0].target).toBe("file:src/target.ts");
});
});
describe("normalizeBatchOutput integration", () => {
it("produces output that passes validateGraph after wrapping", () => {
const result = normalizeBatchOutput({
nodes: [
{
id: "my-project:file:src/index.ts",
type: "file",
name: "index.ts",
filePath: "src/index.ts",
summary: "Entry point",
tags: ["entry"],
complexity: 3,
},
{
id: "src/utils.ts",
type: "file",
name: "utils.ts",
filePath: "src/utils.ts",
summary: "Utilities",
tags: [],
complexity: "simple",
},
],
edges: [
{
source: "my-project:file:src/index.ts",
target: "src/utils.ts",
type: "imports",
direction: "forward",
weight: 0.7,
},
],
});
const graph = {
version: "1.0.0",
project: {
name: "test",
languages: ["typescript"],
frameworks: [],
description: "Test project",
analyzedAt: new Date().toISOString(),
gitCommitHash: "abc123",
},
nodes: result.nodes,
edges: result.edges,
layers: [],
tour: [],
};
const validation = validateGraph(graph);
expect(validation.success).toBe(true);
expect(validation.data?.nodes).toHaveLength(2);
expect(validation.data?.edges).toHaveLength(1);
});
});
@@ -1,6 +1,8 @@
import type { GraphNode, GraphEdge } from "../types.js";
const VALID_PREFIXES = new Set(["file", "func", "class", "module", "concept"]);
const VALID_PREFIXES = new Set([
"file", "func", "class", "module", "concept",
"config", "document", "service", "table", "endpoint",
"pipeline", "schema", "resource",
]);
const TYPE_TO_PREFIX: Record<string, string> = {
file: "file",
@@ -8,6 +10,14 @@ const TYPE_TO_PREFIX: Record<string, string> = {
class: "class",
module: "module",
concept: "concept",
config: "config",
document: "document",
service: "service",
table: "table",
endpoint: "endpoint",
pipeline: "pipeline",
schema: "schema",
resource: "resource",
};
/**
@@ -79,8 +89,6 @@ export function normalizeNodeId(
const VALID_COMPLEXITIES = new Set(["simple", "moderate", "complex"]);
// String aliases for complexity — mirrors upstream's COMPLEXITY_ALIASES.
// After rebasing onto upstream main, this can be replaced with an import.
const COMPLEXITY_STRING_MAP: Record<string, string> = {
low: "simple",
easy: "simple",
@@ -115,16 +123,24 @@ export function normalizeComplexity(
return "moderate";
}
export interface DroppedEdge {
source: string;
target: string;
type: string;
reason: "missing-source" | "missing-target" | "missing-both";
}
export interface NormalizationStats {
idsFixed: number;
complexityFixed: number;
edgesRewritten: number;
danglingEdgesDropped: number;
droppedEdges: DroppedEdge[];
}
export interface NormalizeBatchResult {
nodes: GraphNode[];
edges: GraphEdge[];
nodes: Record<string, unknown>[];
edges: Record<string, unknown>[];
idMap: Map<string, string>;
stats: NormalizationStats;
}
@@ -146,6 +162,7 @@ export function normalizeBatchOutput(data: {
complexityFixed: 0,
edgesRewritten: 0,
danglingEdgesDropped: 0,
droppedEdges: [],
};
const idMap = new Map<string, string>();
@@ -191,15 +208,34 @@ export function normalizeBatchOutput(data: {
for (const raw of data.edges) {
const oldSource = String(raw.source ?? "");
const oldTarget = String(raw.target ?? "");
const newSource = idMap.get(oldSource) ?? oldSource;
const newTarget = idMap.get(oldTarget) ?? oldTarget;
let newSource = idMap.get(oldSource) ?? oldSource;
let newTarget = idMap.get(oldTarget) ?? oldTarget;
// Fallback: if endpoint not found in idMap, normalize it directly
// (handles cross-variant malformed IDs between nodes and edges)
if (!validNodeIds.has(newSource)) {
const normalized = normalizeNodeId(newSource, { type: "file" });
if (validNodeIds.has(normalized)) newSource = normalized;
}
if (!validNodeIds.has(newTarget)) {
const normalized = normalizeNodeId(newTarget, { type: "file" });
if (validNodeIds.has(normalized)) newTarget = normalized;
}
if (newSource !== oldSource || newTarget !== oldTarget) {
stats.edgesRewritten++;
}
if (!validNodeIds.has(newSource) || !validNodeIds.has(newTarget)) {
const missingSource = !validNodeIds.has(newSource);
const missingTarget = !validNodeIds.has(newTarget);
stats.danglingEdgesDropped++;
stats.droppedEdges.push({
source: newSource,
target: newTarget,
type: String(raw.type ?? ""),
reason: missingSource && missingTarget ? "missing-both" : missingSource ? "missing-source" : "missing-target",
});
continue;
}
@@ -213,8 +249,8 @@ export function normalizeBatchOutput(data: {
}
return {
nodes: deduped as unknown as GraphNode[],
edges: edges as unknown as GraphEdge[],
nodes: deduped,
edges,
idMap,
stats,
};
@@ -14,6 +14,7 @@ export {
normalizeNodeId,
normalizeComplexity,
normalizeBatchOutput,
type DroppedEdge,
type NormalizationStats,
type NormalizeBatchResult,
} from "./analyzer/normalize-graph.js";
@@ -152,16 +152,16 @@ After batches complete, merge with the existing graph:
Merge all file-analyzer results into a single set of nodes and edges. Then perform normalization and integrity cleanup **in this order**:
1. **Normalize node IDs:** For every node, verify the `id` field follows the convention `<type-prefix>:<path>` where type-prefix is one of `file`, `func`, `class`, `module`, `concept`. Apply these fixes:
1. **Normalize node IDs:** For every node, verify the `id` field follows the convention `<type-prefix>:<path>` where type-prefix is one of `file`, `func`, `class`, `module`, `concept`, `config`, `document`, `service`, `table`, `endpoint`, `pipeline`, `schema`, `resource`. Apply these fixes:
- If the ID has a double prefix (e.g., `file:file:src/foo.ts`), strip the duplicate prefix.
- If the ID has a project-name prefix (e.g., `my-project:file:src/foo.ts`), strip the project-name portion.
- If the ID is a bare file path with no prefix, add the appropriate prefix based on the node's `type` field: `file` → `file:<path>`, `function` → `func:<filePath>:<name>`, `class` → `class:<filePath>:<name>`.
- Build a mapping of original IDs → corrected IDs.
2. **Normalize complexity values:** For every node, verify `complexity` is one of `"simple"`, `"moderate"`, `"complex"`. Apply these mappings for invalid values:
- `"low"`, `"easy"`, `"trivial"`, `"basic"` → `"simple"`
- `"medium"`, `"mid"`, `"average"` → `"moderate"`
- `"high"`, `"hard"`, `"difficult"`, `"advanced"` → `"complex"`
- `"low"`, `"easy"` → `"simple"`
- `"medium"`, `"intermediate"` → `"moderate"`
- `"high"`, `"hard"`, `"difficult"` → `"complex"`
- Numeric 1-3 → `"simple"`, 4-6 → `"moderate"`, 7-10 → `"complex"`
- Any other value → `"moderate"`