mirror of
https://github.com/Egonex-AI/Understand-Anything.git
synced 2026-06-22 10:58:03 +08:00
fix(core): handle all 13 node types, edge cross-variant resolution, and dropped edge traceability
- Add all 13 node types (including non-code) to VALID_PREFIXES and TYPE_TO_PREFIX to prevent valid IDs like config:tsconfig.json from being stripped - Add fallback normalizeNodeId on edge endpoints not found in idMap, fixing silent relationship loss when edges use different malformed variants than nodes - Add DroppedEdge interface with source, target, type, and reason fields so callers can surface exactly which edges were lost and why - Use honest Record<string, unknown>[] return types instead of unsafe type casts - Align SKILL.md complexity aliases with COMPLEXITY_STRING_MAP - Add 5 new tests for non-code types, cross-variant edges, dropped edge detail, and validateGraph integration Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ import {
|
||||
normalizeComplexity,
|
||||
normalizeBatchOutput,
|
||||
} from "../analyzer/normalize-graph.js";
|
||||
import { validateGraph } from "../schema.js";
|
||||
|
||||
describe("normalizeNodeId", () => {
|
||||
it("passes through a correct file ID unchanged", () => {
|
||||
@@ -83,12 +84,12 @@ describe("normalizeNodeId", () => {
|
||||
).toBe("concept:caching");
|
||||
});
|
||||
|
||||
it("handles double project-name prefix", () => {
|
||||
it("handles project-name prefix before a valid non-code prefix", () => {
|
||||
expect(
|
||||
normalizeNodeId("my-project:service:docker-compose.yml", {
|
||||
type: "file",
|
||||
}),
|
||||
).toBe("file:docker-compose.yml");
|
||||
).toBe("service:docker-compose.yml");
|
||||
});
|
||||
|
||||
it("returns empty string for empty input", () => {
|
||||
@@ -98,6 +99,26 @@ describe("normalizeNodeId", () => {
|
||||
it("falls back to untouched ID for unknown node type", () => {
|
||||
expect(normalizeNodeId("some-id", { type: "widget" as any })).toBe("some-id");
|
||||
});
|
||||
|
||||
it("passes through non-code type IDs unchanged", () => {
|
||||
expect(normalizeNodeId("config:tsconfig.json", { type: "config" })).toBe("config:tsconfig.json");
|
||||
expect(normalizeNodeId("document:README.md", { type: "document" })).toBe("document:README.md");
|
||||
expect(normalizeNodeId("service:docker-compose.yml", { type: "service" })).toBe("service:docker-compose.yml");
|
||||
expect(normalizeNodeId("table:migrations/001.sql:users", { type: "table" })).toBe("table:migrations/001.sql:users");
|
||||
expect(normalizeNodeId("endpoint:src/routes.ts:GET /api/users", { type: "endpoint" })).toBe("endpoint:src/routes.ts:GET /api/users");
|
||||
expect(normalizeNodeId("pipeline:.github/workflows/ci.yml", { type: "pipeline" })).toBe("pipeline:.github/workflows/ci.yml");
|
||||
expect(normalizeNodeId("schema:schema.graphql", { type: "schema" })).toBe("schema:schema.graphql");
|
||||
expect(normalizeNodeId("resource:main.tf", { type: "resource" })).toBe("resource:main.tf");
|
||||
});
|
||||
|
||||
it("adds prefix for bare paths with non-code types", () => {
|
||||
expect(normalizeNodeId("tsconfig.json", { type: "config" })).toBe("config:tsconfig.json");
|
||||
expect(normalizeNodeId("README.md", { type: "document" })).toBe("document:README.md");
|
||||
});
|
||||
|
||||
it("strips project-name prefix from non-code type IDs", () => {
|
||||
expect(normalizeNodeId("my-project:config:tsconfig.json", { type: "config" })).toBe("config:tsconfig.json");
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizeComplexity", () => {
|
||||
@@ -257,6 +278,13 @@ describe("normalizeBatchOutput", () => {
|
||||
|
||||
expect(result.edges).toHaveLength(0);
|
||||
expect(result.stats.danglingEdgesDropped).toBe(1);
|
||||
expect(result.stats.droppedEdges).toHaveLength(1);
|
||||
expect(result.stats.droppedEdges[0]).toEqual({
|
||||
source: "file:src/a.ts",
|
||||
target: "file:src/nonexistent.ts",
|
||||
type: "imports",
|
||||
reason: "missing-target",
|
||||
});
|
||||
});
|
||||
|
||||
it("deduplicates nodes keeping last occurrence", () => {
|
||||
@@ -372,4 +400,99 @@ describe("normalizeBatchOutput", () => {
|
||||
expect(result.stats.danglingEdgesDropped).toBe(1);
|
||||
expect(result.edges).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("resolves edge endpoints with different malformed variants than node IDs", () => {
|
||||
const result = normalizeBatchOutput({
|
||||
nodes: [
|
||||
{
|
||||
id: "src/bare.ts",
|
||||
type: "file",
|
||||
name: "bare.ts",
|
||||
filePath: "src/bare.ts",
|
||||
summary: "Bare",
|
||||
tags: [],
|
||||
complexity: "simple",
|
||||
},
|
||||
{
|
||||
id: "file:src/target.ts",
|
||||
type: "file",
|
||||
name: "target.ts",
|
||||
filePath: "src/target.ts",
|
||||
summary: "Target",
|
||||
tags: [],
|
||||
complexity: "simple",
|
||||
},
|
||||
],
|
||||
edges: [
|
||||
{
|
||||
source: "my-project:file:src/bare.ts",
|
||||
target: "file:src/target.ts",
|
||||
type: "imports",
|
||||
direction: "forward",
|
||||
weight: 0.7,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(result.edges).toHaveLength(1);
|
||||
expect(result.edges[0].source).toBe("file:src/bare.ts");
|
||||
expect(result.edges[0].target).toBe("file:src/target.ts");
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizeBatchOutput integration", () => {
|
||||
it("produces output that passes validateGraph after wrapping", () => {
|
||||
const result = normalizeBatchOutput({
|
||||
nodes: [
|
||||
{
|
||||
id: "my-project:file:src/index.ts",
|
||||
type: "file",
|
||||
name: "index.ts",
|
||||
filePath: "src/index.ts",
|
||||
summary: "Entry point",
|
||||
tags: ["entry"],
|
||||
complexity: 3,
|
||||
},
|
||||
{
|
||||
id: "src/utils.ts",
|
||||
type: "file",
|
||||
name: "utils.ts",
|
||||
filePath: "src/utils.ts",
|
||||
summary: "Utilities",
|
||||
tags: [],
|
||||
complexity: "simple",
|
||||
},
|
||||
],
|
||||
edges: [
|
||||
{
|
||||
source: "my-project:file:src/index.ts",
|
||||
target: "src/utils.ts",
|
||||
type: "imports",
|
||||
direction: "forward",
|
||||
weight: 0.7,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const graph = {
|
||||
version: "1.0.0",
|
||||
project: {
|
||||
name: "test",
|
||||
languages: ["typescript"],
|
||||
frameworks: [],
|
||||
description: "Test project",
|
||||
analyzedAt: new Date().toISOString(),
|
||||
gitCommitHash: "abc123",
|
||||
},
|
||||
nodes: result.nodes,
|
||||
edges: result.edges,
|
||||
layers: [],
|
||||
tour: [],
|
||||
};
|
||||
|
||||
const validation = validateGraph(graph);
|
||||
expect(validation.success).toBe(true);
|
||||
expect(validation.data?.nodes).toHaveLength(2);
|
||||
expect(validation.data?.edges).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import type { GraphNode, GraphEdge } from "../types.js";
|
||||
|
||||
const VALID_PREFIXES = new Set(["file", "func", "class", "module", "concept"]);
|
||||
const VALID_PREFIXES = new Set([
|
||||
"file", "func", "class", "module", "concept",
|
||||
"config", "document", "service", "table", "endpoint",
|
||||
"pipeline", "schema", "resource",
|
||||
]);
|
||||
|
||||
const TYPE_TO_PREFIX: Record<string, string> = {
|
||||
file: "file",
|
||||
@@ -8,6 +10,14 @@ const TYPE_TO_PREFIX: Record<string, string> = {
|
||||
class: "class",
|
||||
module: "module",
|
||||
concept: "concept",
|
||||
config: "config",
|
||||
document: "document",
|
||||
service: "service",
|
||||
table: "table",
|
||||
endpoint: "endpoint",
|
||||
pipeline: "pipeline",
|
||||
schema: "schema",
|
||||
resource: "resource",
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -79,8 +89,6 @@ export function normalizeNodeId(
|
||||
|
||||
const VALID_COMPLEXITIES = new Set(["simple", "moderate", "complex"]);
|
||||
|
||||
// String aliases for complexity — mirrors upstream's COMPLEXITY_ALIASES.
|
||||
// After rebasing onto upstream main, this can be replaced with an import.
|
||||
const COMPLEXITY_STRING_MAP: Record<string, string> = {
|
||||
low: "simple",
|
||||
easy: "simple",
|
||||
@@ -115,16 +123,24 @@ export function normalizeComplexity(
|
||||
return "moderate";
|
||||
}
|
||||
|
||||
export interface DroppedEdge {
|
||||
source: string;
|
||||
target: string;
|
||||
type: string;
|
||||
reason: "missing-source" | "missing-target" | "missing-both";
|
||||
}
|
||||
|
||||
export interface NormalizationStats {
|
||||
idsFixed: number;
|
||||
complexityFixed: number;
|
||||
edgesRewritten: number;
|
||||
danglingEdgesDropped: number;
|
||||
droppedEdges: DroppedEdge[];
|
||||
}
|
||||
|
||||
export interface NormalizeBatchResult {
|
||||
nodes: GraphNode[];
|
||||
edges: GraphEdge[];
|
||||
nodes: Record<string, unknown>[];
|
||||
edges: Record<string, unknown>[];
|
||||
idMap: Map<string, string>;
|
||||
stats: NormalizationStats;
|
||||
}
|
||||
@@ -146,6 +162,7 @@ export function normalizeBatchOutput(data: {
|
||||
complexityFixed: 0,
|
||||
edgesRewritten: 0,
|
||||
danglingEdgesDropped: 0,
|
||||
droppedEdges: [],
|
||||
};
|
||||
|
||||
const idMap = new Map<string, string>();
|
||||
@@ -191,15 +208,34 @@ export function normalizeBatchOutput(data: {
|
||||
for (const raw of data.edges) {
|
||||
const oldSource = String(raw.source ?? "");
|
||||
const oldTarget = String(raw.target ?? "");
|
||||
const newSource = idMap.get(oldSource) ?? oldSource;
|
||||
const newTarget = idMap.get(oldTarget) ?? oldTarget;
|
||||
let newSource = idMap.get(oldSource) ?? oldSource;
|
||||
let newTarget = idMap.get(oldTarget) ?? oldTarget;
|
||||
|
||||
// Fallback: if endpoint not found in idMap, normalize it directly
|
||||
// (handles cross-variant malformed IDs between nodes and edges)
|
||||
if (!validNodeIds.has(newSource)) {
|
||||
const normalized = normalizeNodeId(newSource, { type: "file" });
|
||||
if (validNodeIds.has(normalized)) newSource = normalized;
|
||||
}
|
||||
if (!validNodeIds.has(newTarget)) {
|
||||
const normalized = normalizeNodeId(newTarget, { type: "file" });
|
||||
if (validNodeIds.has(normalized)) newTarget = normalized;
|
||||
}
|
||||
|
||||
if (newSource !== oldSource || newTarget !== oldTarget) {
|
||||
stats.edgesRewritten++;
|
||||
}
|
||||
|
||||
if (!validNodeIds.has(newSource) || !validNodeIds.has(newTarget)) {
|
||||
const missingSource = !validNodeIds.has(newSource);
|
||||
const missingTarget = !validNodeIds.has(newTarget);
|
||||
stats.danglingEdgesDropped++;
|
||||
stats.droppedEdges.push({
|
||||
source: newSource,
|
||||
target: newTarget,
|
||||
type: String(raw.type ?? ""),
|
||||
reason: missingSource && missingTarget ? "missing-both" : missingSource ? "missing-source" : "missing-target",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -213,8 +249,8 @@ export function normalizeBatchOutput(data: {
|
||||
}
|
||||
|
||||
return {
|
||||
nodes: deduped as unknown as GraphNode[],
|
||||
edges: edges as unknown as GraphEdge[],
|
||||
nodes: deduped,
|
||||
edges,
|
||||
idMap,
|
||||
stats,
|
||||
};
|
||||
|
||||
@@ -14,6 +14,7 @@ export {
|
||||
normalizeNodeId,
|
||||
normalizeComplexity,
|
||||
normalizeBatchOutput,
|
||||
type DroppedEdge,
|
||||
type NormalizationStats,
|
||||
type NormalizeBatchResult,
|
||||
} from "./analyzer/normalize-graph.js";
|
||||
|
||||
@@ -152,16 +152,16 @@ After batches complete, merge with the existing graph:
|
||||
|
||||
Merge all file-analyzer results into a single set of nodes and edges. Then perform normalization and integrity cleanup **in this order**:
|
||||
|
||||
1. **Normalize node IDs:** For every node, verify the `id` field follows the convention `<type-prefix>:<path>` where type-prefix is one of `file`, `func`, `class`, `module`, `concept`. Apply these fixes:
|
||||
1. **Normalize node IDs:** For every node, verify the `id` field follows the convention `<type-prefix>:<path>` where type-prefix is one of `file`, `func`, `class`, `module`, `concept`, `config`, `document`, `service`, `table`, `endpoint`, `pipeline`, `schema`, `resource`. Apply these fixes:
|
||||
- If the ID has a double prefix (e.g., `file:file:src/foo.ts`), strip the duplicate prefix.
|
||||
- If the ID has a project-name prefix (e.g., `my-project:file:src/foo.ts`), strip the project-name portion.
|
||||
- If the ID is a bare file path with no prefix, add the appropriate prefix based on the node's `type` field: `file` → `file:<path>`, `function` → `func:<filePath>:<name>`, `class` → `class:<filePath>:<name>`.
|
||||
- Build a mapping of original IDs → corrected IDs.
|
||||
|
||||
2. **Normalize complexity values:** For every node, verify `complexity` is one of `"simple"`, `"moderate"`, `"complex"`. Apply these mappings for invalid values:
|
||||
- `"low"`, `"easy"`, `"trivial"`, `"basic"` → `"simple"`
|
||||
- `"medium"`, `"mid"`, `"average"` → `"moderate"`
|
||||
- `"high"`, `"hard"`, `"difficult"`, `"advanced"` → `"complex"`
|
||||
- `"low"`, `"easy"` → `"simple"`
|
||||
- `"medium"`, `"intermediate"` → `"moderate"`
|
||||
- `"high"`, `"hard"`, `"difficult"` → `"complex"`
|
||||
- Numeric 1-3 → `"simple"`, 4-6 → `"moderate"`, 7-10 → `"complex"`
|
||||
- Any other value → `"moderate"`
|
||||
|
||||
|
||||
Reference in New Issue
Block a user