feat(dashboard): surface graph validation issues and add messy graph fixtures

Add a --messy mode to the large graph generator so robustness cases can be
  tested with null fields, aliases, dangling refs, and other malformed data.

  Update the dashboard to preserve non-fatal validation issues, log
  auto-corrected and dropped items, and show a warning banner with expandable
  details plus copy-to-clipboard support.
This commit is contained in:
Lum1104
2026-03-26 20:44:07 +08:00
Unverified
parent 28e3bd1631
commit 835377b968
7 changed files with 380 additions and 19 deletions
+159 -9
View File
@@ -1,10 +1,15 @@
#!/usr/bin/env node
/**
* Generate a large fake knowledge graph for testing PR #18
* (Web Worker layout for large graphs).
* Generate a large fake knowledge graph for testing.
*
* Usage:
* node scripts/generate-large-graph.mjs [nodeCount]
* node scripts/generate-large-graph.mjs [nodeCount] --messy
*
* Flags:
* --messy Inject LLM-style issues into ~20% of nodes/edges to test the
* dashboard robustness pipeline (Tier 1-3: null fields, wrong cases,
* missing fields, aliases, dangling refs, unrecognizable types).
*
* Default: 3000 nodes. Writes to .understand-anything/knowledge-graph.json
*/
@@ -12,7 +17,10 @@
import { writeFileSync, mkdirSync } from "node:fs";
import { resolve } from "node:path";
const NODE_COUNT = parseInt(process.argv[2] || "3000", 10);
const args = process.argv.slice(2);
const MESSY = args.includes("--messy");
const numArg = args.find((a) => !a.startsWith("--"));
const NODE_COUNT = parseInt(numArg || "3000", 10);
const EDGE_RATIO = 1.7; // edges per node (realistic for codebases)
const nodeTypes = ["file", "function", "class", "module", "concept"];
@@ -110,6 +118,137 @@ function generateTour(nodes) {
return steps;
}
// ── Messy injection (--messy flag) ──
// Tier 1: silent fixes — null optional fields, mixed-case enums
function injectTier1(node) {
const issues = [];
if (Math.random() < 0.5 && node.filePath !== undefined) {
node.filePath = null; // null on optional field
issues.push("null filePath");
}
if (Math.random() < 0.5) {
node.type = node.type.toUpperCase(); // "FILE", "FUNCTION"
issues.push(`uppercase type "${node.type}"`);
}
if (Math.random() < 0.5) {
node.complexity = node.complexity[0].toUpperCase() + node.complexity.slice(1); // "Simple"
issues.push(`mixed-case complexity "${node.complexity}"`);
}
return issues;
}
// Tier 2: auto-fixable — missing fields, aliases, string weights
function injectTier2Node(node) {
const issues = [];
const r = Math.random();
if (r < 0.2) {
delete node.complexity;
issues.push("missing complexity");
} else if (r < 0.4) {
node.complexity = pick(["low", "easy", "medium", "intermediate", "high", "hard"]);
issues.push(`complexity alias "${node.complexity}"`);
}
if (Math.random() < 0.3) {
delete node.tags;
issues.push("missing tags");
}
if (Math.random() < 0.2) {
delete node.summary;
issues.push("missing summary");
}
if (Math.random() < 0.15) {
node.type = pick(["func", "fn", "method", "interface", "struct", "mod", "pkg"]);
issues.push(`type alias "${node.type}"`);
}
return issues;
}
function injectTier2Edge(edge) {
const issues = [];
if (Math.random() < 0.3) {
edge.weight = String(edge.weight); // string weight
issues.push(`string weight "${edge.weight}"`);
}
if (Math.random() < 0.2) {
delete edge.direction;
issues.push("missing direction");
} else if (Math.random() < 0.3) {
edge.direction = pick(["to", "outbound", "from", "inbound", "both"]);
issues.push(`direction alias "${edge.direction}"`);
}
if (Math.random() < 0.15) {
edge.type = pick(["extends", "invokes", "uses", "requires", "relates_to"]);
issues.push(`edge type alias "${edge.type}"`);
}
return issues;
}
// Tier 3: unrecoverable — missing id/name, dangling refs, bad types
function injectTier3Node(node) {
const r = Math.random();
if (r < 0.4) {
delete node.id;
return "missing id";
} else if (r < 0.7) {
delete node.name;
return "missing name";
} else {
node.type = "totally_bogus_type";
return `unrecognizable type "${node.type}"`;
}
}
function injectTier3Edge(edge, validNodeIds) {
const r = Math.random();
if (r < 0.4) {
edge.target = "nonexistent-node-999999";
return "dangling target ref";
} else if (r < 0.7) {
edge.source = "nonexistent-node-888888";
return "dangling source ref";
} else {
edge.weight = "not_a_number";
return "non-coercible weight";
}
}
function applyMessy(nodes, edges) {
const stats = { tier1: 0, tier2: 0, tier3: 0 };
for (const node of nodes) {
const r = Math.random();
if (r < 0.10) {
// ~10% get Tier 3 issues (will be dropped)
injectTier3Node(node);
stats.tier3++;
} else if (r < 0.30) {
// ~20% get Tier 2 issues (will be auto-corrected)
injectTier2Node(node);
stats.tier2++;
} else if (r < 0.40) {
// ~10% get Tier 1 issues (silently fixed)
injectTier1(node);
stats.tier1++;
}
}
const validIds = new Set(nodes.filter((n) => n.id).map((n) => n.id));
for (const edge of edges) {
const r = Math.random();
if (r < 0.05) {
injectTier3Edge(edge, validIds);
stats.tier3++;
} else if (r < 0.20) {
injectTier2Edge(edge);
stats.tier2++;
}
}
// Also set tour/layers to null (Tier 1 null-vs-empty)
return stats;
}
// ── Generate ──
const nodes = generateNodes(NODE_COUNT);
@@ -118,20 +257,25 @@ const edges = generateEdges(nodes, edgeCount);
const layers = generateLayers(nodes);
const tour = generateTour(nodes);
let messyStats = null;
if (MESSY) {
messyStats = applyMessy(nodes, edges);
}
const graph = {
version: "1.0",
project: {
name: "large-test-project",
languages: languages.slice(0, 3),
frameworks: frameworks.slice(0, 2),
description: `Auto-generated project with ${NODE_COUNT} nodes for performance testing.`,
description: `Auto-generated project with ${NODE_COUNT} nodes for ${MESSY ? "robustness" : "performance"} testing.`,
analyzedAt: new Date().toISOString(),
gitCommitHash: "0000000000000000000000000000000000000000",
},
nodes,
edges,
layers,
tour,
layers: MESSY && Math.random() < 0.5 ? null : layers,
tour: MESSY && Math.random() < 0.5 ? null : tour,
};
const outDir = resolve(process.cwd(), ".understand-anything");
@@ -139,9 +283,15 @@ mkdirSync(outDir, { recursive: true });
const outPath = resolve(outDir, "knowledge-graph.json");
writeFileSync(outPath, JSON.stringify(graph, null, 2));
console.log(`Generated knowledge graph:`);
console.log(`Generated knowledge graph${MESSY ? " (messy mode)" : ""}:`);
console.log(` Nodes: ${nodes.length}`);
console.log(` Edges: ${edges.length}`);
console.log(` Layers: ${layers.length}`);
console.log(` Tour steps: ${tour.length}`);
console.log(` Layers: ${graph.layers === null ? "null (Tier 1 test)" : layers.length}`);
console.log(` Tour steps: ${graph.tour === null ? "null (Tier 1 test)" : tour.length}`);
if (messyStats) {
console.log(` Injected issues:`);
console.log(` Tier 1 (silent fix): ~${messyStats.tier1} items`);
console.log(` Tier 2 (auto-correct): ~${messyStats.tier2} items`);
console.log(` Tier 3 (will be dropped): ~${messyStats.tier3} items`);
}
console.log(` Written to: ${outPath}`);