fix(skills/understand): persist canonical edge direction during merge

Fixes #140. merge-batch-graphs.py already defaulted missing `direction`
to "forward" when building the dedup key, but the value was never written
back onto the edge — so the generated knowledge-graph.json shipped without
the field and the dashboard validator emitted one auto-correction per
edge (153k on the reporter's Go codebase).

Mirror the dashboard schema validator at packages/core/src/schema.ts:
lowercase the value, map "both"/"mutual" → "bidirectional", fall back to
"forward" for missing or invalid values, and persist the result onto the
edge before it enters edges_by_key. This also closes a latent dedup leak
where "Forward" and "forward" (or "both" and "bidirectional") would have
produced separate dedup keys.
This commit is contained in:
Lum1104
2026-05-18 09:27:06 +08:00
Unverified
parent a5644ca3b0
commit f71bad5267
2 changed files with 88 additions and 1 deletions
@@ -106,6 +106,21 @@ _TEST_NAME_PATTERNS: dict[str, tuple[tuple[str, ...], tuple[str, ...]]] = {
}
# Mirrors packages/core/src/schema.ts so the dashboard validator has nothing
# left to auto-correct for the `direction` field on merged graphs.
_DIRECTION_ALIASES: dict[str, str] = {"both": "bidirectional", "mutual": "bidirectional"}
_VALID_DIRECTIONS: frozenset[str] = frozenset({"forward", "backward", "bidirectional"})
def normalize_direction(value: Any) -> str:
"""Canonicalize an edge `direction` value to one of the schema enum members."""
candidate = value.lower() if isinstance(value, str) else ""
candidate = _DIRECTION_ALIASES.get(candidate, candidate)
if candidate not in _VALID_DIRECTIONS:
return "forward"
return candidate
def _num(v: Any) -> float:
"""Coerce a value to float for safe comparison (handles string weights)."""
try:
@@ -803,7 +818,8 @@ def merge_and_normalize(batches: list[dict[str, Any]]) -> tuple[dict[str, Any],
src = edge.get("source", "")
tgt = edge.get("target", "")
etype = edge.get("type", "")
direction = edge.get("direction", "forward")
direction = normalize_direction(edge.get("direction"))
edge["direction"] = direction
if src not in node_ids or tgt not in node_ids:
missing = []
@@ -870,5 +870,76 @@ class MergeIntegrationTests(unittest.TestCase):
self.assertIn("tested", prod_node["tags"])
class NormalizeDirectionTests(unittest.TestCase):
"""`direction` canonicalization mirrors the dashboard schema validator."""
def test_missing_defaults_to_forward(self) -> None:
self.assertEqual(mbg.normalize_direction(None), "forward")
self.assertEqual(mbg.normalize_direction(""), "forward")
def test_valid_values_pass_through(self) -> None:
for value in ("forward", "backward", "bidirectional"):
with self.subTest(value=value):
self.assertEqual(mbg.normalize_direction(value), value)
def test_case_is_normalized(self) -> None:
self.assertEqual(mbg.normalize_direction("Forward"), "forward")
self.assertEqual(mbg.normalize_direction("BIDIRECTIONAL"), "bidirectional")
def test_aliases_are_mapped(self) -> None:
self.assertEqual(mbg.normalize_direction("both"), "bidirectional")
self.assertEqual(mbg.normalize_direction("Mutual"), "bidirectional")
def test_unknown_values_fall_back_to_forward(self) -> None:
self.assertEqual(mbg.normalize_direction("sideways"), "forward")
self.assertEqual(mbg.normalize_direction(42), "forward")
class MergeEdgeDirectionTests(unittest.TestCase):
"""End-to-end: merge_and_normalize persists a canonical `direction`."""
def _two_node_batch(self, edge: dict[str, Any]) -> dict[str, Any]:
return {
"nodes": [_file_node("src/a.ts"), _file_node("src/b.ts")],
"edges": [edge],
}
def test_missing_direction_is_persisted_as_forward(self) -> None:
# Reproduces issue #140: edges without a `direction` field still
# reach the final graph and trigger dashboard auto-corrections.
batch = self._two_node_batch({
"source": "file:src/a.ts",
"target": "file:src/b.ts",
"type": "depends_on",
"weight": 0.5,
})
assembled, _report = mbg.merge_and_normalize([batch])
edges = [e for e in assembled["edges"] if e["type"] == "depends_on"]
self.assertEqual(len(edges), 1)
self.assertEqual(edges[0]["direction"], "forward")
def test_alias_is_canonicalized_before_dedup(self) -> None:
# `"both"` and `"bidirectional"` describe the same relationship; without
# canonicalization they get separate dedup keys and leak duplicates.
batch = {
"nodes": [_file_node("src/a.ts"), _file_node("src/b.ts")],
"edges": [
{"source": "file:src/a.ts", "target": "file:src/b.ts",
"type": "depends_on", "direction": "both", "weight": 0.3},
{"source": "file:src/a.ts", "target": "file:src/b.ts",
"type": "depends_on", "direction": "bidirectional", "weight": 0.9},
],
}
assembled, _report = mbg.merge_and_normalize([batch])
edges = [e for e in assembled["edges"] if e["type"] == "depends_on"]
self.assertEqual(len(edges), 1)
self.assertEqual(edges[0]["direction"], "bidirectional")
self.assertEqual(edges[0]["weight"], 0.9)
if __name__ == "__main__":
unittest.main()