mirror of
https://github.com/Egonex-AI/Understand-Anything.git
synced 2026-06-22 10:58:03 +08:00
fix(skills/understand): persist canonical edge direction during merge
Fixes #140. merge-batch-graphs.py already defaulted missing `direction` to "forward" when building the dedup key, but the value was never written back onto the edge — so the generated knowledge-graph.json shipped without the field and the dashboard validator emitted one auto-correction per edge (153k on the reporter's Go codebase). Mirror the dashboard schema validator at packages/core/src/schema.ts: lowercase the value, map "both"/"mutual" → "bidirectional", fall back to "forward" for missing or invalid values, and persist the result onto the edge before it enters edges_by_key. This also closes a latent dedup leak where "Forward" and "forward" (or "both" and "bidirectional") would have produced separate dedup keys.
This commit is contained in:
@@ -106,6 +106,21 @@ _TEST_NAME_PATTERNS: dict[str, tuple[tuple[str, ...], tuple[str, ...]]] = {
|
||||
}
|
||||
|
||||
|
||||
# Mirrors packages/core/src/schema.ts so the dashboard validator has nothing
|
||||
# left to auto-correct for the `direction` field on merged graphs.
|
||||
_DIRECTION_ALIASES: dict[str, str] = {"both": "bidirectional", "mutual": "bidirectional"}
|
||||
_VALID_DIRECTIONS: frozenset[str] = frozenset({"forward", "backward", "bidirectional"})
|
||||
|
||||
|
||||
def normalize_direction(value: Any) -> str:
|
||||
"""Canonicalize an edge `direction` value to one of the schema enum members."""
|
||||
candidate = value.lower() if isinstance(value, str) else ""
|
||||
candidate = _DIRECTION_ALIASES.get(candidate, candidate)
|
||||
if candidate not in _VALID_DIRECTIONS:
|
||||
return "forward"
|
||||
return candidate
|
||||
|
||||
|
||||
def _num(v: Any) -> float:
|
||||
"""Coerce a value to float for safe comparison (handles string weights)."""
|
||||
try:
|
||||
@@ -803,7 +818,8 @@ def merge_and_normalize(batches: list[dict[str, Any]]) -> tuple[dict[str, Any],
|
||||
src = edge.get("source", "")
|
||||
tgt = edge.get("target", "")
|
||||
etype = edge.get("type", "")
|
||||
direction = edge.get("direction", "forward")
|
||||
direction = normalize_direction(edge.get("direction"))
|
||||
edge["direction"] = direction
|
||||
|
||||
if src not in node_ids or tgt not in node_ids:
|
||||
missing = []
|
||||
|
||||
@@ -870,5 +870,76 @@ class MergeIntegrationTests(unittest.TestCase):
|
||||
self.assertIn("tested", prod_node["tags"])
|
||||
|
||||
|
||||
class NormalizeDirectionTests(unittest.TestCase):
|
||||
"""`direction` canonicalization mirrors the dashboard schema validator."""
|
||||
|
||||
def test_missing_defaults_to_forward(self) -> None:
|
||||
self.assertEqual(mbg.normalize_direction(None), "forward")
|
||||
self.assertEqual(mbg.normalize_direction(""), "forward")
|
||||
|
||||
def test_valid_values_pass_through(self) -> None:
|
||||
for value in ("forward", "backward", "bidirectional"):
|
||||
with self.subTest(value=value):
|
||||
self.assertEqual(mbg.normalize_direction(value), value)
|
||||
|
||||
def test_case_is_normalized(self) -> None:
|
||||
self.assertEqual(mbg.normalize_direction("Forward"), "forward")
|
||||
self.assertEqual(mbg.normalize_direction("BIDIRECTIONAL"), "bidirectional")
|
||||
|
||||
def test_aliases_are_mapped(self) -> None:
|
||||
self.assertEqual(mbg.normalize_direction("both"), "bidirectional")
|
||||
self.assertEqual(mbg.normalize_direction("Mutual"), "bidirectional")
|
||||
|
||||
def test_unknown_values_fall_back_to_forward(self) -> None:
|
||||
self.assertEqual(mbg.normalize_direction("sideways"), "forward")
|
||||
self.assertEqual(mbg.normalize_direction(42), "forward")
|
||||
|
||||
|
||||
class MergeEdgeDirectionTests(unittest.TestCase):
|
||||
"""End-to-end: merge_and_normalize persists a canonical `direction`."""
|
||||
|
||||
def _two_node_batch(self, edge: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"nodes": [_file_node("src/a.ts"), _file_node("src/b.ts")],
|
||||
"edges": [edge],
|
||||
}
|
||||
|
||||
def test_missing_direction_is_persisted_as_forward(self) -> None:
|
||||
# Reproduces issue #140: edges without a `direction` field still
|
||||
# reach the final graph and trigger dashboard auto-corrections.
|
||||
batch = self._two_node_batch({
|
||||
"source": "file:src/a.ts",
|
||||
"target": "file:src/b.ts",
|
||||
"type": "depends_on",
|
||||
"weight": 0.5,
|
||||
})
|
||||
|
||||
assembled, _report = mbg.merge_and_normalize([batch])
|
||||
|
||||
edges = [e for e in assembled["edges"] if e["type"] == "depends_on"]
|
||||
self.assertEqual(len(edges), 1)
|
||||
self.assertEqual(edges[0]["direction"], "forward")
|
||||
|
||||
def test_alias_is_canonicalized_before_dedup(self) -> None:
|
||||
# `"both"` and `"bidirectional"` describe the same relationship; without
|
||||
# canonicalization they get separate dedup keys and leak duplicates.
|
||||
batch = {
|
||||
"nodes": [_file_node("src/a.ts"), _file_node("src/b.ts")],
|
||||
"edges": [
|
||||
{"source": "file:src/a.ts", "target": "file:src/b.ts",
|
||||
"type": "depends_on", "direction": "both", "weight": 0.3},
|
||||
{"source": "file:src/a.ts", "target": "file:src/b.ts",
|
||||
"type": "depends_on", "direction": "bidirectional", "weight": 0.9},
|
||||
],
|
||||
}
|
||||
|
||||
assembled, _report = mbg.merge_and_normalize([batch])
|
||||
|
||||
edges = [e for e in assembled["edges"] if e["type"] == "depends_on"]
|
||||
self.assertEqual(len(edges), 1)
|
||||
self.assertEqual(edges[0]["direction"], "bidirectional")
|
||||
self.assertEqual(edges[0]["weight"], 0.9)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user