Skip to content

Commit 4b3865d

Browse files
authored
fix(graph_dbs): sanitize nested metadata before Neo4j writes (#1178)
## Summary - sanitize nested metadata values before writing Neo4j node properties - apply the same sanitization in both Neo4j enterprise and community graph DB implementations - add a regression test covering skill-memory style nested metadata ## Problem Neo4j node properties only accept primitive values or arrays of primitives. Some MemOS flows (especially SkillMemory and feedback-related flows) can produce nested metadata like: - `scripts: dict` - `others: dict` - nested objects under `info` - arrays containing dict items Those values can reach `SET n += $metadata` and trigger errors like: - `Property values can only be of primitive types or arrays thereof` - `CypherTypeError: Map{} encountered` ## Fix This PR adds a small sanitization layer before Neo4j writes: - keep primitive values as-is - keep arrays of primitives as-is - serialize dict values to JSON strings - serialize nested list items that are dict/list values This keeps metadata readable while making it safe for Neo4j property storage. ## Validation - reproduced the issue locally with SkillMemory-style metadata - verified the sanitized metadata no longer contains Neo4j-invalid map values - verified a real `/product/add` flow successfully created `SkillMemory` without the previous Neo4j `Map{}` error
2 parents ac263fd + 760a8e7 commit 4b3865d

File tree

3 files changed

+56
-1
lines changed

3 files changed

+56
-1
lines changed

src/memos/graph_dbs/neo4j.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,27 @@ def _flatten_info_fields(metadata: dict[str, Any]) -> dict[str, Any]:
7272
return metadata
7373

7474

75+
def _sanitize_neo4j_value(value: Any) -> Any:
76+
"""Convert values unsupported by Neo4j properties into safe serializations."""
77+
if value is None or isinstance(value, (str, int, float, bool)):
78+
return value
79+
80+
if isinstance(value, list):
81+
if all(item is None or isinstance(item, (str, int, float, bool)) for item in value):
82+
return value
83+
return [json.dumps(item, ensure_ascii=False) if isinstance(item, (dict, list)) else str(item) for item in value]
84+
85+
if isinstance(value, dict):
86+
return json.dumps(value, ensure_ascii=False, sort_keys=True)
87+
88+
return str(value)
89+
90+
91+
def _sanitize_neo4j_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
92+
"""Ensure all metadata values are valid Neo4j property types."""
93+
return {key: _sanitize_neo4j_value(value) for key, value in metadata.items()}
94+
95+
7596
class Neo4jGraphDB(BaseGraphDB):
7697
"""Neo4j-based implementation of a graph memory store.
7798
@@ -212,6 +233,9 @@ def add_node(
212233
# Flatten info fields to top level (for Neo4j flat structure)
213234
metadata = _flatten_info_fields(metadata)
214235

236+
# Ensure Neo4j property compatibility (no nested map/list-of-map values)
237+
metadata = _sanitize_neo4j_metadata(metadata)
238+
215239
# Initialize delete_time and delete_record_id fields
216240
metadata.setdefault("delete_time", "")
217241
metadata.setdefault("delete_record_id", "")

src/memos/graph_dbs/neo4j_community.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@
55
from typing import Any
66

77
from memos.configs.graph_db import Neo4jGraphDBConfig
8-
from memos.graph_dbs.neo4j import Neo4jGraphDB, _flatten_info_fields, _prepare_node_metadata
8+
from memos.graph_dbs.neo4j import (
9+
Neo4jGraphDB,
10+
_flatten_info_fields,
11+
_prepare_node_metadata,
12+
_sanitize_neo4j_metadata,
13+
)
914
from memos.log import get_logger
1015
from memos.vec_dbs.factory import VecDBFactory
1116
from memos.vec_dbs.item import VecDBItem
@@ -55,6 +60,8 @@ def add_node(
5560

5661
# Safely process metadata
5762
metadata = _prepare_node_metadata(metadata)
63+
metadata = _flatten_info_fields(metadata)
64+
metadata = _sanitize_neo4j_metadata(metadata)
5865

5966
# Initialize delete_time and delete_record_id fields
6067
metadata.setdefault("delete_time", "")
@@ -135,6 +142,7 @@ def add_nodes_batch(self, nodes: list[dict[str, Any]], user_name: str | None = N
135142

136143
metadata = _prepare_node_metadata(metadata)
137144
metadata = _flatten_info_fields(metadata)
145+
metadata = _sanitize_neo4j_metadata(metadata)
138146

139147
# Initialize delete_time and delete_record_id fields
140148
metadata.setdefault("delete_time", "")

tests/graph_dbs/graph_dbs.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,26 @@ def test_get_memory_count(graph_db):
105105
session_mock.run.return_value.single.return_value = {"count": 42}
106106
count = graph_db.get_memory_count("WorkingMemory")
107107
assert count == 42
108+
109+
110+
def test_add_node_sanitizes_nested_metadata(graph_db):
111+
session_mock = graph_db.driver.session.return_value.__enter__.return_value
112+
node_id = str(uuid.uuid4())
113+
memory = "skill memory"
114+
metadata = {
115+
"memory_type": "SkillMemory",
116+
"embedding": [0.1, 0.2, 0.3],
117+
"tags": ["skill"],
118+
"scripts": {"run.py": "print(1)"},
119+
"others": {"README.md": "# demo"},
120+
"info": {"nested": {"x": 1}, "arr_obj": [{"a": 1}]},
121+
}
122+
123+
graph_db.add_node(node_id, memory, metadata)
124+
125+
_, kwargs = session_mock.run.call_args
126+
sanitized = kwargs["metadata"]
127+
assert isinstance(sanitized["scripts"], str)
128+
assert isinstance(sanitized["others"], str)
129+
assert isinstance(sanitized["nested"], str)
130+
assert sanitized["arr_obj"] == ['{"a": 1}']

0 commit comments

Comments
 (0)