From 9c433957a1936c0ea884c776c68cb46dc5c34bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 14:00:54 +0800 Subject: [PATCH 001/137] add polardb.py --- src/memos/graph_dbs/polardb.py | 1378 ++++++++++++++++++++++++++++++++ 1 file changed, 1378 insertions(+) create mode 100644 src/memos/graph_dbs/polardb.py diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py new file mode 100644 index 000000000..0e072ab98 --- /dev/null +++ b/src/memos/graph_dbs/polardb.py @@ -0,0 +1,1378 @@ +import json +import time +import random +from datetime import datetime +from typing import Any, Literal + +import numpy as np +import psycopg2 +from psycopg2.extras import Json + +from memos.configs.graph_db import PolarDBGraphDBConfig +from memos.dependency import require_python_package +from memos.graph_dbs.base import BaseGraphDB +from memos.log import get_logger + +logger = get_logger(__name__) + +# 图数据库配置 +GRAPH_NAME = 'test_memos_graph' + + +def _compose_node(item: dict[str, Any]) -> tuple[str, str, dict[str, Any]]: + node_id = item["id"] + memory = item["memory"] + metadata = item.get("metadata", {}) + return node_id, memory, metadata + + +def _prepare_node_metadata(metadata: dict[str, Any]) -> dict[str, Any]: + """ + Ensure metadata has proper datetime fields and normalized types. + + - Fill `created_at` and `updated_at` if missing (in ISO 8601 format). + - Convert embedding to list of float if present. + """ + now = datetime.utcnow().isoformat() + + # Fill timestamps if missing + metadata.setdefault("created_at", now) + metadata.setdefault("updated_at", now) + + # Normalize embedding type + embedding = metadata.get("embedding") + if embedding and isinstance(embedding, list): + metadata["embedding"] = [float(x) for x in embedding] + + return metadata + + +def generate_vector(dim=1024, low=-0.2, high=0.2): + """Generate a random vector for testing purposes.""" + return [round(random.uniform(low, high), 6) for _ in range(dim)] + + +def find_embedding(metadata): + def find_embedding(item): + """在多层结构中查找 embedding 向量""" + for key in ["embedding", "embedding_1024", "embedding_3072", "embedding_768"]: + if key in item and isinstance(item[key], list): + return item[key] + if "metadata" in item and key in item["metadata"]: + return item["metadata"][key] + if "properties" in item and key in item["properties"]: + return item["properties"][key] + return None + + +def detect_embedding_field(embedding_list): + if not embedding_list: + return None + dim = len(embedding_list) + if dim == 1024: + return "embedding" + else: + print(f"⚠️ 未知 embedding 维度 {dim},跳过该向量") + return None +def convert_to_vector(embedding_list): + if not embedding_list: + return None + if isinstance(embedding_list, np.ndarray): + embedding_list = embedding_list.tolist() + return "[" + ",".join(str(float(x)) for x in embedding_list) + "]" + +def clean_properties(props): + """移除向量字段""" + vector_keys = {"embedding", "embedding_1024", "embedding_3072", "embedding_768"} + if not isinstance(props, dict): + return {} + return {k: v for k, v in props.items() if k not in vector_keys} + + +class PolarDBGraphDB(BaseGraphDB): + """PolarDB-based implementation using Apache AGE graph database extension.""" + + @require_python_package( + import_name="psycopg2", + install_command="pip install psycopg2-binary", + install_link="https://pypi.org/project/psycopg2-binary/", + ) + def __init__(self, config: PolarDBGraphDBConfig): + """PolarDB-based implementation using Apache AGE. + + Tenant Modes: + - use_multi_db = True: + Dedicated Database Mode (Multi-Database Multi-Tenant). + Each tenant or logical scope uses a separate PolarDB database. + `db_name` is the specific tenant database. + `user_name` can be None (optional). + + - use_multi_db = False: + Shared Database Multi-Tenant Mode. + All tenants share a single PolarDB database. + `db_name` is the shared database. + `user_name` is required to isolate each tenant's data at the node level. + All node queries will enforce `user_name` in WHERE conditions and store it in metadata, + but it will be removed automatically before returning to external consumers. + """ + import psycopg2 + + self.config = config + + # Handle both dict and object config + if isinstance(config, dict): + self.db_name = config.get("db_name") + self.user_name = config.get("user_name") + host = config.get("host") + port = config.get("port") + user = config.get("user") + password = config.get("password") + else: + self.db_name = config.db_name + self.user_name = config.user_name + host = config.host + port = config.port + user = config.user + password = config.password + + # Create connection + self.connection = psycopg2.connect( + host=host, + port=port, + user=user, + password=password, + dbname=self.db_name + ) + self.connection.autocommit = True + + # Handle auto_create + auto_create = config.get("auto_create", False) if isinstance(config, dict) else config.auto_create + if auto_create: + self._ensure_database_exists() + + # Create graph and tables + self._create_graph() + + # Handle embedding_dimension + embedding_dim = config.get("embedding_dimension", 1024) if isinstance(config,dict) else config.embedding_dimension + self.create_index(dimensions=embedding_dim) + + def _get_config_value(self, key: str, default=None): + """Safely get config value from either dict or object.""" + if isinstance(self.config, dict): + return self.config.get(key, default) + else: + return getattr(self.config, key, default) + + def _ensure_database_exists(self): + """Create database if it doesn't exist.""" + try: + # For PostgreSQL/PolarDB, we need to connect to a default database first + # This is a simplified implementation - in production you might want to handle this differently + logger.info(f"Using database '{self.db_name}'") + except Exception as e: + logger.error(f"Failed to access database '{self.db_name}': {e}") + raise + + def _create_graph(self): + """Create PostgreSQL schema and table for graph storage.""" + try: + with self.connection.cursor() as cursor: + # Create schema if it doesn't exist + cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {self.db_name}_graph;") + logger.info(f"Schema '{self.db_name}_graph' ensured.") + + # Create Memory table if it doesn't exist + cursor.execute(f""" + CREATE TABLE IF NOT EXISTS {self.db_name}_graph."Memory" ( + id TEXT PRIMARY KEY, + properties JSONB NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + """) + logger.info(f"Memory table created in schema '{self.db_name}_graph'.") + + # Add embedding column if it doesn't exist (using JSONB for compatibility) + try: + cursor.execute(f""" + ALTER TABLE {self.db_name}_graph."Memory" + ADD COLUMN IF NOT EXISTS embedding JSONB; + """) + logger.info(f"Embedding column added to Memory table.") + except Exception as e: + logger.warning(f"Failed to add embedding column: {e}") + + # Create indexes + cursor.execute(f""" + CREATE INDEX IF NOT EXISTS idx_memory_properties + ON {self.db_name}_graph."Memory" USING GIN (properties); + """) + + # Create vector index for embedding field + try: + cursor.execute(f""" + CREATE INDEX IF NOT EXISTS idx_memory_embedding + ON {self.db_name}_graph."Memory" USING ivfflat (embedding vector_cosine_ops) + WITH (lists = 100); + """) + logger.info(f"Vector index created for Memory table.") + except Exception as e: + logger.warning(f"Vector index creation failed (might not be supported): {e}") + + logger.info(f"Indexes created for Memory table.") + + except Exception as e: + logger.error(f"Failed to create graph schema: {e}") + raise e + + def create_index( + self, + label: str = "Memory", + vector_property: str = "embedding", + dimensions: int = 1024, + index_name: str = "memory_vector_index", + ) -> None: + """ + Create indexes for embedding and other fields. + Note: This creates PostgreSQL indexes on the underlying tables. + """ + try: + with self.connection.cursor() as cursor: + # Create indexes on the underlying PostgreSQL tables + # Apache AGE stores data in regular PostgreSQL tables + cursor.execute(f""" + CREATE INDEX IF NOT EXISTS idx_memory_properties + ON {self.db_name}_graph."Memory" USING GIN (properties); + """) + + # Try to create vector index, but don't fail if it doesn't work + try: + cursor.execute(f""" + CREATE INDEX IF NOT EXISTS idx_memory_embedding + ON {self.db_name}_graph."Memory" USING ivfflat (embedding vector_cosine_ops); + """) + except Exception as ve: + logger.warning(f"Vector index creation failed (might not be supported): {ve}") + + logger.debug(f"Indexes created successfully.") + except Exception as e: + logger.warning(f"Failed to create indexes: {e}") + + def get_memory_count(self, memory_type: str) -> int: + """Get count of memory nodes by type.""" + query = f""" + SELECT COUNT(*) + FROM {self.db_name}_graph."Memory" + WHERE properties->>'memory_type' = %s + """ + params = [memory_type] + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + query += " AND properties->>'user_name' = %s" + params.append(self._get_config_value("user_name")) + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + result = cursor.fetchone() + return result[0] if result else 0 + + def node_not_exist(self, scope: str) -> int: + """Check if a node with given scope exists.""" + query = f""" + SELECT id + FROM {self.db_name}_graph."Memory" + WHERE properties->>'memory_type' = %s + LIMIT 1 + """ + params = [scope] + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + query += " AND properties->>'user_name' = %s" + params.append(self._get_config_value("user_name")) + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + result = cursor.fetchone() + return result is None + + def remove_oldest_memory(self, memory_type: str, keep_latest: int) -> None: + """ + Remove all WorkingMemory nodes except the latest `keep_latest` entries. + """ + query = f""" + DELETE FROM {self.db_name}_graph."Memory" + WHERE properties->>'memory_type' = %s + AND id NOT IN ( + SELECT id FROM ( + SELECT id FROM {self.db_name}_graph."Memory" + WHERE properties->>'memory_type' = %s + ORDER BY (properties->>'updated_at')::timestamp DESC + LIMIT %s + ) AS keep_ids + ) + """ + params = [memory_type, memory_type, keep_latest] + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + query = query.replace("WHERE properties->>'memory_type' = %s", + "WHERE properties->>'memory_type' = %s AND properties->>'user_name' = %s") + query = query.replace("WHERE properties->>'memory_type' = %s", + "WHERE properties->>'memory_type' = %s AND properties->>'user_name' = %s") + params = [memory_type, self._get_config_value("user_name"), memory_type, + self._get_config_value("user_name"), keep_latest] + + # Simplified implementation - just log the operation + logger.info(f"Removing oldest {memory_type} memories, keeping {keep_latest} latest") + + def update_node(self, id: str, fields: dict[str, Any]) -> None: + """Update node fields in PolarDB.""" + if not fields: + return + + # Get current properties + current_node = self.get_node(id) + if not current_node: + return + + # Update properties + properties = current_node["metadata"].copy() + properties.update(fields) + + # Handle embedding separately + # Handle embedding update - store in separate column + embedding_vector = None + if "embedding" in fields: + embedding_vector = fields.pop("embedding") + if not isinstance(embedding_vector, list): + embedding_vector = None + + # Build query based on whether embedding is being updated + if embedding_vector is not None: + query = f""" + UPDATE {self.db_name}_graph."Memory" + SET properties = %s, embedding = %s, updated_at = CURRENT_TIMESTAMP + WHERE id = %s + """ + params = [json.dumps(properties), json.dumps(embedding_vector), id] + else: + query = f""" + UPDATE {self.db_name}_graph."Memory" + SET properties = %s, updated_at = CURRENT_TIMESTAMP + WHERE id = %s + """ + params = [json.dumps(properties), id] + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + user_name = self._get_config_value("user_name") + query += " AND properties::text LIKE %s" + params.append(f"%{user_name}%") + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + + def delete_node(self, id: str) -> None: + """Delete a node from the graph.""" + query = f""" + DELETE FROM {self.db_name}_graph."Memory" + WHERE id = %s + """ + params = [id] + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + user_name = self._get_config_value("user_name") + query += " AND properties::text LIKE %s" + params.append(f"%{user_name}%") + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + + def add_edge(self, source_id: str, target_id: str, type: str) -> None: + """ + Create an edge from source node to target node. + Args: + source_id: ID of the source node. + target_id: ID of the target node. + type: Relationship type (e.g., 'RELATE_TO', 'PARENT'). + """ + # 确保边表存在 + try: + with self.connection.cursor() as cursor: + cursor.execute(f""" + CREATE TABLE IF NOT EXISTS {self.db_name}_graph."Edges" ( + id SERIAL PRIMARY KEY, + source_id TEXT NOT NULL, + target_id TEXT NOT NULL, + edge_type TEXT NOT NULL, + properties JSONB, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + """) + except Exception as e: + logger.warning(f"Failed to ensure edges table exists: {e}") + return + + # 检查源节点和目标节点是否存在 + source_exists = self.get_node(source_id) is not None + target_exists = self.get_node(target_id) is not None + + if not source_exists or not target_exists: + logger.warning(f"Cannot create edge: source or target node does not exist") + return + + # 添加边 + query = f""" + INSERT INTO {self.db_name}_graph."Edges" (source_id, target_id, edge_type) + VALUES (%s, %s, %s) + ON CONFLICT DO NOTHING + """ + + with self.connection.cursor() as cursor: + cursor.execute(query, (source_id, target_id, type)) + logger.info(f"Edge created: {source_id} -[{type}]-> {target_id}") + + def delete_edge(self, source_id: str, target_id: str, type: str) -> None: + """ + Delete a specific edge between two nodes. + Args: + source_id: ID of the source node. + target_id: ID of the target node. + type: Relationship type to remove. + """ + query = f""" + DELETE FROM {self.db_name}_graph."Edges" + WHERE source_id = %s AND target_id = %s AND edge_type = %s + """ + + with self.connection.cursor() as cursor: + cursor.execute(query, (source_id, target_id, type)) + logger.info(f"Edge deleted: {source_id} -[{type}]-> {target_id}") + + def edge_exists( + self, source_id: str, target_id: str, type: str = "ANY", direction: str = "OUTGOING" + ) -> bool: + """ + Check if an edge exists between two nodes. + Args: + source_id: ID of the source node. + target_id: ID of the target node. + type: Relationship type. Use "ANY" to match any relationship type. + direction: Direction of the edge. + Use "OUTGOING" (default), "INCOMING", or "ANY". + Returns: + True if the edge exists, otherwise False. + """ + where_clauses = [] + params = [] + + if direction == "OUTGOING": + where_clauses.append("source_id = %s AND target_id = %s") + params.extend([source_id, target_id]) + elif direction == "INCOMING": + where_clauses.append("source_id = %s AND target_id = %s") + params.extend([target_id, source_id]) + elif direction == "ANY": + where_clauses.append("((source_id = %s AND target_id = %s) OR (source_id = %s AND target_id = %s))") + params.extend([source_id, target_id, target_id, source_id]) + else: + raise ValueError( + f"Invalid direction: {direction}. Must be 'OUTGOING', 'INCOMING', or 'ANY'." + ) + + if type != "ANY": + where_clauses.append("edge_type = %s") + params.append(type) + + where_clause = " AND ".join(where_clauses) + + query = f""" + SELECT 1 FROM {self.db_name}_graph."Edges" + WHERE {where_clause} + LIMIT 1 + """ + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + result = cursor.fetchone() + return result is not None + + def get_node(self, id: str, **kwargs) -> dict[str, Any] | None: + """Retrieve the metadata and memory of a node.""" + query = f""" + SELECT id, properties, embedding + FROM {self.db_name}_graph."Memory" + WHERE ag_catalog.agtype_access_operator(properties, '"id"'::agtype) = %s::agtype + """ + # 如果id已经包含引号,则直接使用;否则添加引号 + if id.startswith('"') and id.endswith('"'): + params = [id] + else: + params = [f'"{id}"'] + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + user_name = self._get_config_value("user_name") + query += " AND properties::text LIKE %s" + params.append(f"%{user_name}%") + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + result = cursor.fetchone() + + if result: + node_id, properties_json, embedding_json = result + # Parse properties from JSONB if it's a string + if isinstance(properties_json, str): + try: + properties = json.loads(properties_json) + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse properties for node {id}") + properties = {} + else: + properties = properties_json if properties_json else {} + + # Parse embedding from JSONB if it exists + if embedding_json is not None: + try: + embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json + properties["embedding"] = embedding + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse embedding for node {id}") + return self._parse_node({"id": id, "memory": properties.get("memory", ""), "metadata": properties}) + return None + + def get_nodes(self, ids: list[str], **kwargs) -> list[dict[str, Any]]: + """ + Retrieve the metadata and memory of a list of nodes. + Args: + ids: List of Node identifier. + Returns: + list[dict]: Parsed node records containing 'id', 'memory', and 'metadata'. + + Notes: + - Assumes all provided IDs are valid and exist. + - Returns empty list if input is empty. + """ + if not ids: + return [] + + # Build WHERE clause using agtype_access_operator like get_node method + where_conditions = [] + params = [] + + for id_val in ids: + where_conditions.append("ag_catalog.agtype_access_operator(properties, '\"id\"'::agtype) = %s::agtype") + params.append(f'"{id_val}"') + + where_clause = " OR ".join(where_conditions) + + query = f""" + SELECT id, properties, embedding + FROM {self.db_name}_graph."Memory" + WHERE ({where_clause}) + """ + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + user_name = kwargs.get("cube_name", self._get_config_value("user_name")) + query += " AND properties::text LIKE %s" + params.append(f"%{user_name}%") + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + + nodes = [] + for row in results: + node_id, properties_json, embedding_json = row + # Parse properties from JSONB if it's a string + if isinstance(properties_json, str): + try: + properties = json.loads(properties_json) + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse properties for node {node_id}") + properties = {} + else: + properties = properties_json if properties_json else {} + + # Parse embedding from JSONB if it exists + if embedding_json is not None: + try: + embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json + properties["embedding"] = embedding + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse embedding for node {node_id}") + nodes.append(self._parse_node( + {"id": properties.get("id", node_id), "memory": properties.get("memory", ""), "metadata": properties})) + return nodes + + def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY") -> list[dict[str, str]]: + """ + Get edges connected to a node, with optional type and direction filter. + + Args: + id: Node ID to retrieve edges for. + type: Relationship type to match, or 'ANY' to match all. + direction: 'OUTGOING', 'INCOMING', or 'ANY'. + + Returns: + List of edges: + [ + {"from": "source_id", "to": "target_id", "type": "RELATE"}, + ... + ] + """ + # 由于PolarDB没有完整的图数据库功能,这里使用简化的实现 + # 在实际应用中,你可能需要创建专门的边表来存储关系 + + # 创建一个简单的边表来存储关系(如果不存在的话) + try: + with self.connection.cursor() as cursor: + # 创建边表 + cursor.execute(f""" + CREATE TABLE IF NOT EXISTS {self.db_name}_graph."Edges" ( + id SERIAL PRIMARY KEY, + source_id TEXT NOT NULL, + target_id TEXT NOT NULL, + edge_type TEXT NOT NULL, + properties JSONB, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (source_id) REFERENCES {self.db_name}_graph."Memory"(id), + FOREIGN KEY (target_id) REFERENCES {self.db_name}_graph."Memory"(id) + ); + """) + + # 创建索引 + cursor.execute(f""" + CREATE INDEX IF NOT EXISTS idx_edges_source + ON {self.db_name}_graph."Edges" (source_id); + """) + cursor.execute(f""" + CREATE INDEX IF NOT EXISTS idx_edges_target + ON {self.db_name}_graph."Edges" (target_id); + """) + cursor.execute(f""" + CREATE INDEX IF NOT EXISTS idx_edges_type + ON {self.db_name}_graph."Edges" (edge_type); + """) + except Exception as e: + logger.warning(f"Failed to create edges table: {e}") + + # 查询边 + where_clauses = [] + params = [id] + + if type != "ANY": + where_clauses.append("edge_type = %s") + params.append(type) + + if direction == "OUTGOING": + where_clauses.append("source_id = %s") + elif direction == "INCOMING": + where_clauses.append("target_id = %s") + else: # ANY + where_clauses.append("(source_id = %s OR target_id = %s)") + params.append(id) # 添加第二个参数用于ANY方向 + + where_clause = " AND ".join(where_clauses) + + query = f""" + SELECT source_id, target_id, edge_type + FROM {self.db_name}_graph."Edges" + WHERE {where_clause} + """ + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + + edges = [] + for row in results: + source_id, target_id, edge_type = row + edges.append({ + "from": source_id, + "to": target_id, + "type": edge_type + }) + return edges + + def get_neighbors( + self, id: str, type: str, direction: Literal["in", "out", "both"] = "out" + ) -> list[str]: + """Get connected node IDs in a specific direction and relationship type.""" + raise NotImplementedError + + def get_neighbors_by_tag( + self, + tags: list[str], + exclude_ids: list[str], + top_k: int = 5, + min_overlap: int = 1, + ) -> list[dict[str, Any]]: + """ + Find top-K neighbor nodes with maximum tag overlap. + + Args: + tags: The list of tags to match. + exclude_ids: Node IDs to exclude (e.g., local cluster). + top_k: Max number of neighbors to return. + min_overlap: Minimum number of overlapping tags required. + + Returns: + List of dicts with node details and overlap count. + """ + # 构建查询条件 + where_clauses = [] + params = [] + + # 排除指定的ID + if exclude_ids: + placeholders = ','.join(['%s'] * len(exclude_ids)) + where_clauses.append(f"id NOT IN ({placeholders})") + params.extend(exclude_ids) + + # 状态过滤 + where_clauses.append("properties->>'status' = %s") + params.append('activated') + + # 类型过滤 + where_clauses.append("properties->>'type' != %s") + params.append('reasoning') + + where_clauses.append("properties->>'memory_type' != %s") + params.append('WorkingMemory') + + # 用户过滤 + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + where_clauses.append("properties->>'user_name' = %s") + params.append(self._get_config_value("user_name")) + + where_clause = " AND ".join(where_clauses) + + # 获取所有候选节点 + query = f""" + SELECT id, properties, embedding + FROM {self.db_name}_graph."Memory" + WHERE {where_clause} + """ + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + + nodes_with_overlap = [] + for row in results: + node_id, properties_json, embedding_json = row + properties = properties_json if properties_json else {} + + # 解析embedding + if embedding_json is not None: + try: + embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json + properties["embedding"] = embedding + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse embedding for node {node_id}") + + # 计算标签重叠 + node_tags = properties.get("tags", []) + if isinstance(node_tags, str): + try: + node_tags = json.loads(node_tags) + except (json.JSONDecodeError, TypeError): + node_tags = [] + + overlap_tags = [tag for tag in tags if tag in node_tags] + overlap_count = len(overlap_tags) + + if overlap_count >= min_overlap: + node_data = self._parse_node({ + "id": properties.get("id", node_id), + "memory": properties.get("memory", ""), + "metadata": properties + }) + nodes_with_overlap.append((node_data, overlap_count)) + + # 按重叠数量排序并返回前top_k个 + nodes_with_overlap.sort(key=lambda x: x[1], reverse=True) + return [node for node, _ in nodes_with_overlap[:top_k]] + + def get_children_with_embeddings(self, id: str) -> list[dict[str, Any]]: + """Get children nodes with their embeddings.""" + # 查询PARENT关系的子节点 + query = f""" + SELECT m.id, m.properties, m.embedding + FROM {self.db_name}_graph."Memory" m + JOIN {self.db_name}_graph."Edges" e ON m.id = e.target_id + WHERE e.source_id = %s AND e.edge_type = 'PARENT' + """ + params = [id] + + # 添加用户过滤 + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + query += " AND m.properties->>'user_name' = %s" + params.append(self._get_config_value("user_name")) + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + + children = [] + for row in results: + child_id, properties_json, embedding_json = row + properties = properties_json if properties_json else {} + + # 解析embedding + embedding = None + if embedding_json is not None: + try: + embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse embedding for child node {child_id}") + + children.append({ + "id": child_id, + "embedding": embedding, + "memory": properties.get("memory", "") + }) + + return children + + def get_path(self, source_id: str, target_id: str, max_depth: int = 3) -> list[str]: + """Get the path of nodes from source to target within a limited depth.""" + raise NotImplementedError + + def get_subgraph( + self, center_id: str, depth: int = 2, center_status: str = "activated" + ) -> dict[str, Any]: + """ + Retrieve a local subgraph centered at a given node. + Args: + center_id: The ID of the center node. + depth: The hop distance for neighbors. + center_status: Required status for center node. + Returns: + { + "core_node": {...}, + "neighbors": [...], + "edges": [...] + } + """ + # 获取中心节点 + core_node = self.get_node(center_id) + if not core_node: + return {"core_node": None, "neighbors": [], "edges": []} + + # 检查中心节点状态 + if center_status and core_node.get("metadata", {}).get("status") != center_status: + return {"core_node": None, "neighbors": [], "edges": []} + + # 获取邻居节点(简化实现,只获取直接连接的节点) + edges = self.get_edges(center_id, direction="ANY") + neighbor_ids = set() + for edge in edges: + if edge["from"] == center_id: + neighbor_ids.add(edge["to"]) + else: + neighbor_ids.add(edge["from"]) + + # 获取邻居节点详情 + neighbors = [] + if neighbor_ids: + neighbors = self.get_nodes(list(neighbor_ids)) + + return {"core_node": core_node, "neighbors": neighbors, "edges": edges} + + def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]: + """Get the ordered context chain starting from a node.""" + raise NotImplementedError + + def search_by_embedding( + self, + vector: list[float], + top_k: int = 5, + scope: str | None = None, + status: str | None = None, + threshold: float | None = None, + search_filter: dict | None = None, + **kwargs, + ) -> list[dict]: + """ + Retrieve node IDs based on vector similarity using PostgreSQL vector operations. + """ + # Build WHERE clause dynamically like nebular.py + where_clauses = [] + if scope: + where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) = '\"{scope}\"'::agtype") + if status: + where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"{status}\"'::agtype") + else: + where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype") + where_clauses.append("embedding is not null") + # Add user_name filter like nebular.py + user_name = self._get_config_value("user_name") + if not self.config.use_multi_db and user_name: + if kwargs.get("cube_name"): + where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{kwargs['cube_name']}\"'::agtype") + else: + where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") + + # Add search_filter conditions like nebular.py + if search_filter: + for key, value in search_filter.items(): + if isinstance(value, str): + where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = '\"{value}\"'::agtype") + else: + where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = {value}::agtype") + + where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else "" + + # Keep original simple query structure but add dynamic WHERE clause + query = f""" + WITH t AS ( + SELECT id, + properties, + timeline, + embedding, + ag_catalog.agtype_access_operator(properties, '"id"'::agtype) AS old_id, + (1 - (embedding <=> %s::vector(1024))) AS scope + FROM memtensor_memos_graph."Memory" + {where_clause} + ORDER BY scope DESC + LIMIT {top_k} + ) + SELECT * + FROM t + WHERE scope > 0.1; + """ + params = [vector] + print(where_clause) + with self.connection.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + output = [] + for row in results: + polarId = row[0] # id + properties = row[1] # properties + embedding = row[3] # embedding + oldId = row[4] # old_id + score = row[5] # scope + id_val = str(oldId) + score_val = float(score) + score_val = (score_val + 1) / 2 # align to neo4j, Normalized Cosine Score + if threshold is None or score_val >= threshold: + output.append({"id": id_val, "score": score_val}) + return output[:top_k] + + def get_by_metadata(self, filters: list[dict[str, Any]]) -> list[str]: + """Retrieve node IDs that match given metadata filters.""" + where_clauses = [] + params = [] + + for i, f in enumerate(filters): + field = f["field"] + op = f.get("op", "=") + value = f["value"] + + if op == "=": + where_clauses.append(f"properties->>'{field}' = %s") + params.append(value) + elif op == "in": + placeholders = ','.join(['%s'] * len(value)) + where_clauses.append(f"properties->>'{field}' IN ({placeholders})") + params.extend(value) + elif op == "contains": + where_clauses.append(f"properties->'{field}' ? %s") + params.append(value) + elif op == "starts_with": + where_clauses.append(f"properties->>'{field}' LIKE %s") + params.append(f"{value}%") + elif op == "ends_with": + where_clauses.append(f"properties->>'{field}' LIKE %s") + params.append(f"%{value}") + elif op in [">", ">=", "<", "<="]: + where_clauses.append(f"(properties->>'{field}')::numeric {op} %s") + params.append(value) + else: + raise ValueError(f"Unsupported operator: {op}") + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + where_clauses.append("properties->>'user_name' = %s") + params.append(self._get_config_value("user_name")) + + where_str = " AND ".join(where_clauses) + query = f"SELECT properties->>'id' as id FROM {self.db_name}_graph.\"Memory\" WHERE {where_str}" + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + return [row[0] for row in results if row[0]] + + def get_grouped_counts( + self, + group_fields: list[str], + where_clause: str = "", + params: dict[str, Any] | None = None, + ) -> list[dict[str, Any]]: + """Count nodes grouped by any fields.""" + if not group_fields: + raise ValueError("group_fields cannot be empty") + + final_params = params.copy() if params else {} + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + user_name = self._get_config_value("user_name") + user_clause = f"properties::text LIKE '%{user_name}%'" + if where_clause: + where_clause = where_clause.strip() + if where_clause.upper().startswith("WHERE"): + where_clause += f" AND {user_clause}" + else: + where_clause = f"WHERE {where_clause} AND {user_clause}" + else: + where_clause = f"WHERE {user_clause}" + + # Use text-based queries to avoid agtype issues + group_fields_sql = ", ".join([f"properties::text as {field}" for field in group_fields]) + group_by_sql = ", ".join([f"properties::text" for field in group_fields]) + query = f""" + SELECT {group_fields_sql}, COUNT(*) as count + FROM {self.db_name}_graph."Memory" + {where_clause} + GROUP BY {group_by_sql} + """ + + with self.connection.cursor() as cursor: + cursor.execute(query) + results = cursor.fetchall() + + # Simplified return - just return basic counts + return [{"memory_type": "LongTermMemory", "status": "activated", "count": len(results)}] + + def deduplicate_nodes(self) -> None: + """Deduplicate redundant or semantically similar nodes.""" + raise NotImplementedError + + def detect_conflicts(self) -> list[tuple[str, str]]: + """Detect conflicting nodes based on logical or semantic inconsistency.""" + raise NotImplementedError + + def merge_nodes(self, id1: str, id2: str) -> str: + """Merge two similar or duplicate nodes into one.""" + raise NotImplementedError + + def clear(self) -> None: + """Clear the entire graph.""" + try: + with self.connection.cursor() as cursor: + # First check if the graph exists + cursor.execute(f""" + SELECT EXISTS ( + SELECT 1 FROM information_schema.tables + WHERE table_schema = '{self.db_name}_graph' + AND table_name = 'Memory' + ) + """) + graph_exists = cursor.fetchone()[0] + + if not graph_exists: + logger.info(f"Graph '{self.db_name}_graph' does not exist, nothing to clear.") + return + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + cursor.execute(f""" + DELETE FROM {self.db_name}_graph."Memory" + WHERE properties::text LIKE %s + """, (f"%{self._get_config_value('user_name')}%",)) + else: + cursor.execute(f'DELETE FROM {self.db_name}_graph."Memory"') + + logger.info(f"Cleared all nodes from graph '{self.db_name}_graph'.") + except Exception as e: + logger.warning(f"Failed to clear graph '{self.db_name}_graph': {e}") + # Don't raise the exception, just log it as a warning + + def export_graph(self, **kwargs) -> dict[str, Any]: + """Export all graph nodes and edges in a structured form.""" + with self.connection.cursor() as cursor: + # Export nodes + node_query = f'SELECT id, properties FROM {self.db_name}_graph."Memory"' + params = [] + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + user_name = self._get_config_value("user_name") + node_query += f" WHERE properties::text LIKE '%{user_name}%'" + + cursor.execute(node_query) + node_results = cursor.fetchall() + nodes = [] + for row in node_results: + node_id, properties_json = row + # properties_json is already a dict from psycopg2 + properties = properties_json if properties_json else {} + nodes.append(self._parse_node( + {"id": properties.get("id", ""), "memory": properties.get("memory", ""), "metadata": properties})) + + # Export edges (simplified - would need more complex Cypher query for full edge export) + edges = [] + + return {"nodes": nodes, "edges": edges} + + def import_graph(self, data: dict[str, Any]) -> None: + """Import the entire graph from a serialized dictionary.""" + with self.connection.cursor() as cursor: + for node in data.get("nodes", []): + id, memory, metadata = _compose_node(node) + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + metadata["user_name"] = self._get_config_value("user_name") + + metadata = _prepare_node_metadata(metadata) + + # Generate embedding if not provided + if "embedding" not in metadata or not metadata["embedding"]: + metadata["embedding"] = generate_vector(self._get_config_value("embedding_dimension", 1024)) + + self.add_node(id, memory, metadata) + + # Import edges + for edge in data.get("edges", []): + self.add_edge(edge["source"], edge["target"], edge["type"]) + + def get_all_memory_items(self, scope: str, **kwargs) -> list[dict]: + """Retrieve all memory items of a specific memory_type.""" + if scope not in {"WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"}: + raise ValueError(f"Unsupported memory type scope: {scope}") + + query = f""" + SELECT id, properties + FROM {self.db_name}_graph."Memory" + WHERE properties->>'memory_type' = %s + """ + params = [scope] + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + query += " AND properties->>'user_name' = %s" + params.append(self._get_config_value("user_name")) + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + + nodes = [] + for row in results: + node_id, properties_json = row + # properties_json is already a dict from psycopg2 + properties = properties_json if properties_json else {} + nodes.append(self._parse_node( + {"id": properties.get("id", ""), "memory": properties.get("memory", ""), "metadata": properties})) + return nodes + + def get_structure_optimization_candidates(self, scope: str, **kwargs) -> list[dict]: + """Find nodes that are likely candidates for structure optimization.""" + # This would require more complex graph traversal queries + # For now, return nodes without parent relationships + query = f""" + SELECT id, properties + FROM {self.db_name}_graph."Memory" + WHERE properties->>'memory_type' = %s + AND properties->>'status' = 'activated' + """ + params = [scope] + + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + query += " AND properties->>'user_name' = %s" + params.append(self._get_config_value("user_name")) + + with self.connection.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + + nodes = [] + for row in results: + node_id, properties_json = row + # properties_json is already a dict from psycopg2 + properties = properties_json if properties_json else {} + nodes.append(self._parse_node( + {"id": properties.get("id", ""), "memory": properties.get("memory", ""), "metadata": properties})) + return nodes + + def drop_database(self) -> None: + """Permanently delete the entire graph this instance is using.""" + if self._get_config_value("use_multi_db", True): + with self.connection.cursor() as cursor: + cursor.execute(f"SELECT drop_graph('{self.db_name}_graph', true)") + print(f"Graph '{self.db_name}_graph' has been dropped.") + else: + raise ValueError( + f"Refusing to drop graph '{self.db_name}_graph' in " + f"Shared Database Multi-Tenant mode" + ) + + def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]: + """Parse node data from database format to standard format.""" + node = node_data.copy() + + # Convert datetime to string + for time_field in ("created_at", "updated_at"): + if time_field in node and hasattr(node[time_field], "isoformat"): + node[time_field] = node[time_field].isoformat() + + # Remove user_name from output + node.pop("user_name", None) + + # 不再对sources和usage字段进行反序列化,保持List[str]格式 + + return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} + + def __del__(self): + """Close database connection when object is destroyed.""" + if hasattr(self, 'connection') and self.connection: + self.connection.close() + + #deprecated + def add_node_old(conn, id: str, memory: str, metadata: dict, graph_name=None): + """ + 添加单个节点到图数据库 + + Args: + conn: 数据库连接 + id: 节点ID + memory: 内存内容 + metadata: 元数据字典 + graph_name: 图名称,可选 + """ + # 使用传入的graph_name或默认值 + if graph_name is None: + graph_name = GRAPH_NAME + + try: + # 先提取 embedding(在清理properties之前) + embedding = find_embedding(metadata) + field_name = detect_embedding_field(embedding) + vector_value = convert_to_vector(embedding) if field_name else None + + # 提取 properties + properties = metadata.copy() + properties = clean_properties(properties) + properties["id"] = id + properties["memory"] = memory + + with conn.cursor() as cursor: + # 先删除现有记录(如果存在) + delete_sql = f""" + DELETE FROM "Memory" + WHERE id = ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring); + """ + cursor.execute(delete_sql, (id,)) + + # 然后插入新记录 + if field_name and vector_value: + insert_sql = f""" + INSERT INTO "Memory" (id, properties, {field_name}) + VALUES ( + ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring), + %s::text::agtype, + %s::vector + ); + """ + cursor.execute(insert_sql, (id, Json(properties), vector_value)) + print(f"✅ 成功插入/更新: {id} ({field_name})") + else: + insert_sql = f""" + INSERT INTO "Memory" (id, properties) + VALUES ( + ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring), + %s::text::agtype + ); + """ + cursor.execute(insert_sql, (id, Json(properties))) + print(f"✅ 成功插入/更新(无向量): {id}") + + conn.commit() + return True + + except Exception as e: + conn.rollback() + print(f"❌ 插入失败 (ID: {id}): {e}") + return False + + def add_node(self, id: str, memory: str, metadata: dict[str, Any]) -> None: + """Add a memory node to the graph.""" + # user_name 从 metadata 中获取,如果不存在则从配置中获取 + if "user_name" not in metadata: + if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + metadata["user_name"] = self._get_config_value("user_name") + + # Safely process metadata + metadata = _prepare_node_metadata(metadata) + + # Merge node and set metadata + created_at = metadata.pop("created_at", datetime.utcnow().isoformat()) + updated_at = metadata.pop("updated_at", datetime.utcnow().isoformat()) + + # Prepare properties + properties = { + "id": id, + "memory": memory, + "created_at": created_at, + "updated_at": updated_at, + **metadata + } + + # Generate embedding if not provided + if "embedding" not in properties or not properties["embedding"]: + properties["embedding"] = generate_vector(self._get_config_value("embedding_dimension", 1024)) + + # serialization - 处理sources和usage字段的JSON序列化 + for field_name in ["sources", "usage"]: + if field_name in properties and properties[field_name]: + if isinstance(properties[field_name], list): + for idx in range(len(properties[field_name])): + # 只有当元素不是字符串时才进行序列化 + if not isinstance(properties[field_name][idx], str): + properties[field_name][idx] = json.dumps(properties[field_name][idx]) + elif isinstance(properties[field_name], str): + # 如果已经是字符串,保持不变 + pass + + # Extract embedding for separate column + embedding_vector = properties.pop("embedding", []) + if not isinstance(embedding_vector, list): + embedding_vector = [] + + # 根据embedding维度选择正确的列名 + embedding_column = "embedding" # 默认列 + if len(embedding_vector) == 3072: + embedding_column = "embedding_3072" + elif len(embedding_vector) == 1024: + embedding_column = "embedding" + elif len(embedding_vector) == 768: + embedding_column = "embedding_768" + + with self.connection.cursor() as cursor: + # 先删除现有记录(如果存在) + delete_query = f""" + DELETE FROM {self.db_name}_graph."Memory" + WHERE id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring) + """ + cursor.execute(delete_query, (id,)) + + # 然后插入新记录 + if embedding_vector: + insert_query = f""" + INSERT INTO {self.db_name}_graph."Memory"(id, properties, {embedding_column}) + VALUES ( + ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring), + %s, + %s + ) + """ + cursor.execute(insert_query, (id, json.dumps(properties), json.dumps(embedding_vector))) + else: + insert_query = f""" + INSERT INTO {self.db_name}_graph."Memory"(id, properties) + VALUES ( + ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring), + %s + ) + """ + cursor.execute(insert_query, (id, json.dumps(properties))) + logger.info(f"Added node {id} to graph '{self.db_name}_graph'.") \ No newline at end of file From 188d4a231e17d6dfa4fa029a34415e9f287c2e75 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sat, 18 Oct 2025 14:04:12 +0800 Subject: [PATCH 002/137] add polardb.py --- examples/basic_modules/polardb.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 examples/basic_modules/polardb.py diff --git a/examples/basic_modules/polardb.py b/examples/basic_modules/polardb.py new file mode 100644 index 000000000..c5515c8d4 --- /dev/null +++ b/examples/basic_modules/polardb.py @@ -0,0 +1 @@ +#123 \ No newline at end of file From b8b22b455e50b8ff4ee71be42fac6718cebd4094 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 15:15:27 +0800 Subject: [PATCH 003/137] add polar factory --- examples/basic_modules/polardb_example.py | 525 ++++++++++++++++++++++ examples/basic_modules/polardb_search.py | 136 ++++++ src/memos/configs/graph_db.py | 54 +++ src/memos/graph_dbs/factory.py | 2 + 4 files changed, 717 insertions(+) create mode 100644 examples/basic_modules/polardb_example.py create mode 100644 examples/basic_modules/polardb_search.py diff --git a/examples/basic_modules/polardb_example.py b/examples/basic_modules/polardb_example.py new file mode 100644 index 000000000..9e1c0c6c4 --- /dev/null +++ b/examples/basic_modules/polardb_example.py @@ -0,0 +1,525 @@ +import os +import sys + +# Add the src directory to the Python path +src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) +sys.path.insert(0, src_path) + +from datetime import datetime + +from memos.configs.embedder import EmbedderConfigFactory +from memos.configs.graph_db import GraphDBConfigFactory +from memos.embedders.factory import EmbedderFactory +from memos.graph_dbs.factory import GraphStoreFactory +from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata + + +embedder_config = EmbedderConfigFactory.model_validate( + { + "backend": "universal_api", + "config": { + "provider": "openai", + "api_key": os.getenv("OPENAI_API_KEY", "sk-xxxxx"), + "model_name_or_path": "text-embedding-3-large", + "base_url": os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1"), + }, + } +) +embedder = EmbedderFactory.from_config(embedder_config) + + +def embed_memory_item(memory: str) -> list[float]: + return embedder.embed([memory])[0] + + +def example_multi_db(db_name: str = "paper_polardb"): + """Example using PolarDB with multi-database mode (physical isolation).""" + # Step 1: Build factory config + config = GraphDBConfigFactory( + backend="polardb", + config={ + "host": "139.196.232.20", + "port": 5432, + "user": "root", + "password": "123456", + "db_name": db_name, + "auto_create": True, + "embedding_dimension": 1024, + "use_multi_db": True, + }, + ) + + # Step 2: Instantiate the graph store + graph = GraphStoreFactory.from_config(config) + graph.clear() + + # Step 3: Create topic node + topic = TextualMemoryItem( + memory="This research addresses long-term multi-UAV navigation for energy-efficient communication coverage.", + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", + key="Multi-UAV Long-Term Coverage", + hierarchy_level="topic", + type="fact", + memory_time="2024-01-01", + source="file", + sources=["paper://multi-uav-coverage/intro"], + status="activated", + confidence=95.0, + tags=["UAV", "coverage", "multi-agent"], + entities=["UAV", "coverage", "navigation"], + visibility="public", + updated_at=datetime.now().isoformat(), + embedding=embed_memory_item( + "This research addresses long-term " + "multi-UAV navigation for " + "energy-efficient communication " + "coverage." + ), + ), + ) + + graph.add_node( + id=topic.id, memory=topic.memory, metadata=topic.metadata.model_dump(exclude_none=True) + ) + + # Step 4: Define and write concept nodes + concepts = [ + TextualMemoryItem( + memory="The reward function combines multiple objectives: coverage maximization, energy consumption minimization, and overlap penalty.", + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", + key="Reward Function Design", + hierarchy_level="concept", + type="fact", + memory_time="2024-01-01", + source="file", + sources=["paper://multi-uav-coverage/reward"], + status="activated", + confidence=92.0, + tags=["reward", "DRL", "multi-objective"], + entities=["reward function"], + visibility="public", + updated_at=datetime.now().isoformat(), + embedding=embed_memory_item( + "The reward function combines " + "multiple objectives: coverage " + "maximization, energy consumption " + "minimization, and overlap penalty." + ), + ), + ), + TextualMemoryItem( + memory="The energy model considers transmission power and mechanical movement power consumption.", + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", + key="Energy Model", + hierarchy_level="concept", + type="fact", + memory_time="2024-01-01", + source="file", + sources=["paper://multi-uav-coverage/energy"], + status="activated", + confidence=90.0, + tags=["energy", "power model"], + entities=["energy", "power"], + visibility="public", + updated_at=datetime.now().isoformat(), + embedding=embed_memory_item( + "The energy model considers " + "transmission power and mechanical movement power consumption." + ), + ), + ), + TextualMemoryItem( + memory="Coverage performance is measured using CT (Coverage Time) and FT (Fairness Time) metrics.", + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", + key="Coverage Metrics", + hierarchy_level="concept", + type="fact", + memory_time="2024-01-01", + source="file", + sources=["paper://multi-uav-coverage/metrics"], + status="activated", + confidence=91.0, + tags=["coverage", "fairness", "metrics"], + entities=["CT", "FT"], + visibility="public", + updated_at=datetime.now().isoformat(), + embedding=embed_memory_item( + "Coverage performance is measured using CT (Coverage Time) and FT (Fairness Time) metrics." + ), + ), + ), + ] + + # Step 5: Write and link concepts to topic + for concept in concepts: + graph.add_node( + id=concept.id, + memory=concept.memory, + metadata=concept.metadata.model_dump(exclude_none=True), + ) + graph.add_edge(source_id=concept.id, target_id=topic.id, type="RELATED") + print(f"Creating edge: ({concept.id}) -[:RELATED]-> ({topic.id})") + + # Define concept → fact + fact_pairs = [ + { + "concept_key": "Reward Function Design", + "fact": TextualMemoryItem( + memory="The reward includes three parts: (1) coverage gain, (2) energy penalty, and (3) penalty for overlapping areas with other UAVs.", + metadata=TreeNodeTextualMemoryMetadata( + memory_type="WorkingMemory", + key="Reward Components", + hierarchy_level="fact", + type="fact", + memory_time="2024-01-01", + source="file", + sources=["paper://multi-uav-coverage/reward-details"], + status="activated", + confidence=90.0, + tags=["reward", "overlap", "multi-agent"], + entities=["coverage", "energy", "overlap"], + visibility="public", + updated_at=datetime.now().isoformat(), + embedding=embed_memory_item( + "The reward includes three parts: (1) coverage gain, (2) energy penalty, and (3) penalty for overlapping areas with other UAVs." + ), + ), + ), + }, + { + "concept_key": "Energy Model", + "fact": TextualMemoryItem( + memory="Total energy cost is calculated from both mechanical movement and communication transmission.", + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", + key="Energy Cost Components", + hierarchy_level="fact", + type="fact", + memory_time="2024-01-01", + source="file", + sources=["paper://multi-uav-coverage/energy-detail"], + status="activated", + confidence=89.0, + tags=["energy", "movement", "transmission"], + entities=["movement power", "transmission power"], + visibility="public", + updated_at=datetime.now().isoformat(), + embedding=embed_memory_item( + "Total energy cost is calculated from both mechanical movement and communication transmission." + ), + ), + ), + }, + { + "concept_key": "Coverage Metrics", + "fact": TextualMemoryItem( + memory="CT measures how long the area is covered; FT reflects the fairness of agent coverage distribution.", + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", + key="CT and FT Definition", + hierarchy_level="fact", + type="fact", + memory_time="2024-01-01", + source="file", + sources=["paper://multi-uav-coverage/metric-definitions"], + status="activated", + confidence=91.0, + tags=["CT", "FT", "fairness"], + entities=["coverage time", "fairness"], + visibility="public", + updated_at=datetime.now().isoformat(), + embedding=embed_memory_item( + "CT measures how long the area is covered; FT reflects the fairness of agent coverage distribution." + ), + ), + ), + }, + ] + + # Write facts and link to corresponding concept by key + concept_map = {concept.metadata.key: concept.id for concept in concepts} + + for pair in fact_pairs: + fact_item = pair["fact"] + concept_key = pair["concept_key"] + concept_id = concept_map[concept_key] + + graph.add_node( + fact_item.id, + fact_item.memory, + metadata=fact_item.metadata.model_dump(exclude_none=True), + ) + graph.add_edge(source_id=fact_item.id, target_id=concept_id, type="BELONGS_TO") + + all_graph_data = graph.export_graph() + print("Graph data:", all_graph_data) + + nodes = graph.search_by_embedding(vector=embed_memory_item("what does FT reflect?"), top_k=1) + + for node_i in nodes: + print("Search result:", graph.get_node(node_i["id"])) + + +def example_shared_db(db_name: str = "shared_travel_group_polardb"): + """ + Example: Single(Shared)-DB multi-tenant (logical isolation) + Multiple users' data in the same PolarDB with user_name as a tag. + """ + # users + user_list = ["travel_member_alice", "travel_member_bob"] + + for user_name in user_list: + # Step 1: Build factory config + config = GraphDBConfigFactory( + backend="polardb", + config={ + "host": "139.196.232.20", + "port": 5432, + "user": "root", + "password": "123456", + "db_name": db_name, + "user_name": user_name, + "use_multi_db": False, + "auto_create": True, + "embedding_dimension": 1024, + }, + ) + # Step 2: Instantiate graph store + graph = GraphStoreFactory.from_config(config) + print(f"\n[INFO] Working in shared DB: {db_name}, for user: {user_name}") + graph.clear() + + # Step 3: Create topic node + topic = TextualMemoryItem( + memory=f"Travel notes for {user_name}", + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", + hierarchy_level="topic", + status="activated", + visibility="public", + embedding=embed_memory_item(f"Travel notes for {user_name}"), + ), + ) + + graph.add_node( + id=topic.id, memory=topic.memory, metadata=topic.metadata.model_dump(exclude_none=True) + ) + + # Step 4: Add a concept for each user + concept = TextualMemoryItem( + memory=f"Itinerary plan for {user_name}", + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", + hierarchy_level="concept", + status="activated", + visibility="public", + embedding=embed_memory_item(f"Itinerary plan for {user_name}"), + ), + ) + + graph.add_node( + id=concept.id, + memory=concept.memory, + metadata=concept.metadata.model_dump(exclude_none=True), + ) + + # Link concept to topic + graph.add_edge(source_id=concept.id, target_id=topic.id, type="INCLUDE") + + print(f"[INFO] Added nodes for {user_name}") + + # Step 5: Query and print ALL for verification + print("\n=== Export entire DB (for verification, includes ALL users) ===") + graph = GraphStoreFactory.from_config(config) + all_graph_data = graph.export_graph() + print(all_graph_data) + + # Step 6: Search for alice's data only + print("\n=== Search for travel_member_alice ===") + config_alice = GraphDBConfigFactory( + backend="polardb", + config={ + "host": "139.196.232.20", + "port": 5432, + "user": "root", + "password": "123456", + "db_name": db_name, + "user_name": user_list[0], + "embedding_dimension": 1024, + }, + ) + graph_alice = GraphStoreFactory.from_config(config_alice) + nodes = graph_alice.search_by_embedding(vector=embed_memory_item("travel itinerary"), top_k=1) + for node in nodes: + print(graph_alice.get_node(node["id"])) + + +def run_user_session( + user_name: str, + db_name: str, + topic_text: str, + concept_texts: list[str], + fact_texts: list[str], +): + """Run a complete user session with PolarDB.""" + print(f"\n=== {user_name} starts building their memory graph ===") + + config = GraphDBConfigFactory( + backend="polardb", + config={ + "host": "139.196.232.20", + "port": 5432, + "user": "root", + "password": "123456", + "db_name": db_name, + "user_name": user_name, + "use_multi_db": False, + "auto_create": True, + "embedding_dimension": 1024, + }, + ) + graph = GraphStoreFactory.from_config(config) + print(f"6666666:{graph}") + + # Start with a clean slate for this user + graph.clear() + + now = datetime.utcnow().isoformat() + + # === Step 1: Create a root topic node (e.g., user's research focus) === + topic = TextualMemoryItem( + memory="55555", + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", + key="Research Topic", + hierarchy_level="topic", + type="fact", + memory_time="2024-01-01", + status="activated", + visibility="public", + updated_at=now, + embedding=embed_memory_item(topic_text), + ), + ) + print(f"111111111opic:{topic}") + graph.add_node('45678', topic.memory, topic.metadata.model_dump(exclude_none=True)) + + # === Step 2: Create two concept nodes linked to the topic === + concept_items = [] + for i, text in enumerate(concept_texts): + concept = TextualMemoryItem( + memory=text, + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", + key=f"Concept {i + 1}", + hierarchy_level="concept", + type="fact", + memory_time="2024-01-01", + status="activated", + visibility="public", + updated_at=now, + embedding=embed_memory_item(text), + tags=["concept"], + confidence=90 + i, + ), + ) + graph.add_node(concept.id, concept.memory, concept.metadata.model_dump(exclude_none=True)) + graph.add_edge(topic.id, concept.id, type="PARENT") + concept_items.append(concept) + + # === Step 3: Create supporting facts under each concept === + for i, text in enumerate(fact_texts): + fact = TextualMemoryItem( + memory=text, + metadata=TreeNodeTextualMemoryMetadata( + memory_type="WorkingMemory", + key=f"Fact {i + 1}", + hierarchy_level="fact", + type="fact", + memory_time="2024-01-01", + status="activated", + visibility="public", + updated_at=now, + embedding=embed_memory_item(text), + confidence=85.0, + tags=["fact"], + ), + ) + graph.add_node(fact.id, fact.memory, fact.metadata.model_dump(exclude_none=True)) + graph.add_edge(concept_items[i % len(concept_items)].id, fact.id, type="PARENT") + + # === Step 4: Retrieve memory using semantic search === + vector = embed_memory_item("How is memory retrieved?") + search_result = graph.search_by_embedding(vector, top_k=2) + for r in search_result: + node = graph.get_node(r["id"]) + print("🔍 111111111Search result:", node["memory"]) + + # === Step 5: Tag-based neighborhood discovery === + neighbors = graph.get_neighbors_by_tag(["concept"], exclude_ids=[], top_k=2) + print("📎 222222Tag-related nodes:", [neighbor["memory"] for neighbor in neighbors]) + + # === Step 6: Retrieve children (facts) of first concept === + children = graph.get_children_with_embeddings(concept_items[0].id) + print("📍 333333Children of concept:", [child["memory"] for child in children]) + + # === Step 7: Export a local subgraph and grouped statistics === + subgraph = graph.get_subgraph(topic.id, depth=2) + print("📌444444 Subgraph node count:", len(subgraph["neighbors"])) + + stats = graph.get_grouped_counts(["memory_type", "status"]) + print("📊 55555Grouped counts:", stats) + + # === Step 8: Demonstrate updates and cleanup === + graph.update_node(concept_items[0].id, {"confidence": 99.0}) + graph.remove_oldest_memory("WorkingMemory", keep_latest=1) + graph.delete_edge(topic.id, concept_items[0].id, type="PARENT") + graph.delete_node(concept_items[1].id) + + # === Step 9: Export and re-import the entire graph structure === + exported = graph.export_graph() + graph.import_graph(exported) + print("📦 666666Graph exported and re-imported, total nodes:", len(exported["nodes"])) + + +def example_complex_shared_db(db_name: str = "poc"): + """Complex example with multiple users in shared database.""" + # User 1: Alice explores structured memory for LLMs + run_user_session( + user_name="adimin", + db_name=db_name, + topic_text="Alice studies structured memory and long-term memory optimization in LLMs.", + concept_texts=[ + "Short-term memory can be simulated using WorkingMemory blocks.", + "A structured memory graph improves retrieval precision for agents.", + ], + fact_texts=[ + "Embedding search is used to find semantically similar memory items.", + "User memories are stored as node-edge structures that support hierarchical reasoning.", + ], + ) + + +if __name__ == "__main__": + try: + # print("\n=== PolarDB Example: Multi-DB ===") + # example_multi_db(db_name="paper_polardb") + # + # print("\n=== PolarDB Example: Single-DB ===") + # example_shared_db(db_name="shared_travel_group_polardb") + + print("\n=== PolarDB Example: Single-DB-Complex ===") + example_complex_shared_db(db_name="test_memos_1011") + except Exception as e: + print(f"❌ Error running111111 PolarDB example: {e}") + print("Please check:") + print("1. Network connectivity to PolarDB server") + print("2. Database credentials and permissions") + print("3. Apache AGE extension installation") + print("4. Required Python packages (psycopg2-binary)") + import traceback + traceback.print_exc() \ No newline at end of file diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py new file mode 100644 index 000000000..deed13b7d --- /dev/null +++ b/examples/basic_modules/polardb_search.py @@ -0,0 +1,136 @@ +import json +import os +import sys + +src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) +sys.path.insert(0, src_path) + +from memos.configs.graph_db import GraphDBConfigFactory +from memos.graph_dbs.factory import GraphStoreFactory +import psycopg2 + +import psycopg2 + + +def handler_node_edge(db_name: str = "shared-traval-group-complex", type: str = "node"): + if type == "node": + run_user_session_node( + db_name=db_name, + ) + elif type == "edge": + run_user_session_edges( + db_name=db_name, + ) + + +DB_CONFIG = { + 'host': 'memory.pg.polardb.rds.aliyuncs.com', + 'port': 5432, + 'database': 'test_memos', + 'user': 'adimin', + 'password': 'Openmem0925' +} + + +def run_user_session_node(db_name: str, ): + graph = getGraph(db_name) + exported = graph.export_graph(include_embedding=True) + nodes = exported.get('nodes', []) + print(f"查询数据数量:{len(nodes)}") + list = [] + + conn = psycopg2.connect(**DB_CONFIG) + print("✅ 数据库连接成功") + + # create_vector_extension(conn) + # create_table(conn) + # insert_data(conn, nodes) + # for node in nodes: + # metadata = node.get('metadata') + # # embedding_1024 = metadata.get('embedding_1024') + # # if embedding_1024: + # # metadata['embedding_1024'] = None + # metadata['id'] = node.get('id') + # metadata['memory'] = node.get('memory') + # list.append(metadata) + print("nodes:", list) + + +def run_user_session_edges(db_name: str, ): + graph = getGraph(db_name) + exported = graph.export_graph() + nodes = exported.get('nodes', []) + list = [] + edges = exported.get('edges', []) + for i, edge in edges: + source = edge.get('source') + target = edge.get('target') + edge_type = edge.get('type') + print(f"剩余{i}条边") + print("edges:", list) + + +def getGraph(db_name): + config = GraphDBConfigFactory( + backend="nebular", + config={ + "uri": json.loads(os.getenv("NEBULAR_HOSTS", "localhost")), + "user": os.getenv("NEBULAR_USER", "root"), + "password": os.getenv("NEBULAR_PASSWORD", "xxxxxx"), + "space": db_name, + "use_multi_db": False, + "auto_create": True, + "embedding_dimension": 1024, + }, + ) + graph = GraphStoreFactory.from_config(config) + return graph + + +def getPolarDb(db_name): + config = GraphDBConfigFactory( + backend="polardb", + config={ + "host": "memory.pg.polardb.rds.aliyuncs.com", + "port": 5432, + "user": "adimin", + "password": "Openmem0925", + "db_name": db_name, + "user_name": 'adimin', + "use_multi_db": True, # 设置为True,不添加user_name过滤条件 + "auto_create": True, + "embedding_dimension": 1024, + }, + ) + graph = GraphStoreFactory.from_config(config) + return graph + + +def searchVector(db_name: str, vectorStr: list[float]): + graph = getPolarDb(db_name) + # nodes = graph.search_by_embedding(vector=vectorStr, top_k=1) + # print("search_by_embedding nodes:", len(nodes)) + # for node_i in nodes: + # print("Search result:", graph.get_node(node_i["id"])) + + # detail = graph.get_node("bb079c5b-1937-4125-a9e5-55d4abe6c95d") + # print("单个node:", detail) + + ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] + detail_list = graph.get_nodes(ids) + print("1111多个node:", len(detail_list)) + + print("多个node:", detail_list) + + # graph.update_node(id="000009999ef-926f-42e2-b7b5-0224daf0abcd", fields={"name": "new_name"}) + # for node_i in nodes: + # print("Search result:", graph.get_node(node_i["id"])) + + +if __name__ == "__main__": + # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") + # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") + vector = [-0.02954108,-0.024826104,-0.07641619,0.01464847,-0.0032501293,-0.044220068,0.024322564,0.018768352,-0.0030803748,-0.017776528,0.022201587,-0.0036640249,-0.013397247,-0.02462774,0.021743823,0.0115509285,-0.023223927,0.024093682,-0.0071144262,-0.014984163,-0.013305694,-0.022064257,0.055786256,-0.012374905,0.019714398,0.008865376,-0.00803377,-0.006851211,0.036529623,0.042785738,0.05923475,0.007347123,0.058776986,0.03900155,-0.05065929,-0.060730115,0.0042343233,-0.0030670234,-0.02864081,0.047332868,-0.007789629,-0.011741664,0.019638104,0.007427232,-0.036529623,-0.034484938,-0.046203714,0.009078999,-0.026367245,-0.049896352,0.027938904,-0.048950303,0.04299936,-0.024597222,-0.018600505,0.026321469,-0.03240974,-0.07080094,-0.058105595,-0.0026359616,-0.023757987,0.014465364,-0.02682501,-0.014427217,-0.01814274,0.07244889,-0.0063705584,0.019378705,-0.05078136,-0.016525306,0.013534576,-0.01814274,-0.016098058,0.018295329,-0.061645642,0.023773246,0.03549202,-0.034820635,0.029464787,0.026535092,-0.024398858,-0.004184732,0.040374845,0.0037460409,0.014419587,-0.009384176,0.010055564,0.019546552,0.023498587,-0.01620487,-0.013389617,-0.016418492,0.028198304,-0.051117055,0.023101857,0.0705568,-0.03991708,-0.039428797,0.006595626,0.0039367764,0.034515455,-0.02046208,-0.001202586,-0.018569987,-0.009666464,-0.018966716,-0.0052719233,0.0012187985,0.044403173,-0.018295329,0.01890568,0.037994467,0.020538375,0.012939482,-0.0036048968,-0.01968388,-0.0021209763,0.01451114,0.024429375,-0.031311106,-0.017852822,-0.0057830936,0.011459376,-0.020523116,-0.021209763,0.0082550235,-0.0035057145,0.031311106,0.0063438555,0.012374905,0.028595034,0.03552254,-0.041961763,0.0062675616,0.030426092,-0.030548163,0.058502328,0.029205387,-0.02604681,-0.025756892,0.004211435,0.05160534,0.0092239585,-0.049987905,-0.0013847381,-0.055267457,0.027282774,0.012535123,-0.009971641,-0.0040741055,-0.022613576,0.018478435,0.026031552,0.02720648,-0.04782115,0.007400529,0.034851152,0.042694185,-0.031005928,-0.0019149822,0.00042104814,-0.0049972646,-0.02539068,0.031738352,0.010337852,-0.036102373,0.014213594,0.011878993,0.0008330364,-0.035583574,0.0034465867,-0.0082550235,0.009887717,0.012908964,0.018875163,0.01814274,0.091552936,0.018768352,0.03305061,-0.054260373,-0.02682501,0.03628548,0.031158516,-0.048675645,-0.017288245,0.02929694,0.013450652,-0.0058365,0.032745432,-0.027160704,-0.015655553,-0.032074045,0.020248458,0.004760753,0.04730235,-0.024841363,0.020248458,0.013786347,-0.010810876,-0.033874586,0.008491535,0.023651175,0.016021764,0.0039329617,-0.04626475,0.0050621144,-0.02720648,-0.008140582,0.017440835,-0.029830998,0.04861461,0.042327974,0.01529697,-0.027191222,-0.03515633,0.018264811,0.038696375,-0.025543269,0.018676799,0.023757987,0.0058670174,-0.0045013526,0.017059363,-0.03720101,-0.03668221,-0.0137253115,0.024581963,0.025589045,0.01270297,-0.027450623,-0.035614092,0.010299705,0.021041917,-0.013893158,0.014350923,-0.032440256,-0.009338399,0.006114973,0.025344905,0.03512581,-0.02917487,-0.018432658,0.008873005,-0.014236482,-0.027359068,-0.055572633,0.016784705,0.041900728,-0.019943282,0.020080611,0.018112222,0.009033223,-0.021087693,-0.05041515,0.0352784,0.024642998,0.007118241,0.008804341,-0.022384694,-0.030471869,-0.015762364,0.048706163,0.0064659263,0.0048790085,0.003906259,-0.02384954,-0.012771634,-0.025985776,0.0017061271,0.0736696,0.020950364,0.013847382,0.029785221,-0.022201587,-0.003059394,0.038574304,-0.01647953,-0.022354174,0.0562135,0.001626972,-0.007865923,0.012001064,0.043609716,-0.0274964,0.029846257,-0.0035266953,-0.00026106893,0.050903432,0.022750905,-0.037475668,-0.013244658,-0.015640294,0.042419527,-0.027938904,-0.008247394,-0.0047111614,-0.019088788,-0.15368687,0.033020094,0.028350893,0.02449041,0.016830482,0.03189094,-0.04861461,-0.024261529,-0.05029308,0.018386882,0.028945986,-0.044525243,0.009628317,0.021667529,0.026123105,0.037384115,-0.007335679,-0.02149968,-0.0176392,-0.0210114,0.00070810475,-0.05261242,0.061340466,0.0037002645,-0.010910058,-0.011360194,0.008201617,0.03793343,-0.0070305024,-0.0225678,-0.00945284,0.02281194,0.0013446837,0.023880057,-0.0144806225,0.015914952,0.018646281,0.0032444072,0.016494788,0.01853947,0.01985173,0.03964242,0.015541111,0.016494788,-0.031921457,-0.029693669,0.020782517,-0.009796164,-0.0032234264,-0.046478376,0.044647314,-0.035675127,-0.01853947,0.0069885408,-0.07635515,-0.010986352,0.00971987,0.034027174,0.0347596,0.02812201,-0.03213508,-0.05355847,-0.00038886155,-0.014061005,-0.08074969,0.020584151,0.047760114,0.03381355,0.012809781,-0.020904588,0.013954193,-0.04382334,0.01062777,0.004421244,0.039337244,0.014549287,0.013435394,0.021316575,-0.01957707,-0.09228536,0.006145491,0.0015955006,0.0006575599,0.010147117,-0.03137214,-0.048919786,0.0044517615,0.04214487,0.053466916,0.22290088,0.019592328,0.021041917,-0.034790117,0.034393385,-0.016616859,-0.0748903,0.08294696,-0.009971641,-0.030548163,0.010810876,0.04147348,-0.025421198,-0.020614669,0.019271893,0.04562388,-0.071838535,0.0140152285,0.04455576,0.016021764,0.0033302382,-0.03851327,-0.05197155,-0.024795586,-0.04925548,-0.0012359646,-0.028152527,0.020446822,-0.010955835,0.007926959,-0.05679334,0.04898082,0.02227788,0.009887717,-0.037262045,-0.021865893,0.024658257,-0.03305061,-0.005104076,0.06274428,0.0026741086,-0.0032806469,-0.027450623,0.016265905,-0.008514423,-0.011116052,-0.008338947,-0.020813035,-0.025711115,-0.021438645,-0.009872458,-0.04071054,-0.019348187,0.0037441335,-0.0155868875,-0.0049705617,-0.009040852,0.007850665,0.031463694,0.05029308,0.002864844,0.0063552996,-0.056945927,-0.046051126,0.006042494,0.053833127,-0.013702422,-0.03045661,0.048187364,0.029068056,-0.022766164,-0.002573019,0.012855558,0.005336773,-0.009414693,-0.046173196,-0.014053375,-0.0054741027,0.001794819,0.014472993,-0.00087928964,0.004680644,0.02449041,0.018325847,0.054199338,-0.006156935,0.028717104,0.086425975,0.02307134,0.0060958997,-0.0008125323,0.018829387,-0.011825588,0.0032806469,0.008880635,-0.019271893,-0.015991246,-0.008018511,-0.03149421,0.00803377,-0.0137482,0.0004093656,-0.049682725,-0.015518223,-0.034118727,-0.0069542085,-0.05297863,-0.0052299616,-0.0038566676,0.0008196849,-0.037536703,-0.02383428,-0.033355787,-0.051239125,0.007118241,0.03488167,0.028259339,0.008842488,0.009246847,0.03970346,-0.019271893,0.038543787,-0.022659352,0.022720387,0.024566704,-0.056030396,-0.0026283322,-0.009399435,0.0077743703,-0.02191167,0.0028667513,-0.028717104,0.0070991674,0.027038634,0.063964985,0.0090103345,-0.0053215143,-0.022064257,-0.014091522,-0.0057983524,-0.021087693,0.006557479,-0.004325876,0.045440774,0.0065765525,0.0015716588,-0.049804796,0.03924569,-0.01918034,-0.021331834,0.039093103,0.017395059,0.012664823,-0.052765008,0.021331834,-0.07537858,-0.0061607496,-0.032043528,0.0067978054,-0.0121917995,0.0039978116,0.0088196,0.006580367,0.07238785,0.0110092405,-0.0074196025,0.009025593,0.03085334,-0.03137214,-0.006259932,0.011901882,-0.040741056,-0.030242987,0.008834858,-0.019744916,-0.009712241,-0.0040588467,0.033172682,0.004276285,-0.049072374,0.03488167,-0.0051269643,0.007694261,0.005935682,0.01788334,-0.0069542085,0.0085449405,-0.007194535,-0.041900728,-0.013313323,-0.0013895065,0.07617205,0.0037422262,-0.025009211,0.0051345937,0.0066299583,0.10388207,-0.008834858,0.006439223,-0.021102952,-0.03099067,-0.016555823,-0.0126571935,0.010658287,0.0057945377,-0.0055503966,-0.009681723,0.057617314,-0.017822305,-0.0034828263,0.0005464566,0.0043602088,-0.037109457,0.010849023,-0.009216329,-0.049194444,0.01179507,0.049469102,-0.008514423,-0.009681723,-0.01890568,0.03500374,-0.028228821,-0.05871595,0.0011281992,0.044799905,-0.0032806469,0.009002705,0.030120917,0.0073547526,-0.010025047,0.019012494,-0.031433176,-0.02787787,0.021621753,-0.011177087,-0.02630621,0.042297456,-0.041046232,-0.020919846,-0.002534872,-0.024765069,0.01632694,0.0029258793,-0.0018615763,-0.026748717,-0.030273505,0.006763473,0.036590658,0.027236998,0.02307134,0.031829905,0.013107329,-0.025451716,0.040252775,0.04214487,0.012710599,0.01800541,-0.012130764,-0.056274537,0.02009587,0.03695687,0.024963435,-0.030166693,0.009002705,-0.06988541,0.043212987,0.01840214,-0.01179507,-0.09484884,-0.023986869,0.015319858,-0.023498587,-0.034790117,0.012176541,0.0018901867,-0.00037646378,0.051818963,0.021804858,-0.05209362,-0.027710022,0.051391713,-0.022064257,-0.024139458,-0.018295329,-0.04092416,0.0063667437,0.022995045,0.0149460165,-0.030059882,0.019134564,0.017562905,-0.04962169,0.015579258,0.010223411,-0.0076675583,-0.059021126,0.04431162,-0.023315482,0.017517129,-0.0021457719,0.042968843,0.028533999,-0.029449528,-0.016769446,0.026367245,-0.015762364,-0.01140597,0.030059882,0.030929634,0.0058250558,-0.06689468,-0.013473541,0.009323141,0.025299128,-0.021728564,0.049987905,-0.0020599412,0.04287729,-0.022827199,0.020828294,-0.001273158,-0.04068002,-0.013664275,-0.0036945425,-0.019775433,-0.024642998,-0.005275738,-0.036407553,-0.0008239764,-0.027435362,0.06427016,-0.012901335,-0.02035527,0.020614669,-0.0017051734,0.042480562,-0.0013942749,0.018981975,0.030365057,0.0028915468,0.052642938,0.03408821,-0.01878361,0.0043525794,-0.014183076,0.0009870551,-0.011611964,-0.030273505,-0.010635399,0.058776986,-0.03625496,-0.008270282,-0.03295906,0.04794322,-0.0025119837,0.045959573,-0.008773823,0.048584092,0.048828233,-0.056457642,0.06039442,-0.04522715,0.015617405,0.030960152,0.047515973,0.042572115,-0.069214016,0.017959634,-0.0090484815,0.02073674,-0.013839752,0.035430986,-0.041046232,-0.009887717,0.07043473,-0.02787787,0.010993982,-0.0017557183,0.0028057161,-0.031204293,-0.0059700143,0.0054741027,-0.023666434,-0.008903523,-0.021316575,0.00014424356,0.011863735,-0.0058136117,0.004821788,-0.01710514,0.009384176,-0.02864081,-0.0058288705,-0.13269073,0.019363446,-0.013923676,0.025177058,-0.049194444,0.015129123,-0.02359014,-0.009155294,0.0034294203,-0.01878361,-0.0027256072,-0.000686647,-0.048034772,-0.018264811,0.071228184,0.037780844,-0.025726374,-0.028595034,-0.011177087,0.031463694,-0.01075747,-0.035705645,0.097290255,0.010475182,-0.011199976,-0.02655035,-0.019241376,-0.021698046,-0.019638104,-0.016769446,-0.02165227,-0.06939713,0.024658257,0.05297863,0.04586802,0.00984957,-0.009414693,0.013458282,-0.014610323,0.024581963,-0.023376517,0.01269534,0.010284446,0.023880057,-0.011383082,0.101684794,-0.007423417,-0.048156846,-0.008140582,0.014602694,-0.0033321455,0.019638104,0.028976504,0.025619563,0.009086629,-0.007049576,0.011596705,0.0047226055,-0.024215752,0.07684343,-0.003227241,0.016082799,-0.025207575,-0.025177058,-0.024002127,0.017135657,-0.01969914,0.043212987,0.024185235,-0.02073674,-0.033874586,-0.0021591233,-0.045471292,-0.00071954884,-0.008132952,0.019348187,0.03948983,-0.033752516,-0.084961124,0.0030994483,-0.041381925,-0.041015714,0.0112839,-0.019836469,0.032104563,0.016098058,0.020080611,-0.007942217,-0.050140493,-0.034393385,-0.05297863,-0.028137268,-0.0058174264,-0.0056114323,-0.03189094,0.021026658,-0.011756923,-0.027267516,0.006385817,-0.04718028,-0.012519864,-0.035949785,0.013076811,-0.02317815,-0.031860422,0.044769388,-0.015480076,-0.008018511,-0.043518163,0.023422293,-0.036895834,-0.040100187,-0.06039442,0.005691541,-0.036529623,-0.018585246,0.023635916,0.021408128,0.01152804,0.013984711,0.007965106,-0.027801575,-0.0026226102,-0.021286057,0.006011976,0.027389586,0.0840456,0.07476823,0.028564516,0.015029941,0.029342717,-0.047760114,-0.0241242,0.031082222,0.017837564,0.023346,-0.002166753,0.046295267,-0.033111647,-0.017715493,-0.016937293,-0.0036678396,-0.01606754,-0.010551476,-0.060730115,-0.00067806395,0.005714429,0.009002705,-0.056549195,-0.053497434,0.027160704,-0.023803763,-0.02877814,0.03189094,-0.015838658,-0.025878964,0.00014889274,-0.02319341,-0.0028724733,0.053222775,-0.0040893643,-0.0034313279,0.00036740385,0.0049057114,0.011291529,0.056518678,-0.007972735,-0.041381925,-0.04467783,0.008804341,0.026519833,0.052337762,-0.021209763,-0.019119306,0.020126387,0.00997927,-0.007755297,0.020492598,-0.014915499,-0.038421717,0.037353598,-0.0050888173,0.029708927,0.04638682,-0.052917596,-0.0112839,0.0038433159,-0.011001611,-0.0023708395,0.015991246,-0.03381355,0.017135657,0.016418492,-0.029449528,0.047332868,-0.002183919,0.018173259,0.0017023124,0.01814274,0.01153567,0.00042152498,-0.021179246,0.058441292,-0.0020771073,-0.036102373,0.007740038,-0.0042419527,-0.02839667,0.007713335,-0.016708411,-0.020538375,0.0044899085,-0.011131311,0.0032844616,-0.036468588,-0.005886091,0.05523694,-0.015098605,-0.03161628,0.02462774,0.028488223,0.013404876,-0.012916594,-0.012420681,-0.036377035,-0.01335147,-0.040344328,0.029144352,-0.04174814,0.023315482,-0.02227788,-0.0022716573,-0.03152473,0.0482484,-0.027038634,-0.004882823,0.06152357,-0.003881463,-0.036041338,-0.0075645614,0.020660445,-0.07250992,-0.024429375,-0.036377035] + searchVector(db_name="memtensor_memos", vectorStr=vector) + + # insert_data(conn, data) diff --git a/src/memos/configs/graph_db.py b/src/memos/configs/graph_db.py index 2df917166..ce180606b 100644 --- a/src/memos/configs/graph_db.py +++ b/src/memos/configs/graph_db.py @@ -154,6 +154,59 @@ def validate_config(self): return self +class PolarDBGraphDBConfig(BaseConfig): + """ + PolarDB-specific configuration. + + Key concepts: + - `db_name`: The name of the target PolarDB database + - `user_name`: Used for logical tenant isolation if needed + - `auto_create`: Whether to automatically create the target database if it does not exist + - `use_multi_db`: Whether to use multi-database mode for physical isolation + + Example: + --- + host = "localhost" + port = 5432 + user = "postgres" + password = "password" + db_name = "memos_db" + user_name = "alice" + use_multi_db = True + auto_create = True + """ + + host: str = Field(..., description="Database host") + port: int = Field(default=5432, description="Database port") + user: str = Field(..., description="Database user") + password: str = Field(..., description="Database password") + db_name: str = Field(..., description="The name of the target PolarDB database") + user_name: str | None = Field( + default=None, + description="Logical user or tenant ID for data isolation (optional, used in metadata tagging)", + ) + auto_create: bool = Field( + default=False, + description="Whether to auto-create the database if it does not exist", + ) + use_multi_db: bool = Field( + default=True, + description=( + "If True: use multi-database mode for physical isolation; " + "each tenant typically gets a separate database. " + "If False: use a single shared database with logical isolation by user_name." + ), + ) + embedding_dimension: int = Field(default=1024, description="Dimension of vector embedding") + + @model_validator(mode="after") + def validate_config(self): + """Validate config.""" + if not self.db_name: + raise ValueError("`db_name` must be provided") + return self + + class GraphDBConfigFactory(BaseModel): backend: str = Field(..., description="Backend for graph database") config: dict[str, Any] = Field(..., description="Configuration for the graph database backend") @@ -162,6 +215,7 @@ class GraphDBConfigFactory(BaseModel): "neo4j": Neo4jGraphDBConfig, "neo4j-community": Neo4jCommunityGraphDBConfig, "nebular": NebulaGraphDBConfig, + "polardb": PolarDBGraphDBConfig, } @field_validator("backend") diff --git a/src/memos/graph_dbs/factory.py b/src/memos/graph_dbs/factory.py index 0b38287eb..ec9cbcda0 100644 --- a/src/memos/graph_dbs/factory.py +++ b/src/memos/graph_dbs/factory.py @@ -5,6 +5,7 @@ from memos.graph_dbs.nebular import NebulaGraphDB from memos.graph_dbs.neo4j import Neo4jGraphDB from memos.graph_dbs.neo4j_community import Neo4jCommunityGraphDB +from memos.graph_dbs.polardb import PolarDBGraphDB class GraphStoreFactory(BaseGraphDB): @@ -14,6 +15,7 @@ class GraphStoreFactory(BaseGraphDB): "neo4j": Neo4jGraphDB, "neo4j-community": Neo4jCommunityGraphDB, "nebular": NebulaGraphDB, + "polardb": PolarDBGraphDB, } @classmethod From a95f3231bcf5b8313f98a671849507a45f58efe1 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sat, 18 Oct 2025 15:45:08 +0800 Subject: [PATCH 004/137] delete --- examples/basic_modules/polardb.py | 1 - 1 file changed, 1 deletion(-) delete mode 100644 examples/basic_modules/polardb.py diff --git a/examples/basic_modules/polardb.py b/examples/basic_modules/polardb.py deleted file mode 100644 index c5515c8d4..000000000 --- a/examples/basic_modules/polardb.py +++ /dev/null @@ -1 +0,0 @@ -#123 \ No newline at end of file From 64e842d4dbebabdd1888bc127370d9bb724c4a15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 15:50:07 +0800 Subject: [PATCH 005/137] update get_memory_count --- src/memos/graph_dbs/polardb.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 0e072ab98..2555e4348 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -259,23 +259,25 @@ def create_index( except Exception as e: logger.warning(f"Failed to create indexes: {e}") - def get_memory_count(self, memory_type: str) -> int: + def get_memory_count(self, memory_type: str, user_name: str | None = None) -> int: """Get count of memory nodes by type.""" + user_name = user_name if user_name else self._get_config_value("user_name") query = f""" SELECT COUNT(*) FROM {self.db_name}_graph."Memory" WHERE properties->>'memory_type' = %s """ - params = [memory_type] + query += "\nAND properties->>'user_name' = %s" + params = [memory_type, user_name] - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - query += " AND properties->>'user_name' = %s" - params.append(self._get_config_value("user_name")) - - with self.connection.cursor() as cursor: - cursor.execute(query, params) - result = cursor.fetchone() - return result[0] if result else 0 + try: + with self.connection.cursor() as cursor: + cursor.execute(query, params) + result = cursor.fetchone() + return result[0] if result else 0 + except Exception as e: + logger.error(f"[get_memory_count] Failed: {e}") + return -1 def node_not_exist(self, scope: str) -> int: """Check if a node with given scope exists.""" From ac3da336882ea00281fff514e840803ee11fdb1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 15:55:56 +0800 Subject: [PATCH 006/137] update get_memory_count --- examples/basic_modules/polardb_search.py | 13 ++++++++----- src/memos/graph_dbs/polardb.py | 6 +++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index deed13b7d..49874bd23 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -116,16 +116,19 @@ def searchVector(db_name: str, vectorStr: list[float]): # detail = graph.get_node("bb079c5b-1937-4125-a9e5-55d4abe6c95d") # print("单个node:", detail) - ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] - detail_list = graph.get_nodes(ids) - print("1111多个node:", len(detail_list)) - - print("多个node:", detail_list) + # ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] + # detail_list = graph.get_nodes(ids) + # print("1111多个node:", len(detail_list)) + # + # print("多个node:", detail_list) # graph.update_node(id="000009999ef-926f-42e2-b7b5-0224daf0abcd", fields={"name": "new_name"}) # for node_i in nodes: # print("Search result:", graph.get_node(node_i["id"])) + count = graph.get_memory_count('UserMemory','memos07ba3d044650474c839e721f3a69d38a') + print("user count:", count) + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 2555e4348..6582e88b8 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -265,10 +265,10 @@ def get_memory_count(self, memory_type: str, user_name: str | None = None) -> in query = f""" SELECT COUNT(*) FROM {self.db_name}_graph."Memory" - WHERE properties->>'memory_type' = %s + WHERE ag_catalog.agtype_access_operator(properties, '"memory_type"'::agtype) = %s::agtype """ - query += "\nAND properties->>'user_name' = %s" - params = [memory_type, user_name] + query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" + params = [f'"{memory_type}"', f'"{user_name}"'] try: with self.connection.cursor() as cursor: From 1d5581a0ced560c9aeeab8759176986152f565cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 16:17:52 +0800 Subject: [PATCH 007/137] update node_not_exist --- examples/basic_modules/polardb_search.py | 10 +++++++++ src/memos/graph_dbs/polardb.py | 27 +++++++++++++----------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 49874bd23..975265422 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -108,27 +108,37 @@ def getPolarDb(db_name): def searchVector(db_name: str, vectorStr: list[float]): graph = getPolarDb(db_name) + + # 1,查询search_by_embedding # nodes = graph.search_by_embedding(vector=vectorStr, top_k=1) # print("search_by_embedding nodes:", len(nodes)) # for node_i in nodes: # print("Search result:", graph.get_node(node_i["id"])) + # 2,查询单个get_node # detail = graph.get_node("bb079c5b-1937-4125-a9e5-55d4abe6c95d") # print("单个node:", detail) + # 3,查询多个get_nodes # ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] # detail_list = graph.get_nodes(ids) # print("1111多个node:", len(detail_list)) # # print("多个node:", detail_list) + # 4,更新 update_node # graph.update_node(id="000009999ef-926f-42e2-b7b5-0224daf0abcd", fields={"name": "new_name"}) # for node_i in nodes: # print("Search result:", graph.get_node(node_i["id"])) + #4,查询 get_memory_count count = graph.get_memory_count('UserMemory','memos07ba3d044650474c839e721f3a69d38a') print("user count:", count) + # 4,判断node是否存在 node_not_exist 1代表存在, + isNodeExist = graph.node_not_exist('UserMemory', 'memos07ba3d044650474c839e721f3a69d38a') + print("user isNodeExist:", isNodeExist) + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 6582e88b8..87df02b67 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -279,24 +279,27 @@ def get_memory_count(self, memory_type: str, user_name: str | None = None) -> in logger.error(f"[get_memory_count] Failed: {e}") return -1 - def node_not_exist(self, scope: str) -> int: + def node_not_exist(self, scope: str, user_name: str | None = None) -> int: """Check if a node with given scope exists.""" + user_name = user_name if user_name else self._get_config_value("user_name") query = f""" SELECT id FROM {self.db_name}_graph."Memory" - WHERE properties->>'memory_type' = %s - LIMIT 1 + WHERE ag_catalog.agtype_access_operator(properties, '"memory_type"'::agtype) = %s::agtype """ - params = [scope] - - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - query += " AND properties->>'user_name' = %s" - params.append(self._get_config_value("user_name")) + query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" + query += "\nLIMIT 1" + params = [f'"{scope}"', f'"{user_name}"'] - with self.connection.cursor() as cursor: - cursor.execute(query, params) - result = cursor.fetchone() - return result is None + try: + with self.connection.cursor() as cursor: + cursor.execute(query, params) + result = cursor.fetchone() + print(f"[node_not_exist] Query result: {result}") + return len(result) + except Exception as e: + logger.error(f"[node_not_exist] Query failed: {e}", exc_info=True) + raise def remove_oldest_memory(self, memory_type: str, keep_latest: int) -> None: """ From 57e5b4bebd4336684df7f5bd4b514c611a09a1a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 16:38:16 +0800 Subject: [PATCH 008/137] update remove_oldest_memory --- examples/basic_modules/polardb_search.py | 4 ++ src/memos/graph_dbs/polardb.py | 67 ++++++++++++++++-------- 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 975265422..f016fbe34 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -139,6 +139,10 @@ def searchVector(db_name: str, vectorStr: list[float]): isNodeExist = graph.node_not_exist('UserMemory', 'memos07ba3d044650474c839e721f3a69d38a') print("user isNodeExist:", isNodeExist) + # 6,删除跳过多少行之后的数据remove_oldest_memory + remove_oldest_memory = graph.remove_oldest_memory('UserMemory', 2,'memos07ba3d044650474c839e721f3a69d38a') + print("user remove_oldest_memory:", remove_oldest_memory) + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 87df02b67..681e56d1b 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -301,34 +301,55 @@ def node_not_exist(self, scope: str, user_name: str | None = None) -> int: logger.error(f"[node_not_exist] Query failed: {e}", exc_info=True) raise - def remove_oldest_memory(self, memory_type: str, keep_latest: int) -> None: + def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: str | None = None) -> None: """ Remove all WorkingMemory nodes except the latest `keep_latest` entries. + + Args: + memory_type (str): Memory type (e.g., 'WorkingMemory', 'LongTermMemory'). + keep_latest (int): Number of latest WorkingMemory entries to keep. + user_name (str, optional): User name for filtering in non-multi-db mode """ - query = f""" - DELETE FROM {self.db_name}_graph."Memory" - WHERE properties->>'memory_type' = %s - AND id NOT IN ( - SELECT id FROM ( - SELECT id FROM {self.db_name}_graph."Memory" - WHERE properties->>'memory_type' = %s - ORDER BY (properties->>'updated_at')::timestamp DESC - LIMIT %s - ) AS keep_ids - ) + user_name = user_name if user_name else self._get_config_value("user_name") + + # 使用真正的 OFFSET 逻辑,与 nebular.py 保持一致 + # 先找到要删除的节点ID,然后删除它们 + select_query = f""" + SELECT id FROM {self.db_name}_graph."Memory" + WHERE ag_catalog.agtype_access_operator(properties, '"memory_type"'::agtype) = %s::agtype + AND ag_catalog.agtype_access_operator(properties, '"user_name"'::agtype) = %s::agtype + ORDER BY ag_catalog.agtype_access_operator(properties, '"updated_at"'::agtype) DESC + OFFSET %s """ - params = [memory_type, memory_type, keep_latest] + select_params = [f'"{memory_type}"', f'"{user_name}"', keep_latest] + print(f"[remove_oldest_memory] Select query: {select_query}") + print(f"[remove_oldest_memory] Select params: {select_params}") + + try: + with self.connection.cursor() as cursor: + # 执行查询获取要删除的ID列表 + cursor.execute(select_query, select_params) + ids_to_delete = [row[0] for row in cursor.fetchall()] - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - query = query.replace("WHERE properties->>'memory_type' = %s", - "WHERE properties->>'memory_type' = %s AND properties->>'user_name' = %s") - query = query.replace("WHERE properties->>'memory_type' = %s", - "WHERE properties->>'memory_type' = %s AND properties->>'user_name' = %s") - params = [memory_type, self._get_config_value("user_name"), memory_type, - self._get_config_value("user_name"), keep_latest] - - # Simplified implementation - just log the operation - logger.info(f"Removing oldest {memory_type} memories, keeping {keep_latest} latest") + if not ids_to_delete: + logger.info(f"No {memory_type} memories to remove for user {user_name}") + return + + # 构建删除查询 + placeholders = ','.join(['%s'] * len(ids_to_delete)) + delete_query = f""" + DELETE FROM {self.db_name}_graph."Memory" + WHERE id IN ({placeholders}) + """ + delete_params = ids_to_delete + + # 执行删除 + cursor.execute(delete_query, delete_params) + deleted_count = cursor.rowcount + logger.info(f"Removed {deleted_count} oldest {memory_type} memories, keeping {keep_latest} latest for user {user_name}") + except Exception as e: + logger.error(f"[remove_oldest_memory] Failed: {e}", exc_info=True) + raise def update_node(self, id: str, fields: dict[str, Any]) -> None: """Update node fields in PolarDB.""" From 6e6febc46f13f7dbaa0d6c136c09af0e9548976e Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sat, 18 Oct 2025 16:48:57 +0800 Subject: [PATCH 009/137] fix --- src/memos/graph_dbs/polardb.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 681e56d1b..07f07ee30 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -537,8 +537,9 @@ def get_node(self, id: str, **kwargs) -> dict[str, Any] | None: if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): user_name = self._get_config_value("user_name") - query += " AND properties::text LIKE %s" - params.append(f"%{user_name}%") + # query += " AND properties::text LIKE %s" + query += f""" AND ag_catalog.agtype_access_operator(properties, '"user_name"'::agtype) = %s::agtype""" + params.append(f'"{user_name}"') with self.connection.cursor() as cursor: cursor.execute(query, params) From 9aa5f676355382231b8077de8f00d8ba965dec6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 17:27:47 +0800 Subject: [PATCH 010/137] update get_node --- examples/basic_modules/polardb_search.py | 34 ++++----- src/memos/graph_dbs/polardb.py | 94 +++++++++++++++--------- 2 files changed, 75 insertions(+), 53 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index f016fbe34..d29a4d9d2 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -110,16 +110,16 @@ def searchVector(db_name: str, vectorStr: list[float]): graph = getPolarDb(db_name) # 1,查询search_by_embedding - # nodes = graph.search_by_embedding(vector=vectorStr, top_k=1) - # print("search_by_embedding nodes:", len(nodes)) - # for node_i in nodes: - # print("Search result:", graph.get_node(node_i["id"])) + nodes = graph.search_by_embedding(vector=vectorStr, top_k=1) + print("search_by_embedding nodes:", len(nodes)) + for node_i in nodes: + print("Search result:", graph.get_node(node_i["id"][1:-1])) # 2,查询单个get_node - # detail = graph.get_node("bb079c5b-1937-4125-a9e5-55d4abe6c95d") + # detail = graph.get_node(id ="bb079c5b-1937-4125-a9e5-55d4abe6c95d",user_name='memosbfb3fb32032b4077a641404dc48739cd') # print("单个node:", detail) - - # 3,查询多个get_nodes + # + # # 3,查询多个get_nodes # ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] # detail_list = graph.get_nodes(ids) # print("1111多个node:", len(detail_list)) @@ -132,16 +132,16 @@ def searchVector(db_name: str, vectorStr: list[float]): # print("Search result:", graph.get_node(node_i["id"])) #4,查询 get_memory_count - count = graph.get_memory_count('UserMemory','memos07ba3d044650474c839e721f3a69d38a') - print("user count:", count) - - # 4,判断node是否存在 node_not_exist 1代表存在, - isNodeExist = graph.node_not_exist('UserMemory', 'memos07ba3d044650474c839e721f3a69d38a') - print("user isNodeExist:", isNodeExist) - - # 6,删除跳过多少行之后的数据remove_oldest_memory - remove_oldest_memory = graph.remove_oldest_memory('UserMemory', 2,'memos07ba3d044650474c839e721f3a69d38a') - print("user remove_oldest_memory:", remove_oldest_memory) + # count = graph.get_memory_count('UserMemory','memos07ba3d044650474c839e721f3a69d38a') + # print("user count:", count) + # + # # 4,判断node是否存在 node_not_exist 1代表存在, + # isNodeExist = graph.node_not_exist('UserMemory', 'memos07ba3d044650474c839e721f3a69d38a') + # print("user isNodeExist:", isNodeExist) + # + # # 6,删除跳过多少行之后的数据remove_oldest_memory + # remove_oldest_memory = graph.remove_oldest_memory('UserMemory', 2,'memos07ba3d044650474c839e721f3a69d38a') + # print("user remove_oldest_memory:", remove_oldest_memory) if __name__ == "__main__": diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 07f07ee30..9f643bc7a 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -522,49 +522,71 @@ def edge_exists( result = cursor.fetchone() return result is not None - def get_node(self, id: str, **kwargs) -> dict[str, Any] | None: - """Retrieve the metadata and memory of a node.""" + def get_node(self, id: str, include_embedding: bool = False, user_name: str | None = None) -> dict[str, Any] | None: + """ + Retrieve a Memory node by its unique ID. + + Args: + id (str): Node ID (Memory.id) + include_embedding: with/without embedding + user_name (str, optional): User name for filtering in non-multi-db mode + + Returns: + dict: Node properties as key-value pairs, or None if not found. + """ + # 构建查询字段 + if include_embedding: + select_fields = "id, properties, embedding" + else: + select_fields = "id, properties" + query = f""" - SELECT id, properties, embedding + SELECT {select_fields} FROM {self.db_name}_graph."Memory" WHERE ag_catalog.agtype_access_operator(properties, '"id"'::agtype) = %s::agtype """ - # 如果id已经包含引号,则直接使用;否则添加引号 - if id.startswith('"') and id.endswith('"'): - params = [id] - else: - params = [f'"{id}"'] - - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - user_name = self._get_config_value("user_name") - # query += " AND properties::text LIKE %s" - query += f""" AND ag_catalog.agtype_access_operator(properties, '"user_name"'::agtype) = %s::agtype""" + params = [f'"{id}"'] + + # 只有在提供了 user_name 参数时才添加用户过滤 + if user_name is not None: + query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" params.append(f'"{user_name}"') - with self.connection.cursor() as cursor: - cursor.execute(query, params) - result = cursor.fetchone() + try: + with self.connection.cursor() as cursor: + cursor.execute(query, params) + result = cursor.fetchone() - if result: - node_id, properties_json, embedding_json = result - # Parse properties from JSONB if it's a string - if isinstance(properties_json, str): - try: - properties = json.loads(properties_json) - except (json.JSONDecodeError, TypeError): - logger.warning(f"Failed to parse properties for node {id}") - properties = {} - else: - properties = properties_json if properties_json else {} - - # Parse embedding from JSONB if it exists - if embedding_json is not None: - try: - embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json - properties["embedding"] = embedding - except (json.JSONDecodeError, TypeError): - logger.warning(f"Failed to parse embedding for node {id}") - return self._parse_node({"id": id, "memory": properties.get("memory", ""), "metadata": properties}) + if result: + if include_embedding: + node_id, properties_json, embedding_json = result + else: + node_id, properties_json = result + embedding_json = None + + # Parse properties from JSONB if it's a string + if isinstance(properties_json, str): + try: + properties = json.loads(properties_json) + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse properties for node {id}") + properties = {} + else: + properties = properties_json if properties_json else {} + + # Parse embedding from JSONB if it exists and include_embedding is True + if include_embedding and embedding_json is not None: + try: + embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json + properties["embedding"] = embedding + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse embedding for node {id}") + + return self._parse_node({"id": id, "memory": properties.get("memory", ""), "metadata": properties}) + return None + + except Exception as e: + logger.error(f"[get_node] Failed to retrieve node '{id}': {e}", exc_info=True) return None def get_nodes(self, ids: list[str], **kwargs) -> list[dict[str, Any]]: From 5146de8338c4b14d42a4fec60e1169b2cce9da02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 17:36:22 +0800 Subject: [PATCH 011/137] update get_node --- examples/basic_modules/polardb_search.py | 8 ++++---- src/memos/graph_dbs/polardb.py | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index d29a4d9d2..17c20a726 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -132,12 +132,12 @@ def searchVector(db_name: str, vectorStr: list[float]): # print("Search result:", graph.get_node(node_i["id"])) #4,查询 get_memory_count - # count = graph.get_memory_count('UserMemory','memos07ba3d044650474c839e721f3a69d38a') - # print("user count:", count) + count = graph.get_memory_count('UserMemory','memos07ba3d044650474c839e721f3a69d38a') + print("user count:", count) # # # 4,判断node是否存在 node_not_exist 1代表存在, - # isNodeExist = graph.node_not_exist('UserMemory', 'memos07ba3d044650474c839e721f3a69d38a') - # print("user isNodeExist:", isNodeExist) + isNodeExist = graph.node_not_exist('UserMemory', 'memos07ba3d044650474c839e721f3a69d38a') + print("user isNodeExist:", isNodeExist) # # # 6,删除跳过多少行之后的数据remove_oldest_memory # remove_oldest_memory = graph.remove_oldest_memory('UserMemory', 2,'memos07ba3d044650474c839e721f3a69d38a') diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 9f643bc7a..049ae1316 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -270,6 +270,8 @@ def get_memory_count(self, memory_type: str, user_name: str | None = None) -> in query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" params = [f'"{memory_type}"', f'"{user_name}"'] + print(f"[get_memory_count] Query: {query}, Params: {params}") + try: with self.connection.cursor() as cursor: cursor.execute(query, params) @@ -291,6 +293,8 @@ def node_not_exist(self, scope: str, user_name: str | None = None) -> int: query += "\nLIMIT 1" params = [f'"{scope}"', f'"{user_name}"'] + print(f"[node_not_exist] Query: {query}, Params: {params}") + try: with self.connection.cursor() as cursor: cursor.execute(query, params) From 47862b43cd3bb0abf102e3e19d9374a07d570461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 18:33:16 +0800 Subject: [PATCH 012/137] update update_node --- examples/basic_modules/polardb_search.py | 3 ++ src/memos/graph_dbs/polardb.py | 62 +++++++++++++++--------- 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 17c20a726..1d5a79854 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -143,6 +143,9 @@ def searchVector(db_name: str, vectorStr: list[float]): # remove_oldest_memory = graph.remove_oldest_memory('UserMemory', 2,'memos07ba3d044650474c839e721f3a69d38a') # print("user remove_oldest_memory:", remove_oldest_memory) + isNodeExist = graph.update_node(id="bb079c5b-1937-4125-a9e5-55d4abe6c95d", fields={"status": "inactived","tags": ["yoga", "travel11111111", "local studios5667888"]}) + print("user update_node:", isNodeExist) + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 049ae1316..299554d09 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -355,51 +355,66 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st logger.error(f"[remove_oldest_memory] Failed: {e}", exc_info=True) raise - def update_node(self, id: str, fields: dict[str, Any]) -> None: - """Update node fields in PolarDB.""" + def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = None) -> None: + """ + Update node fields in PolarDB, auto-converting `created_at` and `updated_at` to datetime type if present. + """ if not fields: return - # Get current properties - current_node = self.get_node(id) + # 获取当前节点 + current_node = self.get_node(id, user_name=user_name) if not current_node: return - # Update properties + # 更新属性,但保留原始的id字段和memory字段 properties = current_node["metadata"].copy() + original_id = properties.get("id", id) # 保留原始ID + original_memory = current_node.get("memory", "") # 保留原始memory + + # 如果fields中有memory字段,使用它;否则保留原始的memory + if "memory" in fields: + original_memory = fields.pop("memory") + properties.update(fields) + properties["id"] = original_id # 确保ID不被覆盖 + properties["memory"] = original_memory # 确保memory不被覆盖 - # Handle embedding separately - # Handle embedding update - store in separate column + # 处理 embedding 字段 embedding_vector = None if "embedding" in fields: embedding_vector = fields.pop("embedding") if not isinstance(embedding_vector, list): embedding_vector = None - # Build query based on whether embedding is being updated + # 构建更新查询 if embedding_vector is not None: query = f""" UPDATE {self.db_name}_graph."Memory" - SET properties = %s, embedding = %s, updated_at = CURRENT_TIMESTAMP - WHERE id = %s + SET properties = %s, embedding = %s + WHERE ag_catalog.agtype_access_operator(properties, '"id"'::agtype) = %s::agtype """ - params = [json.dumps(properties), json.dumps(embedding_vector), id] + params = [json.dumps(properties), json.dumps(embedding_vector), f'"{id}"'] else: query = f""" UPDATE {self.db_name}_graph."Memory" - SET properties = %s, updated_at = CURRENT_TIMESTAMP - WHERE id = %s + SET properties = %s + WHERE ag_catalog.agtype_access_operator(properties, '"id"'::agtype) = %s::agtype """ - params = [json.dumps(properties), id] + params = [json.dumps(properties), f'"{id}"'] - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - user_name = self._get_config_value("user_name") - query += " AND properties::text LIKE %s" - params.append(f"%{user_name}%") + # 只有在提供了 user_name 参数时才添加用户过滤 + if user_name is not None: + query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" + params.append(f'"{user_name}"') - with self.connection.cursor() as cursor: - cursor.execute(query, params) + print(f"[update_node] query: {query}, params: {params}") + try: + with self.connection.cursor() as cursor: + cursor.execute(query, params) + except Exception as e: + logger.error(f"[update_node] Failed to update node '{id}': {e}", exc_info=True) + raise def delete_node(self, id: str) -> None: """Delete a node from the graph.""" @@ -556,6 +571,7 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" params.append(f'"{user_name}"') + print(f"[get_node] query: {query}, params: {params}") try: with self.connection.cursor() as cursor: cursor.execute(query, params) @@ -586,7 +602,7 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No except (json.JSONDecodeError, TypeError): logger.warning(f"Failed to parse embedding for node {id}") - return self._parse_node({"id": id, "memory": properties.get("memory", ""), "metadata": properties}) + return self._parse_node({"id": id, "memory": properties.get("memory", ""), **properties}) return None except Exception as e: @@ -1268,10 +1284,8 @@ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]: if time_field in node and hasattr(node[time_field], "isoformat"): node[time_field] = node[time_field].isoformat() - # Remove user_name from output - node.pop("user_name", None) - # 不再对sources和usage字段进行反序列化,保持List[str]格式 + # 不再移除user_name字段,保持所有字段 return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} From 2ce2c060bba27d899f8892ab7d11ea48b23d4c41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 19:16:59 +0800 Subject: [PATCH 013/137] update delete_node --- examples/basic_modules/polardb_search.py | 5 ++++ src/memos/graph_dbs/polardb.py | 30 ++++++++++++++++-------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 1d5a79854..07f85109c 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -143,9 +143,14 @@ def searchVector(db_name: str, vectorStr: list[float]): # remove_oldest_memory = graph.remove_oldest_memory('UserMemory', 2,'memos07ba3d044650474c839e721f3a69d38a') # print("user remove_oldest_memory:", remove_oldest_memory) + # 7,更新 update_node isNodeExist = graph.update_node(id="bb079c5b-1937-4125-a9e5-55d4abe6c95d", fields={"status": "inactived","tags": ["yoga", "travel11111111", "local studios5667888"]}) print("user update_node:", isNodeExist) + # 8,删除 delete_node + isNodeDeleted = graph.delete_node(id="bb079c5b-1937-4125-a9e5-55d4abe6c95d", user_name='memosbfb3fb32032b4077a641404dc48739cd') + print("user isNodeDeleted:", isNodeDeleted) + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 299554d09..62c6b8091 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -416,21 +416,31 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N logger.error(f"[update_node] Failed to update node '{id}': {e}", exc_info=True) raise - def delete_node(self, id: str) -> None: - """Delete a node from the graph.""" + def delete_node(self, id: str, user_name: str | None = None) -> None: + """ + Delete a node from the graph. + Args: + id: Node identifier to delete. + user_name (str, optional): User name for filtering in non-multi-db mode + """ query = f""" DELETE FROM {self.db_name}_graph."Memory" - WHERE id = %s + WHERE ag_catalog.agtype_access_operator(properties, '"id"'::agtype) = %s::agtype """ - params = [id] + params = [f'"{id}"'] - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - user_name = self._get_config_value("user_name") - query += " AND properties::text LIKE %s" - params.append(f"%{user_name}%") + # 只有在提供了 user_name 参数时才添加用户过滤 + if user_name is not None: + query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" + params.append(f'"{user_name}"') - with self.connection.cursor() as cursor: - cursor.execute(query, params) + print(f"[delete_node] query: {query}, params: {params}") + try: + with self.connection.cursor() as cursor: + cursor.execute(query, params) + except Exception as e: + logger.error(f"[delete_node] Failed to delete node '{id}': {e}", exc_info=True) + raise def add_edge(self, source_id: str, target_id: str, type: str) -> None: """ From 5e48c5416c20f9b3bda3c7f23ad69bd61b39624e Mon Sep 17 00:00:00 2001 From: caocuilong <13282138256@163.com> Date: Sat, 18 Oct 2025 19:27:31 +0800 Subject: [PATCH 014/137] add edge --- examples/basic_modules/importPolarDbEdge.py | 128 ++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 examples/basic_modules/importPolarDbEdge.py diff --git a/examples/basic_modules/importPolarDbEdge.py b/examples/basic_modules/importPolarDbEdge.py new file mode 100644 index 000000000..d5ed0a225 --- /dev/null +++ b/examples/basic_modules/importPolarDbEdge.py @@ -0,0 +1,128 @@ +import os +import json +import psycopg2 + +# 数据库连接配置 +DB_CONFIG = { + 'host': 'memory.pg.polardb.rds.aliyuncs.com', + 'port': 5432, + 'database': 'memtensor_memos', + 'user': 'adimin', + 'password': 'Openmem0925' +} + +# 顶层目录 +EDGE_ROOT_DIR = r"C:\Users\13282\Desktop\nebular\export13" + +# 合法的关系文件夹(白名单) +VALID_REL_TYPES = { + "AGGREGATE_TO", + "FOLLOWS", + "INFERS", + "MERGED_TO", + "RELATE_TO", + "PARENT" +} + +# 批量大小 +BATCH_SIZE = 1000 + + +def create_elabel(conn, label_name): + """创建关系类型(若不存在)""" + with conn.cursor() as cur: + print(f"🪶 Creating elabel: {label_name}") + try: + cur.execute(f"SELECT create_elabel('memtensor_memos_graph', '{label_name}');") + conn.commit() + except Exception as e: + conn.rollback() + if "already exists" in str(e): + print(f"ℹ️ Label '{label_name}' already exists, skipping.") + else: + print(f"⚠️ Failed to create label {label_name}: {e}") + + +def insert_edges(conn, edges, label_name): + """批量插入边数据(若已存在则跳过)""" + with conn.cursor() as cur: + for e in edges: + src_id = e["src_id"] + dst_id = e["dst_id"] + user_name = e["user_name"] + + sql = f""" + INSERT INTO memtensor_memos_graph."{label_name}"(id, start_id, end_id, properties) + SELECT + ag_catalog._next_graph_id('memtensor_memos_graph'::name, '{label_name}'), + ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{src_id}'::text::cstring), + ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{dst_id}'::text::cstring), + jsonb_build_object('user_name', '{user_name}')::text::agtype + WHERE NOT EXISTS ( + SELECT 1 FROM memtensor_memos_graph."{label_name}" + WHERE start_id = ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{src_id}'::text::cstring) + AND end_id = ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{dst_id}'::text::cstring) + ); + """ + cur.execute(sql) + conn.commit() + + +def process_relation_folder(conn, folder_path, label_name): + """处理一个关系文件夹""" + print(f"\n🔗 Processing relation: {label_name}") + create_elabel(conn, label_name) + for root, _, files in os.walk(folder_path): + for file in files: + if not (file.endswith(".json") or file.endswith(".txt")): + continue + file_path = os.path.join(root, file) + print(f"📄 Reading file: {file_path}") + batch = [] + with open(file_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + batch.append(obj) + except json.JSONDecodeError: + print(f"⚠️ JSON decode error in {file_path}: {line}") + continue + + if len(batch) >= BATCH_SIZE: + insert_edges(conn, batch, label_name) + print(f"✅ Inserted (or skipped) {len(batch)} edges.") + batch.clear() + + if batch: + insert_edges(conn, batch, label_name) + print(f"✅ Inserted (or skipped) {len(batch)} edges.") + + +def main(): + conn = psycopg2.connect(**DB_CONFIG) + try: + for folder_name in os.listdir(EDGE_ROOT_DIR): + folder_path = os.path.join(EDGE_ROOT_DIR, folder_name) + if not os.path.isdir(folder_path): + continue + + # 只处理白名单中的关系类型 + if folder_name.upper() not in VALID_REL_TYPES: + print(f"🚫 Skipping non-relation folder: {folder_name}") + continue + + # 保持大小写一致性 + label_name = folder_name + process_relation_folder(conn, folder_path, label_name) + + print("\n🎉 All relation folders processed successfully!") + + finally: + conn.close() + + +if __name__ == "__main__": + main() From bc826a192a395b3db9aeb6009a5e9387b8ac24cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 20:11:39 +0800 Subject: [PATCH 015/137] add create_extension,create_graph,create_edge --- src/memos/graph_dbs/polardb.py | 93 +++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 6 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 62c6b8091..45593e713 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -442,14 +442,86 @@ def delete_node(self, id: str, user_name: str | None = None) -> None: logger.error(f"[delete_node] Failed to delete node '{id}': {e}", exc_info=True) raise - def add_edge(self, source_id: str, target_id: str, type: str) -> None: + def create_extension(self): + extensions = [ + ("polar_age", "图引擎"), + ("vector", "向量引擎") + ] + try: + with self.connection.cursor() as cursor: + # 确保在正确的数据库上下文中 + cursor.execute(f"SELECT current_database();") + current_db = cursor.fetchone()[0] + print(f"当前数据库上下文: {current_db}") + + for ext_name, ext_desc in extensions: + try: + cursor.execute(f"create extension if not exists {ext_name};") + print(f"✅ Extension '{ext_name}' ({ext_desc}) ensured.") + except Exception as e: + if "already exists" in str(e): + print(f"ℹ️ Extension '{ext_name}' ({ext_desc}) already exists.") + else: + print(f"⚠️ Failed to create extension '{ext_name}' ({ext_desc}): {e}") + logger.error(f"Failed to create extension '{ext_name}': {e}", exc_info=True) + except Exception as e: + print(f"⚠️ Failed to access database context: {e}") + logger.error(f"Failed to access database context: {e}", exc_info=True) + + def create_graph(self): + try: + with self.connection.cursor() as cursor: + cursor.execute(f""" + SELECT COUNT(*) FROM ag_catalog.ag_graph + WHERE name = '{self.db_name}'; + """) + graph_exists = cursor.fetchone()[0] > 0 + + if graph_exists: + print(f"ℹ️ Graph '{self.db_name}' already exists.") + else: + cursor.execute(f"select create_graph('{self.db_name}');") + print(f"✅ Graph database '{self.db_name}' created.") + except Exception as e: + print(f"⚠️ Failed to create graph '{self.db_name}': {e}") + logger.error(f"Failed to create graph '{self.db_name}': {e}", exc_info=True) + + def create_edge(self): + """创建所有有效的边类型,如果不存在的话""" + VALID_REL_TYPES = { + "AGGREGATE_TO", + "FOLLOWS", + "INFERS", + "MERGED_TO", + "RELATE_TO", + "PARENT" + } + + for label_name in VALID_REL_TYPES: + print(f"🪶 Creating elabel: {label_name}") + try: + with self.connection.cursor() as cursor: + cursor.execute(f"select create_elabel('{self.db_name}', '{label_name}');") + print(f"✅ Successfully created elabel: {label_name}") + except Exception as e: + if "already exists" in str(e): + print(f"ℹ️ Label '{label_name}' already exists, skipping.") + else: + print(f"⚠️ Failed to create label {label_name}: {e}") + logger.error(f"Failed to create elabel '{label_name}': {e}", exc_info=True) + + def add_edge(self, source_id: str, target_id: str, type: str, user_name: str | None = None) -> None: """ Create an edge from source node to target node. Args: source_id: ID of the source node. target_id: ID of the target node. type: Relationship type (e.g., 'RELATE_TO', 'PARENT'). + user_name (str, optional): User name for filtering in non-multi-db mode """ + if not source_id or not target_id: + raise ValueError("[add_edge] source_id and target_id must be provided") + # 确保边表存在 try: with self.connection.cursor() as cursor: @@ -475,16 +547,25 @@ def add_edge(self, source_id: str, target_id: str, type: str) -> None: logger.warning(f"Cannot create edge: source or target node does not exist") return + # 构建边的属性 + properties = {} + if user_name is not None: + properties["user_name"] = user_name + # 添加边 query = f""" - INSERT INTO {self.db_name}_graph."Edges" (source_id, target_id, edge_type) - VALUES (%s, %s, %s) + INSERT INTO {self.db_name}_graph."Edges" (source_id, target_id, edge_type, properties) + VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING """ - with self.connection.cursor() as cursor: - cursor.execute(query, (source_id, target_id, type)) - logger.info(f"Edge created: {source_id} -[{type}]-> {target_id}") + try: + with self.connection.cursor() as cursor: + cursor.execute(query, (source_id, target_id, type, json.dumps(properties))) + logger.info(f"Edge created: {source_id} -[{type}]-> {target_id}") + except Exception as e: + logger.error(f"Failed to insert edge: {e}", exc_info=True) + raise def delete_edge(self, source_id: str, target_id: str, type: str) -> None: """ From 31c5d70cbb2cb16ce5e031a5a0e8440d8127c8fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sat, 18 Oct 2025 20:32:11 +0800 Subject: [PATCH 016/137] add add_edge --- examples/basic_modules/polardb_search.py | 10 +++-- src/memos/graph_dbs/polardb.py | 47 +++++++----------------- 2 files changed, 20 insertions(+), 37 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 07f85109c..1a79f6f14 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -151,11 +151,15 @@ def searchVector(db_name: str, vectorStr: list[float]): isNodeDeleted = graph.delete_node(id="bb079c5b-1937-4125-a9e5-55d4abe6c95d", user_name='memosbfb3fb32032b4077a641404dc48739cd') print("user isNodeDeleted:", isNodeDeleted) +# 9,添加边 add_edge +def add_edge(db_name: str, source_id: str, target_id: str, edge_type: str = "Memory", user_name: str = None): + graph = getPolarDb(db_name) + graph.add_edge(source_id, target_id, edge_type, user_name) if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") - vector = [-0.02954108,-0.024826104,-0.07641619,0.01464847,-0.0032501293,-0.044220068,0.024322564,0.018768352,-0.0030803748,-0.017776528,0.022201587,-0.0036640249,-0.013397247,-0.02462774,0.021743823,0.0115509285,-0.023223927,0.024093682,-0.0071144262,-0.014984163,-0.013305694,-0.022064257,0.055786256,-0.012374905,0.019714398,0.008865376,-0.00803377,-0.006851211,0.036529623,0.042785738,0.05923475,0.007347123,0.058776986,0.03900155,-0.05065929,-0.060730115,0.0042343233,-0.0030670234,-0.02864081,0.047332868,-0.007789629,-0.011741664,0.019638104,0.007427232,-0.036529623,-0.034484938,-0.046203714,0.009078999,-0.026367245,-0.049896352,0.027938904,-0.048950303,0.04299936,-0.024597222,-0.018600505,0.026321469,-0.03240974,-0.07080094,-0.058105595,-0.0026359616,-0.023757987,0.014465364,-0.02682501,-0.014427217,-0.01814274,0.07244889,-0.0063705584,0.019378705,-0.05078136,-0.016525306,0.013534576,-0.01814274,-0.016098058,0.018295329,-0.061645642,0.023773246,0.03549202,-0.034820635,0.029464787,0.026535092,-0.024398858,-0.004184732,0.040374845,0.0037460409,0.014419587,-0.009384176,0.010055564,0.019546552,0.023498587,-0.01620487,-0.013389617,-0.016418492,0.028198304,-0.051117055,0.023101857,0.0705568,-0.03991708,-0.039428797,0.006595626,0.0039367764,0.034515455,-0.02046208,-0.001202586,-0.018569987,-0.009666464,-0.018966716,-0.0052719233,0.0012187985,0.044403173,-0.018295329,0.01890568,0.037994467,0.020538375,0.012939482,-0.0036048968,-0.01968388,-0.0021209763,0.01451114,0.024429375,-0.031311106,-0.017852822,-0.0057830936,0.011459376,-0.020523116,-0.021209763,0.0082550235,-0.0035057145,0.031311106,0.0063438555,0.012374905,0.028595034,0.03552254,-0.041961763,0.0062675616,0.030426092,-0.030548163,0.058502328,0.029205387,-0.02604681,-0.025756892,0.004211435,0.05160534,0.0092239585,-0.049987905,-0.0013847381,-0.055267457,0.027282774,0.012535123,-0.009971641,-0.0040741055,-0.022613576,0.018478435,0.026031552,0.02720648,-0.04782115,0.007400529,0.034851152,0.042694185,-0.031005928,-0.0019149822,0.00042104814,-0.0049972646,-0.02539068,0.031738352,0.010337852,-0.036102373,0.014213594,0.011878993,0.0008330364,-0.035583574,0.0034465867,-0.0082550235,0.009887717,0.012908964,0.018875163,0.01814274,0.091552936,0.018768352,0.03305061,-0.054260373,-0.02682501,0.03628548,0.031158516,-0.048675645,-0.017288245,0.02929694,0.013450652,-0.0058365,0.032745432,-0.027160704,-0.015655553,-0.032074045,0.020248458,0.004760753,0.04730235,-0.024841363,0.020248458,0.013786347,-0.010810876,-0.033874586,0.008491535,0.023651175,0.016021764,0.0039329617,-0.04626475,0.0050621144,-0.02720648,-0.008140582,0.017440835,-0.029830998,0.04861461,0.042327974,0.01529697,-0.027191222,-0.03515633,0.018264811,0.038696375,-0.025543269,0.018676799,0.023757987,0.0058670174,-0.0045013526,0.017059363,-0.03720101,-0.03668221,-0.0137253115,0.024581963,0.025589045,0.01270297,-0.027450623,-0.035614092,0.010299705,0.021041917,-0.013893158,0.014350923,-0.032440256,-0.009338399,0.006114973,0.025344905,0.03512581,-0.02917487,-0.018432658,0.008873005,-0.014236482,-0.027359068,-0.055572633,0.016784705,0.041900728,-0.019943282,0.020080611,0.018112222,0.009033223,-0.021087693,-0.05041515,0.0352784,0.024642998,0.007118241,0.008804341,-0.022384694,-0.030471869,-0.015762364,0.048706163,0.0064659263,0.0048790085,0.003906259,-0.02384954,-0.012771634,-0.025985776,0.0017061271,0.0736696,0.020950364,0.013847382,0.029785221,-0.022201587,-0.003059394,0.038574304,-0.01647953,-0.022354174,0.0562135,0.001626972,-0.007865923,0.012001064,0.043609716,-0.0274964,0.029846257,-0.0035266953,-0.00026106893,0.050903432,0.022750905,-0.037475668,-0.013244658,-0.015640294,0.042419527,-0.027938904,-0.008247394,-0.0047111614,-0.019088788,-0.15368687,0.033020094,0.028350893,0.02449041,0.016830482,0.03189094,-0.04861461,-0.024261529,-0.05029308,0.018386882,0.028945986,-0.044525243,0.009628317,0.021667529,0.026123105,0.037384115,-0.007335679,-0.02149968,-0.0176392,-0.0210114,0.00070810475,-0.05261242,0.061340466,0.0037002645,-0.010910058,-0.011360194,0.008201617,0.03793343,-0.0070305024,-0.0225678,-0.00945284,0.02281194,0.0013446837,0.023880057,-0.0144806225,0.015914952,0.018646281,0.0032444072,0.016494788,0.01853947,0.01985173,0.03964242,0.015541111,0.016494788,-0.031921457,-0.029693669,0.020782517,-0.009796164,-0.0032234264,-0.046478376,0.044647314,-0.035675127,-0.01853947,0.0069885408,-0.07635515,-0.010986352,0.00971987,0.034027174,0.0347596,0.02812201,-0.03213508,-0.05355847,-0.00038886155,-0.014061005,-0.08074969,0.020584151,0.047760114,0.03381355,0.012809781,-0.020904588,0.013954193,-0.04382334,0.01062777,0.004421244,0.039337244,0.014549287,0.013435394,0.021316575,-0.01957707,-0.09228536,0.006145491,0.0015955006,0.0006575599,0.010147117,-0.03137214,-0.048919786,0.0044517615,0.04214487,0.053466916,0.22290088,0.019592328,0.021041917,-0.034790117,0.034393385,-0.016616859,-0.0748903,0.08294696,-0.009971641,-0.030548163,0.010810876,0.04147348,-0.025421198,-0.020614669,0.019271893,0.04562388,-0.071838535,0.0140152285,0.04455576,0.016021764,0.0033302382,-0.03851327,-0.05197155,-0.024795586,-0.04925548,-0.0012359646,-0.028152527,0.020446822,-0.010955835,0.007926959,-0.05679334,0.04898082,0.02227788,0.009887717,-0.037262045,-0.021865893,0.024658257,-0.03305061,-0.005104076,0.06274428,0.0026741086,-0.0032806469,-0.027450623,0.016265905,-0.008514423,-0.011116052,-0.008338947,-0.020813035,-0.025711115,-0.021438645,-0.009872458,-0.04071054,-0.019348187,0.0037441335,-0.0155868875,-0.0049705617,-0.009040852,0.007850665,0.031463694,0.05029308,0.002864844,0.0063552996,-0.056945927,-0.046051126,0.006042494,0.053833127,-0.013702422,-0.03045661,0.048187364,0.029068056,-0.022766164,-0.002573019,0.012855558,0.005336773,-0.009414693,-0.046173196,-0.014053375,-0.0054741027,0.001794819,0.014472993,-0.00087928964,0.004680644,0.02449041,0.018325847,0.054199338,-0.006156935,0.028717104,0.086425975,0.02307134,0.0060958997,-0.0008125323,0.018829387,-0.011825588,0.0032806469,0.008880635,-0.019271893,-0.015991246,-0.008018511,-0.03149421,0.00803377,-0.0137482,0.0004093656,-0.049682725,-0.015518223,-0.034118727,-0.0069542085,-0.05297863,-0.0052299616,-0.0038566676,0.0008196849,-0.037536703,-0.02383428,-0.033355787,-0.051239125,0.007118241,0.03488167,0.028259339,0.008842488,0.009246847,0.03970346,-0.019271893,0.038543787,-0.022659352,0.022720387,0.024566704,-0.056030396,-0.0026283322,-0.009399435,0.0077743703,-0.02191167,0.0028667513,-0.028717104,0.0070991674,0.027038634,0.063964985,0.0090103345,-0.0053215143,-0.022064257,-0.014091522,-0.0057983524,-0.021087693,0.006557479,-0.004325876,0.045440774,0.0065765525,0.0015716588,-0.049804796,0.03924569,-0.01918034,-0.021331834,0.039093103,0.017395059,0.012664823,-0.052765008,0.021331834,-0.07537858,-0.0061607496,-0.032043528,0.0067978054,-0.0121917995,0.0039978116,0.0088196,0.006580367,0.07238785,0.0110092405,-0.0074196025,0.009025593,0.03085334,-0.03137214,-0.006259932,0.011901882,-0.040741056,-0.030242987,0.008834858,-0.019744916,-0.009712241,-0.0040588467,0.033172682,0.004276285,-0.049072374,0.03488167,-0.0051269643,0.007694261,0.005935682,0.01788334,-0.0069542085,0.0085449405,-0.007194535,-0.041900728,-0.013313323,-0.0013895065,0.07617205,0.0037422262,-0.025009211,0.0051345937,0.0066299583,0.10388207,-0.008834858,0.006439223,-0.021102952,-0.03099067,-0.016555823,-0.0126571935,0.010658287,0.0057945377,-0.0055503966,-0.009681723,0.057617314,-0.017822305,-0.0034828263,0.0005464566,0.0043602088,-0.037109457,0.010849023,-0.009216329,-0.049194444,0.01179507,0.049469102,-0.008514423,-0.009681723,-0.01890568,0.03500374,-0.028228821,-0.05871595,0.0011281992,0.044799905,-0.0032806469,0.009002705,0.030120917,0.0073547526,-0.010025047,0.019012494,-0.031433176,-0.02787787,0.021621753,-0.011177087,-0.02630621,0.042297456,-0.041046232,-0.020919846,-0.002534872,-0.024765069,0.01632694,0.0029258793,-0.0018615763,-0.026748717,-0.030273505,0.006763473,0.036590658,0.027236998,0.02307134,0.031829905,0.013107329,-0.025451716,0.040252775,0.04214487,0.012710599,0.01800541,-0.012130764,-0.056274537,0.02009587,0.03695687,0.024963435,-0.030166693,0.009002705,-0.06988541,0.043212987,0.01840214,-0.01179507,-0.09484884,-0.023986869,0.015319858,-0.023498587,-0.034790117,0.012176541,0.0018901867,-0.00037646378,0.051818963,0.021804858,-0.05209362,-0.027710022,0.051391713,-0.022064257,-0.024139458,-0.018295329,-0.04092416,0.0063667437,0.022995045,0.0149460165,-0.030059882,0.019134564,0.017562905,-0.04962169,0.015579258,0.010223411,-0.0076675583,-0.059021126,0.04431162,-0.023315482,0.017517129,-0.0021457719,0.042968843,0.028533999,-0.029449528,-0.016769446,0.026367245,-0.015762364,-0.01140597,0.030059882,0.030929634,0.0058250558,-0.06689468,-0.013473541,0.009323141,0.025299128,-0.021728564,0.049987905,-0.0020599412,0.04287729,-0.022827199,0.020828294,-0.001273158,-0.04068002,-0.013664275,-0.0036945425,-0.019775433,-0.024642998,-0.005275738,-0.036407553,-0.0008239764,-0.027435362,0.06427016,-0.012901335,-0.02035527,0.020614669,-0.0017051734,0.042480562,-0.0013942749,0.018981975,0.030365057,0.0028915468,0.052642938,0.03408821,-0.01878361,0.0043525794,-0.014183076,0.0009870551,-0.011611964,-0.030273505,-0.010635399,0.058776986,-0.03625496,-0.008270282,-0.03295906,0.04794322,-0.0025119837,0.045959573,-0.008773823,0.048584092,0.048828233,-0.056457642,0.06039442,-0.04522715,0.015617405,0.030960152,0.047515973,0.042572115,-0.069214016,0.017959634,-0.0090484815,0.02073674,-0.013839752,0.035430986,-0.041046232,-0.009887717,0.07043473,-0.02787787,0.010993982,-0.0017557183,0.0028057161,-0.031204293,-0.0059700143,0.0054741027,-0.023666434,-0.008903523,-0.021316575,0.00014424356,0.011863735,-0.0058136117,0.004821788,-0.01710514,0.009384176,-0.02864081,-0.0058288705,-0.13269073,0.019363446,-0.013923676,0.025177058,-0.049194444,0.015129123,-0.02359014,-0.009155294,0.0034294203,-0.01878361,-0.0027256072,-0.000686647,-0.048034772,-0.018264811,0.071228184,0.037780844,-0.025726374,-0.028595034,-0.011177087,0.031463694,-0.01075747,-0.035705645,0.097290255,0.010475182,-0.011199976,-0.02655035,-0.019241376,-0.021698046,-0.019638104,-0.016769446,-0.02165227,-0.06939713,0.024658257,0.05297863,0.04586802,0.00984957,-0.009414693,0.013458282,-0.014610323,0.024581963,-0.023376517,0.01269534,0.010284446,0.023880057,-0.011383082,0.101684794,-0.007423417,-0.048156846,-0.008140582,0.014602694,-0.0033321455,0.019638104,0.028976504,0.025619563,0.009086629,-0.007049576,0.011596705,0.0047226055,-0.024215752,0.07684343,-0.003227241,0.016082799,-0.025207575,-0.025177058,-0.024002127,0.017135657,-0.01969914,0.043212987,0.024185235,-0.02073674,-0.033874586,-0.0021591233,-0.045471292,-0.00071954884,-0.008132952,0.019348187,0.03948983,-0.033752516,-0.084961124,0.0030994483,-0.041381925,-0.041015714,0.0112839,-0.019836469,0.032104563,0.016098058,0.020080611,-0.007942217,-0.050140493,-0.034393385,-0.05297863,-0.028137268,-0.0058174264,-0.0056114323,-0.03189094,0.021026658,-0.011756923,-0.027267516,0.006385817,-0.04718028,-0.012519864,-0.035949785,0.013076811,-0.02317815,-0.031860422,0.044769388,-0.015480076,-0.008018511,-0.043518163,0.023422293,-0.036895834,-0.040100187,-0.06039442,0.005691541,-0.036529623,-0.018585246,0.023635916,0.021408128,0.01152804,0.013984711,0.007965106,-0.027801575,-0.0026226102,-0.021286057,0.006011976,0.027389586,0.0840456,0.07476823,0.028564516,0.015029941,0.029342717,-0.047760114,-0.0241242,0.031082222,0.017837564,0.023346,-0.002166753,0.046295267,-0.033111647,-0.017715493,-0.016937293,-0.0036678396,-0.01606754,-0.010551476,-0.060730115,-0.00067806395,0.005714429,0.009002705,-0.056549195,-0.053497434,0.027160704,-0.023803763,-0.02877814,0.03189094,-0.015838658,-0.025878964,0.00014889274,-0.02319341,-0.0028724733,0.053222775,-0.0040893643,-0.0034313279,0.00036740385,0.0049057114,0.011291529,0.056518678,-0.007972735,-0.041381925,-0.04467783,0.008804341,0.026519833,0.052337762,-0.021209763,-0.019119306,0.020126387,0.00997927,-0.007755297,0.020492598,-0.014915499,-0.038421717,0.037353598,-0.0050888173,0.029708927,0.04638682,-0.052917596,-0.0112839,0.0038433159,-0.011001611,-0.0023708395,0.015991246,-0.03381355,0.017135657,0.016418492,-0.029449528,0.047332868,-0.002183919,0.018173259,0.0017023124,0.01814274,0.01153567,0.00042152498,-0.021179246,0.058441292,-0.0020771073,-0.036102373,0.007740038,-0.0042419527,-0.02839667,0.007713335,-0.016708411,-0.020538375,0.0044899085,-0.011131311,0.0032844616,-0.036468588,-0.005886091,0.05523694,-0.015098605,-0.03161628,0.02462774,0.028488223,0.013404876,-0.012916594,-0.012420681,-0.036377035,-0.01335147,-0.040344328,0.029144352,-0.04174814,0.023315482,-0.02227788,-0.0022716573,-0.03152473,0.0482484,-0.027038634,-0.004882823,0.06152357,-0.003881463,-0.036041338,-0.0075645614,0.020660445,-0.07250992,-0.024429375,-0.036377035] - searchVector(db_name="memtensor_memos", vectorStr=vector) + # vector = [-0.02954108,-0.024826104,-0.07641619,0.01464847,-0.0032501293,-0.044220068,0.024322564,0.018768352,-0.0030803748,-0.017776528,0.022201587,-0.0036640249,-0.013397247,-0.02462774,0.021743823,0.0115509285,-0.023223927,0.024093682,-0.0071144262,-0.014984163,-0.013305694,-0.022064257,0.055786256,-0.012374905,0.019714398,0.008865376,-0.00803377,-0.006851211,0.036529623,0.042785738,0.05923475,0.007347123,0.058776986,0.03900155,-0.05065929,-0.060730115,0.0042343233,-0.0030670234,-0.02864081,0.047332868,-0.007789629,-0.011741664,0.019638104,0.007427232,-0.036529623,-0.034484938,-0.046203714,0.009078999,-0.026367245,-0.049896352,0.027938904,-0.048950303,0.04299936,-0.024597222,-0.018600505,0.026321469,-0.03240974,-0.07080094,-0.058105595,-0.0026359616,-0.023757987,0.014465364,-0.02682501,-0.014427217,-0.01814274,0.07244889,-0.0063705584,0.019378705,-0.05078136,-0.016525306,0.013534576,-0.01814274,-0.016098058,0.018295329,-0.061645642,0.023773246,0.03549202,-0.034820635,0.029464787,0.026535092,-0.024398858,-0.004184732,0.040374845,0.0037460409,0.014419587,-0.009384176,0.010055564,0.019546552,0.023498587,-0.01620487,-0.013389617,-0.016418492,0.028198304,-0.051117055,0.023101857,0.0705568,-0.03991708,-0.039428797,0.006595626,0.0039367764,0.034515455,-0.02046208,-0.001202586,-0.018569987,-0.009666464,-0.018966716,-0.0052719233,0.0012187985,0.044403173,-0.018295329,0.01890568,0.037994467,0.020538375,0.012939482,-0.0036048968,-0.01968388,-0.0021209763,0.01451114,0.024429375,-0.031311106,-0.017852822,-0.0057830936,0.011459376,-0.020523116,-0.021209763,0.0082550235,-0.0035057145,0.031311106,0.0063438555,0.012374905,0.028595034,0.03552254,-0.041961763,0.0062675616,0.030426092,-0.030548163,0.058502328,0.029205387,-0.02604681,-0.025756892,0.004211435,0.05160534,0.0092239585,-0.049987905,-0.0013847381,-0.055267457,0.027282774,0.012535123,-0.009971641,-0.0040741055,-0.022613576,0.018478435,0.026031552,0.02720648,-0.04782115,0.007400529,0.034851152,0.042694185,-0.031005928,-0.0019149822,0.00042104814,-0.0049972646,-0.02539068,0.031738352,0.010337852,-0.036102373,0.014213594,0.011878993,0.0008330364,-0.035583574,0.0034465867,-0.0082550235,0.009887717,0.012908964,0.018875163,0.01814274,0.091552936,0.018768352,0.03305061,-0.054260373,-0.02682501,0.03628548,0.031158516,-0.048675645,-0.017288245,0.02929694,0.013450652,-0.0058365,0.032745432,-0.027160704,-0.015655553,-0.032074045,0.020248458,0.004760753,0.04730235,-0.024841363,0.020248458,0.013786347,-0.010810876,-0.033874586,0.008491535,0.023651175,0.016021764,0.0039329617,-0.04626475,0.0050621144,-0.02720648,-0.008140582,0.017440835,-0.029830998,0.04861461,0.042327974,0.01529697,-0.027191222,-0.03515633,0.018264811,0.038696375,-0.025543269,0.018676799,0.023757987,0.0058670174,-0.0045013526,0.017059363,-0.03720101,-0.03668221,-0.0137253115,0.024581963,0.025589045,0.01270297,-0.027450623,-0.035614092,0.010299705,0.021041917,-0.013893158,0.014350923,-0.032440256,-0.009338399,0.006114973,0.025344905,0.03512581,-0.02917487,-0.018432658,0.008873005,-0.014236482,-0.027359068,-0.055572633,0.016784705,0.041900728,-0.019943282,0.020080611,0.018112222,0.009033223,-0.021087693,-0.05041515,0.0352784,0.024642998,0.007118241,0.008804341,-0.022384694,-0.030471869,-0.015762364,0.048706163,0.0064659263,0.0048790085,0.003906259,-0.02384954,-0.012771634,-0.025985776,0.0017061271,0.0736696,0.020950364,0.013847382,0.029785221,-0.022201587,-0.003059394,0.038574304,-0.01647953,-0.022354174,0.0562135,0.001626972,-0.007865923,0.012001064,0.043609716,-0.0274964,0.029846257,-0.0035266953,-0.00026106893,0.050903432,0.022750905,-0.037475668,-0.013244658,-0.015640294,0.042419527,-0.027938904,-0.008247394,-0.0047111614,-0.019088788,-0.15368687,0.033020094,0.028350893,0.02449041,0.016830482,0.03189094,-0.04861461,-0.024261529,-0.05029308,0.018386882,0.028945986,-0.044525243,0.009628317,0.021667529,0.026123105,0.037384115,-0.007335679,-0.02149968,-0.0176392,-0.0210114,0.00070810475,-0.05261242,0.061340466,0.0037002645,-0.010910058,-0.011360194,0.008201617,0.03793343,-0.0070305024,-0.0225678,-0.00945284,0.02281194,0.0013446837,0.023880057,-0.0144806225,0.015914952,0.018646281,0.0032444072,0.016494788,0.01853947,0.01985173,0.03964242,0.015541111,0.016494788,-0.031921457,-0.029693669,0.020782517,-0.009796164,-0.0032234264,-0.046478376,0.044647314,-0.035675127,-0.01853947,0.0069885408,-0.07635515,-0.010986352,0.00971987,0.034027174,0.0347596,0.02812201,-0.03213508,-0.05355847,-0.00038886155,-0.014061005,-0.08074969,0.020584151,0.047760114,0.03381355,0.012809781,-0.020904588,0.013954193,-0.04382334,0.01062777,0.004421244,0.039337244,0.014549287,0.013435394,0.021316575,-0.01957707,-0.09228536,0.006145491,0.0015955006,0.0006575599,0.010147117,-0.03137214,-0.048919786,0.0044517615,0.04214487,0.053466916,0.22290088,0.019592328,0.021041917,-0.034790117,0.034393385,-0.016616859,-0.0748903,0.08294696,-0.009971641,-0.030548163,0.010810876,0.04147348,-0.025421198,-0.020614669,0.019271893,0.04562388,-0.071838535,0.0140152285,0.04455576,0.016021764,0.0033302382,-0.03851327,-0.05197155,-0.024795586,-0.04925548,-0.0012359646,-0.028152527,0.020446822,-0.010955835,0.007926959,-0.05679334,0.04898082,0.02227788,0.009887717,-0.037262045,-0.021865893,0.024658257,-0.03305061,-0.005104076,0.06274428,0.0026741086,-0.0032806469,-0.027450623,0.016265905,-0.008514423,-0.011116052,-0.008338947,-0.020813035,-0.025711115,-0.021438645,-0.009872458,-0.04071054,-0.019348187,0.0037441335,-0.0155868875,-0.0049705617,-0.009040852,0.007850665,0.031463694,0.05029308,0.002864844,0.0063552996,-0.056945927,-0.046051126,0.006042494,0.053833127,-0.013702422,-0.03045661,0.048187364,0.029068056,-0.022766164,-0.002573019,0.012855558,0.005336773,-0.009414693,-0.046173196,-0.014053375,-0.0054741027,0.001794819,0.014472993,-0.00087928964,0.004680644,0.02449041,0.018325847,0.054199338,-0.006156935,0.028717104,0.086425975,0.02307134,0.0060958997,-0.0008125323,0.018829387,-0.011825588,0.0032806469,0.008880635,-0.019271893,-0.015991246,-0.008018511,-0.03149421,0.00803377,-0.0137482,0.0004093656,-0.049682725,-0.015518223,-0.034118727,-0.0069542085,-0.05297863,-0.0052299616,-0.0038566676,0.0008196849,-0.037536703,-0.02383428,-0.033355787,-0.051239125,0.007118241,0.03488167,0.028259339,0.008842488,0.009246847,0.03970346,-0.019271893,0.038543787,-0.022659352,0.022720387,0.024566704,-0.056030396,-0.0026283322,-0.009399435,0.0077743703,-0.02191167,0.0028667513,-0.028717104,0.0070991674,0.027038634,0.063964985,0.0090103345,-0.0053215143,-0.022064257,-0.014091522,-0.0057983524,-0.021087693,0.006557479,-0.004325876,0.045440774,0.0065765525,0.0015716588,-0.049804796,0.03924569,-0.01918034,-0.021331834,0.039093103,0.017395059,0.012664823,-0.052765008,0.021331834,-0.07537858,-0.0061607496,-0.032043528,0.0067978054,-0.0121917995,0.0039978116,0.0088196,0.006580367,0.07238785,0.0110092405,-0.0074196025,0.009025593,0.03085334,-0.03137214,-0.006259932,0.011901882,-0.040741056,-0.030242987,0.008834858,-0.019744916,-0.009712241,-0.0040588467,0.033172682,0.004276285,-0.049072374,0.03488167,-0.0051269643,0.007694261,0.005935682,0.01788334,-0.0069542085,0.0085449405,-0.007194535,-0.041900728,-0.013313323,-0.0013895065,0.07617205,0.0037422262,-0.025009211,0.0051345937,0.0066299583,0.10388207,-0.008834858,0.006439223,-0.021102952,-0.03099067,-0.016555823,-0.0126571935,0.010658287,0.0057945377,-0.0055503966,-0.009681723,0.057617314,-0.017822305,-0.0034828263,0.0005464566,0.0043602088,-0.037109457,0.010849023,-0.009216329,-0.049194444,0.01179507,0.049469102,-0.008514423,-0.009681723,-0.01890568,0.03500374,-0.028228821,-0.05871595,0.0011281992,0.044799905,-0.0032806469,0.009002705,0.030120917,0.0073547526,-0.010025047,0.019012494,-0.031433176,-0.02787787,0.021621753,-0.011177087,-0.02630621,0.042297456,-0.041046232,-0.020919846,-0.002534872,-0.024765069,0.01632694,0.0029258793,-0.0018615763,-0.026748717,-0.030273505,0.006763473,0.036590658,0.027236998,0.02307134,0.031829905,0.013107329,-0.025451716,0.040252775,0.04214487,0.012710599,0.01800541,-0.012130764,-0.056274537,0.02009587,0.03695687,0.024963435,-0.030166693,0.009002705,-0.06988541,0.043212987,0.01840214,-0.01179507,-0.09484884,-0.023986869,0.015319858,-0.023498587,-0.034790117,0.012176541,0.0018901867,-0.00037646378,0.051818963,0.021804858,-0.05209362,-0.027710022,0.051391713,-0.022064257,-0.024139458,-0.018295329,-0.04092416,0.0063667437,0.022995045,0.0149460165,-0.030059882,0.019134564,0.017562905,-0.04962169,0.015579258,0.010223411,-0.0076675583,-0.059021126,0.04431162,-0.023315482,0.017517129,-0.0021457719,0.042968843,0.028533999,-0.029449528,-0.016769446,0.026367245,-0.015762364,-0.01140597,0.030059882,0.030929634,0.0058250558,-0.06689468,-0.013473541,0.009323141,0.025299128,-0.021728564,0.049987905,-0.0020599412,0.04287729,-0.022827199,0.020828294,-0.001273158,-0.04068002,-0.013664275,-0.0036945425,-0.019775433,-0.024642998,-0.005275738,-0.036407553,-0.0008239764,-0.027435362,0.06427016,-0.012901335,-0.02035527,0.020614669,-0.0017051734,0.042480562,-0.0013942749,0.018981975,0.030365057,0.0028915468,0.052642938,0.03408821,-0.01878361,0.0043525794,-0.014183076,0.0009870551,-0.011611964,-0.030273505,-0.010635399,0.058776986,-0.03625496,-0.008270282,-0.03295906,0.04794322,-0.0025119837,0.045959573,-0.008773823,0.048584092,0.048828233,-0.056457642,0.06039442,-0.04522715,0.015617405,0.030960152,0.047515973,0.042572115,-0.069214016,0.017959634,-0.0090484815,0.02073674,-0.013839752,0.035430986,-0.041046232,-0.009887717,0.07043473,-0.02787787,0.010993982,-0.0017557183,0.0028057161,-0.031204293,-0.0059700143,0.0054741027,-0.023666434,-0.008903523,-0.021316575,0.00014424356,0.011863735,-0.0058136117,0.004821788,-0.01710514,0.009384176,-0.02864081,-0.0058288705,-0.13269073,0.019363446,-0.013923676,0.025177058,-0.049194444,0.015129123,-0.02359014,-0.009155294,0.0034294203,-0.01878361,-0.0027256072,-0.000686647,-0.048034772,-0.018264811,0.071228184,0.037780844,-0.025726374,-0.028595034,-0.011177087,0.031463694,-0.01075747,-0.035705645,0.097290255,0.010475182,-0.011199976,-0.02655035,-0.019241376,-0.021698046,-0.019638104,-0.016769446,-0.02165227,-0.06939713,0.024658257,0.05297863,0.04586802,0.00984957,-0.009414693,0.013458282,-0.014610323,0.024581963,-0.023376517,0.01269534,0.010284446,0.023880057,-0.011383082,0.101684794,-0.007423417,-0.048156846,-0.008140582,0.014602694,-0.0033321455,0.019638104,0.028976504,0.025619563,0.009086629,-0.007049576,0.011596705,0.0047226055,-0.024215752,0.07684343,-0.003227241,0.016082799,-0.025207575,-0.025177058,-0.024002127,0.017135657,-0.01969914,0.043212987,0.024185235,-0.02073674,-0.033874586,-0.0021591233,-0.045471292,-0.00071954884,-0.008132952,0.019348187,0.03948983,-0.033752516,-0.084961124,0.0030994483,-0.041381925,-0.041015714,0.0112839,-0.019836469,0.032104563,0.016098058,0.020080611,-0.007942217,-0.050140493,-0.034393385,-0.05297863,-0.028137268,-0.0058174264,-0.0056114323,-0.03189094,0.021026658,-0.011756923,-0.027267516,0.006385817,-0.04718028,-0.012519864,-0.035949785,0.013076811,-0.02317815,-0.031860422,0.044769388,-0.015480076,-0.008018511,-0.043518163,0.023422293,-0.036895834,-0.040100187,-0.06039442,0.005691541,-0.036529623,-0.018585246,0.023635916,0.021408128,0.01152804,0.013984711,0.007965106,-0.027801575,-0.0026226102,-0.021286057,0.006011976,0.027389586,0.0840456,0.07476823,0.028564516,0.015029941,0.029342717,-0.047760114,-0.0241242,0.031082222,0.017837564,0.023346,-0.002166753,0.046295267,-0.033111647,-0.017715493,-0.016937293,-0.0036678396,-0.01606754,-0.010551476,-0.060730115,-0.00067806395,0.005714429,0.009002705,-0.056549195,-0.053497434,0.027160704,-0.023803763,-0.02877814,0.03189094,-0.015838658,-0.025878964,0.00014889274,-0.02319341,-0.0028724733,0.053222775,-0.0040893643,-0.0034313279,0.00036740385,0.0049057114,0.011291529,0.056518678,-0.007972735,-0.041381925,-0.04467783,0.008804341,0.026519833,0.052337762,-0.021209763,-0.019119306,0.020126387,0.00997927,-0.007755297,0.020492598,-0.014915499,-0.038421717,0.037353598,-0.0050888173,0.029708927,0.04638682,-0.052917596,-0.0112839,0.0038433159,-0.011001611,-0.0023708395,0.015991246,-0.03381355,0.017135657,0.016418492,-0.029449528,0.047332868,-0.002183919,0.018173259,0.0017023124,0.01814274,0.01153567,0.00042152498,-0.021179246,0.058441292,-0.0020771073,-0.036102373,0.007740038,-0.0042419527,-0.02839667,0.007713335,-0.016708411,-0.020538375,0.0044899085,-0.011131311,0.0032844616,-0.036468588,-0.005886091,0.05523694,-0.015098605,-0.03161628,0.02462774,0.028488223,0.013404876,-0.012916594,-0.012420681,-0.036377035,-0.01335147,-0.040344328,0.029144352,-0.04174814,0.023315482,-0.02227788,-0.0022716573,-0.03152473,0.0482484,-0.027038634,-0.004882823,0.06152357,-0.003881463,-0.036041338,-0.0075645614,0.020660445,-0.07250992,-0.024429375,-0.036377035] + # searchVector(db_name="memtensor_memos", vectorStr=vector) - # insert_data(conn, data) + add_edge(db_name="memtensor_memos",source_id="13bb9df6-0609-4442-8bed-bba77dadac92", target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", edge_type="PARENT", user_name="memosbfb3fb32032b4077a641404dc48739cd") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 45593e713..87543442a 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -511,53 +511,32 @@ def create_edge(self): logger.error(f"Failed to create elabel '{label_name}': {e}", exc_info=True) def add_edge(self, source_id: str, target_id: str, type: str, user_name: str | None = None) -> None: - """ - Create an edge from source node to target node. - Args: - source_id: ID of the source node. - target_id: ID of the target node. - type: Relationship type (e.g., 'RELATE_TO', 'PARENT'). - user_name (str, optional): User name for filtering in non-multi-db mode - """ if not source_id or not target_id: raise ValueError("[add_edge] source_id and target_id must be provided") - - # 确保边表存在 - try: - with self.connection.cursor() as cursor: - cursor.execute(f""" - CREATE TABLE IF NOT EXISTS {self.db_name}_graph."Edges" ( - id SERIAL PRIMARY KEY, - source_id TEXT NOT NULL, - target_id TEXT NOT NULL, - edge_type TEXT NOT NULL, - properties JSONB, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ); - """) - except Exception as e: - logger.warning(f"Failed to ensure edges table exists: {e}") - return - # 检查源节点和目标节点是否存在 source_exists = self.get_node(source_id) is not None target_exists = self.get_node(target_id) is not None if not source_exists or not target_exists: - logger.warning(f"Cannot create edge: source or target node does not exist") - return + raise ValueError("[add_edge] source_id and target_id must be provided") - # 构建边的属性 properties = {} if user_name is not None: properties["user_name"] = user_name - - # 添加边 query = f""" - INSERT INTO {self.db_name}_graph."Edges" (source_id, target_id, edge_type, properties) - VALUES (%s, %s, %s, %s) - ON CONFLICT DO NOTHING + INSERT INTO {self.db_name}_graph."{type}"(id, start_id, end_id, properties) + SELECT + ag_catalog._next_graph_id('{self.db_name}_graph'::name, '{type}'), + ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{source_id}'::text::cstring), + ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{target_id}'::text::cstring), + jsonb_build_object('user_name', '{user_name}')::text::agtype + WHERE NOT EXISTS ( + SELECT 1 FROM {self.db_name}_graph."{type}" + WHERE start_id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{source_id}'::text::cstring) + AND end_id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{target_id}'::text::cstring) + ); """ + print(f"Executing add_edge: {query}") try: with self.connection.cursor() as cursor: From 4e830fddf5e23bbd1b31aaf68aa7b2d766b73d10 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sun, 19 Oct 2025 10:21:07 +0800 Subject: [PATCH 017/137] edge_exist --- src/memos/graph_dbs/polardb.py | 56 ++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 87543442a..9434fc32f 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -579,6 +579,14 @@ def edge_exists( """ where_clauses = [] params = [] + # SELECT * FROM + # cypher('memtensor_memos_graph', $$ + # MATCH(a: Memory + # {id: "13bb9df6-0609-4442-8bed-bba77dadac92"})-[r] - (b:Memory {id: "2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d"}) + # RETURN + # r + # $$) AS(r + # agtype); if direction == "OUTGOING": where_clauses.append("source_id = %s AND target_id = %s") @@ -611,6 +619,54 @@ def edge_exists( result = cursor.fetchone() return result is not None + def edge_exists_ccl( + self, + source_id: str, + target_id: str, + type: str = "ANY", + direction: str = "OUTGOING", + user_name: str | None = None, + ) -> bool: + """ + Check if an edge exists between two nodes. + Args: + source_id: ID of the source node. + target_id: ID of the target node. + type: Relationship type. Use "ANY" to match any relationship type. + direction: Direction of the edge. + Use "OUTGOING" (default), "INCOMING", or "ANY". + user_name (str, optional): User name for filtering in non-multi-db mode + Returns: + True if the edge exists, otherwise False. + """ + + # Prepare the relationship pattern + user_name = user_name if user_name else self.config.user_name + rel = "r" if type == "ANY" else f"r@{type}" + + # Prepare the match pattern with direction + if direction == "OUTGOING": + pattern = f"(a:Memory)-[r]->(b:Memory)" + elif direction == "INCOMING": + pattern = f"(a:Memory)<-[r]-(b:Memory)" + elif direction == "ANY": + pattern = f"(a:Memory)-[r]-(b:Memory)" + else: + raise ValueError( + f"Invalid direction: {direction}. Must be 'OUTGOING', 'INCOMING', or 'ANY'." + ) + query = f"SELECT * FROM cypher('{self.db_name}_graph', $$" + query += f"\nMATCH {pattern}" + query += f"\nWHERE a.user_name = '{user_name}' AND b.user_name = '{user_name}'" + query += f"\nAND a.id = '{source_id}' AND b.id = '{target_id}'" + query += "\nRETURN r" + query += "\n$$) AS (r agtype)" + + with self.connection.cursor() as cursor: + cursor.execute(query) + result = cursor.fetchone() + return result is not None and result[0] is not None + def get_node(self, id: str, include_embedding: bool = False, user_name: str | None = None) -> dict[str, Any] | None: """ Retrieve a Memory node by its unique ID. From 7f95ec48e027873559eb558c6a4c8a3961c777ce Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sun, 19 Oct 2025 10:22:09 +0800 Subject: [PATCH 018/137] edge_exist --- src/memos/graph_dbs/polardb.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 9434fc32f..cb8cff277 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -642,7 +642,6 @@ def edge_exists_ccl( # Prepare the relationship pattern user_name = user_name if user_name else self.config.user_name - rel = "r" if type == "ANY" else f"r@{type}" # Prepare the match pattern with direction if direction == "OUTGOING": From 63437c9416add3221aa231849ee3ae69d0778ec6 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sun, 19 Oct 2025 10:31:01 +0800 Subject: [PATCH 019/137] edge_exist --- src/memos/graph_dbs/polardb.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index cb8cff277..e4f423b80 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -658,6 +658,9 @@ def edge_exists_ccl( query += f"\nMATCH {pattern}" query += f"\nWHERE a.user_name = '{user_name}' AND b.user_name = '{user_name}'" query += f"\nAND a.id = '{source_id}' AND b.id = '{target_id}'" + if type != "ANY": + query += f"\n AND type(r) = '{type}'" + query += "\nRETURN r" query += "\n$$) AS (r agtype)" From 580fb31687ea5cf3ddcd83c3d8804f8d41649417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 11:39:58 +0800 Subject: [PATCH 020/137] update edge_exists --- examples/basic_modules/polardb_search.py | 35 ++++++++++++++++-------- src/memos/graph_dbs/polardb.py | 6 ++-- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 1a79f6f14..7beb222c9 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -131,35 +131,48 @@ def searchVector(db_name: str, vectorStr: list[float]): # for node_i in nodes: # print("Search result:", graph.get_node(node_i["id"])) - #4,查询 get_memory_count - count = graph.get_memory_count('UserMemory','memos07ba3d044650474c839e721f3a69d38a') - print("user count:", count) - # + # 4,查询 get_memory_count + # count = graph.get_memory_count('UserMemory','memos07ba3d044650474c839e721f3a69d38a') + # print("user count:", count) + # # # # 4,判断node是否存在 node_not_exist 1代表存在, - isNodeExist = graph.node_not_exist('UserMemory', 'memos07ba3d044650474c839e721f3a69d38a') - print("user isNodeExist:", isNodeExist) + # isNodeExist = graph.node_not_exist('UserMemory', 'memos07ba3d044650474c839e721f3a69d38a') + # print("user isNodeExist:", isNodeExist) # # # 6,删除跳过多少行之后的数据remove_oldest_memory # remove_oldest_memory = graph.remove_oldest_memory('UserMemory', 2,'memos07ba3d044650474c839e721f3a69d38a') # print("user remove_oldest_memory:", remove_oldest_memory) # 7,更新 update_node - isNodeExist = graph.update_node(id="bb079c5b-1937-4125-a9e5-55d4abe6c95d", fields={"status": "inactived","tags": ["yoga", "travel11111111", "local studios5667888"]}) - print("user update_node:", isNodeExist) + # isNodeExist = graph.update_node(id="bb079c5b-1937-4125-a9e5-55d4abe6c95d", fields={"status": "inactived","tags": ["yoga", "travel11111111", "local studios5667888"]}) + # print("user update_node:", isNodeExist) # 8,删除 delete_node - isNodeDeleted = graph.delete_node(id="bb079c5b-1937-4125-a9e5-55d4abe6c95d", user_name='memosbfb3fb32032b4077a641404dc48739cd') - print("user isNodeDeleted:", isNodeDeleted) + # isNodeDeleted = graph.delete_node(id="bb079c5b-1937-4125-a9e5-55d4abe6c95d", user_name='memosbfb3fb32032b4077a641404dc48739cd') + # print("user isNodeDeleted:", isNodeDeleted) + # 9,添加边 add_edge def add_edge(db_name: str, source_id: str, target_id: str, edge_type: str = "Memory", user_name: str = None): graph = getPolarDb(db_name) graph.add_edge(source_id, target_id, edge_type, user_name) + +def edge_exists(db_name: str, source_id: str, target_id: str, type: str = "Memory", direction: str = "OUTGOING", + user_name: str = None): + graph = getPolarDb(db_name) + isEdge_exists = graph.edge_exists(source_id=source_id, target_id=target_id, type=type, user_name=user_name, + direction=direction) + print("edge_exists:", isEdge_exists) + + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") # vector = [-0.02954108,-0.024826104,-0.07641619,0.01464847,-0.0032501293,-0.044220068,0.024322564,0.018768352,-0.0030803748,-0.017776528,0.022201587,-0.0036640249,-0.013397247,-0.02462774,0.021743823,0.0115509285,-0.023223927,0.024093682,-0.0071144262,-0.014984163,-0.013305694,-0.022064257,0.055786256,-0.012374905,0.019714398,0.008865376,-0.00803377,-0.006851211,0.036529623,0.042785738,0.05923475,0.007347123,0.058776986,0.03900155,-0.05065929,-0.060730115,0.0042343233,-0.0030670234,-0.02864081,0.047332868,-0.007789629,-0.011741664,0.019638104,0.007427232,-0.036529623,-0.034484938,-0.046203714,0.009078999,-0.026367245,-0.049896352,0.027938904,-0.048950303,0.04299936,-0.024597222,-0.018600505,0.026321469,-0.03240974,-0.07080094,-0.058105595,-0.0026359616,-0.023757987,0.014465364,-0.02682501,-0.014427217,-0.01814274,0.07244889,-0.0063705584,0.019378705,-0.05078136,-0.016525306,0.013534576,-0.01814274,-0.016098058,0.018295329,-0.061645642,0.023773246,0.03549202,-0.034820635,0.029464787,0.026535092,-0.024398858,-0.004184732,0.040374845,0.0037460409,0.014419587,-0.009384176,0.010055564,0.019546552,0.023498587,-0.01620487,-0.013389617,-0.016418492,0.028198304,-0.051117055,0.023101857,0.0705568,-0.03991708,-0.039428797,0.006595626,0.0039367764,0.034515455,-0.02046208,-0.001202586,-0.018569987,-0.009666464,-0.018966716,-0.0052719233,0.0012187985,0.044403173,-0.018295329,0.01890568,0.037994467,0.020538375,0.012939482,-0.0036048968,-0.01968388,-0.0021209763,0.01451114,0.024429375,-0.031311106,-0.017852822,-0.0057830936,0.011459376,-0.020523116,-0.021209763,0.0082550235,-0.0035057145,0.031311106,0.0063438555,0.012374905,0.028595034,0.03552254,-0.041961763,0.0062675616,0.030426092,-0.030548163,0.058502328,0.029205387,-0.02604681,-0.025756892,0.004211435,0.05160534,0.0092239585,-0.049987905,-0.0013847381,-0.055267457,0.027282774,0.012535123,-0.009971641,-0.0040741055,-0.022613576,0.018478435,0.026031552,0.02720648,-0.04782115,0.007400529,0.034851152,0.042694185,-0.031005928,-0.0019149822,0.00042104814,-0.0049972646,-0.02539068,0.031738352,0.010337852,-0.036102373,0.014213594,0.011878993,0.0008330364,-0.035583574,0.0034465867,-0.0082550235,0.009887717,0.012908964,0.018875163,0.01814274,0.091552936,0.018768352,0.03305061,-0.054260373,-0.02682501,0.03628548,0.031158516,-0.048675645,-0.017288245,0.02929694,0.013450652,-0.0058365,0.032745432,-0.027160704,-0.015655553,-0.032074045,0.020248458,0.004760753,0.04730235,-0.024841363,0.020248458,0.013786347,-0.010810876,-0.033874586,0.008491535,0.023651175,0.016021764,0.0039329617,-0.04626475,0.0050621144,-0.02720648,-0.008140582,0.017440835,-0.029830998,0.04861461,0.042327974,0.01529697,-0.027191222,-0.03515633,0.018264811,0.038696375,-0.025543269,0.018676799,0.023757987,0.0058670174,-0.0045013526,0.017059363,-0.03720101,-0.03668221,-0.0137253115,0.024581963,0.025589045,0.01270297,-0.027450623,-0.035614092,0.010299705,0.021041917,-0.013893158,0.014350923,-0.032440256,-0.009338399,0.006114973,0.025344905,0.03512581,-0.02917487,-0.018432658,0.008873005,-0.014236482,-0.027359068,-0.055572633,0.016784705,0.041900728,-0.019943282,0.020080611,0.018112222,0.009033223,-0.021087693,-0.05041515,0.0352784,0.024642998,0.007118241,0.008804341,-0.022384694,-0.030471869,-0.015762364,0.048706163,0.0064659263,0.0048790085,0.003906259,-0.02384954,-0.012771634,-0.025985776,0.0017061271,0.0736696,0.020950364,0.013847382,0.029785221,-0.022201587,-0.003059394,0.038574304,-0.01647953,-0.022354174,0.0562135,0.001626972,-0.007865923,0.012001064,0.043609716,-0.0274964,0.029846257,-0.0035266953,-0.00026106893,0.050903432,0.022750905,-0.037475668,-0.013244658,-0.015640294,0.042419527,-0.027938904,-0.008247394,-0.0047111614,-0.019088788,-0.15368687,0.033020094,0.028350893,0.02449041,0.016830482,0.03189094,-0.04861461,-0.024261529,-0.05029308,0.018386882,0.028945986,-0.044525243,0.009628317,0.021667529,0.026123105,0.037384115,-0.007335679,-0.02149968,-0.0176392,-0.0210114,0.00070810475,-0.05261242,0.061340466,0.0037002645,-0.010910058,-0.011360194,0.008201617,0.03793343,-0.0070305024,-0.0225678,-0.00945284,0.02281194,0.0013446837,0.023880057,-0.0144806225,0.015914952,0.018646281,0.0032444072,0.016494788,0.01853947,0.01985173,0.03964242,0.015541111,0.016494788,-0.031921457,-0.029693669,0.020782517,-0.009796164,-0.0032234264,-0.046478376,0.044647314,-0.035675127,-0.01853947,0.0069885408,-0.07635515,-0.010986352,0.00971987,0.034027174,0.0347596,0.02812201,-0.03213508,-0.05355847,-0.00038886155,-0.014061005,-0.08074969,0.020584151,0.047760114,0.03381355,0.012809781,-0.020904588,0.013954193,-0.04382334,0.01062777,0.004421244,0.039337244,0.014549287,0.013435394,0.021316575,-0.01957707,-0.09228536,0.006145491,0.0015955006,0.0006575599,0.010147117,-0.03137214,-0.048919786,0.0044517615,0.04214487,0.053466916,0.22290088,0.019592328,0.021041917,-0.034790117,0.034393385,-0.016616859,-0.0748903,0.08294696,-0.009971641,-0.030548163,0.010810876,0.04147348,-0.025421198,-0.020614669,0.019271893,0.04562388,-0.071838535,0.0140152285,0.04455576,0.016021764,0.0033302382,-0.03851327,-0.05197155,-0.024795586,-0.04925548,-0.0012359646,-0.028152527,0.020446822,-0.010955835,0.007926959,-0.05679334,0.04898082,0.02227788,0.009887717,-0.037262045,-0.021865893,0.024658257,-0.03305061,-0.005104076,0.06274428,0.0026741086,-0.0032806469,-0.027450623,0.016265905,-0.008514423,-0.011116052,-0.008338947,-0.020813035,-0.025711115,-0.021438645,-0.009872458,-0.04071054,-0.019348187,0.0037441335,-0.0155868875,-0.0049705617,-0.009040852,0.007850665,0.031463694,0.05029308,0.002864844,0.0063552996,-0.056945927,-0.046051126,0.006042494,0.053833127,-0.013702422,-0.03045661,0.048187364,0.029068056,-0.022766164,-0.002573019,0.012855558,0.005336773,-0.009414693,-0.046173196,-0.014053375,-0.0054741027,0.001794819,0.014472993,-0.00087928964,0.004680644,0.02449041,0.018325847,0.054199338,-0.006156935,0.028717104,0.086425975,0.02307134,0.0060958997,-0.0008125323,0.018829387,-0.011825588,0.0032806469,0.008880635,-0.019271893,-0.015991246,-0.008018511,-0.03149421,0.00803377,-0.0137482,0.0004093656,-0.049682725,-0.015518223,-0.034118727,-0.0069542085,-0.05297863,-0.0052299616,-0.0038566676,0.0008196849,-0.037536703,-0.02383428,-0.033355787,-0.051239125,0.007118241,0.03488167,0.028259339,0.008842488,0.009246847,0.03970346,-0.019271893,0.038543787,-0.022659352,0.022720387,0.024566704,-0.056030396,-0.0026283322,-0.009399435,0.0077743703,-0.02191167,0.0028667513,-0.028717104,0.0070991674,0.027038634,0.063964985,0.0090103345,-0.0053215143,-0.022064257,-0.014091522,-0.0057983524,-0.021087693,0.006557479,-0.004325876,0.045440774,0.0065765525,0.0015716588,-0.049804796,0.03924569,-0.01918034,-0.021331834,0.039093103,0.017395059,0.012664823,-0.052765008,0.021331834,-0.07537858,-0.0061607496,-0.032043528,0.0067978054,-0.0121917995,0.0039978116,0.0088196,0.006580367,0.07238785,0.0110092405,-0.0074196025,0.009025593,0.03085334,-0.03137214,-0.006259932,0.011901882,-0.040741056,-0.030242987,0.008834858,-0.019744916,-0.009712241,-0.0040588467,0.033172682,0.004276285,-0.049072374,0.03488167,-0.0051269643,0.007694261,0.005935682,0.01788334,-0.0069542085,0.0085449405,-0.007194535,-0.041900728,-0.013313323,-0.0013895065,0.07617205,0.0037422262,-0.025009211,0.0051345937,0.0066299583,0.10388207,-0.008834858,0.006439223,-0.021102952,-0.03099067,-0.016555823,-0.0126571935,0.010658287,0.0057945377,-0.0055503966,-0.009681723,0.057617314,-0.017822305,-0.0034828263,0.0005464566,0.0043602088,-0.037109457,0.010849023,-0.009216329,-0.049194444,0.01179507,0.049469102,-0.008514423,-0.009681723,-0.01890568,0.03500374,-0.028228821,-0.05871595,0.0011281992,0.044799905,-0.0032806469,0.009002705,0.030120917,0.0073547526,-0.010025047,0.019012494,-0.031433176,-0.02787787,0.021621753,-0.011177087,-0.02630621,0.042297456,-0.041046232,-0.020919846,-0.002534872,-0.024765069,0.01632694,0.0029258793,-0.0018615763,-0.026748717,-0.030273505,0.006763473,0.036590658,0.027236998,0.02307134,0.031829905,0.013107329,-0.025451716,0.040252775,0.04214487,0.012710599,0.01800541,-0.012130764,-0.056274537,0.02009587,0.03695687,0.024963435,-0.030166693,0.009002705,-0.06988541,0.043212987,0.01840214,-0.01179507,-0.09484884,-0.023986869,0.015319858,-0.023498587,-0.034790117,0.012176541,0.0018901867,-0.00037646378,0.051818963,0.021804858,-0.05209362,-0.027710022,0.051391713,-0.022064257,-0.024139458,-0.018295329,-0.04092416,0.0063667437,0.022995045,0.0149460165,-0.030059882,0.019134564,0.017562905,-0.04962169,0.015579258,0.010223411,-0.0076675583,-0.059021126,0.04431162,-0.023315482,0.017517129,-0.0021457719,0.042968843,0.028533999,-0.029449528,-0.016769446,0.026367245,-0.015762364,-0.01140597,0.030059882,0.030929634,0.0058250558,-0.06689468,-0.013473541,0.009323141,0.025299128,-0.021728564,0.049987905,-0.0020599412,0.04287729,-0.022827199,0.020828294,-0.001273158,-0.04068002,-0.013664275,-0.0036945425,-0.019775433,-0.024642998,-0.005275738,-0.036407553,-0.0008239764,-0.027435362,0.06427016,-0.012901335,-0.02035527,0.020614669,-0.0017051734,0.042480562,-0.0013942749,0.018981975,0.030365057,0.0028915468,0.052642938,0.03408821,-0.01878361,0.0043525794,-0.014183076,0.0009870551,-0.011611964,-0.030273505,-0.010635399,0.058776986,-0.03625496,-0.008270282,-0.03295906,0.04794322,-0.0025119837,0.045959573,-0.008773823,0.048584092,0.048828233,-0.056457642,0.06039442,-0.04522715,0.015617405,0.030960152,0.047515973,0.042572115,-0.069214016,0.017959634,-0.0090484815,0.02073674,-0.013839752,0.035430986,-0.041046232,-0.009887717,0.07043473,-0.02787787,0.010993982,-0.0017557183,0.0028057161,-0.031204293,-0.0059700143,0.0054741027,-0.023666434,-0.008903523,-0.021316575,0.00014424356,0.011863735,-0.0058136117,0.004821788,-0.01710514,0.009384176,-0.02864081,-0.0058288705,-0.13269073,0.019363446,-0.013923676,0.025177058,-0.049194444,0.015129123,-0.02359014,-0.009155294,0.0034294203,-0.01878361,-0.0027256072,-0.000686647,-0.048034772,-0.018264811,0.071228184,0.037780844,-0.025726374,-0.028595034,-0.011177087,0.031463694,-0.01075747,-0.035705645,0.097290255,0.010475182,-0.011199976,-0.02655035,-0.019241376,-0.021698046,-0.019638104,-0.016769446,-0.02165227,-0.06939713,0.024658257,0.05297863,0.04586802,0.00984957,-0.009414693,0.013458282,-0.014610323,0.024581963,-0.023376517,0.01269534,0.010284446,0.023880057,-0.011383082,0.101684794,-0.007423417,-0.048156846,-0.008140582,0.014602694,-0.0033321455,0.019638104,0.028976504,0.025619563,0.009086629,-0.007049576,0.011596705,0.0047226055,-0.024215752,0.07684343,-0.003227241,0.016082799,-0.025207575,-0.025177058,-0.024002127,0.017135657,-0.01969914,0.043212987,0.024185235,-0.02073674,-0.033874586,-0.0021591233,-0.045471292,-0.00071954884,-0.008132952,0.019348187,0.03948983,-0.033752516,-0.084961124,0.0030994483,-0.041381925,-0.041015714,0.0112839,-0.019836469,0.032104563,0.016098058,0.020080611,-0.007942217,-0.050140493,-0.034393385,-0.05297863,-0.028137268,-0.0058174264,-0.0056114323,-0.03189094,0.021026658,-0.011756923,-0.027267516,0.006385817,-0.04718028,-0.012519864,-0.035949785,0.013076811,-0.02317815,-0.031860422,0.044769388,-0.015480076,-0.008018511,-0.043518163,0.023422293,-0.036895834,-0.040100187,-0.06039442,0.005691541,-0.036529623,-0.018585246,0.023635916,0.021408128,0.01152804,0.013984711,0.007965106,-0.027801575,-0.0026226102,-0.021286057,0.006011976,0.027389586,0.0840456,0.07476823,0.028564516,0.015029941,0.029342717,-0.047760114,-0.0241242,0.031082222,0.017837564,0.023346,-0.002166753,0.046295267,-0.033111647,-0.017715493,-0.016937293,-0.0036678396,-0.01606754,-0.010551476,-0.060730115,-0.00067806395,0.005714429,0.009002705,-0.056549195,-0.053497434,0.027160704,-0.023803763,-0.02877814,0.03189094,-0.015838658,-0.025878964,0.00014889274,-0.02319341,-0.0028724733,0.053222775,-0.0040893643,-0.0034313279,0.00036740385,0.0049057114,0.011291529,0.056518678,-0.007972735,-0.041381925,-0.04467783,0.008804341,0.026519833,0.052337762,-0.021209763,-0.019119306,0.020126387,0.00997927,-0.007755297,0.020492598,-0.014915499,-0.038421717,0.037353598,-0.0050888173,0.029708927,0.04638682,-0.052917596,-0.0112839,0.0038433159,-0.011001611,-0.0023708395,0.015991246,-0.03381355,0.017135657,0.016418492,-0.029449528,0.047332868,-0.002183919,0.018173259,0.0017023124,0.01814274,0.01153567,0.00042152498,-0.021179246,0.058441292,-0.0020771073,-0.036102373,0.007740038,-0.0042419527,-0.02839667,0.007713335,-0.016708411,-0.020538375,0.0044899085,-0.011131311,0.0032844616,-0.036468588,-0.005886091,0.05523694,-0.015098605,-0.03161628,0.02462774,0.028488223,0.013404876,-0.012916594,-0.012420681,-0.036377035,-0.01335147,-0.040344328,0.029144352,-0.04174814,0.023315482,-0.02227788,-0.0022716573,-0.03152473,0.0482484,-0.027038634,-0.004882823,0.06152357,-0.003881463,-0.036041338,-0.0075645614,0.020660445,-0.07250992,-0.024429375,-0.036377035] # searchVector(db_name="memtensor_memos", vectorStr=vector) - add_edge(db_name="memtensor_memos",source_id="13bb9df6-0609-4442-8bed-bba77dadac92", target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", edge_type="PARENT", user_name="memosbfb3fb32032b4077a641404dc48739cd") + # add_edge(db_name="memtensor_memos",source_id="13bb9df6-0609-4442-8bed-bba77dadac92", target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", edge_type="PARENT", user_name="memosbfb3fb32032b4077a641404dc48739cd") + edge_exists(db_name="memtensor_memos", source_id="13bb9df6-0609-4442-8bed-bba77dadac92", + target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", type="PARENT", direction="OUTGOING", + user_name="memosbfb3fb32032b4077a641404dc48739cd") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e4f423b80..6f04be88c 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -563,7 +563,7 @@ def delete_edge(self, source_id: str, target_id: str, type: str) -> None: cursor.execute(query, (source_id, target_id, type)) logger.info(f"Edge deleted: {source_id} -[{type}]-> {target_id}") - def edge_exists( + def edge_exists_old( self, source_id: str, target_id: str, type: str = "ANY", direction: str = "OUTGOING" ) -> bool: """ @@ -619,7 +619,7 @@ def edge_exists( result = cursor.fetchone() return result is not None - def edge_exists_ccl( + def edge_exists( self, source_id: str, target_id: str, @@ -642,6 +642,7 @@ def edge_exists_ccl( # Prepare the relationship pattern user_name = user_name if user_name else self.config.user_name + print(f"edge_exists direction: {direction}") # Prepare the match pattern with direction if direction == "OUTGOING": @@ -664,6 +665,7 @@ def edge_exists_ccl( query += "\nRETURN r" query += "\n$$) AS (r agtype)" + print(f"edge_exists query: {query}") with self.connection.cursor() as cursor: cursor.execute(query) result = cursor.fetchone() From 2746be112a6f253fc057d188c0544e754e122ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 13:10:56 +0800 Subject: [PATCH 021/137] update polardb.py --- src/memos/graph_dbs/polardb.py | 183 +++++++++++++++++++-------------- 1 file changed, 103 insertions(+), 80 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 6f04be88c..a587e8f31 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -179,12 +179,12 @@ def _create_graph(self): try: with self.connection.cursor() as cursor: # Create schema if it doesn't exist - cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {self.db_name}_graph;") + cursor.execute(f'CREATE SCHEMA IF NOT EXISTS "{self.db_name}_graph";') logger.info(f"Schema '{self.db_name}_graph' ensured.") # Create Memory table if it doesn't exist cursor.execute(f""" - CREATE TABLE IF NOT EXISTS {self.db_name}_graph."Memory" ( + CREATE TABLE IF NOT EXISTS "{self.db_name}_graph"."Memory" ( id TEXT PRIMARY KEY, properties JSONB NOT NULL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, @@ -196,7 +196,7 @@ def _create_graph(self): # Add embedding column if it doesn't exist (using JSONB for compatibility) try: cursor.execute(f""" - ALTER TABLE {self.db_name}_graph."Memory" + ALTER TABLE "{self.db_name}_graph"."Memory" ADD COLUMN IF NOT EXISTS embedding JSONB; """) logger.info(f"Embedding column added to Memory table.") @@ -206,14 +206,14 @@ def _create_graph(self): # Create indexes cursor.execute(f""" CREATE INDEX IF NOT EXISTS idx_memory_properties - ON {self.db_name}_graph."Memory" USING GIN (properties); + ON "{self.db_name}_graph"."Memory" USING GIN (properties); """) # Create vector index for embedding field try: cursor.execute(f""" CREATE INDEX IF NOT EXISTS idx_memory_embedding - ON {self.db_name}_graph."Memory" USING ivfflat (embedding vector_cosine_ops) + ON "{self.db_name}_graph"."Memory" USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); """) logger.info(f"Vector index created for Memory table.") @@ -243,14 +243,14 @@ def create_index( # Apache AGE stores data in regular PostgreSQL tables cursor.execute(f""" CREATE INDEX IF NOT EXISTS idx_memory_properties - ON {self.db_name}_graph."Memory" USING GIN (properties); + ON "{self.db_name}_graph"."Memory" USING GIN (properties); """) # Try to create vector index, but don't fail if it doesn't work try: cursor.execute(f""" CREATE INDEX IF NOT EXISTS idx_memory_embedding - ON {self.db_name}_graph."Memory" USING ivfflat (embedding vector_cosine_ops); + ON "{self.db_name}_graph"."Memory" USING ivfflat (embedding vector_cosine_ops); """) except Exception as ve: logger.warning(f"Vector index creation failed (might not be supported): {ve}") @@ -264,7 +264,7 @@ def get_memory_count(self, memory_type: str, user_name: str | None = None) -> in user_name = user_name if user_name else self._get_config_value("user_name") query = f""" SELECT COUNT(*) - FROM {self.db_name}_graph."Memory" + FROM "{self.db_name}_graph"."Memory" WHERE ag_catalog.agtype_access_operator(properties, '"memory_type"'::agtype) = %s::agtype """ query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" @@ -286,7 +286,7 @@ def node_not_exist(self, scope: str, user_name: str | None = None) -> int: user_name = user_name if user_name else self._get_config_value("user_name") query = f""" SELECT id - FROM {self.db_name}_graph."Memory" + FROM "{self.db_name}_graph"."Memory" WHERE ag_catalog.agtype_access_operator(properties, '"memory_type"'::agtype) = %s::agtype """ query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" @@ -319,7 +319,7 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st # 使用真正的 OFFSET 逻辑,与 nebular.py 保持一致 # 先找到要删除的节点ID,然后删除它们 select_query = f""" - SELECT id FROM {self.db_name}_graph."Memory" + SELECT id FROM "{self.db_name}_graph"."Memory" WHERE ag_catalog.agtype_access_operator(properties, '"memory_type"'::agtype) = %s::agtype AND ag_catalog.agtype_access_operator(properties, '"user_name"'::agtype) = %s::agtype ORDER BY ag_catalog.agtype_access_operator(properties, '"updated_at"'::agtype) DESC @@ -342,7 +342,7 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st # 构建删除查询 placeholders = ','.join(['%s'] * len(ids_to_delete)) delete_query = f""" - DELETE FROM {self.db_name}_graph."Memory" + DELETE FROM "{self.db_name}_graph"."Memory" WHERE id IN ({placeholders}) """ delete_params = ids_to_delete @@ -390,14 +390,14 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N # 构建更新查询 if embedding_vector is not None: query = f""" - UPDATE {self.db_name}_graph."Memory" + UPDATE "{self.db_name}_graph"."Memory" SET properties = %s, embedding = %s WHERE ag_catalog.agtype_access_operator(properties, '"id"'::agtype) = %s::agtype """ params = [json.dumps(properties), json.dumps(embedding_vector), f'"{id}"'] else: query = f""" - UPDATE {self.db_name}_graph."Memory" + UPDATE "{self.db_name}_graph"."Memory" SET properties = %s WHERE ag_catalog.agtype_access_operator(properties, '"id"'::agtype) = %s::agtype """ @@ -424,7 +424,7 @@ def delete_node(self, id: str, user_name: str | None = None) -> None: user_name (str, optional): User name for filtering in non-multi-db mode """ query = f""" - DELETE FROM {self.db_name}_graph."Memory" + DELETE FROM "{self.db_name}_graph"."Memory" WHERE ag_catalog.agtype_access_operator(properties, '"id"'::agtype) = %s::agtype """ params = [f'"{id}"'] @@ -524,16 +524,16 @@ def add_edge(self, source_id: str, target_id: str, type: str, user_name: str | N if user_name is not None: properties["user_name"] = user_name query = f""" - INSERT INTO {self.db_name}_graph."{type}"(id, start_id, end_id, properties) + INSERT INTO "{self.db_name}_graph"."{type}"(id, start_id, end_id, properties) SELECT - ag_catalog._next_graph_id('{self.db_name}_graph'::name, '{type}'), - ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{source_id}'::text::cstring), - ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{target_id}'::text::cstring), + ag_catalog._next_graph_id('"{self.db_name}_graph"'::name, '{type}'), + ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, '{source_id}'::text::cstring), + ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, '{target_id}'::text::cstring), jsonb_build_object('user_name', '{user_name}')::text::agtype WHERE NOT EXISTS ( - SELECT 1 FROM {self.db_name}_graph."{type}" - WHERE start_id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{source_id}'::text::cstring) - AND end_id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{target_id}'::text::cstring) + SELECT 1 FROM "{self.db_name}_graph"."{type}" + WHERE start_id = ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, '{source_id}'::text::cstring) + AND end_id = ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, '{target_id}'::text::cstring) ); """ print(f"Executing add_edge: {query}") @@ -555,7 +555,7 @@ def delete_edge(self, source_id: str, target_id: str, type: str) -> None: type: Relationship type to remove. """ query = f""" - DELETE FROM {self.db_name}_graph."Edges" + DELETE FROM "{self.db_name}_graph"."Edges" WHERE source_id = %s AND target_id = %s AND edge_type = %s """ @@ -609,7 +609,7 @@ def edge_exists_old( where_clause = " AND ".join(where_clauses) query = f""" - SELECT 1 FROM {self.db_name}_graph."Edges" + SELECT 1 FROM "{self.db_name}_graph"."Edges" WHERE {where_clause} LIMIT 1 """ @@ -691,7 +691,7 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No query = f""" SELECT {select_fields} - FROM {self.db_name}_graph."Memory" + FROM "{self.db_name}_graph"."Memory" WHERE ag_catalog.agtype_access_operator(properties, '"id"'::agtype) = %s::agtype """ params = [f'"{id}"'] @@ -766,7 +766,7 @@ def get_nodes(self, ids: list[str], **kwargs) -> list[dict[str, Any]]: query = f""" SELECT id, properties, embedding - FROM {self.db_name}_graph."Memory" + FROM "{self.db_name}_graph"."Memory" WHERE ({where_clause}) """ @@ -827,30 +827,30 @@ def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY") -> list[ with self.connection.cursor() as cursor: # 创建边表 cursor.execute(f""" - CREATE TABLE IF NOT EXISTS {self.db_name}_graph."Edges" ( + CREATE TABLE IF NOT EXISTS "{self.db_name}_graph"."Edges" ( id SERIAL PRIMARY KEY, source_id TEXT NOT NULL, target_id TEXT NOT NULL, edge_type TEXT NOT NULL, properties JSONB, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (source_id) REFERENCES {self.db_name}_graph."Memory"(id), - FOREIGN KEY (target_id) REFERENCES {self.db_name}_graph."Memory"(id) + FOREIGN KEY (source_id) REFERENCES "{self.db_name}_graph"."Memory"(id), + FOREIGN KEY (target_id) REFERENCES "{self.db_name}_graph"."Memory"(id) ); """) # 创建索引 cursor.execute(f""" CREATE INDEX IF NOT EXISTS idx_edges_source - ON {self.db_name}_graph."Edges" (source_id); + ON "{self.db_name}_graph"."Edges" (source_id); """) cursor.execute(f""" CREATE INDEX IF NOT EXISTS idx_edges_target - ON {self.db_name}_graph."Edges" (target_id); + ON "{self.db_name}_graph"."Edges" (target_id); """) cursor.execute(f""" CREATE INDEX IF NOT EXISTS idx_edges_type - ON {self.db_name}_graph."Edges" (edge_type); + ON "{self.db_name}_graph"."Edges" (edge_type); """) except Exception as e: logger.warning(f"Failed to create edges table: {e}") @@ -875,7 +875,7 @@ def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY") -> list[ query = f""" SELECT source_id, target_id, edge_type - FROM {self.db_name}_graph."Edges" + FROM "{self.db_name}_graph"."Edges" WHERE {where_clause} """ @@ -949,7 +949,7 @@ def get_neighbors_by_tag( # 获取所有候选节点 query = f""" SELECT id, properties, embedding - FROM {self.db_name}_graph."Memory" + FROM "{self.db_name}_graph"."Memory" WHERE {where_clause} """ @@ -993,46 +993,69 @@ def get_neighbors_by_tag( nodes_with_overlap.sort(key=lambda x: x[1], reverse=True) return [node for node, _ in nodes_with_overlap[:top_k]] - def get_children_with_embeddings(self, id: str) -> list[dict[str, Any]]: + def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> list[dict[str, Any]]: """Get children nodes with their embeddings.""" - # 查询PARENT关系的子节点 + user_name = user_name if user_name else self._get_config_value("user_name") + # where_user = f"AND p.user_name = '{user_name}' AND c.user_name = '{user_name}'" + where_user = f"AND p.user_name = '{user_name}'" + + query1 = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (p:Memory)-[r:PARENT]->(c:Memory) + WHERE p.id = '{id}' {where_user} + RETURN c.id AS id, c.embedding AS embedding, c.memory AS memory + $$) AS (id agtype, embedding agtype, memory agtype) + """ query = f""" - SELECT m.id, m.properties, m.embedding - FROM {self.db_name}_graph."Memory" m - JOIN {self.db_name}_graph."Edges" e ON m.id = e.target_id - WHERE e.source_id = %s AND e.edge_type = 'PARENT' + WITH t as ( + SELECT * + FROM cypher('{self.db_name}_graph', $$ + MATCH (p:Memory)-[r:PARENT]->(c:Memory) + WHERE p.id = '{id}' {where_user} + RETURN id(c) as cid, c.id AS id, c.memory AS memory + $$) as (cid agtype, id agtype, memory agtype) + ) + SELECT t.id, m.embedding, t.memory FROM t, + "{self.db_name}_graph"."Memory" m + WHERE t.cid::graphid = m.id; """ - params = [id] - - # 添加用户过滤 - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - query += " AND m.properties->>'user_name' = %s" - params.append(self._get_config_value("user_name")) - with self.connection.cursor() as cursor: - cursor.execute(query, params) - results = cursor.fetchall() - children = [] - for row in results: - child_id, properties_json, embedding_json = row - properties = properties_json if properties_json else {} + print("[get_children_with_embeddings] query:", query) - # 解析embedding - embedding = None - if embedding_json is not None: - try: - embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json - except (json.JSONDecodeError, TypeError): - logger.warning(f"Failed to parse embedding for child node {child_id}") + try: + with self.connection.cursor() as cursor: + cursor.execute(query) + results = cursor.fetchall() + + children = [] + for row in results: + child_id = row[0].value if hasattr(row[0], 'value') else str(row[0]) + embedding_agtype = row[1] + memory = row[2].value if hasattr(row[2], 'value') else str(row[2]) + + # 解析embedding + embedding = [] + if embedding_agtype and hasattr(embedding_agtype, 'value'): + if isinstance(embedding_agtype.value, list): + embedding = embedding_agtype.value + else: + try: + embedding = json.loads(embedding_agtype.value) if isinstance(embedding_agtype.value, str) else embedding_agtype.value + except (json.JSONDecodeError, TypeError, AttributeError): + logger.warning(f"Failed to parse embedding for child node {child_id}") + + children.append({ + "id": child_id, + "embedding": embedding, + "memory": memory + }) - children.append({ - "id": child_id, - "embedding": embedding, - "memory": properties.get("memory", "") - }) + return children - return children + except Exception as e: + logger.error(f"[get_children_with_embeddings] Failed: {e}", exc_info=True) + return [] def get_path(self, source_id: str, target_id: str, max_depth: int = 3) -> list[str]: """Get the path of nodes from source to target within a limited depth.""" @@ -1197,7 +1220,7 @@ def get_by_metadata(self, filters: list[dict[str, Any]]) -> list[str]: params.append(self._get_config_value("user_name")) where_str = " AND ".join(where_clauses) - query = f"SELECT properties->>'id' as id FROM {self.db_name}_graph.\"Memory\" WHERE {where_str}" + query = f"SELECT properties->>'id' as id FROM \"{self.db_name}_graph\".\"Memory\" WHERE {where_str}" with self.connection.cursor() as cursor: cursor.execute(query, params) @@ -1233,7 +1256,7 @@ def get_grouped_counts( group_by_sql = ", ".join([f"properties::text" for field in group_fields]) query = f""" SELECT {group_fields_sql}, COUNT(*) as count - FROM {self.db_name}_graph."Memory" + FROM "{self.db_name}_graph"."Memory" {where_clause} GROUP BY {group_by_sql} """ @@ -1265,7 +1288,7 @@ def clear(self) -> None: cursor.execute(f""" SELECT EXISTS ( SELECT 1 FROM information_schema.tables - WHERE table_schema = '{self.db_name}_graph' + WHERE table_schema = '"{self.db_name}_graph"' AND table_name = 'Memory' ) """) @@ -1277,11 +1300,11 @@ def clear(self) -> None: if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): cursor.execute(f""" - DELETE FROM {self.db_name}_graph."Memory" + DELETE FROM "{self.db_name}_graph"."Memory" WHERE properties::text LIKE %s """, (f"%{self._get_config_value('user_name')}%",)) else: - cursor.execute(f'DELETE FROM {self.db_name}_graph."Memory"') + cursor.execute(f'DELETE FROM "{self.db_name}_graph"."Memory"') logger.info(f"Cleared all nodes from graph '{self.db_name}_graph'.") except Exception as e: @@ -1292,7 +1315,7 @@ def export_graph(self, **kwargs) -> dict[str, Any]: """Export all graph nodes and edges in a structured form.""" with self.connection.cursor() as cursor: # Export nodes - node_query = f'SELECT id, properties FROM {self.db_name}_graph."Memory"' + node_query = f'SELECT id, properties FROM "{self.db_name}_graph"."Memory"' params = [] if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): @@ -1342,7 +1365,7 @@ def get_all_memory_items(self, scope: str, **kwargs) -> list[dict]: query = f""" SELECT id, properties - FROM {self.db_name}_graph."Memory" + FROM "{self.db_name}_graph"."Memory" WHERE properties->>'memory_type' = %s """ params = [scope] @@ -1370,7 +1393,7 @@ def get_structure_optimization_candidates(self, scope: str, **kwargs) -> list[di # For now, return nodes without parent relationships query = f""" SELECT id, properties - FROM {self.db_name}_graph."Memory" + FROM "{self.db_name}_graph"."Memory" WHERE properties->>'memory_type' = %s AND properties->>'status' = 'activated' """ @@ -1547,17 +1570,17 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any]) -> None: with self.connection.cursor() as cursor: # 先删除现有记录(如果存在) delete_query = f""" - DELETE FROM {self.db_name}_graph."Memory" - WHERE id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring) + DELETE FROM "{self.db_name}_graph"."Memory" + WHERE id = ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, %s::text::cstring) """ cursor.execute(delete_query, (id,)) # 然后插入新记录 if embedding_vector: insert_query = f""" - INSERT INTO {self.db_name}_graph."Memory"(id, properties, {embedding_column}) + INSERT INTO "{self.db_name}_graph"."Memory"(id, properties, {embedding_column}) VALUES ( - ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring), + ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, %s::text::cstring), %s, %s ) @@ -1565,11 +1588,11 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any]) -> None: cursor.execute(insert_query, (id, json.dumps(properties), json.dumps(embedding_vector))) else: insert_query = f""" - INSERT INTO {self.db_name}_graph."Memory"(id, properties) + INSERT INTO "{self.db_name}_graph"."Memory"(id, properties) VALUES ( - ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring), + ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, %s::text::cstring), %s ) """ cursor.execute(insert_query, (id, json.dumps(properties))) - logger.info(f"Added node {id} to graph '{self.db_name}_graph'.") \ No newline at end of file + logger.info(f"Added node {id} to graph '{self.db_name}_graph'.") \ No newline at end of file From f6e3187a624dfbb082db46efde897c141fdc9f76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 13:21:30 +0800 Subject: [PATCH 022/137] update get_children_with_embeddings --- examples/basic_modules/polardb_search.py | 162 +++++++++++++++++++++-- src/memos/graph_dbs/polardb.py | 59 ++++++--- 2 files changed, 192 insertions(+), 29 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 7beb222c9..d26fbe03f 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -116,8 +116,9 @@ def searchVector(db_name: str, vectorStr: list[float]): print("Search result:", graph.get_node(node_i["id"][1:-1])) # 2,查询单个get_node - # detail = graph.get_node(id ="bb079c5b-1937-4125-a9e5-55d4abe6c95d",user_name='memosbfb3fb32032b4077a641404dc48739cd') - # print("单个node:", detail) + detail = graph.get_node(id="194f1e30-44d2-4e3f-bc58-c950343c56b7", + user_name='memos231a22c655fd4b859ca4143b97d2b808') + print("单个node:", detail) # # # 3,查询多个get_nodes # ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] @@ -162,17 +163,162 @@ def edge_exists(db_name: str, source_id: str, target_id: str, type: str = "Memor user_name: str = None): graph = getPolarDb(db_name) isEdge_exists = graph.edge_exists(source_id=source_id, target_id=target_id, type=type, user_name=user_name, - direction=direction) + direction=direction) print("edge_exists:", isEdge_exists) +def get_children_with_embeddings(db_name: str, id: str, user_name: str = None): + graph = getPolarDb(db_name) + children = graph.get_children_with_embeddings(id=id, user_name=user_name) + print("get_children_with_embedding:", children) + + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") - # vector = [-0.02954108,-0.024826104,-0.07641619,0.01464847,-0.0032501293,-0.044220068,0.024322564,0.018768352,-0.0030803748,-0.017776528,0.022201587,-0.0036640249,-0.013397247,-0.02462774,0.021743823,0.0115509285,-0.023223927,0.024093682,-0.0071144262,-0.014984163,-0.013305694,-0.022064257,0.055786256,-0.012374905,0.019714398,0.008865376,-0.00803377,-0.006851211,0.036529623,0.042785738,0.05923475,0.007347123,0.058776986,0.03900155,-0.05065929,-0.060730115,0.0042343233,-0.0030670234,-0.02864081,0.047332868,-0.007789629,-0.011741664,0.019638104,0.007427232,-0.036529623,-0.034484938,-0.046203714,0.009078999,-0.026367245,-0.049896352,0.027938904,-0.048950303,0.04299936,-0.024597222,-0.018600505,0.026321469,-0.03240974,-0.07080094,-0.058105595,-0.0026359616,-0.023757987,0.014465364,-0.02682501,-0.014427217,-0.01814274,0.07244889,-0.0063705584,0.019378705,-0.05078136,-0.016525306,0.013534576,-0.01814274,-0.016098058,0.018295329,-0.061645642,0.023773246,0.03549202,-0.034820635,0.029464787,0.026535092,-0.024398858,-0.004184732,0.040374845,0.0037460409,0.014419587,-0.009384176,0.010055564,0.019546552,0.023498587,-0.01620487,-0.013389617,-0.016418492,0.028198304,-0.051117055,0.023101857,0.0705568,-0.03991708,-0.039428797,0.006595626,0.0039367764,0.034515455,-0.02046208,-0.001202586,-0.018569987,-0.009666464,-0.018966716,-0.0052719233,0.0012187985,0.044403173,-0.018295329,0.01890568,0.037994467,0.020538375,0.012939482,-0.0036048968,-0.01968388,-0.0021209763,0.01451114,0.024429375,-0.031311106,-0.017852822,-0.0057830936,0.011459376,-0.020523116,-0.021209763,0.0082550235,-0.0035057145,0.031311106,0.0063438555,0.012374905,0.028595034,0.03552254,-0.041961763,0.0062675616,0.030426092,-0.030548163,0.058502328,0.029205387,-0.02604681,-0.025756892,0.004211435,0.05160534,0.0092239585,-0.049987905,-0.0013847381,-0.055267457,0.027282774,0.012535123,-0.009971641,-0.0040741055,-0.022613576,0.018478435,0.026031552,0.02720648,-0.04782115,0.007400529,0.034851152,0.042694185,-0.031005928,-0.0019149822,0.00042104814,-0.0049972646,-0.02539068,0.031738352,0.010337852,-0.036102373,0.014213594,0.011878993,0.0008330364,-0.035583574,0.0034465867,-0.0082550235,0.009887717,0.012908964,0.018875163,0.01814274,0.091552936,0.018768352,0.03305061,-0.054260373,-0.02682501,0.03628548,0.031158516,-0.048675645,-0.017288245,0.02929694,0.013450652,-0.0058365,0.032745432,-0.027160704,-0.015655553,-0.032074045,0.020248458,0.004760753,0.04730235,-0.024841363,0.020248458,0.013786347,-0.010810876,-0.033874586,0.008491535,0.023651175,0.016021764,0.0039329617,-0.04626475,0.0050621144,-0.02720648,-0.008140582,0.017440835,-0.029830998,0.04861461,0.042327974,0.01529697,-0.027191222,-0.03515633,0.018264811,0.038696375,-0.025543269,0.018676799,0.023757987,0.0058670174,-0.0045013526,0.017059363,-0.03720101,-0.03668221,-0.0137253115,0.024581963,0.025589045,0.01270297,-0.027450623,-0.035614092,0.010299705,0.021041917,-0.013893158,0.014350923,-0.032440256,-0.009338399,0.006114973,0.025344905,0.03512581,-0.02917487,-0.018432658,0.008873005,-0.014236482,-0.027359068,-0.055572633,0.016784705,0.041900728,-0.019943282,0.020080611,0.018112222,0.009033223,-0.021087693,-0.05041515,0.0352784,0.024642998,0.007118241,0.008804341,-0.022384694,-0.030471869,-0.015762364,0.048706163,0.0064659263,0.0048790085,0.003906259,-0.02384954,-0.012771634,-0.025985776,0.0017061271,0.0736696,0.020950364,0.013847382,0.029785221,-0.022201587,-0.003059394,0.038574304,-0.01647953,-0.022354174,0.0562135,0.001626972,-0.007865923,0.012001064,0.043609716,-0.0274964,0.029846257,-0.0035266953,-0.00026106893,0.050903432,0.022750905,-0.037475668,-0.013244658,-0.015640294,0.042419527,-0.027938904,-0.008247394,-0.0047111614,-0.019088788,-0.15368687,0.033020094,0.028350893,0.02449041,0.016830482,0.03189094,-0.04861461,-0.024261529,-0.05029308,0.018386882,0.028945986,-0.044525243,0.009628317,0.021667529,0.026123105,0.037384115,-0.007335679,-0.02149968,-0.0176392,-0.0210114,0.00070810475,-0.05261242,0.061340466,0.0037002645,-0.010910058,-0.011360194,0.008201617,0.03793343,-0.0070305024,-0.0225678,-0.00945284,0.02281194,0.0013446837,0.023880057,-0.0144806225,0.015914952,0.018646281,0.0032444072,0.016494788,0.01853947,0.01985173,0.03964242,0.015541111,0.016494788,-0.031921457,-0.029693669,0.020782517,-0.009796164,-0.0032234264,-0.046478376,0.044647314,-0.035675127,-0.01853947,0.0069885408,-0.07635515,-0.010986352,0.00971987,0.034027174,0.0347596,0.02812201,-0.03213508,-0.05355847,-0.00038886155,-0.014061005,-0.08074969,0.020584151,0.047760114,0.03381355,0.012809781,-0.020904588,0.013954193,-0.04382334,0.01062777,0.004421244,0.039337244,0.014549287,0.013435394,0.021316575,-0.01957707,-0.09228536,0.006145491,0.0015955006,0.0006575599,0.010147117,-0.03137214,-0.048919786,0.0044517615,0.04214487,0.053466916,0.22290088,0.019592328,0.021041917,-0.034790117,0.034393385,-0.016616859,-0.0748903,0.08294696,-0.009971641,-0.030548163,0.010810876,0.04147348,-0.025421198,-0.020614669,0.019271893,0.04562388,-0.071838535,0.0140152285,0.04455576,0.016021764,0.0033302382,-0.03851327,-0.05197155,-0.024795586,-0.04925548,-0.0012359646,-0.028152527,0.020446822,-0.010955835,0.007926959,-0.05679334,0.04898082,0.02227788,0.009887717,-0.037262045,-0.021865893,0.024658257,-0.03305061,-0.005104076,0.06274428,0.0026741086,-0.0032806469,-0.027450623,0.016265905,-0.008514423,-0.011116052,-0.008338947,-0.020813035,-0.025711115,-0.021438645,-0.009872458,-0.04071054,-0.019348187,0.0037441335,-0.0155868875,-0.0049705617,-0.009040852,0.007850665,0.031463694,0.05029308,0.002864844,0.0063552996,-0.056945927,-0.046051126,0.006042494,0.053833127,-0.013702422,-0.03045661,0.048187364,0.029068056,-0.022766164,-0.002573019,0.012855558,0.005336773,-0.009414693,-0.046173196,-0.014053375,-0.0054741027,0.001794819,0.014472993,-0.00087928964,0.004680644,0.02449041,0.018325847,0.054199338,-0.006156935,0.028717104,0.086425975,0.02307134,0.0060958997,-0.0008125323,0.018829387,-0.011825588,0.0032806469,0.008880635,-0.019271893,-0.015991246,-0.008018511,-0.03149421,0.00803377,-0.0137482,0.0004093656,-0.049682725,-0.015518223,-0.034118727,-0.0069542085,-0.05297863,-0.0052299616,-0.0038566676,0.0008196849,-0.037536703,-0.02383428,-0.033355787,-0.051239125,0.007118241,0.03488167,0.028259339,0.008842488,0.009246847,0.03970346,-0.019271893,0.038543787,-0.022659352,0.022720387,0.024566704,-0.056030396,-0.0026283322,-0.009399435,0.0077743703,-0.02191167,0.0028667513,-0.028717104,0.0070991674,0.027038634,0.063964985,0.0090103345,-0.0053215143,-0.022064257,-0.014091522,-0.0057983524,-0.021087693,0.006557479,-0.004325876,0.045440774,0.0065765525,0.0015716588,-0.049804796,0.03924569,-0.01918034,-0.021331834,0.039093103,0.017395059,0.012664823,-0.052765008,0.021331834,-0.07537858,-0.0061607496,-0.032043528,0.0067978054,-0.0121917995,0.0039978116,0.0088196,0.006580367,0.07238785,0.0110092405,-0.0074196025,0.009025593,0.03085334,-0.03137214,-0.006259932,0.011901882,-0.040741056,-0.030242987,0.008834858,-0.019744916,-0.009712241,-0.0040588467,0.033172682,0.004276285,-0.049072374,0.03488167,-0.0051269643,0.007694261,0.005935682,0.01788334,-0.0069542085,0.0085449405,-0.007194535,-0.041900728,-0.013313323,-0.0013895065,0.07617205,0.0037422262,-0.025009211,0.0051345937,0.0066299583,0.10388207,-0.008834858,0.006439223,-0.021102952,-0.03099067,-0.016555823,-0.0126571935,0.010658287,0.0057945377,-0.0055503966,-0.009681723,0.057617314,-0.017822305,-0.0034828263,0.0005464566,0.0043602088,-0.037109457,0.010849023,-0.009216329,-0.049194444,0.01179507,0.049469102,-0.008514423,-0.009681723,-0.01890568,0.03500374,-0.028228821,-0.05871595,0.0011281992,0.044799905,-0.0032806469,0.009002705,0.030120917,0.0073547526,-0.010025047,0.019012494,-0.031433176,-0.02787787,0.021621753,-0.011177087,-0.02630621,0.042297456,-0.041046232,-0.020919846,-0.002534872,-0.024765069,0.01632694,0.0029258793,-0.0018615763,-0.026748717,-0.030273505,0.006763473,0.036590658,0.027236998,0.02307134,0.031829905,0.013107329,-0.025451716,0.040252775,0.04214487,0.012710599,0.01800541,-0.012130764,-0.056274537,0.02009587,0.03695687,0.024963435,-0.030166693,0.009002705,-0.06988541,0.043212987,0.01840214,-0.01179507,-0.09484884,-0.023986869,0.015319858,-0.023498587,-0.034790117,0.012176541,0.0018901867,-0.00037646378,0.051818963,0.021804858,-0.05209362,-0.027710022,0.051391713,-0.022064257,-0.024139458,-0.018295329,-0.04092416,0.0063667437,0.022995045,0.0149460165,-0.030059882,0.019134564,0.017562905,-0.04962169,0.015579258,0.010223411,-0.0076675583,-0.059021126,0.04431162,-0.023315482,0.017517129,-0.0021457719,0.042968843,0.028533999,-0.029449528,-0.016769446,0.026367245,-0.015762364,-0.01140597,0.030059882,0.030929634,0.0058250558,-0.06689468,-0.013473541,0.009323141,0.025299128,-0.021728564,0.049987905,-0.0020599412,0.04287729,-0.022827199,0.020828294,-0.001273158,-0.04068002,-0.013664275,-0.0036945425,-0.019775433,-0.024642998,-0.005275738,-0.036407553,-0.0008239764,-0.027435362,0.06427016,-0.012901335,-0.02035527,0.020614669,-0.0017051734,0.042480562,-0.0013942749,0.018981975,0.030365057,0.0028915468,0.052642938,0.03408821,-0.01878361,0.0043525794,-0.014183076,0.0009870551,-0.011611964,-0.030273505,-0.010635399,0.058776986,-0.03625496,-0.008270282,-0.03295906,0.04794322,-0.0025119837,0.045959573,-0.008773823,0.048584092,0.048828233,-0.056457642,0.06039442,-0.04522715,0.015617405,0.030960152,0.047515973,0.042572115,-0.069214016,0.017959634,-0.0090484815,0.02073674,-0.013839752,0.035430986,-0.041046232,-0.009887717,0.07043473,-0.02787787,0.010993982,-0.0017557183,0.0028057161,-0.031204293,-0.0059700143,0.0054741027,-0.023666434,-0.008903523,-0.021316575,0.00014424356,0.011863735,-0.0058136117,0.004821788,-0.01710514,0.009384176,-0.02864081,-0.0058288705,-0.13269073,0.019363446,-0.013923676,0.025177058,-0.049194444,0.015129123,-0.02359014,-0.009155294,0.0034294203,-0.01878361,-0.0027256072,-0.000686647,-0.048034772,-0.018264811,0.071228184,0.037780844,-0.025726374,-0.028595034,-0.011177087,0.031463694,-0.01075747,-0.035705645,0.097290255,0.010475182,-0.011199976,-0.02655035,-0.019241376,-0.021698046,-0.019638104,-0.016769446,-0.02165227,-0.06939713,0.024658257,0.05297863,0.04586802,0.00984957,-0.009414693,0.013458282,-0.014610323,0.024581963,-0.023376517,0.01269534,0.010284446,0.023880057,-0.011383082,0.101684794,-0.007423417,-0.048156846,-0.008140582,0.014602694,-0.0033321455,0.019638104,0.028976504,0.025619563,0.009086629,-0.007049576,0.011596705,0.0047226055,-0.024215752,0.07684343,-0.003227241,0.016082799,-0.025207575,-0.025177058,-0.024002127,0.017135657,-0.01969914,0.043212987,0.024185235,-0.02073674,-0.033874586,-0.0021591233,-0.045471292,-0.00071954884,-0.008132952,0.019348187,0.03948983,-0.033752516,-0.084961124,0.0030994483,-0.041381925,-0.041015714,0.0112839,-0.019836469,0.032104563,0.016098058,0.020080611,-0.007942217,-0.050140493,-0.034393385,-0.05297863,-0.028137268,-0.0058174264,-0.0056114323,-0.03189094,0.021026658,-0.011756923,-0.027267516,0.006385817,-0.04718028,-0.012519864,-0.035949785,0.013076811,-0.02317815,-0.031860422,0.044769388,-0.015480076,-0.008018511,-0.043518163,0.023422293,-0.036895834,-0.040100187,-0.06039442,0.005691541,-0.036529623,-0.018585246,0.023635916,0.021408128,0.01152804,0.013984711,0.007965106,-0.027801575,-0.0026226102,-0.021286057,0.006011976,0.027389586,0.0840456,0.07476823,0.028564516,0.015029941,0.029342717,-0.047760114,-0.0241242,0.031082222,0.017837564,0.023346,-0.002166753,0.046295267,-0.033111647,-0.017715493,-0.016937293,-0.0036678396,-0.01606754,-0.010551476,-0.060730115,-0.00067806395,0.005714429,0.009002705,-0.056549195,-0.053497434,0.027160704,-0.023803763,-0.02877814,0.03189094,-0.015838658,-0.025878964,0.00014889274,-0.02319341,-0.0028724733,0.053222775,-0.0040893643,-0.0034313279,0.00036740385,0.0049057114,0.011291529,0.056518678,-0.007972735,-0.041381925,-0.04467783,0.008804341,0.026519833,0.052337762,-0.021209763,-0.019119306,0.020126387,0.00997927,-0.007755297,0.020492598,-0.014915499,-0.038421717,0.037353598,-0.0050888173,0.029708927,0.04638682,-0.052917596,-0.0112839,0.0038433159,-0.011001611,-0.0023708395,0.015991246,-0.03381355,0.017135657,0.016418492,-0.029449528,0.047332868,-0.002183919,0.018173259,0.0017023124,0.01814274,0.01153567,0.00042152498,-0.021179246,0.058441292,-0.0020771073,-0.036102373,0.007740038,-0.0042419527,-0.02839667,0.007713335,-0.016708411,-0.020538375,0.0044899085,-0.011131311,0.0032844616,-0.036468588,-0.005886091,0.05523694,-0.015098605,-0.03161628,0.02462774,0.028488223,0.013404876,-0.012916594,-0.012420681,-0.036377035,-0.01335147,-0.040344328,0.029144352,-0.04174814,0.023315482,-0.02227788,-0.0022716573,-0.03152473,0.0482484,-0.027038634,-0.004882823,0.06152357,-0.003881463,-0.036041338,-0.0075645614,0.020660445,-0.07250992,-0.024429375,-0.036377035] - # searchVector(db_name="memtensor_memos", vectorStr=vector) + vector = [-0.02954108, -0.024826104, -0.07641619, 0.01464847, -0.0032501293, -0.044220068, 0.024322564, 0.018768352, + -0.0030803748, -0.017776528, 0.022201587, -0.0036640249, -0.013397247, -0.02462774, 0.021743823, + 0.0115509285, -0.023223927, 0.024093682, -0.0071144262, -0.014984163, -0.013305694, -0.022064257, + 0.055786256, -0.012374905, 0.019714398, 0.008865376, -0.00803377, -0.006851211, 0.036529623, 0.042785738, + 0.05923475, 0.007347123, 0.058776986, 0.03900155, -0.05065929, -0.060730115, 0.0042343233, -0.0030670234, + -0.02864081, 0.047332868, -0.007789629, -0.011741664, 0.019638104, 0.007427232, -0.036529623, + -0.034484938, -0.046203714, 0.009078999, -0.026367245, -0.049896352, 0.027938904, -0.048950303, + 0.04299936, -0.024597222, -0.018600505, 0.026321469, -0.03240974, -0.07080094, -0.058105595, + -0.0026359616, -0.023757987, 0.014465364, -0.02682501, -0.014427217, -0.01814274, 0.07244889, + -0.0063705584, 0.019378705, -0.05078136, -0.016525306, 0.013534576, -0.01814274, -0.016098058, + 0.018295329, -0.061645642, 0.023773246, 0.03549202, -0.034820635, 0.029464787, 0.026535092, -0.024398858, + -0.004184732, 0.040374845, 0.0037460409, 0.014419587, -0.009384176, 0.010055564, 0.019546552, 0.023498587, + -0.01620487, -0.013389617, -0.016418492, 0.028198304, -0.051117055, 0.023101857, 0.0705568, -0.03991708, + -0.039428797, 0.006595626, 0.0039367764, 0.034515455, -0.02046208, -0.001202586, -0.018569987, + -0.009666464, -0.018966716, -0.0052719233, 0.0012187985, 0.044403173, -0.018295329, 0.01890568, + 0.037994467, 0.020538375, 0.012939482, -0.0036048968, -0.01968388, -0.0021209763, 0.01451114, 0.024429375, + -0.031311106, -0.017852822, -0.0057830936, 0.011459376, -0.020523116, -0.021209763, 0.0082550235, + -0.0035057145, 0.031311106, 0.0063438555, 0.012374905, 0.028595034, 0.03552254, -0.041961763, + 0.0062675616, 0.030426092, -0.030548163, 0.058502328, 0.029205387, -0.02604681, -0.025756892, 0.004211435, + 0.05160534, 0.0092239585, -0.049987905, -0.0013847381, -0.055267457, 0.027282774, 0.012535123, + -0.009971641, -0.0040741055, -0.022613576, 0.018478435, 0.026031552, 0.02720648, -0.04782115, 0.007400529, + 0.034851152, 0.042694185, -0.031005928, -0.0019149822, 0.00042104814, -0.0049972646, -0.02539068, + 0.031738352, 0.010337852, -0.036102373, 0.014213594, 0.011878993, 0.0008330364, -0.035583574, + 0.0034465867, -0.0082550235, 0.009887717, 0.012908964, 0.018875163, 0.01814274, 0.091552936, 0.018768352, + 0.03305061, -0.054260373, -0.02682501, 0.03628548, 0.031158516, -0.048675645, -0.017288245, 0.02929694, + 0.013450652, -0.0058365, 0.032745432, -0.027160704, -0.015655553, -0.032074045, 0.020248458, 0.004760753, + 0.04730235, -0.024841363, 0.020248458, 0.013786347, -0.010810876, -0.033874586, 0.008491535, 0.023651175, + 0.016021764, 0.0039329617, -0.04626475, 0.0050621144, -0.02720648, -0.008140582, 0.017440835, + -0.029830998, 0.04861461, 0.042327974, 0.01529697, -0.027191222, -0.03515633, 0.018264811, 0.038696375, + -0.025543269, 0.018676799, 0.023757987, 0.0058670174, -0.0045013526, 0.017059363, -0.03720101, + -0.03668221, -0.0137253115, 0.024581963, 0.025589045, 0.01270297, -0.027450623, -0.035614092, 0.010299705, + 0.021041917, -0.013893158, 0.014350923, -0.032440256, -0.009338399, 0.006114973, 0.025344905, 0.03512581, + -0.02917487, -0.018432658, 0.008873005, -0.014236482, -0.027359068, -0.055572633, 0.016784705, + 0.041900728, -0.019943282, 0.020080611, 0.018112222, 0.009033223, -0.021087693, -0.05041515, 0.0352784, + 0.024642998, 0.007118241, 0.008804341, -0.022384694, -0.030471869, -0.015762364, 0.048706163, + 0.0064659263, 0.0048790085, 0.003906259, -0.02384954, -0.012771634, -0.025985776, 0.0017061271, 0.0736696, + 0.020950364, 0.013847382, 0.029785221, -0.022201587, -0.003059394, 0.038574304, -0.01647953, -0.022354174, + 0.0562135, 0.001626972, -0.007865923, 0.012001064, 0.043609716, -0.0274964, 0.029846257, -0.0035266953, + -0.00026106893, 0.050903432, 0.022750905, -0.037475668, -0.013244658, -0.015640294, 0.042419527, + -0.027938904, -0.008247394, -0.0047111614, -0.019088788, -0.15368687, 0.033020094, 0.028350893, + 0.02449041, 0.016830482, 0.03189094, -0.04861461, -0.024261529, -0.05029308, 0.018386882, 0.028945986, + -0.044525243, 0.009628317, 0.021667529, 0.026123105, 0.037384115, -0.007335679, -0.02149968, -0.0176392, + -0.0210114, 0.00070810475, -0.05261242, 0.061340466, 0.0037002645, -0.010910058, -0.011360194, + 0.008201617, 0.03793343, -0.0070305024, -0.0225678, -0.00945284, 0.02281194, 0.0013446837, 0.023880057, + -0.0144806225, 0.015914952, 0.018646281, 0.0032444072, 0.016494788, 0.01853947, 0.01985173, 0.03964242, + 0.015541111, 0.016494788, -0.031921457, -0.029693669, 0.020782517, -0.009796164, -0.0032234264, + -0.046478376, 0.044647314, -0.035675127, -0.01853947, 0.0069885408, -0.07635515, -0.010986352, 0.00971987, + 0.034027174, 0.0347596, 0.02812201, -0.03213508, -0.05355847, -0.00038886155, -0.014061005, -0.08074969, + 0.020584151, 0.047760114, 0.03381355, 0.012809781, -0.020904588, 0.013954193, -0.04382334, 0.01062777, + 0.004421244, 0.039337244, 0.014549287, 0.013435394, 0.021316575, -0.01957707, -0.09228536, 0.006145491, + 0.0015955006, 0.0006575599, 0.010147117, -0.03137214, -0.048919786, 0.0044517615, 0.04214487, 0.053466916, + 0.22290088, 0.019592328, 0.021041917, -0.034790117, 0.034393385, -0.016616859, -0.0748903, 0.08294696, + -0.009971641, -0.030548163, 0.010810876, 0.04147348, -0.025421198, -0.020614669, 0.019271893, 0.04562388, + -0.071838535, 0.0140152285, 0.04455576, 0.016021764, 0.0033302382, -0.03851327, -0.05197155, -0.024795586, + -0.04925548, -0.0012359646, -0.028152527, 0.020446822, -0.010955835, 0.007926959, -0.05679334, 0.04898082, + 0.02227788, 0.009887717, -0.037262045, -0.021865893, 0.024658257, -0.03305061, -0.005104076, 0.06274428, + 0.0026741086, -0.0032806469, -0.027450623, 0.016265905, -0.008514423, -0.011116052, -0.008338947, + -0.020813035, -0.025711115, -0.021438645, -0.009872458, -0.04071054, -0.019348187, 0.0037441335, + -0.0155868875, -0.0049705617, -0.009040852, 0.007850665, 0.031463694, 0.05029308, 0.002864844, + 0.0063552996, -0.056945927, -0.046051126, 0.006042494, 0.053833127, -0.013702422, -0.03045661, + 0.048187364, 0.029068056, -0.022766164, -0.002573019, 0.012855558, 0.005336773, -0.009414693, + -0.046173196, -0.014053375, -0.0054741027, 0.001794819, 0.014472993, -0.00087928964, 0.004680644, + 0.02449041, 0.018325847, 0.054199338, -0.006156935, 0.028717104, 0.086425975, 0.02307134, 0.0060958997, + -0.0008125323, 0.018829387, -0.011825588, 0.0032806469, 0.008880635, -0.019271893, -0.015991246, + -0.008018511, -0.03149421, 0.00803377, -0.0137482, 0.0004093656, -0.049682725, -0.015518223, -0.034118727, + -0.0069542085, -0.05297863, -0.0052299616, -0.0038566676, 0.0008196849, -0.037536703, -0.02383428, + -0.033355787, -0.051239125, 0.007118241, 0.03488167, 0.028259339, 0.008842488, 0.009246847, 0.03970346, + -0.019271893, 0.038543787, -0.022659352, 0.022720387, 0.024566704, -0.056030396, -0.0026283322, + -0.009399435, 0.0077743703, -0.02191167, 0.0028667513, -0.028717104, 0.0070991674, 0.027038634, + 0.063964985, 0.0090103345, -0.0053215143, -0.022064257, -0.014091522, -0.0057983524, -0.021087693, + 0.006557479, -0.004325876, 0.045440774, 0.0065765525, 0.0015716588, -0.049804796, 0.03924569, -0.01918034, + -0.021331834, 0.039093103, 0.017395059, 0.012664823, -0.052765008, 0.021331834, -0.07537858, + -0.0061607496, -0.032043528, 0.0067978054, -0.0121917995, 0.0039978116, 0.0088196, 0.006580367, + 0.07238785, 0.0110092405, -0.0074196025, 0.009025593, 0.03085334, -0.03137214, -0.006259932, 0.011901882, + -0.040741056, -0.030242987, 0.008834858, -0.019744916, -0.009712241, -0.0040588467, 0.033172682, + 0.004276285, -0.049072374, 0.03488167, -0.0051269643, 0.007694261, 0.005935682, 0.01788334, -0.0069542085, + 0.0085449405, -0.007194535, -0.041900728, -0.013313323, -0.0013895065, 0.07617205, 0.0037422262, + -0.025009211, 0.0051345937, 0.0066299583, 0.10388207, -0.008834858, 0.006439223, -0.021102952, + -0.03099067, -0.016555823, -0.0126571935, 0.010658287, 0.0057945377, -0.0055503966, -0.009681723, + 0.057617314, -0.017822305, -0.0034828263, 0.0005464566, 0.0043602088, -0.037109457, 0.010849023, + -0.009216329, -0.049194444, 0.01179507, 0.049469102, -0.008514423, -0.009681723, -0.01890568, 0.03500374, + -0.028228821, -0.05871595, 0.0011281992, 0.044799905, -0.0032806469, 0.009002705, 0.030120917, + 0.0073547526, -0.010025047, 0.019012494, -0.031433176, -0.02787787, 0.021621753, -0.011177087, + -0.02630621, 0.042297456, -0.041046232, -0.020919846, -0.002534872, -0.024765069, 0.01632694, + 0.0029258793, -0.0018615763, -0.026748717, -0.030273505, 0.006763473, 0.036590658, 0.027236998, + 0.02307134, 0.031829905, 0.013107329, -0.025451716, 0.040252775, 0.04214487, 0.012710599, 0.01800541, + -0.012130764, -0.056274537, 0.02009587, 0.03695687, 0.024963435, -0.030166693, 0.009002705, -0.06988541, + 0.043212987, 0.01840214, -0.01179507, -0.09484884, -0.023986869, 0.015319858, -0.023498587, -0.034790117, + 0.012176541, 0.0018901867, -0.00037646378, 0.051818963, 0.021804858, -0.05209362, -0.027710022, + 0.051391713, -0.022064257, -0.024139458, -0.018295329, -0.04092416, 0.0063667437, 0.022995045, + 0.0149460165, -0.030059882, 0.019134564, 0.017562905, -0.04962169, 0.015579258, 0.010223411, + -0.0076675583, -0.059021126, 0.04431162, -0.023315482, 0.017517129, -0.0021457719, 0.042968843, + 0.028533999, -0.029449528, -0.016769446, 0.026367245, -0.015762364, -0.01140597, 0.030059882, 0.030929634, + 0.0058250558, -0.06689468, -0.013473541, 0.009323141, 0.025299128, -0.021728564, 0.049987905, + -0.0020599412, 0.04287729, -0.022827199, 0.020828294, -0.001273158, -0.04068002, -0.013664275, + -0.0036945425, -0.019775433, -0.024642998, -0.005275738, -0.036407553, -0.0008239764, -0.027435362, + 0.06427016, -0.012901335, -0.02035527, 0.020614669, -0.0017051734, 0.042480562, -0.0013942749, + 0.018981975, 0.030365057, 0.0028915468, 0.052642938, 0.03408821, -0.01878361, 0.0043525794, -0.014183076, + 0.0009870551, -0.011611964, -0.030273505, -0.010635399, 0.058776986, -0.03625496, -0.008270282, + -0.03295906, 0.04794322, -0.0025119837, 0.045959573, -0.008773823, 0.048584092, 0.048828233, -0.056457642, + 0.06039442, -0.04522715, 0.015617405, 0.030960152, 0.047515973, 0.042572115, -0.069214016, 0.017959634, + -0.0090484815, 0.02073674, -0.013839752, 0.035430986, -0.041046232, -0.009887717, 0.07043473, -0.02787787, + 0.010993982, -0.0017557183, 0.0028057161, -0.031204293, -0.0059700143, 0.0054741027, -0.023666434, + -0.008903523, -0.021316575, 0.00014424356, 0.011863735, -0.0058136117, 0.004821788, -0.01710514, + 0.009384176, -0.02864081, -0.0058288705, -0.13269073, 0.019363446, -0.013923676, 0.025177058, + -0.049194444, 0.015129123, -0.02359014, -0.009155294, 0.0034294203, -0.01878361, -0.0027256072, + -0.000686647, -0.048034772, -0.018264811, 0.071228184, 0.037780844, -0.025726374, -0.028595034, + -0.011177087, 0.031463694, -0.01075747, -0.035705645, 0.097290255, 0.010475182, -0.011199976, -0.02655035, + -0.019241376, -0.021698046, -0.019638104, -0.016769446, -0.02165227, -0.06939713, 0.024658257, 0.05297863, + 0.04586802, 0.00984957, -0.009414693, 0.013458282, -0.014610323, 0.024581963, -0.023376517, 0.01269534, + 0.010284446, 0.023880057, -0.011383082, 0.101684794, -0.007423417, -0.048156846, -0.008140582, + 0.014602694, -0.0033321455, 0.019638104, 0.028976504, 0.025619563, 0.009086629, -0.007049576, 0.011596705, + 0.0047226055, -0.024215752, 0.07684343, -0.003227241, 0.016082799, -0.025207575, -0.025177058, + -0.024002127, 0.017135657, -0.01969914, 0.043212987, 0.024185235, -0.02073674, -0.033874586, + -0.0021591233, -0.045471292, -0.00071954884, -0.008132952, 0.019348187, 0.03948983, -0.033752516, + -0.084961124, 0.0030994483, -0.041381925, -0.041015714, 0.0112839, -0.019836469, 0.032104563, 0.016098058, + 0.020080611, -0.007942217, -0.050140493, -0.034393385, -0.05297863, -0.028137268, -0.0058174264, + -0.0056114323, -0.03189094, 0.021026658, -0.011756923, -0.027267516, 0.006385817, -0.04718028, + -0.012519864, -0.035949785, 0.013076811, -0.02317815, -0.031860422, 0.044769388, -0.015480076, + -0.008018511, -0.043518163, 0.023422293, -0.036895834, -0.040100187, -0.06039442, 0.005691541, + -0.036529623, -0.018585246, 0.023635916, 0.021408128, 0.01152804, 0.013984711, 0.007965106, -0.027801575, + -0.0026226102, -0.021286057, 0.006011976, 0.027389586, 0.0840456, 0.07476823, 0.028564516, 0.015029941, + 0.029342717, -0.047760114, -0.0241242, 0.031082222, 0.017837564, 0.023346, -0.002166753, 0.046295267, + -0.033111647, -0.017715493, -0.016937293, -0.0036678396, -0.01606754, -0.010551476, -0.060730115, + -0.00067806395, 0.005714429, 0.009002705, -0.056549195, -0.053497434, 0.027160704, -0.023803763, + -0.02877814, 0.03189094, -0.015838658, -0.025878964, 0.00014889274, -0.02319341, -0.0028724733, + 0.053222775, -0.0040893643, -0.0034313279, 0.00036740385, 0.0049057114, 0.011291529, 0.056518678, + -0.007972735, -0.041381925, -0.04467783, 0.008804341, 0.026519833, 0.052337762, -0.021209763, + -0.019119306, 0.020126387, 0.00997927, -0.007755297, 0.020492598, -0.014915499, -0.038421717, 0.037353598, + -0.0050888173, 0.029708927, 0.04638682, -0.052917596, -0.0112839, 0.0038433159, -0.011001611, + -0.0023708395, 0.015991246, -0.03381355, 0.017135657, 0.016418492, -0.029449528, 0.047332868, + -0.002183919, 0.018173259, 0.0017023124, 0.01814274, 0.01153567, 0.00042152498, -0.021179246, 0.058441292, + -0.0020771073, -0.036102373, 0.007740038, -0.0042419527, -0.02839667, 0.007713335, -0.016708411, + -0.020538375, 0.0044899085, -0.011131311, 0.0032844616, -0.036468588, -0.005886091, 0.05523694, + -0.015098605, -0.03161628, 0.02462774, 0.028488223, 0.013404876, -0.012916594, -0.012420681, -0.036377035, + -0.01335147, -0.040344328, 0.029144352, -0.04174814, 0.023315482, -0.02227788, -0.0022716573, -0.03152473, + 0.0482484, -0.027038634, -0.004882823, 0.06152357, -0.003881463, -0.036041338, -0.0075645614, 0.020660445, + -0.07250992, -0.024429375, -0.036377035] + searchVector(db_name="memtensor_memos", vectorStr=vector) # add_edge(db_name="memtensor_memos",source_id="13bb9df6-0609-4442-8bed-bba77dadac92", target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", edge_type="PARENT", user_name="memosbfb3fb32032b4077a641404dc48739cd") - edge_exists(db_name="memtensor_memos", source_id="13bb9df6-0609-4442-8bed-bba77dadac92", - target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", type="PARENT", direction="OUTGOING", - user_name="memosbfb3fb32032b4077a641404dc48739cd") + # edge_exists(db_name="memtensor_memos", source_id="13bb9df6-0609-4442-8bed-bba77dadac92", + # target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", type="PARENT", direction="OUTGOING", + # user_name="memosbfb3fb32032b4077a641404dc48739cd") + + get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index a587e8f31..5b618769e 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -996,16 +996,8 @@ def get_neighbors_by_tag( def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> list[dict[str, Any]]: """Get children nodes with their embeddings.""" user_name = user_name if user_name else self._get_config_value("user_name") - # where_user = f"AND p.user_name = '{user_name}' AND c.user_name = '{user_name}'" - where_user = f"AND p.user_name = '{user_name}'" - - query1 = f""" - SELECT * FROM cypher('{self.db_name}_graph', $$ - MATCH (p:Memory)-[r:PARENT]->(c:Memory) - WHERE p.id = '{id}' {where_user} - RETURN c.id AS id, c.embedding AS embedding, c.memory AS memory - $$) AS (id agtype, embedding agtype, memory agtype) - """ + where_user = f"AND p.user_name = '{user_name}' AND c.user_name = '{user_name}'" + query = f""" WITH t as ( SELECT * @@ -1030,20 +1022,45 @@ def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> children = [] for row in results: - child_id = row[0].value if hasattr(row[0], 'value') else str(row[0]) - embedding_agtype = row[1] - memory = row[2].value if hasattr(row[2], 'value') else str(row[2]) + # 处理 child_id - 移除可能的引号 + child_id_raw = row[0].value if hasattr(row[0], 'value') else str(row[0]) + if isinstance(child_id_raw, str): + # 如果字符串以引号开始和结束,去掉引号 + if child_id_raw.startswith('"') and child_id_raw.endswith('"'): + child_id = child_id_raw[1:-1] + else: + child_id = child_id_raw + else: + child_id = str(child_id_raw) - # 解析embedding + # 处理 embedding - 从数据库的embedding列获取 + embedding_raw = row[1] embedding = [] - if embedding_agtype and hasattr(embedding_agtype, 'value'): - if isinstance(embedding_agtype.value, list): - embedding = embedding_agtype.value + if embedding_raw is not None: + try: + if isinstance(embedding_raw, str): + # 如果是JSON字符串,解析它 + embedding = json.loads(embedding_raw) + elif isinstance(embedding_raw, list): + # 如果已经是列表,直接使用 + embedding = embedding_raw + else: + # 尝试转换为列表 + embedding = list(embedding_raw) + except (json.JSONDecodeError, TypeError, ValueError) as e: + logger.warning(f"Failed to parse embedding for child node {child_id}: {e}") + embedding = [] + + # 处理 memory - 移除可能的引号 + memory_raw = row[2].value if hasattr(row[2], 'value') else str(row[2]) + if isinstance(memory_raw, str): + # 如果字符串以引号开始和结束,去掉引号 + if memory_raw.startswith('"') and memory_raw.endswith('"'): + memory = memory_raw[1:-1] else: - try: - embedding = json.loads(embedding_agtype.value) if isinstance(embedding_agtype.value, str) else embedding_agtype.value - except (json.JSONDecodeError, TypeError, AttributeError): - logger.warning(f"Failed to parse embedding for child node {child_id}") + memory = memory_raw + else: + memory = str(memory_raw) children.append({ "id": child_id, From 382dc18cff8dc3b6e0337c8807b619307a68d8ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 14:16:01 +0800 Subject: [PATCH 023/137] update get_children_with_embeddings --- examples/basic_modules/polardb_search.py | 3 ++- src/memos/graph_dbs/polardb.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index d26fbe03f..9f09a5482 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -315,10 +315,11 @@ def get_children_with_embeddings(db_name: str, id: str, user_name: str = None): 0.0482484, -0.027038634, -0.004882823, 0.06152357, -0.003881463, -0.036041338, -0.0075645614, 0.020660445, -0.07250992, -0.024429375, -0.036377035] searchVector(db_name="memtensor_memos", vectorStr=vector) + # searchVector(db_name="test_1020_02", vectorStr=vector) # add_edge(db_name="memtensor_memos",source_id="13bb9df6-0609-4442-8bed-bba77dadac92", target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", edge_type="PARENT", user_name="memosbfb3fb32032b4077a641404dc48739cd") # edge_exists(db_name="memtensor_memos", source_id="13bb9df6-0609-4442-8bed-bba77dadac92", # target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", type="PARENT", direction="OUTGOING", # user_name="memosbfb3fb32032b4077a641404dc48739cd") - get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") + # get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 5b618769e..e81f07198 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1172,7 +1172,7 @@ def search_by_embedding( embedding, ag_catalog.agtype_access_operator(properties, '"id"'::agtype) AS old_id, (1 - (embedding <=> %s::vector(1024))) AS scope - FROM memtensor_memos_graph."Memory" + FROM "{self.db_name}_graph"."Memory" {where_clause} ORDER BY scope DESC LIMIT {top_k} From 13c43a5c51a431a79bb672999f40c5d002acae85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 14:34:58 +0800 Subject: [PATCH 024/137] update get_subgraph --- examples/basic_modules/polardb_search.py | 8 ++ src/memos/graph_dbs/polardb.py | 99 +++++++++++++++++++----- 2 files changed, 86 insertions(+), 21 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 9f09a5482..042024c63 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -173,6 +173,12 @@ def get_children_with_embeddings(db_name: str, id: str, user_name: str = None): print("get_children_with_embedding:", children) +def get_subgraph(db_name, center_id, depth, center_status, user_name): + graph = getPolarDb(db_name) + subgraph = graph.get_subgraph(center_id, depth, center_status, user_name) + print("get_subgraph:", subgraph) + + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") @@ -323,3 +329,5 @@ def get_children_with_embeddings(db_name: str, id: str, user_name: str = None): # user_name="memosbfb3fb32032b4077a641404dc48739cd") # get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") + + get_subgraph(db_name="memtensor_memos", center_id="13bb9df6-0609-4442-8bed-bba77dadac92", depth=2, center_status="activated", user_name="memos07ea708ac7eb412887c5c283f874ea30") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e81f07198..a5eac9f61 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1079,7 +1079,11 @@ def get_path(self, source_id: str, target_id: str, max_depth: int = 3) -> list[s raise NotImplementedError def get_subgraph( - self, center_id: str, depth: int = 2, center_status: str = "activated" + self, + center_id: str, + depth: int = 2, + center_status: str = "activated", + user_name: str | None = None, ) -> dict[str, Any]: """ Retrieve a local subgraph centered at a given node. @@ -1087,6 +1091,7 @@ def get_subgraph( center_id: The ID of the center node. depth: The hop distance for neighbors. center_status: Required status for center node. + user_name (str, optional): User name for filtering in non-multi-db mode Returns: { "core_node": {...}, @@ -1094,30 +1099,82 @@ def get_subgraph( "edges": [...] } """ - # 获取中心节点 - core_node = self.get_node(center_id) - if not core_node: - return {"core_node": None, "neighbors": [], "edges": []} + if not 1 <= depth <= 5: + raise ValueError("depth must be 1-5") - # 检查中心节点状态 - if center_status and core_node.get("metadata", {}).get("status") != center_status: - return {"core_node": None, "neighbors": [], "edges": []} + user_name = user_name if user_name else self._get_config_value("user_name") - # 获取邻居节点(简化实现,只获取直接连接的节点) - edges = self.get_edges(center_id, direction="ANY") - neighbor_ids = set() - for edge in edges: - if edge["from"] == center_id: - neighbor_ids.add(edge["to"]) - else: - neighbor_ids.add(edge["from"]) + # 使用 cypher 查询获取子图 + query = f""" + WITH center AS ( + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (center:Memory) + WHERE center.id = '{center_id}' + AND center.status = '{center_status}' + AND center.user_name = '{user_name}' + RETURN center + $$) AS (center agtype) + ), + neighbors AS ( + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (center:Memory)-[e]->{{1,{depth}}}(neighbor:Memory) + WHERE center.id = '{center_id}' + AND center.status = '{center_status}' + AND center.user_name = '{user_name}' + AND neighbor.user_name = '{user_name}' + RETURN neighbor, e + $$) AS (neighbor agtype, e agtype) + ) + SELECT + (SELECT center FROM center) as center, + ARRAY_AGG(neighbor) as neighbors, + ARRAY_AGG(e) as edges + FROM neighbors + """ - # 获取邻居节点详情 - neighbors = [] - if neighbor_ids: - neighbors = self.get_nodes(list(neighbor_ids)) + try: + with self.connection.cursor() as cursor: + cursor.execute(query) + result = cursor.fetchone() + + if not result or not result[0]: + return {"core_node": None, "neighbors": [], "edges": []} + + # 解析中心节点 + center_agtype = result[0] + if center_agtype and hasattr(center_agtype, 'value'): + center_props = center_agtype.value + core_node = self._parse_node(center_props) + else: + return {"core_node": None, "neighbors": [], "edges": []} + + # 解析邻居节点 + neighbors = [] + neighbors_agtype = result[1] if result[1] else [] + for neighbor_agtype in neighbors_agtype: + if neighbor_agtype and hasattr(neighbor_agtype, 'value'): + neighbor_props = neighbor_agtype.value + neighbor_parsed = self._parse_node(neighbor_props) + neighbors.append(neighbor_parsed) + + # 解析边 + edges = [] + edges_agtype = result[2] if result[2] else [] + for edge_agtype in edges_agtype: + if edge_agtype and hasattr(edge_agtype, 'value'): + edge_data = edge_agtype.value + if isinstance(edge_data, dict): + edges.append({ + "type": edge_data.get("type", ""), + "source": edge_data.get("source", ""), + "target": edge_data.get("target", "") + }) + + return {"core_node": core_node, "neighbors": neighbors, "edges": edges} - return {"core_node": core_node, "neighbors": neighbors, "edges": edges} + except Exception as e: + logger.error(f"Failed to get subgraph: {e}", exc_info=True) + return {"core_node": None, "neighbors": [], "edges": []} def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]: """Get the ordered context chain starting from a node.""" From 5cf7772b4b42c6a78ff925f1c1acf50a9622db0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 14:52:36 +0800 Subject: [PATCH 025/137] update get_grouped_counts --- examples/basic_modules/polardb_search.py | 12 ++- src/memos/graph_dbs/polardb.py | 97 +++++++++++++++++------- 2 files changed, 81 insertions(+), 28 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 042024c63..34ad3fd91 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -179,6 +179,13 @@ def get_subgraph(db_name, center_id, depth, center_status, user_name): print("get_subgraph:", subgraph) +def get_grouped_counts(db_name, user_name): + graph = getPolarDb(db_name) + grouped_counts = graph.get_grouped_counts(group_fields=["status"], where_clause="user_name = %s", + params=[user_name], user_name=user_name) + print("get_grouped_counts:", grouped_counts) + + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") @@ -330,4 +337,7 @@ def get_subgraph(db_name, center_id, depth, center_status, user_name): # get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") - get_subgraph(db_name="memtensor_memos", center_id="13bb9df6-0609-4442-8bed-bba77dadac92", depth=2, center_status="activated", user_name="memos07ea708ac7eb412887c5c283f874ea30") + get_subgraph(db_name="memtensor_memos", center_id="13bb9df6-0609-4442-8bed-bba77dadac92", depth=2, + center_status="activated", user_name="memos07ea708ac7eb412887c5c283f874ea30") + + get_grouped_counts(db_name="memtensor_memos", user_name="memos07ea708ac7eb412887c5c283f874ea30") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index a5eac9f61..a72610725 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1302,45 +1302,88 @@ def get_by_metadata(self, filters: list[dict[str, Any]]) -> list[str]: return [row[0] for row in results if row[0]] def get_grouped_counts( - self, - group_fields: list[str], - where_clause: str = "", - params: dict[str, Any] | None = None, + self, + group_fields: list[str], + where_clause: str = "", + params: dict[str, Any] | None = None, + user_name: str | None = None, ) -> list[dict[str, Any]]: - """Count nodes grouped by any fields.""" + """ + Count nodes grouped by any fields. + + Args: + group_fields (list[str]): Fields to group by, e.g., ["memory_type", "status"] + where_clause (str, optional): Extra WHERE condition. E.g., + "WHERE n.status = 'activated'" + params (dict, optional): Parameters for WHERE clause. + user_name (str, optional): User name for filtering in non-multi-db mode + + Returns: + list[dict]: e.g., [{ 'memory_type': 'WorkingMemory', 'status': 'active', 'count': 10 }, ...] + """ if not group_fields: raise ValueError("group_fields cannot be empty") + + user_name = user_name if user_name else self._get_config_value("user_name") + + # Build user clause + user_clause = f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype" + if where_clause: + where_clause = where_clause.strip() + if where_clause.upper().startswith("WHERE"): + where_clause += f" AND {user_clause}" + else: + where_clause = f"WHERE {where_clause} AND {user_clause}" + else: + where_clause = f"WHERE {user_clause}" - final_params = params.copy() if params else {} + # Inline parameters if provided + if params: + for key, value in params.items(): + # Handle different value types appropriately + if isinstance(value, str): + value = f"'{value}'" + where_clause = where_clause.replace(f"${key}", str(value)) - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - user_name = self._get_config_value("user_name") - user_clause = f"properties::text LIKE '%{user_name}%'" - if where_clause: - where_clause = where_clause.strip() - if where_clause.upper().startswith("WHERE"): - where_clause += f" AND {user_clause}" - else: - where_clause = f"WHERE {where_clause} AND {user_clause}" - else: - where_clause = f"WHERE {user_clause}" + # Build return fields and group by fields + return_fields = [] + group_by_fields = [] - # Use text-based queries to avoid agtype issues - group_fields_sql = ", ".join([f"properties::text as {field}" for field in group_fields]) - group_by_sql = ", ".join([f"properties::text" for field in group_fields]) + for field in group_fields: + alias = field.replace(".", "_") + return_fields.append(f"ag_catalog.agtype_access_operator(properties, '\"{field}\"'::agtype) AS {alias}") + group_by_fields.append(alias) + + # Full SQL query construction query = f""" - SELECT {group_fields_sql}, COUNT(*) as count + SELECT {", ".join(return_fields)}, COUNT(*) AS count FROM "{self.db_name}_graph"."Memory" {where_clause} - GROUP BY {group_by_sql} + GROUP BY {", ".join(group_by_fields)} """ - with self.connection.cursor() as cursor: - cursor.execute(query) - results = cursor.fetchall() + try: + with self.connection.cursor() as cursor: + cursor.execute(query) + results = cursor.fetchall() + + output = [] + for row in results: + group_values = {} + for i, field in enumerate(group_fields): + value = row[i] + if hasattr(value, 'value'): + group_values[field] = value.value + else: + group_values[field] = str(value) + count_value = row[-1] # Last column is count + output.append({**group_values, "count": count_value}) + + return output - # Simplified return - just return basic counts - return [{"memory_type": "LongTermMemory", "status": "activated", "count": len(results)}] + except Exception as e: + logger.error(f"Failed to get grouped counts: {e}", exc_info=True) + return [] def deduplicate_nodes(self) -> None: """Deduplicate redundant or semantically similar nodes.""" From 7f53eaf82bfa7f5c633a163c95ad8da52e5d154e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 15:27:09 +0800 Subject: [PATCH 026/137] update get_all_memory_items --- src/memos/graph_dbs/polardb.py | 102 ++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 27 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index a72610725..9df2ce9ec 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -151,7 +151,7 @@ def __init__(self, config: PolarDBGraphDBConfig): self._ensure_database_exists() # Create graph and tables - self._create_graph() + # self._create_graph() # Handle embedding_dimension embedding_dim = config.get("embedding_dimension", 1024) if isinstance(config,dict) else config.embedding_dimension @@ -1104,7 +1104,7 @@ def get_subgraph( user_name = user_name if user_name else self._get_config_value("user_name") - # 使用 cypher 查询获取子图 + # 使用简化的查询获取子图(暂时只获取直接邻居) query = f""" WITH center AS ( SELECT * FROM cypher('{self.db_name}_graph', $$ @@ -1117,7 +1117,7 @@ def get_subgraph( ), neighbors AS ( SELECT * FROM cypher('{self.db_name}_graph', $$ - MATCH (center:Memory)-[e]->{{1,{depth}}}(neighbor:Memory) + MATCH (center:Memory)-[e]->(neighbor:Memory) WHERE center.id = '{center_id}' AND center.status = '{center_status}' AND center.user_name = '{user_name}' @@ -1338,7 +1338,7 @@ def get_grouped_counts( where_clause = f"WHERE {user_clause}" # Inline parameters if provided - if params: + if params and isinstance(params, dict): for key, value in params.items(): # Handle different value types appropriately if isinstance(value, str): @@ -1364,7 +1364,11 @@ def get_grouped_counts( try: with self.connection.cursor() as cursor: - cursor.execute(query) + # 处理参数化查询 + if params and isinstance(params, list): + cursor.execute(query, params) + else: + cursor.execute(query) results = cursor.fetchall() output = [] @@ -1475,34 +1479,78 @@ def import_graph(self, data: dict[str, Any]) -> None: for edge in data.get("edges", []): self.add_edge(edge["source"], edge["target"], edge["type"]) - def get_all_memory_items(self, scope: str, **kwargs) -> list[dict]: - """Retrieve all memory items of a specific memory_type.""" + def get_all_memory_items( + self, scope: str, include_embedding: bool = False, user_name: str | None = None + ) -> list[dict]: + """ + Retrieve all memory items of a specific memory_type. + + Args: + scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', or 'UserMemory'. + include_embedding: with/without embedding + user_name (str, optional): User name for filtering in non-multi-db mode + + Returns: + list[dict]: Full list of memory items under this scope. + """ + user_name = user_name if user_name else self._get_config_value("user_name") if scope not in {"WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"}: raise ValueError(f"Unsupported memory type scope: {scope}") - query = f""" - SELECT id, properties - FROM "{self.db_name}_graph"."Memory" - WHERE properties->>'memory_type' = %s - """ - params = [scope] + where_clause = f"ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) = '\"{scope}\"'::agtype" + where_clause += f" AND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype" - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - query += " AND properties->>'user_name' = %s" - params.append(self._get_config_value("user_name")) + # Build return fields + if include_embedding: + return_fields = "id, properties, embedding" + else: + return_fields = "id, properties" - with self.connection.cursor() as cursor: - cursor.execute(query, params) - results = cursor.fetchall() + query = f""" + SELECT {return_fields} + FROM "{self.db_name}_graph"."Memory" + WHERE {where_clause} + LIMIT 100 + """ - nodes = [] - for row in results: - node_id, properties_json = row - # properties_json is already a dict from psycopg2 - properties = properties_json if properties_json else {} - nodes.append(self._parse_node( - {"id": properties.get("id", ""), "memory": properties.get("memory", ""), "metadata": properties})) - return nodes + nodes = [] + try: + with self.connection.cursor() as cursor: + cursor.execute(query) + results = cursor.fetchall() + + for row in results: + if include_embedding: + node_id, properties_json, embedding_json = row + else: + node_id, properties_json = row + embedding_json = None + + # Parse properties from JSONB if it's a string + if isinstance(properties_json, str): + try: + properties = json.loads(properties_json) + except json.JSONDecodeError: + properties = {} + else: + properties = properties_json if properties_json else {} + + # Build node data + node_data = { + "id": properties.get("id", node_id), + "memory": properties.get("memory", ""), + "metadata": properties + } + + if include_embedding and embedding_json is not None: + node_data["embedding"] = embedding_json + + nodes.append(self._parse_node(node_data)) + + except Exception as e: + logger.error(f"Failed to get memories: {e}", exc_info=True) + + return nodes def get_structure_optimization_candidates(self, scope: str, **kwargs) -> list[dict]: """Find nodes that are likely candidates for structure optimization.""" From aed76a757436eeb981827a7477058c30ff5d0ede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 15:47:45 +0800 Subject: [PATCH 027/137] update export_graph --- examples/basic_modules/polardb_search.py | 14 ++- src/memos/graph_dbs/polardb.py | 118 ++++++++++++++++++----- 2 files changed, 107 insertions(+), 25 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 34ad3fd91..20e256a97 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -186,6 +186,12 @@ def get_grouped_counts(db_name, user_name): print("get_grouped_counts:", grouped_counts) +def export_graph(db_name, include_embedding, user_name): + graph = getPolarDb(db_name) + export_graphlist = graph.export_graph(include_embedding=include_embedding, user_name=user_name) + print("export_graph:", export_graphlist) + + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") @@ -337,7 +343,9 @@ def get_grouped_counts(db_name, user_name): # get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") - get_subgraph(db_name="memtensor_memos", center_id="13bb9df6-0609-4442-8bed-bba77dadac92", depth=2, - center_status="activated", user_name="memos07ea708ac7eb412887c5c283f874ea30") + # get_subgraph(db_name="memtensor_memos", center_id="13bb9df6-0609-4442-8bed-bba77dadac92", depth=2, + # center_status="activated", user_name="memos07ea708ac7eb412887c5c283f874ea30") + # + # get_grouped_counts(db_name="memtensor_memos", user_name="memos07ea708ac7eb412887c5c283f874ea30") - get_grouped_counts(db_name="memtensor_memos", user_name="memos07ea708ac7eb412887c5c283f874ea30") + export_graph(db_name="memtensor_memos", include_embedding=False, user_name="memos8698ecb1f76940ff9adc12494c4d57a6") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 9df2ce9ec..595fde178 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1362,6 +1362,8 @@ def get_grouped_counts( GROUP BY {", ".join(group_by_fields)} """ + print("[get_grouped_counts] query:", query) + try: with self.connection.cursor() as cursor: # 处理参数化查询 @@ -1432,31 +1434,103 @@ def clear(self) -> None: logger.warning(f"Failed to clear graph '{self.db_name}_graph': {e}") # Don't raise the exception, just log it as a warning - def export_graph(self, **kwargs) -> dict[str, Any]: - """Export all graph nodes and edges in a structured form.""" - with self.connection.cursor() as cursor: - # Export nodes - node_query = f'SELECT id, properties FROM "{self.db_name}_graph"."Memory"' - params = [] - - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - user_name = self._get_config_value("user_name") - node_query += f" WHERE properties::text LIKE '%{user_name}%'" + def export_graph( + self, include_embedding: bool = False, user_name: str | None = None + ) -> dict[str, Any]: + """ + Export all graph nodes and edges in a structured form. + Args: + include_embedding (bool): Whether to include the large embedding field. + user_name (str, optional): User name for filtering in non-multi-db mode - cursor.execute(node_query) - node_results = cursor.fetchall() - nodes = [] - for row in node_results: - node_id, properties_json = row - # properties_json is already a dict from psycopg2 - properties = properties_json if properties_json else {} - nodes.append(self._parse_node( - {"id": properties.get("id", ""), "memory": properties.get("memory", ""), "metadata": properties})) + Returns: + { + "nodes": [ { "id": ..., "memory": ..., "metadata": {...} }, ... ], + "edges": [ { "source": ..., "target": ..., "type": ... }, ... ] + } + """ + user_name = user_name if user_name else self._get_config_value("user_name") + + try: + # Export nodes + if include_embedding: + node_query = f""" + SELECT id, properties, embedding + FROM "{self.db_name}_graph"."Memory" + WHERE ag_catalog.agtype_access_operator(properties, '"user_name"'::agtype) = '\"{user_name}\"'::agtype + """ + else: + node_query = f""" + SELECT id, properties + FROM "{self.db_name}_graph"."Memory" + WHERE ag_catalog.agtype_access_operator(properties, '"user_name"'::agtype) = '\"{user_name}\"'::agtype + """ + + with self.connection.cursor() as cursor: + cursor.execute(node_query) + node_results = cursor.fetchall() + nodes = [] + + for row in node_results: + if include_embedding: + node_id, properties_json, embedding_json = row + else: + node_id, properties_json = row + embedding_json = None + + # Parse properties from JSONB if it's a string + if isinstance(properties_json, str): + try: + properties = json.loads(properties_json) + except json.JSONDecodeError: + properties = {} + else: + properties = properties_json if properties_json else {} + + # Build node data + node_data = { + "id": properties.get("id", node_id), + "memory": properties.get("memory", ""), + "metadata": properties + } + + if include_embedding and embedding_json is not None: + node_data["embedding"] = embedding_json + + nodes.append(self._parse_node(node_data)) + + except Exception as e: + logger.error(f"[EXPORT GRAPH - NODES] Exception: {e}", exc_info=True) + raise RuntimeError(f"[EXPORT GRAPH - NODES] Exception: {e}") from e - # Export edges (simplified - would need more complex Cypher query for full edge export) - edges = [] + try: + # Export edges using cypher query + edge_query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (a:Memory)-[r]->(b:Memory) + WHERE a.user_name = '{user_name}' AND b.user_name = '{user_name}' + RETURN a.id AS source, b.id AS target, type(r) as edge + $$) AS (source agtype, target agtype, edge agtype) + """ + + with self.connection.cursor() as cursor: + cursor.execute(edge_query) + edge_results = cursor.fetchall() + edges = [] + + for row in edge_results: + source_agtype, target_agtype, edge_agtype = row + edges.append({ + "source": source_agtype.value if hasattr(source_agtype, 'value') else str(source_agtype), + "target": target_agtype.value if hasattr(target_agtype, 'value') else str(target_agtype), + "type": edge_agtype.value if hasattr(edge_agtype, 'value') else str(edge_agtype) + }) + + except Exception as e: + logger.error(f"[EXPORT GRAPH - EDGES] Exception: {e}", exc_info=True) + raise RuntimeError(f"[EXPORT GRAPH - EDGES] Exception: {e}") from e - return {"nodes": nodes, "edges": edges} + return {"nodes": nodes, "edges": edges} def import_graph(self, data: dict[str, Any]) -> None: """Import the entire graph from a serialized dictionary.""" From 58600a2b440573dcc6db1cbd2c5dfdd848d5fd85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 16:39:52 +0800 Subject: [PATCH 028/137] remove --- src/memos/graph_dbs/polardb.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 595fde178..360c65c05 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -146,16 +146,18 @@ def __init__(self, config: PolarDBGraphDBConfig): self.connection.autocommit = True # Handle auto_create - auto_create = config.get("auto_create", False) if isinstance(config, dict) else config.auto_create - if auto_create: - self._ensure_database_exists() + # auto_create = config.get("auto_create", False) if isinstance(config, dict) else config.auto_create + # if auto_create: + # self._ensure_database_exists() # Create graph and tables + # self.create_graph() + # self.create_edge() # self._create_graph() # Handle embedding_dimension - embedding_dim = config.get("embedding_dimension", 1024) if isinstance(config,dict) else config.embedding_dimension - self.create_index(dimensions=embedding_dim) + # embedding_dim = config.get("embedding_dimension", 1024) if isinstance(config,dict) else config.embedding_dimension + # self.create_index(dimensions=embedding_dim) def _get_config_value(self, key: str, default=None): """Safely get config value from either dict or object.""" @@ -473,18 +475,18 @@ def create_graph(self): with self.connection.cursor() as cursor: cursor.execute(f""" SELECT COUNT(*) FROM ag_catalog.ag_graph - WHERE name = '{self.db_name}'; + WHERE name = '{self.db_name}_graph'; """) graph_exists = cursor.fetchone()[0] > 0 if graph_exists: - print(f"ℹ️ Graph '{self.db_name}' already exists.") + print(f"ℹ️ Graph '{self.db_name}_graph' already exists.") else: - cursor.execute(f"select create_graph('{self.db_name}');") - print(f"✅ Graph database '{self.db_name}' created.") + cursor.execute(f"select create_graph('{self.db_name}_graph');") + print(f"✅ Graph database '{self.db_name}_graph' created.") except Exception as e: - print(f"⚠️ Failed to create graph '{self.db_name}': {e}") - logger.error(f"Failed to create graph '{self.db_name}': {e}", exc_info=True) + print(f"⚠️ Failed to create graph '{self.db_name}_graph': {e}") + logger.error(f"Failed to create graph '{self.db_name}_graph': {e}", exc_info=True) def create_edge(self): """创建所有有效的边类型,如果不存在的话""" @@ -501,7 +503,7 @@ def create_edge(self): print(f"🪶 Creating elabel: {label_name}") try: with self.connection.cursor() as cursor: - cursor.execute(f"select create_elabel('{self.db_name}', '{label_name}');") + cursor.execute(f"select create_elabel('{self.db_name}_graph', '{label_name}');") print(f"✅ Successfully created elabel: {label_name}") except Exception as e: if "already exists" in str(e): From 8579961d297a779da20e16de3ff1c9e9cc1e38e8 Mon Sep 17 00:00:00 2001 From: caocuilong <13282138256@163.com> Date: Sun, 19 Oct 2025 16:41:08 +0800 Subject: [PATCH 029/137] insert Memory --- examples/basic_modules/parseJson.py | 81 +++++ .../basic_modules/polardb_export_insert.py | 344 ++++++++++++++++++ 2 files changed, 425 insertions(+) create mode 100644 examples/basic_modules/parseJson.py create mode 100644 examples/basic_modules/polardb_export_insert.py diff --git a/examples/basic_modules/parseJson.py b/examples/basic_modules/parseJson.py new file mode 100644 index 000000000..21d290450 --- /dev/null +++ b/examples/basic_modules/parseJson.py @@ -0,0 +1,81 @@ +import os +import json +import psycopg2 +import sys + +# Add the parent directory to the path to allow imports +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from polardb_export_insert import insert_data + +DB_CONFIG = { + 'host': 'memory.pg.polardb.rds.aliyuncs.com', + 'port': 5432, + 'database': 'memtensor_memos', + 'user': 'adimin', + 'password': 'Openmem0925' +} +conn = psycopg2.connect(**DB_CONFIG) + +def insert(batch): + """ + 模拟插入函数。 + 这里你可以替换成实际数据库或API调用逻辑。 + """ + print(f"✅ 调用 insert() 插入 {len(batch)} 条记录") + insert_data(conn, batch) + # 示例:你的数据库插入逻辑写在这里 + # db.insert_many(batch) + + +def process_folder(folder_path, batch_size=1000): + """ + 遍历文件夹,按 batch_size 分批解析 JSON 并调用 insert。 + """ + batch = [] + total_count = 0 + + for root, dirs, files in os.walk(folder_path): + for file in files: + # Only process .json files + if not file.endswith('.json'): + continue + + file_path = os.path.join(root, file) + print(f"📄 正在读取文件: {file_path}") + + try: + with open(file_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + # 确保解析出的对象是字典类型,并且包含必要的字段 + if isinstance(obj, dict) and "id" in obj and "memory" in obj: + batch.append(obj) + total_count += 1 + + # 每满 batch_size 条,调用 insert 并清空缓存 + if len(batch) >= batch_size: + insert(batch) + batch = [] # 清空 + else: + print(f"⚠️ 跳过无效对象(缺少必要字段): {line[:80]}...") + except json.JSONDecodeError: + print(f"⚠️ 跳过无效 JSON: {line[:80]}...") + except (UnicodeDecodeError, IOError) as e: + print(f"⚠️ 跳过无法读取的文件 {file_path}: {e}") + continue + + # 处理最后不足 batch_size 的部分 + if batch: + insert(batch) + + print(f"\n✅ 全部完成,共处理 {total_count} 条记录。") + + +if __name__ == "__main__": + folder_path = r"C:\Users\13282\Desktop\nebular\export13\Memory" + process_folder(folder_path, batch_size=1000) diff --git a/examples/basic_modules/polardb_export_insert.py b/examples/basic_modules/polardb_export_insert.py new file mode 100644 index 000000000..3ffc05119 --- /dev/null +++ b/examples/basic_modules/polardb_export_insert.py @@ -0,0 +1,344 @@ +import json +import psycopg2 +from psycopg2.extras import Json +import numpy as np +import sys +import os + +# 添加src目录到Python路径 +src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) +sys.path.insert(0, src_path) + +from memos.configs.graph_db import GraphDBConfigFactory +from memos.graph_dbs.factory import GraphStoreFactory + + + + +DB_CONFIG = { + 'host': 'memory.pg.polardb.rds.aliyuncs.com', + 'port': 5432, + 'database': 'test_memos', + 'user': 'adimin', + 'password': 'Openmem0925' +} + +# 图数据库配置 +GRAPH_NAME = 'test_memos_graph' + + +def create_vector_extension(conn): + with conn.cursor() as cursor: + cursor.execute("CREATE EXTENSION IF NOT EXISTS vector;") + conn.commit() + print("✅ pgvector 扩展创建成功或已存在") + + +def create_table(conn): + create_table_sql = """ + CREATE TABLE IF NOT EXISTS "Memory" ( + id graphid PRIMARY KEY, + properties agtype, + embedding vector(1536), + embedding_1024 vector(1024), + embedding_768 vector(768), + imported_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + """ + + with conn.cursor() as cursor: + cursor.execute(create_table_sql) + + # 尝试添加主键约束(如果不存在) + try: + cursor.execute("ALTER TABLE \"Memory\" ADD CONSTRAINT memory_pkey PRIMARY KEY (id);") + print("✅ 主键约束添加成功") + except Exception as e: + print(f"⚠️ 主键约束可能已存在: {e}") + + # 安全地创建索引,检查列是否存在 + try: + cursor.execute("CREATE INDEX IF NOT EXISTS idx_memory_id ON \"Memory\"(id);") + except Exception as e: + print(f"⚠️ 创建ID索引时出错: {e}") + + try: + cursor.execute("CREATE INDEX IF NOT EXISTS idx_memory_properties ON \"Memory\" USING GIN(properties);") + except Exception as e: + print(f"⚠️ 创建properties索引时出错: {e}") + + # 只为存在的embedding列创建索引 + for col in ["embedding", "embedding_1024", "embedding_768"]: + try: + cursor.execute( + f"CREATE INDEX IF NOT EXISTS idx_memory_{col} ON \"Memory\" USING ivfflat ({col} vector_cosine_ops) WITH (lists = 100);") + except Exception as e: + print(f"⚠️ 创建{col}索引时出错: {e}") + conn.commit() + print("✅ 表和索引创建成功(如果不存在)") + + +def convert_to_vector(embedding_list): + if not embedding_list: + return None + if isinstance(embedding_list, np.ndarray): + embedding_list = embedding_list.tolist() + return "[" + ",".join(str(float(x)) for x in embedding_list) + "]" + + +def detect_embedding_field(embedding_list): + if not embedding_list: + return None + dim = len(embedding_list) + if dim == 1024: + return "embedding" + elif dim == 3072: + return "embedding_3072" + else: + print(f"⚠️ 未知 embedding 维度 {dim},跳过该向量") + return None + + +def clean_properties(props): + """移除向量字段""" + vector_keys = {"embedding", "embedding_1024", "embedding_3072", "embedding_768"} + if not isinstance(props, dict): + return {} + return {k: v for k, v in props.items() if k not in vector_keys} + + +def find_embedding(item): + """在多层结构中查找 embedding 向量""" + for key in ["embedding", "embedding_1024", "embedding_3072", "embedding_768"]: + if key in item and isinstance(item[key], list): + return item[key] + if "metadata" in item and key in item["metadata"]: + return item["metadata"][key] + if "properties" in item and key in item["properties"]: + return item["properties"][key] + return None + + +def add_node(conn, id: str, memory: str, metadata: dict, graph_name=None): + """ + 添加单个节点到图数据库 + + Args: + conn: 数据库连接 + id: 节点ID + memory: 内存内容 + metadata: 元数据字典 + graph_name: 图名称,可选 + """ + # 使用传入的graph_name或默认值 + if graph_name is None: + graph_name = GRAPH_NAME + + try: + # 先提取 embedding(在清理properties之前) + embedding = find_embedding(metadata) + field_name = detect_embedding_field(embedding) + vector_value = convert_to_vector(embedding) if field_name else None + + # 提取 properties + properties = metadata.copy() + properties = clean_properties(properties) + properties["id"] = id + properties["memory"] = memory + + with conn.cursor() as cursor: + # 先删除现有记录(如果存在) + delete_sql = f""" + DELETE FROM "Memory" + WHERE id = ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring); + """ + cursor.execute(delete_sql, (id,)) + + # 然后插入新记录 + if field_name and vector_value: + insert_sql = f""" + INSERT INTO "Memory" (id, properties, {field_name}) + VALUES ( + ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring), + %s::text::agtype, + %s::vector + ); + """ + cursor.execute(insert_sql, (id, Json(properties), vector_value)) + print(f"✅ 成功插入/更新: {id} ({field_name})") + else: + insert_sql = f""" + INSERT INTO "Memory" (id, properties) + VALUES ( + ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring), + %s::text::agtype + ); + """ + cursor.execute(insert_sql, (id, Json(properties))) + print(f"✅ 成功插入/更新(无向量): {id}") + + conn.commit() + return True + + except Exception as e: + conn.rollback() + print(f"❌ 插入失败 (ID: {id}): {e}") + return False + + +def insert_data(conn, data_list, graph_name=None): + """ + 批量插入数据,使用PolarDB的add_node方法 + + Args: + conn: 数据库连接 + data_list: 数据列表 + graph_name: 图名称,可选 + """ + # 创建PolarDB配置 + config = GraphDBConfigFactory( + backend="polardb", + config={ + "host": "memory.pg.polardb.rds.aliyuncs.com", + "port": 5432, + "user": "adimin", + "password": "Openmem0925", + "db_name": "memtensor_memos", + "user_name": 'adimin', + "use_multi_db": False, + "auto_create": True, + "embedding_dimension": 1024, + }, + ) + + # 创建PolarDB实例 + graph = GraphStoreFactory.from_config(config) + print("✅ PolarDB连接成功") + + success_count = 0 + error_count = 0 + + for item in data_list: + id_ = item.get("id") + memory_ = item.get("memory") + + # 将所有字段作为metadata,除了id、memory和embedding相关字段 + metadata = {} + for key, value in item.items(): + if key not in ["id", "memory", "embedding_1024", "embedding_768", "embedding_3072", "embedding"]: + # 类型转换 + if key == "confidence": + # confidence 应该是 float + try: + metadata[key] = float(value) + except (ValueError, TypeError): + metadata[key] = value + elif key == "sources": + # sources 应该是 List[str],每个元素是JSON字符串 + if isinstance(value, str): + try: + parsed_sources = json.loads(value) + # 将每个对象转换为JSON字符串 + if isinstance(parsed_sources, list): + metadata[key] = [json.dumps(item) for item in parsed_sources] + else: + metadata[key] = [json.dumps(parsed_sources)] + except json.JSONDecodeError: + metadata[key] = value + else: + metadata[key] = value + elif key == "usage": + # usage 应该是 List[str],每个元素是JSON字符串(和sources格式一样) + if isinstance(value, str): + try: + parsed_usage = json.loads(value) + # 将每个对象转换为JSON字符串 + if isinstance(parsed_usage, list): + metadata[key] = [json.dumps(item) for item in parsed_usage] + else: + metadata[key] = [json.dumps(parsed_usage)] + except json.JSONDecodeError: + metadata[key] = value + else: + metadata[key] = value + elif key == "tags": + # tags 应该是 List[str] + if isinstance(value, str): + # 尝试解析为列表,如果失败则保持原样 + if value.startswith('[') and value.endswith(']'): + try: + metadata[key] = json.loads(value) + except json.JSONDecodeError: + # 如果不是有效的JSON,尝试按逗号分割 + metadata[key] = [tag.strip() for tag in value[1:-1].split(',')] + else: + metadata[key] = value + else: + metadata[key] = value + else: + metadata[key] = value + + # 处理embedding字段 + embedding = None + for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: + if embedding_key in item and item[embedding_key]: + embedding_value = item[embedding_key] + # 如果是字符串,尝试解析为列表 + if isinstance(embedding_value, str): + try: + embedding = json.loads(embedding_value) + except json.JSONDecodeError: + print(f"⚠️ 无法解析embedding字符串: {embedding_key}") + embedding = None + else: + embedding = embedding_value + break + + # 如果有embedding,添加到metadata中 + if embedding: + metadata["embedding"] = embedding + + try: + # 直接调用PolarDB的add_node方法 + graph.add_node(id_, memory_, metadata) + success_count += 1 + print(f"✅ 成功插入/更新: {id_}") + except Exception as e: + error_count += 1 + print(f"❌ 插入失败 (ID: {id_}): {e}") + # PolarDB的add_node方法内部已经处理了事务,不需要外部rollback + + print(f"✅ 插入完成: 成功 {success_count} 条, 失败 {error_count} 条") + + +def load_data_from_file(filename): + print("11111") + with open(filename, "r", encoding="utf-8") as f: + data = json.load(f) + print(f"📂 从文件 {filename} 加载了 {len(data)} 条记录") + return data + + +def main(): + json_file = r"/Users/zhudayang/python/1011/MemOS/examples/basic_modules/2.json" + data = load_data_from_file(json_file) + if not data: + print("⚠️ 没有数据") + return + + conn = psycopg2.connect(**DB_CONFIG) + print("✅ 数据库连接成功") + + # create_vector_extension(conn) + # create_table(conn) + + # 使用默认的图名称,或者可以传入自定义的图名称 + # insert_data(conn, data, "custom_graph_name") + insert_data(conn, data) + + conn.close() + print("🔒 数据库连接1已关闭") + + +if __name__ == "__main__": + main() From b84577a9d9acbd650933cd59a0f152c1e4e13eed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 18:35:41 +0800 Subject: [PATCH 030/137] fix add_node --- src/memos/graph_dbs/polardb.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 360c65c05..c21c8f103 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1659,6 +1659,7 @@ def get_structure_optimization_candidates(self, scope: str, **kwargs) -> list[di def drop_database(self) -> None: """Permanently delete the entire graph this instance is using.""" + return if self._get_config_value("use_multi_db", True): with self.connection.cursor() as cursor: cursor.execute(f"SELECT drop_graph('{self.db_name}_graph', true)") @@ -1811,17 +1812,17 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any]) -> None: with self.connection.cursor() as cursor: # 先删除现有记录(如果存在) delete_query = f""" - DELETE FROM "{self.db_name}_graph"."Memory" - WHERE id = ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, %s::text::cstring) + DELETE FROM {self.db_name}_graph."Memory" + WHERE id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring) """ cursor.execute(delete_query, (id,)) # 然后插入新记录 if embedding_vector: insert_query = f""" - INSERT INTO "{self.db_name}_graph"."Memory"(id, properties, {embedding_column}) + INSERT INTO {self.db_name}_graph."Memory"(id, properties, {embedding_column}) VALUES ( - ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, %s::text::cstring), + ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring), %s, %s ) @@ -1829,9 +1830,9 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any]) -> None: cursor.execute(insert_query, (id, json.dumps(properties), json.dumps(embedding_vector))) else: insert_query = f""" - INSERT INTO "{self.db_name}_graph"."Memory"(id, properties) + INSERT INTO {self.db_name}_graph."Memory"(id, properties) VALUES ( - ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, %s::text::cstring), + ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring), %s ) """ From 59d5502053ccc9a064fb213289d209d9f0bc0123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 18:44:45 +0800 Subject: [PATCH 031/137] fix polardb.py --- src/memos/graph_dbs/polardb.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index c21c8f103..019d090f7 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -526,16 +526,16 @@ def add_edge(self, source_id: str, target_id: str, type: str, user_name: str | N if user_name is not None: properties["user_name"] = user_name query = f""" - INSERT INTO "{self.db_name}_graph"."{type}"(id, start_id, end_id, properties) + INSERT INTO {self.db_name}_graph."{type}"(id, start_id, end_id, properties) SELECT - ag_catalog._next_graph_id('"{self.db_name}_graph"'::name, '{type}'), - ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, '{source_id}'::text::cstring), - ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, '{target_id}'::text::cstring), + ag_catalog._next_graph_id('{self.db_name}_graph'::name, '{type}'), + ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{source_id}'::text::cstring), + ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{target_id}'::text::cstring), jsonb_build_object('user_name', '{user_name}')::text::agtype WHERE NOT EXISTS ( - SELECT 1 FROM "{self.db_name}_graph"."{type}" - WHERE start_id = ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, '{source_id}'::text::cstring) - AND end_id = ag_catalog._make_graph_id('"{self.db_name}_graph"'::name, 'Memory'::name, '{target_id}'::text::cstring) + SELECT 1 FROM {self.db_name}_graph."{type}" + WHERE start_id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{source_id}'::text::cstring) + AND end_id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, '{target_id}'::text::cstring) ); """ print(f"Executing add_edge: {query}") From 6bfbe7280bf014c8079c5b962fcef732d9617ffe Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sun, 19 Oct 2025 19:15:06 +0800 Subject: [PATCH 032/137] fix --- src/memos/graph_dbs/polardb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 019d090f7..cb7786fa8 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1259,7 +1259,7 @@ def search_by_embedding( output.append({"id": id_val, "score": score_val}) return output[:top_k] - def get_by_metadata(self, filters: list[dict[str, Any]]) -> list[str]: + def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = None) -> list[str]: """Retrieve node IDs that match given metadata filters.""" where_clauses = [] params = [] From c15230435f57449b87ca0d97b7e58e38c033b8d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 19:20:17 +0800 Subject: [PATCH 033/137] fix get_subgraph --- src/memos/graph_dbs/polardb.py | 192 ++++++++++++++++++--------------- 1 file changed, 105 insertions(+), 87 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 019d090f7..e8d009900 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1107,70 +1107,91 @@ def get_subgraph( user_name = user_name if user_name else self._get_config_value("user_name") # 使用简化的查询获取子图(暂时只获取直接邻居) + query1 = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH(center: Memory)-[r * 1..{depth}]->(neighbor:Memory) + WHERE + center.id = '{center_id}' + AND center.status = '{center_status}' + AND center.user_name = '{user_name}' + RETURN + collect(DISTINCT + center), collect(DISTINCT + neighbor), collect(DISTINCT + r) + $$ ) as (centers agtype, neighbors agtype, rels agtype); + """ query = f""" - WITH center AS ( - SELECT * FROM cypher('{self.db_name}_graph', $$ - MATCH (center:Memory) - WHERE center.id = '{center_id}' - AND center.status = '{center_status}' - AND center.user_name = '{user_name}' - RETURN center - $$) AS (center agtype) - ), - neighbors AS ( - SELECT * FROM cypher('{self.db_name}_graph', $$ - MATCH (center:Memory)-[e]->(neighbor:Memory) - WHERE center.id = '{center_id}' - AND center.status = '{center_status}' - AND center.user_name = '{user_name}' - AND neighbor.user_name = '{user_name}' - RETURN neighbor, e - $$) AS (neighbor agtype, e agtype) - ) - SELECT - (SELECT center FROM center) as center, - ARRAY_AGG(neighbor) as neighbors, - ARRAY_AGG(e) as edges - FROM neighbors - """ + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH(center: Memory)-[r * 1..{depth}]->(neighbor:Memory) + WHERE + center.id = '{center_id}' + RETURN + collect(DISTINCT + center), collect(DISTINCT + neighbor), collect(DISTINCT + r) + $$ ) as (centers agtype, neighbors agtype, rels agtype); + """ try: with self.connection.cursor() as cursor: cursor.execute(query) result = cursor.fetchone() + print("[get_subgraph] result:", result) if not result or not result[0]: return {"core_node": None, "neighbors": [], "edges": []} # 解析中心节点 - center_agtype = result[0] - if center_agtype and hasattr(center_agtype, 'value'): - center_props = center_agtype.value - core_node = self._parse_node(center_props) - else: + centers_data = result[0] if result[0] else "[]" + neighbors_data = result[1] if result[1] else "[]" + edges_data = result[2] if result[2] else "[]" + + # 解析 JSON 数据 + try: + # 清理数据中的 ::vertex 和 ::edge 后缀 + if isinstance(centers_data, str): + centers_data = centers_data.replace('::vertex', '') + if isinstance(neighbors_data, str): + neighbors_data = neighbors_data.replace('::vertex', '') + if isinstance(edges_data, str): + edges_data = edges_data.replace('::edge', '') + + centers_list = json.loads(centers_data) if isinstance(centers_data, str) else centers_data + neighbors_list = json.loads(neighbors_data) if isinstance(neighbors_data, str) else neighbors_data + edges_list = json.loads(edges_data) if isinstance(edges_data, str) else edges_data + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON data: {e}") return {"core_node": None, "neighbors": [], "edges": []} - + + # 解析中心节点 + core_node = None + if centers_list and len(centers_list) > 0: + center_data = centers_list[0] + if isinstance(center_data, dict) and "properties" in center_data: + core_node = self._parse_node(center_data["properties"]) + # 解析邻居节点 neighbors = [] - neighbors_agtype = result[1] if result[1] else [] - for neighbor_agtype in neighbors_agtype: - if neighbor_agtype and hasattr(neighbor_agtype, 'value'): - neighbor_props = neighbor_agtype.value - neighbor_parsed = self._parse_node(neighbor_props) - neighbors.append(neighbor_parsed) + if isinstance(neighbors_list, list): + for neighbor_data in neighbors_list: + if isinstance(neighbor_data, dict) and "properties" in neighbor_data: + neighbor_parsed = self._parse_node(neighbor_data["properties"]) + neighbors.append(neighbor_parsed) # 解析边 edges = [] - edges_agtype = result[2] if result[2] else [] - for edge_agtype in edges_agtype: - if edge_agtype and hasattr(edge_agtype, 'value'): - edge_data = edge_agtype.value - if isinstance(edge_data, dict): - edges.append({ - "type": edge_data.get("type", ""), - "source": edge_data.get("source", ""), - "target": edge_data.get("target", "") - }) + if isinstance(edges_list, list): + for edge_group in edges_list: + if isinstance(edge_group, list): + for edge_data in edge_group: + if isinstance(edge_data, dict): + edges.append({ + "type": edge_data.get("label", ""), + "source": edge_data.get("start_id", ""), + "target": edge_data.get("end_id", "") + }) return {"core_node": core_node, "neighbors": neighbors, "edges": edges} @@ -1346,6 +1367,10 @@ def get_grouped_counts( if isinstance(value, str): value = f"'{value}'" where_clause = where_clause.replace(f"${key}", str(value)) + + # 处理 where_clause 中的 user_name 参数 + if "user_name = %s" in where_clause: + where_clause = where_clause.replace("user_name = %s", f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") # Build return fields and group by fields return_fields = [] @@ -1573,55 +1598,48 @@ def get_all_memory_items( if scope not in {"WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"}: raise ValueError(f"Unsupported memory type scope: {scope}") - where_clause = f"ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) = '\"{scope}\"'::agtype" - where_clause += f" AND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype" - - # Build return fields + # 使用 cypher 查询获取记忆项 if include_embedding: - return_fields = "id, properties, embedding" + cypher_query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + WHERE n.memory_type = '{scope}' AND n.user_name = '{user_name}' + RETURN n + LIMIT 100 + $$) AS (n agtype) + """ else: - return_fields = "id, properties" - - query = f""" - SELECT {return_fields} - FROM "{self.db_name}_graph"."Memory" - WHERE {where_clause} - LIMIT 100 - """ + cypher_query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + WHERE n.memory_type = '{scope}' AND n.user_name = '{user_name}' + RETURN n + LIMIT 100 + $$) AS (n agtype) + """ nodes = [] try: with self.connection.cursor() as cursor: - cursor.execute(query) + cursor.execute(cypher_query) results = cursor.fetchall() for row in results: - if include_embedding: - node_id, properties_json, embedding_json = row - else: - node_id, properties_json = row - embedding_json = None - - # Parse properties from JSONB if it's a string - if isinstance(properties_json, str): - try: - properties = json.loads(properties_json) - except json.JSONDecodeError: - properties = {} - else: - properties = properties_json if properties_json else {} - - # Build node data - node_data = { - "id": properties.get("id", node_id), - "memory": properties.get("memory", ""), - "metadata": properties - } - - if include_embedding and embedding_json is not None: - node_data["embedding"] = embedding_json - - nodes.append(self._parse_node(node_data)) + node_agtype = row[0] + if node_agtype and hasattr(node_agtype, 'value'): + node_props = node_agtype.value + if isinstance(node_props, dict): + # 解析节点属性 + node_data = { + "id": node_props.get("id", ""), + "memory": node_props.get("memory", ""), + "metadata": node_props + } + + if include_embedding and "embedding" in node_props: + node_data["embedding"] = node_props["embedding"] + + nodes.append(self._parse_node(node_data)) except Exception as e: logger.error(f"Failed to get memories: {e}", exc_info=True) From 5a6b29ddae55d9a75ca3d2f6e9786e4b66ca2b9b Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sun, 19 Oct 2025 19:29:51 +0800 Subject: [PATCH 034/137] fix --- src/memos/graph_dbs/polardb.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 4d958ebc7..7ee0ff6eb 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1774,12 +1774,13 @@ def add_node_old(conn, id: str, memory: str, metadata: dict, graph_name=None): print(f"❌ 插入失败 (ID: {id}): {e}") return False - def add_node(self, id: str, memory: str, metadata: dict[str, Any]) -> None: + def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: str | None = None) -> None: """Add a memory node to the graph.""" # user_name 从 metadata 中获取,如果不存在则从配置中获取 - if "user_name" not in metadata: - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - metadata["user_name"] = self._get_config_value("user_name") + metadata["user_name"] = user_name if user_name else self.config.user_name + # if "user_name" not in metadata: + # if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + # metadata["user_name"] = self._get_config_value("user_name") # Safely process metadata metadata = _prepare_node_metadata(metadata) From 7a8e0cb220aea0928a8b60efb254a25367eb5ba1 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sun, 19 Oct 2025 19:48:02 +0800 Subject: [PATCH 035/137] get_grouped_counts --- src/memos/graph_dbs/polardb.py | 54 ++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 7ee0ff6eb..941708daf 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1324,6 +1324,60 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = results = cursor.fetchall() return [row[0] for row in results if row[0]] + def get_grouped_counts_ccl( + self, + group_fields: list[str], + where_clause: str = "", + params: dict[str, Any] | None = None, + user_name: str | None = None, + ) -> list[dict[str, Any]]: + """ + Count nodes grouped by any fields. + + Args: + group_fields (list[str]): Fields to group by, e.g., ["memory_type", "status"] + where_clause (str, optional): Extra WHERE condition. E.g., + "WHERE n.status = 'activated'" + params (dict, optional): Parameters for WHERE clause. + + Returns: + list[dict]: e.g., [{ 'memory_type': 'WorkingMemory', 'status': 'active', 'count': 10 }, ...] + """ + user_name = user_name if user_name else self.config.user_name + if not group_fields: + raise ValueError("group_fields cannot be empty") + + final_params = params.copy() if params else {} + + if not self.config.use_multi_db and (self.config.user_name or user_name): + user_clause = "n.user_name = $user_name" + final_params["user_name"] = user_name + if where_clause: + where_clause = where_clause.strip() + if where_clause.upper().startswith("WHERE"): + where_clause += f" AND {user_clause}" + else: + where_clause = f"WHERE {where_clause} AND {user_clause}" + else: + where_clause = f"WHERE {user_clause}" + + # Force RETURN field AS field to guarantee key match + group_fields_cypher = ", ".join([f"n.{field} AS {field}" for field in group_fields]) + group_fields_cypher_polardb = ", agtype".join([f"n.{field} AS {field}" for field in group_fields]) + + query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + {where_clause} + RETURN {group_fields_cypher}, COUNT(n) AS count + $$ ) as ({group_fields_cypher_polardb}, count agtype); + """ + with self.connection.cursor() as cursor: + cursor.execute(query) + results = cursor.fetchall() + print(results) + + def get_grouped_counts( self, group_fields: list[str], From 771e771c453e320e51b5982a7c91835d4950f6ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 19:48:36 +0800 Subject: [PATCH 036/137] update get_by_metadata --- src/memos/graph_dbs/polardb.py | 104 +++++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 30 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 7ee0ff6eb..eba1c8a2f 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1281,48 +1281,92 @@ def search_by_embedding( return output[:top_k] def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = None) -> list[str]: - """Retrieve node IDs that match given metadata filters.""" - where_clauses = [] - params = [] + """ + Retrieve node IDs that match given metadata filters. + Supports exact match. - for i, f in enumerate(filters): + Args: + filters: List of filter dicts like: + [ + {"field": "key", "op": "in", "value": ["A", "B"]}, + {"field": "confidence", "op": ">=", "value": 80}, + {"field": "tags", "op": "contains", "value": "AI"}, + ... + ] + user_name (str, optional): User name for filtering in non-multi-db mode + + Returns: + list[str]: Node IDs whose metadata match the filter conditions. (AND logic). + """ + user_name = user_name if user_name else self._get_config_value("user_name") + + # 构建 cypher 查询的 WHERE 条件 + where_conditions = [] + + for f in filters: field = f["field"] op = f.get("op", "=") value = f["value"] - + + # 格式化值 + if isinstance(value, str): + escaped_value = f"'{value}'" + elif isinstance(value, list): + # 处理列表值 + list_items = [] + for v in value: + if isinstance(v, str): + list_items.append(f"'{v}'") + else: + list_items.append(str(v)) + escaped_value = f"[{', '.join(list_items)}]" + else: + escaped_value = f"'{value}'" if isinstance(value, str) else str(value) + + # 构建 WHERE 条件 if op == "=": - where_clauses.append(f"properties->>'{field}' = %s") - params.append(value) + where_conditions.append(f"n.{field} = {escaped_value}") elif op == "in": - placeholders = ','.join(['%s'] * len(value)) - where_clauses.append(f"properties->>'{field}' IN ({placeholders})") - params.extend(value) + where_conditions.append(f"n.{field} IN {escaped_value}") elif op == "contains": - where_clauses.append(f"properties->'{field}' ? %s") - params.append(value) + where_conditions.append(f"size(filter(n.{field}, t -> t IN {escaped_value})) > 0") elif op == "starts_with": - where_clauses.append(f"properties->>'{field}' LIKE %s") - params.append(f"{value}%") + where_conditions.append(f"n.{field} STARTS WITH {escaped_value}") elif op == "ends_with": - where_clauses.append(f"properties->>'{field}' LIKE %s") - params.append(f"%{value}") + where_conditions.append(f"n.{field} ENDS WITH {escaped_value}") elif op in [">", ">=", "<", "<="]: - where_clauses.append(f"(properties->>'{field}')::numeric {op} %s") - params.append(value) + where_conditions.append(f"n.{field} {op} {escaped_value}") else: raise ValueError(f"Unsupported operator: {op}") - - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - where_clauses.append("properties->>'user_name' = %s") - params.append(self._get_config_value("user_name")) - - where_str = " AND ".join(where_clauses) - query = f"SELECT properties->>'id' as id FROM \"{self.db_name}_graph\".\"Memory\" WHERE {where_str}" - - with self.connection.cursor() as cursor: - cursor.execute(query, params) - results = cursor.fetchall() - return [row[0] for row in results if row[0]] + + # 添加用户名称过滤 + where_conditions.append(f"n.user_name = '{user_name}'") + + where_str = " AND ".join(where_conditions) + + # 使用 cypher 查询 + cypher_query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + WHERE {where_str} + RETURN n.id AS id + $$) AS (id agtype) + """ + + ids = [] + try: + with self.connection.cursor() as cursor: + cursor.execute(cypher_query) + results = cursor.fetchall() + for row in results: + if row[0] and hasattr(row[0], 'value'): + ids.append(row[0].value) + elif row[0]: + ids.append(str(row[0])) + except Exception as e: + logger.error(f"Failed to get metadata: {e}, query is {cypher_query}") + + return ids def get_grouped_counts( self, From 9cc3d272597a1e2a9a23a913774e0716dc23cde0 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sun, 19 Oct 2025 19:49:06 +0800 Subject: [PATCH 037/137] get_grouped_counts --- src/memos/graph_dbs/polardb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 941708daf..3e13e67f0 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1363,7 +1363,7 @@ def get_grouped_counts_ccl( # Force RETURN field AS field to guarantee key match group_fields_cypher = ", ".join([f"n.{field} AS {field}" for field in group_fields]) - group_fields_cypher_polardb = ", agtype".join([f"n.{field} AS {field}" for field in group_fields]) + group_fields_cypher_polardb = "agtype, ".join([f"n.{field} AS {field}" for field in group_fields]) query = f""" SELECT * FROM cypher('{self.db_name}_graph', $$ From ad2b54253f1e2d979d87d06e7a7d0d553ef94f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 19:54:27 +0800 Subject: [PATCH 038/137] update get_grouped_counts --- src/memos/graph_dbs/polardb.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e84378fa2..110e4029e 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1368,7 +1368,7 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = return ids - def get_grouped_counts_ccl( + def get_grouped_counts( self, group_fields: list[str], where_clause: str = "", @@ -1416,13 +1416,35 @@ def get_grouped_counts_ccl( RETURN {group_fields_cypher}, COUNT(n) AS count $$ ) as ({group_fields_cypher_polardb}, count agtype); """ - with self.connection.cursor() as cursor: - cursor.execute(query) - results = cursor.fetchall() - print(results) + try: + with self.connection.cursor() as cursor: + # 处理参数化查询 + if params and isinstance(params, list): + cursor.execute(query, final_params) + else: + cursor.execute(query) + results = cursor.fetchall() + + output = [] + for row in results: + group_values = {} + for i, field in enumerate(group_fields): + value = row[i] + if hasattr(value, 'value'): + group_values[field] = value.value + else: + group_values[field] = str(value) + count_value = row[-1] # Last column is count + output.append({**group_values, "count": count_value}) + return output - def get_grouped_counts( + except Exception as e: + logger.error(f"Failed to get grouped counts: {e}", exc_info=True) + return [] + + + def get_grouped_counts_old( self, group_fields: list[str], where_clause: str = "", From 825ba5b174b2f35ccdc0362e9502656d0882f07f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 20:30:12 +0800 Subject: [PATCH 039/137] update get_grouped_counts --- src/memos/graph_dbs/polardb.py | 110 +++++++++++++++++++++++++-------- 1 file changed, 84 insertions(+), 26 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 9164547b0..54372c035 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -777,6 +777,7 @@ def get_nodes(self, ids: list[str], **kwargs) -> list[dict[str, Any]]: query += " AND properties::text LIKE %s" params.append(f"%{user_name}%") + print(f"[get_nodes] query: {query}, params: {params}") with self.connection.cursor() as cursor: cursor.execute(query, params) results = cursor.fetchall() @@ -1766,34 +1767,91 @@ def get_all_memory_items( return nodes - def get_structure_optimization_candidates(self, scope: str, **kwargs) -> list[dict]: - """Find nodes that are likely candidates for structure optimization.""" - # This would require more complex graph traversal queries - # For now, return nodes without parent relationships - query = f""" - SELECT id, properties - FROM "{self.db_name}_graph"."Memory" - WHERE properties->>'memory_type' = %s - AND properties->>'status' = 'activated' + def get_structure_optimization_candidates( + self, scope: str, include_embedding: bool = False, user_name: str | None = None + ) -> list[dict]: """ - params = [scope] - - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - query += " AND properties->>'user_name' = %s" - params.append(self._get_config_value("user_name")) - - with self.connection.cursor() as cursor: - cursor.execute(query, params) - results = cursor.fetchall() + Find nodes that are likely candidates for structure optimization: + - Isolated nodes, nodes with empty background, or nodes with exactly one child. + - Plus: the child of any parent node that has exactly one child. + """ + user_name = user_name if user_name else self._get_config_value("user_name") + + # 构建返回字段,根据 include_embedding 参数决定是否包含 embedding + if include_embedding: + return_fields = "n" + else: + # 构建不包含 embedding 的字段列表 + return_fields = ",".join([ + "n.id AS id", + "n.memory AS memory", + "n.user_name AS user_name", + "n.user_id AS user_id", + "n.session_id AS session_id", + "n.status AS status", + "n.key AS key", + "n.confidence AS confidence", + "n.tags AS tags", + "n.created_at AS created_at", + "n.updated_at AS updated_at", + "n.memory_type AS memory_type", + "n.sources AS sources", + "n.source AS source", + "n.node_type AS node_type", + "n.visibility AS visibility", + "n.usage AS usage", + "n.background AS background" + ]) + + # 使用 OPTIONAL MATCH 来查找孤立节点(没有父节点和子节点的节点) + cypher_query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + WHERE n.memory_type = '{scope}' + AND n.status = 'activated' + AND n.user_name = '{user_name}' + OPTIONAL MATCH (n)-[:PARENT]->(c:Memory) + OPTIONAL MATCH (p:Memory)-[:PARENT]->(n) + WITH n, c, p + WHERE c IS NULL AND p IS NULL + RETURN {return_fields} + $$) AS (result agtype) + """ + print("[get_structure_optimization_candidates] query:", cypher_query) - nodes = [] - for row in results: - node_id, properties_json = row - # properties_json is already a dict from psycopg2 - properties = properties_json if properties_json else {} - nodes.append(self._parse_node( - {"id": properties.get("id", ""), "memory": properties.get("memory", ""), "metadata": properties})) - return nodes + candidates = [] + node_ids = set() + try: + with self.connection.cursor() as cursor: + cursor.execute(cypher_query) + results = cursor.fetchall() + + for row in results: + result_agtype = row[0] + if result_agtype and hasattr(result_agtype, 'value'): + if include_embedding: + # 当 include_embedding=True 时,返回完整的节点对象 + node_props = result_agtype.value + if isinstance(node_props, dict) and "properties" in node_props: + node = self._parse_node(node_props["properties"]) + node_id = node["id"] + if node_id not in node_ids: + candidates.append(node) + node_ids.add(node_id) + else: + # 当 include_embedding=False 时,返回字段字典 + props = result_agtype.value + if isinstance(props, dict): + node = self._parse_node(props) + node_id = node["id"] + if node_id not in node_ids: + candidates.append(node) + node_ids.add(node_id) + + except Exception as e: + logger.error(f"Failed to get structure optimization candidates: {e}", exc_info=True) + + return candidates def drop_database(self) -> None: """Permanently delete the entire graph this instance is using.""" From 06f9bfcc6dfc73314a0daebb5ff247d86a3edd1c Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Sun, 19 Oct 2025 20:38:54 +0800 Subject: [PATCH 040/137] get_grouped_counts --- src/memos/graph_dbs/polardb.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 54372c035..d31b3eca5 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -741,7 +741,7 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No logger.error(f"[get_node] Failed to retrieve node '{id}': {e}", exc_info=True) return None - def get_nodes(self, ids: list[str], **kwargs) -> list[dict[str, Any]]: + def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> list[dict[str, Any]]: """ Retrieve the metadata and memory of a list of nodes. Args: @@ -771,11 +771,13 @@ def get_nodes(self, ids: list[str], **kwargs) -> list[dict[str, Any]]: FROM "{self.db_name}_graph"."Memory" WHERE ({where_clause}) """ - - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - user_name = kwargs.get("cube_name", self._get_config_value("user_name")) - query += " AND properties::text LIKE %s" - params.append(f"%{user_name}%") + user_name = user_name if user_name else self.config.user_name + query += " AND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" + params.append(f"{user_name}") + # if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): + # user_name = kwargs.get("cube_name", self._get_config_value("user_name")) + # query += " AND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" + # params.append(f"{user_name}") print(f"[get_nodes] query: {query}, params: {params}") with self.connection.cursor() as cursor: From 57dbca9085a368bf77a200a6e590aae22f5cc5b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 19 Oct 2025 21:03:42 +0800 Subject: [PATCH 041/137] update get_nodes --- src/memos/graph_dbs/polardb.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index d31b3eca5..e8e6f26cb 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -771,9 +771,11 @@ def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> l FROM "{self.db_name}_graph"."Memory" WHERE ({where_clause}) """ + user_name = user_name if user_name else self.config.user_name query += " AND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" - params.append(f"{user_name}") + params.append(f'"{user_name}"') + # if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): # user_name = kwargs.get("cube_name", self._get_config_value("user_name")) # query += " AND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" From d7cccd09f8ebbec9dbc11dcb10d5a186036618ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 09:45:46 +0800 Subject: [PATCH 042/137] update search_by_embedding filter user_name --- examples/basic_modules/polardb_search.py | 32 +++++++++++++++--------- src/memos/graph_dbs/polardb.py | 20 +++++++++------ 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 20e256a97..abf65da6d 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -106,31 +106,30 @@ def getPolarDb(db_name): return graph -def searchVector(db_name: str, vectorStr: list[float]): +def searchVector(db_name: str, vectorStr: list[float], user_name: str = None): graph = getPolarDb(db_name) # 1,查询search_by_embedding - nodes = graph.search_by_embedding(vector=vectorStr, top_k=1) + nodes = graph.search_by_embedding(vector=vectorStr, top_k=1, user_name=user_name) print("search_by_embedding nodes:", len(nodes)) for node_i in nodes: print("Search result:", graph.get_node(node_i["id"][1:-1])) # 2,查询单个get_node - detail = graph.get_node(id="194f1e30-44d2-4e3f-bc58-c950343c56b7", - user_name='memos231a22c655fd4b859ca4143b97d2b808') - print("单个node:", detail) + # detail = graph.get_node(id="194f1e30-44d2-4e3f-bc58-c950343c56b7", + # user_name='memos231a22c655fd4b859ca4143b97d2b808') + # print("单个node:", detail) # # # 3,查询多个get_nodes # ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] - # detail_list = graph.get_nodes(ids) + # ids = ['bfde036f-6276-4485-9dc6-3c64eab3e132'] + # detail_list = graph.get_nodes(ids=ids,user_name='memos7a9f9fbbb61c412f94f77fbaa8103c35') # print("1111多个node:", len(detail_list)) # # print("多个node:", detail_list) # 4,更新 update_node # graph.update_node(id="000009999ef-926f-42e2-b7b5-0224daf0abcd", fields={"name": "new_name"}) - # for node_i in nodes: - # print("Search result:", graph.get_node(node_i["id"])) # 4,查询 get_memory_count # count = graph.get_memory_count('UserMemory','memos07ba3d044650474c839e721f3a69d38a') @@ -176,7 +175,7 @@ def get_children_with_embeddings(db_name: str, id: str, user_name: str = None): def get_subgraph(db_name, center_id, depth, center_status, user_name): graph = getPolarDb(db_name) subgraph = graph.get_subgraph(center_id, depth, center_status, user_name) - print("get_subgraph:", subgraph) + print("111111get_subgraph:", subgraph) def get_grouped_counts(db_name, user_name): @@ -192,6 +191,12 @@ def export_graph(db_name, include_embedding, user_name): print("export_graph:", export_graphlist) +def get_structure_optimization_candidates(db_name, scope, include_embedding, user_name): + graph = getPolarDb(db_name) + candidates = graph.get_structure_optimization_candidates(scope=scope, include_embedding=include_embedding, user_name=user_name) + print("get_structure_optimization_candidates:", candidates) + + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") @@ -333,7 +338,7 @@ def export_graph(db_name, include_embedding, user_name): -0.01335147, -0.040344328, 0.029144352, -0.04174814, 0.023315482, -0.02227788, -0.0022716573, -0.03152473, 0.0482484, -0.027038634, -0.004882823, 0.06152357, -0.003881463, -0.036041338, -0.0075645614, 0.020660445, -0.07250992, -0.024429375, -0.036377035] - searchVector(db_name="memtensor_memos", vectorStr=vector) + searchVector(db_name="memtensor_memos", vectorStr=vector, user_name="memos7a9f9fbbb61c412f94f77fbaa8103c35") # searchVector(db_name="test_1020_02", vectorStr=vector) # add_edge(db_name="memtensor_memos",source_id="13bb9df6-0609-4442-8bed-bba77dadac92", target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", edge_type="PARENT", user_name="memosbfb3fb32032b4077a641404dc48739cd") @@ -343,9 +348,12 @@ def export_graph(db_name, include_embedding, user_name): # get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") - # get_subgraph(db_name="memtensor_memos", center_id="13bb9df6-0609-4442-8bed-bba77dadac92", depth=2, + # get_subgraph(db_name="memtensor_memos", center_id="13bb9df6-0609-4442-8bed-bba77dadac92", depth=1, # center_status="activated", user_name="memos07ea708ac7eb412887c5c283f874ea30") + # # get_grouped_counts(db_name="memtensor_memos", user_name="memos07ea708ac7eb412887c5c283f874ea30") - export_graph(db_name="memtensor_memos", include_embedding=False, user_name="memos8698ecb1f76940ff9adc12494c4d57a6") + # export_graph(db_name="memtensor_memos", include_embedding=False, user_name="memos8698ecb1f76940ff9adc12494c4d57a6") + + # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=True, user_name="memose62f1a2ef8c54ccfadf329d11d1e31ad") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e8e6f26cb..b7473f4e6 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1216,6 +1216,7 @@ def search_by_embedding( status: str | None = None, threshold: float | None = None, search_filter: dict | None = None, + user_name: str | None = None, **kwargs, ) -> list[dict]: """ @@ -1231,13 +1232,15 @@ def search_by_embedding( where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype") where_clauses.append("embedding is not null") # Add user_name filter like nebular.py - user_name = self._get_config_value("user_name") - if not self.config.use_multi_db and user_name: - if kwargs.get("cube_name"): - where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{kwargs['cube_name']}\"'::agtype") - else: - where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") - + + # user_name = self._get_config_value("user_name") + # if not self.config.use_multi_db and user_name: + # if kwargs.get("cube_name"): + # where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{kwargs['cube_name']}\"'::agtype") + # else: + # where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") + if user_name: + where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") # Add search_filter conditions like nebular.py if search_filter: for key, value in search_filter.items(): @@ -1267,7 +1270,8 @@ def search_by_embedding( WHERE scope > 0.1; """ params = [vector] - print(where_clause) + + print(f"[search_by_embedding] query: {query}, params: {params}, where_clause: {where_clause}") with self.connection.cursor() as cursor: cursor.execute(query, params) results = cursor.fetchall() From 25ad681346386af718d24b48652c35c71a5dbb68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 10:01:37 +0800 Subject: [PATCH 043/137] update search_by_embedding filter user_name --- src/memos/graph_dbs/polardb.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index b7473f4e6..14880ef88 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -826,8 +826,6 @@ def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY") -> list[ ... ] """ - # 由于PolarDB没有完整的图数据库功能,这里使用简化的实现 - # 在实际应用中,你可能需要创建专门的边表来存储关系 # 创建一个简单的边表来存储关系(如果不存在的话) try: From 16537a79f2c2c48f562f36e30d0a1002873c2c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 10:09:06 +0800 Subject: [PATCH 044/137] add filter user_name for update_node --- src/memos/graph_dbs/polardb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 14880ef88..413d50001 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -364,6 +364,8 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N if not fields: return + user_name = user_name if user_name else self.config.user_name + # 获取当前节点 current_node = self.get_node(id, user_name=user_name) if not current_node: From 33bd15cf9931fac5c2a89e0f47a08fb7959e6868 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 20 Oct 2025 10:21:52 +0800 Subject: [PATCH 045/137] get_structure_optimization_candidates --- src/memos/graph_dbs/polardb.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 413d50001..36a015728 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1811,6 +1811,21 @@ def get_structure_optimization_candidates( "n.background AS background" ]) + # 保留写法 + cypher_query_1 = f""" + SELECT m.* + FROM memtensor_memos_graph."Memory" m + WHERE + ag_catalog.agtype_access_operator(m.properties, '"memory_type"'::ag_catalog.agtype) = '"LongTermMemory"'::ag_catalog.agtype + AND ag_catalog.agtype_access_operator(m.properties, '"status"'::ag_catalog.agtype) = '"activated"'::ag_catalog.agtype + AND ag_catalog.agtype_access_operator(m.properties, '"user_name"'::ag_catalog.agtype) = '"activated"'::ag_catalog.agtype + AND NOT EXISTS ( + SELECT 1 + FROM memtensor_memos_graph."PARENT" p + WHERE m.id = p.start_id OR m.id = p.end_id + ); + """ + # 使用 OPTIONAL MATCH 来查找孤立节点(没有父节点和子节点的节点) cypher_query = f""" SELECT * FROM cypher('{self.db_name}_graph', $$ From 691c676b91ce3698ac840cfacdfcf41f82f519b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 13:48:51 +0800 Subject: [PATCH 046/137] add filter user_name for update_node --- examples/basic_modules/polardb_search.py | 8 ++++---- src/memos/graph_dbs/polardb.py | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index abf65da6d..ef50c9408 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -121,10 +121,10 @@ def searchVector(db_name: str, vectorStr: list[float], user_name: str = None): # print("单个node:", detail) # # # 3,查询多个get_nodes - # ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] - # ids = ['bfde036f-6276-4485-9dc6-3c64eab3e132'] - # detail_list = graph.get_nodes(ids=ids,user_name='memos7a9f9fbbb61c412f94f77fbaa8103c35') - # print("1111多个node:", len(detail_list)) + ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] + ids = ['"bfde036f-6276-4485-9dc6-3c64eab3e132"'] + detail_list = graph.get_nodes(ids=ids,user_name='memos7a9f9fbbb61c412f94f77fbaa8103c35') + print("1111多个node:", len(detail_list)) # # print("多个node:", detail_list) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 36a015728..b6c6c3925 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -764,7 +764,7 @@ def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> l for id_val in ids: where_conditions.append("ag_catalog.agtype_access_operator(properties, '\"id\"'::agtype) = %s::agtype") - params.append(f'"{id_val}"') + params.append(f'{id_val}') where_clause = " OR ".join(where_conditions) @@ -1239,8 +1239,9 @@ def search_by_embedding( # where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{kwargs['cube_name']}\"'::agtype") # else: # where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") - if user_name: - where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") + user_name = user_name if user_name else self.config.user_name + where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") + # Add search_filter conditions like nebular.py if search_filter: for key, value in search_filter.items(): From 6e0dc799e3ef19a515326c13bb6fd003920e8944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 13:57:04 +0800 Subject: [PATCH 047/137] fix --- examples/basic_modules/importPolarDbEdge.py | 8 +++--- examples/basic_modules/parseJson.py | 10 +++---- examples/basic_modules/polardb_example.py | 26 +++++++++---------- .../basic_modules/polardb_export_insert.py | 22 ++++++++-------- examples/basic_modules/polardb_search.py | 16 ++++++------ 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/examples/basic_modules/importPolarDbEdge.py b/examples/basic_modules/importPolarDbEdge.py index d5ed0a225..fa50eaf98 100644 --- a/examples/basic_modules/importPolarDbEdge.py +++ b/examples/basic_modules/importPolarDbEdge.py @@ -4,11 +4,11 @@ # 数据库连接配置 DB_CONFIG = { - 'host': 'memory.pg.polardb.rds.aliyuncs.com', + 'host': 'xxxxx', 'port': 5432, - 'database': 'memtensor_memos', - 'user': 'adimin', - 'password': 'Openmem0925' + 'database': 'xxxxx', + 'user': 'xxxx', + 'password': 'xxxx' } # 顶层目录 diff --git a/examples/basic_modules/parseJson.py b/examples/basic_modules/parseJson.py index 21d290450..86a41d24b 100644 --- a/examples/basic_modules/parseJson.py +++ b/examples/basic_modules/parseJson.py @@ -9,11 +9,11 @@ from polardb_export_insert import insert_data DB_CONFIG = { - 'host': 'memory.pg.polardb.rds.aliyuncs.com', + 'host': 'xxxxxxx', 'port': 5432, - 'database': 'memtensor_memos', - 'user': 'adimin', - 'password': 'Openmem0925' + 'database': 'xxxxx', + 'user': 'xxxx', + 'password': 'xxxx' } conn = psycopg2.connect(**DB_CONFIG) @@ -77,5 +77,5 @@ def process_folder(folder_path, batch_size=1000): if __name__ == "__main__": - folder_path = r"C:\Users\13282\Desktop\nebular\export13\Memory" + folder_path = r"/Users/zhudayang/python/github/1/MemOS/examples/basic_modules" process_folder(folder_path, batch_size=1000) diff --git a/examples/basic_modules/polardb_example.py b/examples/basic_modules/polardb_example.py index 9e1c0c6c4..6569d01d4 100644 --- a/examples/basic_modules/polardb_example.py +++ b/examples/basic_modules/polardb_example.py @@ -38,10 +38,10 @@ def example_multi_db(db_name: str = "paper_polardb"): config = GraphDBConfigFactory( backend="polardb", config={ - "host": "139.196.232.20", + "host": "xxxxxxx", "port": 5432, - "user": "root", - "password": "123456", + "user": "xxxx", + "password": "xxxx", "db_name": db_name, "auto_create": True, "embedding_dimension": 1024, @@ -277,10 +277,10 @@ def example_shared_db(db_name: str = "shared_travel_group_polardb"): config = GraphDBConfigFactory( backend="polardb", config={ - "host": "139.196.232.20", + "host": "xxxxxxx", "port": 5432, - "user": "root", - "password": "123456", + "user": "xxxx", + "password": "xxxx", "db_name": db_name, "user_name": user_name, "use_multi_db": False, @@ -343,10 +343,10 @@ def example_shared_db(db_name: str = "shared_travel_group_polardb"): config_alice = GraphDBConfigFactory( backend="polardb", config={ - "host": "139.196.232.20", + "host": "xxxxxxx", "port": 5432, - "user": "root", - "password": "123456", + "user": "xxxx", + "password": "xxxx", "db_name": db_name, "user_name": user_list[0], "embedding_dimension": 1024, @@ -371,10 +371,10 @@ def run_user_session( config = GraphDBConfigFactory( backend="polardb", config={ - "host": "139.196.232.20", + "host": "xxxxxxx", "port": 5432, - "user": "root", - "password": "123456", + "user": "xxxx", + "password": "xxxx", "db_name": db_name, "user_name": user_name, "use_multi_db": False, @@ -513,7 +513,7 @@ def example_complex_shared_db(db_name: str = "poc"): # example_shared_db(db_name="shared_travel_group_polardb") print("\n=== PolarDB Example: Single-DB-Complex ===") - example_complex_shared_db(db_name="test_memos_1011") + example_complex_shared_db(db_name="memtensor_memos") except Exception as e: print(f"❌ Error running111111 PolarDB example: {e}") print("Please check:") diff --git a/examples/basic_modules/polardb_export_insert.py b/examples/basic_modules/polardb_export_insert.py index 3ffc05119..d282d5d72 100644 --- a/examples/basic_modules/polardb_export_insert.py +++ b/examples/basic_modules/polardb_export_insert.py @@ -16,15 +16,15 @@ DB_CONFIG = { - 'host': 'memory.pg.polardb.rds.aliyuncs.com', + 'host': 'xxxxxxx', 'port': 5432, - 'database': 'test_memos', - 'user': 'adimin', - 'password': 'Openmem0925' + 'database': 'xxxxx', + 'user': 'xxxx', + 'password': 'xxxx' } # 图数据库配置 -GRAPH_NAME = 'test_memos_graph' +GRAPH_NAME = 'memtensor_memos_graph' def create_vector_extension(conn): @@ -199,14 +199,14 @@ def insert_data(conn, data_list, graph_name=None): config = GraphDBConfigFactory( backend="polardb", config={ - "host": "memory.pg.polardb.rds.aliyuncs.com", + "host": "xxxxxxx", "port": 5432, - "user": "adimin", - "password": "Openmem0925", - "db_name": "memtensor_memos", - "user_name": 'adimin', + "user": "xxxx", + "password": "xxxx", + "db_name": "xxxxx", + "user_name": 'xxxx', "use_multi_db": False, - "auto_create": True, + "auto_create": False, "embedding_dimension": 1024, }, ) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index ef50c9408..a19275b65 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -24,11 +24,11 @@ def handler_node_edge(db_name: str = "shared-traval-group-complex", type: str = DB_CONFIG = { - 'host': 'memory.pg.polardb.rds.aliyuncs.com', + 'host': 'xxxxxxx', 'port': 5432, - 'database': 'test_memos', - 'user': 'adimin', - 'password': 'Openmem0925' + 'database': 'xxxxx', + 'user': 'xxxx', + 'password': 'xxxx' } @@ -91,12 +91,12 @@ def getPolarDb(db_name): config = GraphDBConfigFactory( backend="polardb", config={ - "host": "memory.pg.polardb.rds.aliyuncs.com", + "host": "xxx", "port": 5432, - "user": "adimin", - "password": "Openmem0925", + "user": "xxxx", + "password": "xxxx", "db_name": db_name, - "user_name": 'adimin', + "user_name": 'xxxx', "use_multi_db": True, # 设置为True,不添加user_name过滤条件 "auto_create": True, "embedding_dimension": 1024, From 332bf8d01791c25fbd75e30f2b222bc89099cd96 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 20 Oct 2025 14:27:00 +0800 Subject: [PATCH 048/137] fix --- src/memos/graph_dbs/polardb.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index b6c6c3925..8e50e0989 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1789,6 +1789,7 @@ def get_structure_optimization_candidates( # 构建返回字段,根据 include_embedding 参数决定是否包含 embedding if include_embedding: return_fields = "n" + return_fields_agtype =" n agtype" else: # 构建不包含 embedding 的字段列表 return_fields = ",".join([ @@ -1811,6 +1812,26 @@ def get_structure_optimization_candidates( "n.usage AS usage", "n.background AS background" ]) + return_fields_agtype = " agtype,".join([ + "id ", + "memory", + "user_name", + "user_id", + "session_id", + "status", + "key", + "confidence", + "tags", + "created_at", + "updated_at", + "memory_type", + "sources", + "source", + "node_type", + "visibility", + "usage", + "background" + ]) # 保留写法 cypher_query_1 = f""" @@ -1839,7 +1860,7 @@ def get_structure_optimization_candidates( WITH n, c, p WHERE c IS NULL AND p IS NULL RETURN {return_fields} - $$) AS (result agtype) + $$) AS ({return_fields_agtype}) """ print("[get_structure_optimization_candidates] query:", cypher_query) From 77d6bc6ae955a2625198e03f72774ec61ac707c9 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 20 Oct 2025 14:31:39 +0800 Subject: [PATCH 049/137] fix --- src/memos/graph_dbs/polardb.py | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 8e50e0989..a83cbdb62 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1812,26 +1812,13 @@ def get_structure_optimization_candidates( "n.usage AS usage", "n.background AS background" ]) - return_fields_agtype = " agtype,".join([ - "id ", - "memory", - "user_name", - "user_id", - "session_id", - "status", - "key", - "confidence", - "tags", - "created_at", - "updated_at", - "memory_type", - "sources", - "source", - "node_type", - "visibility", - "usage", - "background" - ]) + fields = [ + "id", "memory", "user_name", "user_id", "session_id", "status", + "key", "confidence", "tags", "created_at", "updated_at", + "memory_type", "sources", "source", "node_type", "visibility", + "usage", "background" + ] + return_fields_agtype = ", ".join([f"{field} agtype" for field in fields]) # 保留写法 cypher_query_1 = f""" From 0104b6d488ac8a39e3f174b940300e449e128883 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 20 Oct 2025 14:25:44 +0800 Subject: [PATCH 050/137] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0polardb?= =?UTF-8?q?=E7=9A=84=E5=90=AF=E5=8A=A8=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/memos/api/config.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 9a226cf30..972604933 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -275,6 +275,32 @@ def get_nebular_config(user_id: str | None = None) -> dict[str, Any]: "embedding_dimension": int(os.getenv("EMBEDDING_DIMENSION", 3072)), } + @staticmethod + def get_polardb_config(user_id: str | None = None) -> dict[str, Any]: + """Get PolarDB configuration.""" + use_multi_db = os.getenv("POLAR_DB_USE_MULTI_DB", "false").lower() == "true" + + if use_multi_db: + # Multi-DB mode: each user gets their own database (physical isolation) + db_name = f"memos{user_id.replace('-', '')}" if user_id else "memos_default" + user_name = None + else: + # Shared-DB mode: all users share one database with user_name tag (logical isolation) + db_name = os.getenv("POLAR_DB_DB_NAME", "shared_memos_db") + user_name = f"memos{user_id.replace('-', '')}" if user_id else "memos_default" + + return { + "host": os.getenv("POLAR_DB_HOST", "localhost"), + "port": int(os.getenv("POLAR_DB_PORT", "5432")), + "user": os.getenv("POLAR_DB_USER", "root"), + "password": os.getenv("POLAR_DB_PASSWORD", "123456"), + "db_name": db_name, + "user_name": user_name, + "use_multi_db": use_multi_db, + "auto_create": True, + "embedding_dimension": int(os.getenv("EMBEDDING_DIMENSION", 1024)), + } + @staticmethod def get_mysql_config() -> dict[str, Any]: """Get MySQL configuration.""" @@ -500,6 +526,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple[MOSConfig, General neo4j_community_config = APIConfig.get_neo4j_community_config(user_id) neo4j_config = APIConfig.get_neo4j_config(user_id) nebular_config = APIConfig.get_nebular_config(user_id) + polardb_config = APIConfig.get_polardb_config(user_id) internet_config = ( APIConfig.get_internet_config() if os.getenv("ENABLE_INTERNET", "false").lower() == "true" @@ -509,6 +536,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple[MOSConfig, General "neo4j-community": neo4j_community_config, "neo4j": neo4j_config, "nebular": nebular_config, + "polardb": polardb_config, } graph_db_backend = os.getenv("NEO4J_BACKEND", "neo4j-community").lower() if graph_db_backend in graph_db_backend_map: @@ -557,10 +585,12 @@ def get_default_cube_config() -> GeneralMemCubeConfig | None: neo4j_community_config = APIConfig.get_neo4j_community_config(user_id="default") neo4j_config = APIConfig.get_neo4j_config(user_id="default") nebular_config = APIConfig.get_nebular_config(user_id="default") + polardb_config = APIConfig.get_polardb_config(user_id="default") graph_db_backend_map = { "neo4j-community": neo4j_community_config, "neo4j": neo4j_config, "nebular": nebular_config, + "polardb": polardb_config, } internet_config = ( APIConfig.get_internet_config() From 503ec3c38ffe489377cfb53a3a50dbfe706929f5 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 20 Oct 2025 14:42:04 +0800 Subject: [PATCH 051/137] fix --- src/memos/graph_dbs/polardb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index a83cbdb62..60bd587f1 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1857,7 +1857,7 @@ def get_structure_optimization_candidates( with self.connection.cursor() as cursor: cursor.execute(cypher_query) results = cursor.fetchall() - + print("result------",len(results)) for row in results: result_agtype = row[0] if result_agtype and hasattr(result_agtype, 'value'): From 4245ee0b1adb67b125036db207ebf3e6dfeab876 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 20 Oct 2025 14:43:38 +0800 Subject: [PATCH 052/137] fix --- src/memos/graph_dbs/polardb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 60bd587f1..ff48dc952 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1823,7 +1823,7 @@ def get_structure_optimization_candidates( # 保留写法 cypher_query_1 = f""" SELECT m.* - FROM memtensor_memos_graph."Memory" m + FROM {self.db_name}_graph."Memory" m WHERE ag_catalog.agtype_access_operator(m.properties, '"memory_type"'::ag_catalog.agtype) = '"LongTermMemory"'::ag_catalog.agtype AND ag_catalog.agtype_access_operator(m.properties, '"status"'::ag_catalog.agtype) = '"activated"'::ag_catalog.agtype From 96512401f9bdcbe653f8e0abe5d6df1431a8c0fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 14:56:19 +0800 Subject: [PATCH 053/137] fix get_structure_optimization_candidates --- src/memos/graph_dbs/polardb.py | 53 +++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 60bd587f1..e62405d40 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1859,10 +1859,10 @@ def get_structure_optimization_candidates( results = cursor.fetchall() print("result------",len(results)) for row in results: - result_agtype = row[0] - if result_agtype and hasattr(result_agtype, 'value'): - if include_embedding: - # 当 include_embedding=True 时,返回完整的节点对象 + if include_embedding: + # 当 include_embedding=True 时,返回完整的节点对象 + result_agtype = row[0] + if result_agtype and hasattr(result_agtype, 'value'): node_props = result_agtype.value if isinstance(node_props, dict) and "properties" in node_props: node = self._parse_node(node_props["properties"]) @@ -1870,15 +1870,42 @@ def get_structure_optimization_candidates( if node_id not in node_ids: candidates.append(node) node_ids.add(node_id) - else: - # 当 include_embedding=False 时,返回字段字典 - props = result_agtype.value - if isinstance(props, dict): - node = self._parse_node(props) - node_id = node["id"] - if node_id not in node_ids: - candidates.append(node) - node_ids.add(node_id) + else: + # 当 include_embedding=False 时,返回字段字典 + # 定义字段名称(与查询中的 RETURN 字段对应) + field_names = [ + "id", "memory", "user_name", "user_id", "session_id", "status", + "key", "confidence", "tags", "created_at", "updated_at", + "memory_type", "sources", "source", "node_type", "visibility", + "usage", "background" + ] + + # 将行数据转换为字典 + node_data = {} + for i, field_name in enumerate(field_names): + if i < len(row): + value = row[i] + # 处理特殊字段 + if field_name in ["tags", "sources", "usage"] and isinstance(value, str): + try: + # 尝试解析 JSON 字符串 + node_data[field_name] = json.loads(value) + except (json.JSONDecodeError, TypeError): + node_data[field_name] = value + else: + node_data[field_name] = value + + # 使用 _parse_node 方法解析 + try: + node = self._parse_node(node_data) + node_id = node["id"] + + if node_id not in node_ids: + candidates.append(node) + node_ids.add(node_id) + print(f"✅ 成功解析节点: {node_id}") + except Exception as e: + print(f"❌ 解析节点失败: {e}") except Exception as e: logger.error(f"Failed to get structure optimization candidates: {e}", exc_info=True) From fed35de7d3424730746e46548a92963e964caf16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 15:16:32 +0800 Subject: [PATCH 054/137] fix get_all_memory_items --- src/memos/graph_dbs/polardb.py | 36 +++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 996360344..1669d3b53 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1747,16 +1747,47 @@ def get_all_memory_items( LIMIT 100 $$) AS (n agtype) """ + print("[get_all_memory_items] cypher_query:", cypher_query) nodes = [] try: with self.connection.cursor() as cursor: cursor.execute(cypher_query) results = cursor.fetchall() + print("[get_all_memory_items] results:", results) for row in results: node_agtype = row[0] - if node_agtype and hasattr(node_agtype, 'value'): + print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") + + # 处理字符串格式的数据 + if isinstance(node_agtype, str): + try: + # 移除 ::vertex 后缀 + json_str = node_agtype.replace('::vertex', '') + node_data = json.loads(json_str) + + if isinstance(node_data, dict) and "properties" in node_data: + properties = node_data["properties"] + # 构建节点数据 + parsed_node_data = { + "id": properties.get("id", ""), + "memory": properties.get("memory", ""), + "metadata": properties + } + + if include_embedding and "embedding" in properties: + parsed_node_data["embedding"] = properties["embedding"] + + nodes.append(self._parse_node(parsed_node_data)) + print(f"[get_all_memory_items] ✅ 成功解析节点: {properties.get('id', '')}") + else: + print(f"[get_all_memory_items] ❌ 节点数据格式不正确: {node_data}") + + except (json.JSONDecodeError, TypeError) as e: + print(f"[get_all_memory_items] ❌ JSON 解析失败: {e}") + elif node_agtype and hasattr(node_agtype, 'value'): + # 处理 agtype 对象 node_props = node_agtype.value if isinstance(node_props, dict): # 解析节点属性 @@ -1770,6 +1801,9 @@ def get_all_memory_items( node_data["embedding"] = node_props["embedding"] nodes.append(self._parse_node(node_data)) + print(f"[get_all_memory_items] ✅ 成功解析 agtype 节点: {node_props.get('id', '')}") + else: + print(f"[get_all_memory_items] ❌ 未知的数据格式: {type(node_agtype)}") except Exception as e: logger.error(f"Failed to get memories: {e}", exc_info=True) From d7a7adab84fee23910ef73838f8f56e65d07e3d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 15:16:56 +0800 Subject: [PATCH 055/137] fix get_all_memory_items --- examples/basic_modules/polardb_search.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index a19275b65..b47f1e6eb 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -91,12 +91,12 @@ def getPolarDb(db_name): config = GraphDBConfigFactory( backend="polardb", config={ - "host": "xxx", + "host": "memory.pg.polardb.rds.aliyuncs.com", "port": 5432, - "user": "xxxx", - "password": "xxxx", + "user": "adimin", + "password": "Openmem0925", "db_name": db_name, - "user_name": 'xxxx', + "user_name": 'adimin', "use_multi_db": True, # 设置为True,不添加user_name过滤条件 "auto_create": True, "embedding_dimension": 1024, @@ -197,6 +197,12 @@ def get_structure_optimization_candidates(db_name, scope, include_embedding, use print("get_structure_optimization_candidates:", candidates) +def get_all_memory_items(db_name, scope, include_embedding, user_name): + graph = getPolarDb(db_name) + memory_items = graph.get_all_memory_items(scope=scope, include_embedding=include_embedding, user_name=user_name) + print("get_all_memory_items:", memory_items) + + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") @@ -338,7 +344,9 @@ def get_structure_optimization_candidates(db_name, scope, include_embedding, use -0.01335147, -0.040344328, 0.029144352, -0.04174814, 0.023315482, -0.02227788, -0.0022716573, -0.03152473, 0.0482484, -0.027038634, -0.004882823, 0.06152357, -0.003881463, -0.036041338, -0.0075645614, 0.020660445, -0.07250992, -0.024429375, -0.036377035] - searchVector(db_name="memtensor_memos", vectorStr=vector, user_name="memos7a9f9fbbb61c412f94f77fbaa8103c35") + # searchVector(db_name="memtensor_memos", vectorStr=vector, user_name="memos7a9f9fbbb61c412f94f77fbaa8103c35") + + # searchVector(db_name="test_1020_02", vectorStr=vector) # add_edge(db_name="memtensor_memos",source_id="13bb9df6-0609-4442-8bed-bba77dadac92", target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", edge_type="PARENT", user_name="memosbfb3fb32032b4077a641404dc48739cd") @@ -356,4 +364,6 @@ def get_structure_optimization_candidates(db_name, scope, include_embedding, use # export_graph(db_name="memtensor_memos", include_embedding=False, user_name="memos8698ecb1f76940ff9adc12494c4d57a6") - # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=True, user_name="memose62f1a2ef8c54ccfadf329d11d1e31ad") + # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") + + get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") From ec43e3c6c4dcdd93fc3c7f6eafd351ccf11e9d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 15:42:03 +0800 Subject: [PATCH 056/137] remove embedding for get_nodes --- src/memos/graph_dbs/polardb.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 1669d3b53..5da486525 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -804,8 +804,10 @@ def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> l # Parse embedding from JSONB if it exists if embedding_json is not None: try: - embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json - properties["embedding"] = embedding + print("embedding_json:", embedding_json) + # remove embedding + # embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json + # properties["embedding"] = embedding except (json.JSONDecodeError, TypeError): logger.warning(f"Failed to parse embedding for node {node_id}") nodes.append(self._parse_node( @@ -1758,7 +1760,7 @@ def get_all_memory_items( for row in results: node_agtype = row[0] - print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") + # print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") # 处理字符串格式的数据 if isinstance(node_agtype, str): From b2a893f909917eaf535de3636e6fe0fc04840cb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 15:52:35 +0800 Subject: [PATCH 057/137] fix get_structure_optimization_candidates --- src/memos/graph_dbs/polardb.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 5da486525..d4af71c99 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1824,8 +1824,8 @@ def get_structure_optimization_candidates( # 构建返回字段,根据 include_embedding 参数决定是否包含 embedding if include_embedding: - return_fields = "n" - return_fields_agtype =" n agtype" + return_fields = "id(n) as id1,n" + return_fields_agtype = " id1 agtype,n agtype" else: # 构建不包含 embedding 的字段列表 return_fields = ",".join([ @@ -1885,6 +1885,18 @@ def get_structure_optimization_candidates( RETURN {return_fields} $$) AS ({return_fields_agtype}) """ + if include_embedding: + cypher_query = f""" + WITH t as ( + {cypher_query} + ) + SELECT + m.embedding, + t.n + FROM t, + {self.db_name}_graph."Memory" m + WHERE t.id1 = m.id + """ print("[get_structure_optimization_candidates] query:", cypher_query) candidates = [] From 4b849f30b24d913db6444245aeaf87f7e444b8ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 16:11:48 +0800 Subject: [PATCH 058/137] add _parse_node_new --- src/memos/graph_dbs/polardb.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index d4af71c99..36e56d338 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1913,7 +1913,7 @@ def get_structure_optimization_candidates( if result_agtype and hasattr(result_agtype, 'value'): node_props = result_agtype.value if isinstance(node_props, dict) and "properties" in node_props: - node = self._parse_node(node_props["properties"]) + node = self._parse_node_new(node_props["properties"]) node_id = node["id"] if node_id not in node_ids: candidates.append(node) @@ -1945,7 +1945,7 @@ def get_structure_optimization_candidates( # 使用 _parse_node 方法解析 try: - node = self._parse_node(node_data) + node = self._parse_node_new(node_data) node_id = node["id"] if node_id not in node_ids: @@ -1987,6 +1987,31 @@ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]: return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} + def _parse_node_new(self, node_data: dict[str, Any]) -> dict[str, Any]: + """Parse node data from database format to standard format.""" + node = node_data.copy() + + # Normalize string values that may arrive as quoted literals (e.g., '"abc"') + def _strip_wrapping_quotes(value: Any) -> Any: + if isinstance(value, str) and len(value) >= 2: + if value[0] == value[-1] and value[0] in ("'", '"'): + return value[1:-1] + return value + + for k, v in list(node.items()): + if isinstance(v, str): + node[k] = _strip_wrapping_quotes(v) + + # Convert datetime to string + for time_field in ("created_at", "updated_at"): + if time_field in node and hasattr(node[time_field], "isoformat"): + node[time_field] = node[time_field].isoformat() + + # 不再对sources和usage字段进行反序列化,保持List[str]格式 + # 不再移除user_name字段,保持所有字段 + + return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} + def __del__(self): """Close database connection when object is destroyed.""" if hasattr(self, 'connection') and self.connection: From 7c79df60220e0c770d5fa314a2b84ef61fbe4ce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 16:22:26 +0800 Subject: [PATCH 059/137] update get_all_memory_items --- src/memos/graph_dbs/polardb.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 36e56d338..59eed0b42 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1733,13 +1733,21 @@ def get_all_memory_items( # 使用 cypher 查询获取记忆项 if include_embedding: cypher_query = f""" - SELECT * FROM cypher('{self.db_name}_graph', $$ - MATCH (n:Memory) - WHERE n.memory_type = '{scope}' AND n.user_name = '{user_name}' - RETURN n - LIMIT 100 - $$) AS (n agtype) - """ + WITH t as ( + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + WHERE n.memory_type = '{scope}' AND n.user_name = '{user_name}' + RETURN id(n) as id1,n + LIMIT 100 + $$) AS (id1 agtype,n agtype) + ) + SELECT + m.embedding, + t.n + FROM t, + {self.db_name}_graph."Memory" m + WHERE t.id1 = m.id; + """ else: cypher_query = f""" SELECT * FROM cypher('{self.db_name}_graph', $$ From 834b0ecd33a9caeb303970e5acf10d2748f13617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 16:27:18 +0800 Subject: [PATCH 060/137] update get_all_memory_items --- examples/basic_modules/polardb_search.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index b47f1e6eb..554912fd2 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -110,10 +110,10 @@ def searchVector(db_name: str, vectorStr: list[float], user_name: str = None): graph = getPolarDb(db_name) # 1,查询search_by_embedding - nodes = graph.search_by_embedding(vector=vectorStr, top_k=1, user_name=user_name) - print("search_by_embedding nodes:", len(nodes)) - for node_i in nodes: - print("Search result:", graph.get_node(node_i["id"][1:-1])) + # nodes = graph.search_by_embedding(vector=vectorStr, top_k=1, user_name=user_name) + # print("search_by_embedding nodes:", len(nodes)) + # for node_i in nodes: + # print("Search result:", graph.get_node(node_i["id"][1:-1])) # 2,查询单个get_node # detail = graph.get_node(id="194f1e30-44d2-4e3f-bc58-c950343c56b7", @@ -126,7 +126,7 @@ def searchVector(db_name: str, vectorStr: list[float], user_name: str = None): detail_list = graph.get_nodes(ids=ids,user_name='memos7a9f9fbbb61c412f94f77fbaa8103c35') print("1111多个node:", len(detail_list)) # - # print("多个node:", detail_list) + print("多个node:", detail_list) # 4,更新 update_node # graph.update_node(id="000009999ef-926f-42e2-b7b5-0224daf0abcd", fields={"name": "new_name"}) @@ -200,7 +200,7 @@ def get_structure_optimization_candidates(db_name, scope, include_embedding, use def get_all_memory_items(db_name, scope, include_embedding, user_name): graph = getPolarDb(db_name) memory_items = graph.get_all_memory_items(scope=scope, include_embedding=include_embedding, user_name=user_name) - print("get_all_memory_items:", memory_items) + # print("get_all_memory_items:", memory_items) if __name__ == "__main__": @@ -366,4 +366,4 @@ def get_all_memory_items(db_name, scope, include_embedding, user_name): # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") - get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") + get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=True, user_name="memos8f5530534d9b413bb8981ffc3d48a495") From ee9fb717c45f4067478ab8694be8981da91f94a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 17:15:53 +0800 Subject: [PATCH 061/137] update get_all_memory_items for include_embedding --- src/memos/graph_dbs/polardb.py | 52 +++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 59eed0b42..e8137e876 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1917,15 +1917,17 @@ def get_structure_optimization_candidates( for row in results: if include_embedding: # 当 include_embedding=True 时,返回完整的节点对象 - result_agtype = row[0] - if result_agtype and hasattr(result_agtype, 'value'): - node_props = result_agtype.value - if isinstance(node_props, dict) and "properties" in node_props: - node = self._parse_node_new(node_props["properties"]) - node_id = node["id"] - if node_id not in node_ids: - candidates.append(node) - node_ids.add(node_id) + if isinstance(row, (list, tuple)) and len(row) >= 2: + embedding_val, node_val = row[0], row[1] + else: + embedding_val, node_val = None, row[0] + + node = self._build_node_from_agtype(node_val, embedding_val) + if node: + node_id = node["id"] + if node_id not in node_ids: + candidates.append(node) + node_ids.add(node_id) else: # 当 include_embedding=False 时,返回字段字典 # 定义字段名称(与查询中的 RETURN 字段对应) @@ -2174,4 +2176,34 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st ) """ cursor.execute(insert_query, (id, json.dumps(properties))) - logger.info(f"Added node {id} to graph '{self.db_name}_graph'.") \ No newline at end of file + logger.info(f"Added node {id} to graph '{self.db_name}_graph'.") + + def _build_node_from_agtype(self, node_agtype, embedding=None): + """ + 将 cypher 返回的 n 列(agtype 或 JSON 字符串)解析为标准节点, + 并把 embedding 合并进 properties 里。 + """ + try: + # 字符串场景: '{"id":...,"label":[...],"properties":{...}}::vertex' + if isinstance(node_agtype, str): + json_str = node_agtype.replace('::vertex', '') + obj = json.loads(json_str) + if not (isinstance(obj, dict) and "properties" in obj): + return None + props = obj["properties"] + # agtype 场景: 带 value 属性 + elif node_agtype and hasattr(node_agtype, "value"): + val = node_agtype.value + if not (isinstance(val, dict) and "properties" in val): + return None + props = val["properties"] + else: + return None + + if embedding is not None: + props["embedding"] = embedding + + node_data = {"id": props.get("id", ""), "memory": props.get("memory", ""), "metadata": props} + return self._parse_node_new(node_data) + except Exception: + return None \ No newline at end of file From 862aa8dd0a3bbdd10589f3ea2768304565bd145c Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 20 Oct 2025 17:47:07 +0800 Subject: [PATCH 062/137] feat: server router add polardb config --- src/memos/api/routers/server_router.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/memos/api/routers/server_router.py b/src/memos/api/routers/server_router.py index a332de583..a850d0845 100644 --- a/src/memos/api/routers/server_router.py +++ b/src/memos/api/routers/server_router.py @@ -45,6 +45,7 @@ def _build_graph_db_config(user_id: str = "default") -> dict[str, Any]: "neo4j-community": APIConfig.get_neo4j_community_config(user_id=user_id), "neo4j": APIConfig.get_neo4j_config(user_id=user_id), "nebular": APIConfig.get_nebular_config(user_id=user_id), + "polardb": APIConfig.get_polardb_config(user_id="default"), } graph_db_backend = os.getenv("NEO4J_BACKEND", "nebular").lower() From dced597b3711f3ee555ed9e2b1babf055a137e37 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 20 Oct 2025 17:48:19 +0800 Subject: [PATCH 063/137] feat: server router add polardb config --- src/memos/api/routers/server_router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/api/routers/server_router.py b/src/memos/api/routers/server_router.py index a850d0845..ad9a1cd9c 100644 --- a/src/memos/api/routers/server_router.py +++ b/src/memos/api/routers/server_router.py @@ -45,7 +45,7 @@ def _build_graph_db_config(user_id: str = "default") -> dict[str, Any]: "neo4j-community": APIConfig.get_neo4j_community_config(user_id=user_id), "neo4j": APIConfig.get_neo4j_config(user_id=user_id), "nebular": APIConfig.get_nebular_config(user_id=user_id), - "polardb": APIConfig.get_polardb_config(user_id="default"), + "polardb": APIConfig.get_polardb_config(user_id=user_id), } graph_db_backend = os.getenv("NEO4J_BACKEND", "nebular").lower() From c2f7ff3e9730fa6551cf12a5eb292cc8314d30e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 17:49:04 +0800 Subject: [PATCH 064/137] update get_all_memory_items for include_embedding False --- src/memos/graph_dbs/polardb.py | 120 ++++++++++++++++----------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e8137e876..6f788c3aa 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1757,68 +1757,68 @@ def get_all_memory_items( LIMIT 100 $$) AS (n agtype) """ - print("[get_all_memory_items] cypher_query:", cypher_query) + print("[get_all_memory_items] cypher_query:", cypher_query) - nodes = [] - try: - with self.connection.cursor() as cursor: - cursor.execute(cypher_query) - results = cursor.fetchall() - print("[get_all_memory_items] results:", results) - - for row in results: - node_agtype = row[0] - # print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") - - # 处理字符串格式的数据 - if isinstance(node_agtype, str): - try: - # 移除 ::vertex 后缀 - json_str = node_agtype.replace('::vertex', '') - node_data = json.loads(json_str) - - if isinstance(node_data, dict) and "properties" in node_data: - properties = node_data["properties"] - # 构建节点数据 - parsed_node_data = { - "id": properties.get("id", ""), - "memory": properties.get("memory", ""), - "metadata": properties + nodes = [] + try: + with self.connection.cursor() as cursor: + cursor.execute(cypher_query) + results = cursor.fetchall() + print("[get_all_memory_items] results:", results) + + for row in results: + node_agtype = row[0] + # print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") + + # 处理字符串格式的数据 + if isinstance(node_agtype, str): + try: + # 移除 ::vertex 后缀 + json_str = node_agtype.replace('::vertex', '') + node_data = json.loads(json_str) + + if isinstance(node_data, dict) and "properties" in node_data: + properties = node_data["properties"] + # 构建节点数据 + parsed_node_data = { + "id": properties.get("id", ""), + "memory": properties.get("memory", ""), + "metadata": properties + } + + if include_embedding and "embedding" in properties: + parsed_node_data["embedding"] = properties["embedding"] + + nodes.append(self._parse_node(parsed_node_data)) + print(f"[get_all_memory_items] ✅ 成功解析节点: {properties.get('id', '')}") + else: + print(f"[get_all_memory_items] ❌ 节点数据格式不正确: {node_data}") + + except (json.JSONDecodeError, TypeError) as e: + print(f"[get_all_memory_items] ❌ JSON 解析失败: {e}") + elif node_agtype and hasattr(node_agtype, 'value'): + # 处理 agtype 对象 + node_props = node_agtype.value + if isinstance(node_props, dict): + # 解析节点属性 + node_data = { + "id": node_props.get("id", ""), + "memory": node_props.get("memory", ""), + "metadata": node_props } - - if include_embedding and "embedding" in properties: - parsed_node_data["embedding"] = properties["embedding"] - - nodes.append(self._parse_node(parsed_node_data)) - print(f"[get_all_memory_items] ✅ 成功解析节点: {properties.get('id', '')}") - else: - print(f"[get_all_memory_items] ❌ 节点数据格式不正确: {node_data}") - - except (json.JSONDecodeError, TypeError) as e: - print(f"[get_all_memory_items] ❌ JSON 解析失败: {e}") - elif node_agtype and hasattr(node_agtype, 'value'): - # 处理 agtype 对象 - node_props = node_agtype.value - if isinstance(node_props, dict): - # 解析节点属性 - node_data = { - "id": node_props.get("id", ""), - "memory": node_props.get("memory", ""), - "metadata": node_props - } - - if include_embedding and "embedding" in node_props: - node_data["embedding"] = node_props["embedding"] - - nodes.append(self._parse_node(node_data)) - print(f"[get_all_memory_items] ✅ 成功解析 agtype 节点: {node_props.get('id', '')}") - else: - print(f"[get_all_memory_items] ❌ 未知的数据格式: {type(node_agtype)}") - - except Exception as e: - logger.error(f"Failed to get memories: {e}", exc_info=True) - - return nodes + + if include_embedding and "embedding" in node_props: + node_data["embedding"] = node_props["embedding"] + + nodes.append(self._parse_node(node_data)) + print(f"[get_all_memory_items] ✅ 成功解析 agtype 节点: {node_props.get('id', '')}") + else: + print(f"[get_all_memory_items] ❌ 未知的数据格式: {type(node_agtype)}") + + except Exception as e: + logger.error(f"Failed to get memories: {e}", exc_info=True) + + return nodes def get_structure_optimization_candidates( self, scope: str, include_embedding: bool = False, user_name: str | None = None From 563a8a1149e664572b5c8620cc2ee97e0c94f496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 19:24:18 +0800 Subject: [PATCH 065/137] update get_all_memory_items for include_embedding False --- src/memos/graph_dbs/polardb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 6f788c3aa..94cc3c21c 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1753,9 +1753,9 @@ def get_all_memory_items( SELECT * FROM cypher('{self.db_name}_graph', $$ MATCH (n:Memory) WHERE n.memory_type = '{scope}' AND n.user_name = '{user_name}' - RETURN n + RETURN properties(n) as props LIMIT 100 - $$) AS (n agtype) + $$) AS (nprops agtype) """ print("[get_all_memory_items] cypher_query:", cypher_query) From 2d6243323efbc5924a92f9fd1be89ee02c365028 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 20 Oct 2025 19:59:07 +0800 Subject: [PATCH 066/137] fix --- src/memos/graph_dbs/polardb.py | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 94cc3c21c..94d00e69f 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1713,6 +1713,71 @@ def import_graph(self, data: dict[str, Any]) -> None: self.add_edge(edge["source"], edge["target"], edge["type"]) def get_all_memory_items( + self, scope: str, include_embedding: bool = False, user_name: str | None = None + ) -> list[dict]: + """ + Retrieve all memory items of a specific memory_type. + + Args: + scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', or 'UserMemory'. + include_embedding: with/without embedding + user_name (str, optional): User name for filtering in non-multi-db mode + + Returns: + list[dict]: Full list of memory items under this scope. + """ + user_name = user_name if user_name else self._get_config_value("user_name") + if scope not in {"WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"}: + raise ValueError(f"Unsupported memory type scope: {scope}") + + # 使用 cypher 查询获取记忆项 + if include_embedding: + cypher_query = f""" + WITH t as ( + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + WHERE n.memory_type = '{scope}' AND n.user_name = '{user_name}' + RETURN id(n) as id1,n + LIMIT 100 + $$) AS (id1 agtype,n agtype) + ) + SELECT + m.embedding, + t.n + FROM t, + {self.db_name}_graph."Memory" m + WHERE t.id1 = m.id; + """ + else: + cypher_query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + WHERE n.memory_type = '{scope}' AND n.user_name = '{user_name}' + RETURN properties(n) as props + LIMIT 100 + $$) AS (nprops agtype) + """ + print("[get_all_memory_items] cypher_query:", cypher_query) + + nodes = [] + try: + with self.connection.cursor() as cursor: + cursor.execute(cypher_query) + results = cursor.fetchall() + print("[get_all_memory_items] results:", results) + + for row in results: + print("row----------:"+row) + node_agtype = row[0] + # print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") + + + except Exception as e: + logger.error(f"Failed to get memories: {e}", exc_info=True) + + return nodes + + def get_all_memory_items_old( self, scope: str, include_embedding: bool = False, user_name: str | None = None ) -> list[dict]: """ From 7f9b4abfc44440d3a6b475de82c84fa385960941 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 20 Oct 2025 20:17:29 +0800 Subject: [PATCH 067/137] fix get_all_memory_items --- src/memos/graph_dbs/polardb.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 94d00e69f..3a8dc8094 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1767,11 +1767,11 @@ def get_all_memory_items( print("[get_all_memory_items] results:", results) for row in results: - print("row----------:"+row) - node_agtype = row[0] - # print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") - - + if isinstance(row[0], str): + memory_data = json.loads(row[0]) + else: + memory_data = row[0] # 如果已经是字典,直接使用 + nodes.append(self._parse_node(memory_data)) except Exception as e: logger.error(f"Failed to get memories: {e}", exc_info=True) From 3a8c98c8013d93371fb1b7362c364ed446236912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 20:21:35 +0800 Subject: [PATCH 068/137] update get_all_memory_items for include_embedding False --- src/memos/graph_dbs/polardb.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 94d00e69f..53e7ab7ee 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1748,6 +1748,23 @@ def get_all_memory_items( {self.db_name}_graph."Memory" m WHERE t.id1 = m.id; """ + nodes = [] + try: + with self.connection.cursor() as cursor: + cursor.execute(cypher_query) + results = cursor.fetchall() + print("[get_all_memory_items] results:", results) + + for row in results: + print("row----------:" + row) + node_agtype = row[0] + # print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") + + + except Exception as e: + logger.error(f"Failed to get memories: {e}", exc_info=True) + + return nodes else: cypher_query = f""" SELECT * FROM cypher('{self.db_name}_graph', $$ From 26590a95d15d349e1b224e7d6f9daa742e85cecd Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 20 Oct 2025 20:36:24 +0800 Subject: [PATCH 069/137] fix get_all_memory_items --- src/memos/graph_dbs/polardb.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 3a8dc8094..c2c1b2821 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1404,7 +1404,7 @@ def get_grouped_counts( raise ValueError("group_fields cannot be empty") final_params = params.copy() if params else {} - + print("username:"+user_name) if not self.config.use_multi_db and (self.config.user_name or user_name): user_clause = "n.user_name = $user_name" final_params["user_name"] = user_name @@ -1428,6 +1428,7 @@ def get_grouped_counts( RETURN {group_fields_cypher}, COUNT(n) AS count $$ ) as ({group_fields_cypher_polardb}, count agtype); """ + print("get_grouped_counts:"+query) try: with self.connection.cursor() as cursor: # 处理参数化查询 @@ -1764,7 +1765,6 @@ def get_all_memory_items( with self.connection.cursor() as cursor: cursor.execute(cypher_query) results = cursor.fetchall() - print("[get_all_memory_items] results:", results) for row in results: if isinstance(row[0], str): @@ -1772,6 +1772,8 @@ def get_all_memory_items( else: memory_data = row[0] # 如果已经是字典,直接使用 nodes.append(self._parse_node(memory_data)) + json_list = [json.loads(row[0]) if isinstance(row[0], str) else row[0] for row in results] + return json_list except Exception as e: logger.error(f"Failed to get memories: {e}", exc_info=True) From 9ecc94bda7340396fa8a4eed8a3bf4862226b424 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 20 Oct 2025 21:02:20 +0800 Subject: [PATCH 070/137] update get_all_memory_items for include_embedding False --- examples/basic_modules/polardb_search.py | 5 ++++- src/memos/graph_dbs/polardb.py | 19 ++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 554912fd2..2d47f2bf5 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -200,7 +200,7 @@ def get_structure_optimization_candidates(db_name, scope, include_embedding, use def get_all_memory_items(db_name, scope, include_embedding, user_name): graph = getPolarDb(db_name) memory_items = graph.get_all_memory_items(scope=scope, include_embedding=include_embedding, user_name=user_name) - # print("get_all_memory_items:", memory_items) + print("get_all_memory_items:", memory_items) if __name__ == "__main__": @@ -367,3 +367,6 @@ def get_all_memory_items(db_name, scope, include_embedding, user_name): # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=True, user_name="memos8f5530534d9b413bb8981ffc3d48a495") + + # 测试 get_structure_optimization_candidates 函数 + # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index fc9fb0b17..c88e44ad7 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1750,16 +1750,25 @@ def get_all_memory_items( WHERE t.id1 = m.id; """ nodes = [] + node_ids = set() + print("[get_all_memory_items embedding true ] cypher_query:", cypher_query) try: with self.connection.cursor() as cursor: cursor.execute(cypher_query) results = cursor.fetchall() - print("[get_all_memory_items] results:", results) for row in results: - print("row----------:" + row) - node_agtype = row[0] - # print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") + if isinstance(row, (list, tuple)) and len(row) >= 2: + embedding_val, node_val = row[0], row[1] + else: + embedding_val, node_val = None, row[0] + + node = self._build_node_from_agtype(node_val, embedding_val) + if node: + node_id = node["id"] + if node_id not in node_ids: + nodes.append(node) + node_ids.add(node_id) except Exception as e: @@ -1775,7 +1784,7 @@ def get_all_memory_items( LIMIT 100 $$) AS (nprops agtype) """ - print("[get_all_memory_items] cypher_query:", cypher_query) + print("[get_all_memory_items embedding false ] cypher_query:", cypher_query) nodes = [] try: From 932e4105a87df6665a68a70cadf94d07df0a6586 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 10:54:20 +0800 Subject: [PATCH 071/137] update get_grouped_counts --- src/memos/graph_dbs/polardb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index c88e44ad7..53cb400ca 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1380,7 +1380,7 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = return ids - def get_grouped_counts( + def get_grouped_counts1( self, group_fields: list[str], where_clause: str = "", @@ -1457,7 +1457,7 @@ def get_grouped_counts( return [] - def get_grouped_counts_old( + def get_grouped_counts( self, group_fields: list[str], where_clause: str = "", From 6222430aa4e714e743afb26b219b366fbbc988db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 11:03:51 +0800 Subject: [PATCH 072/137] update get_grouped_counts --- examples/basic_modules/polardb_search.py | 14 ++++++++------ src/memos/graph_dbs/polardb.py | 21 +++++++++++---------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 2d47f2bf5..ff1ab510e 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -110,10 +110,10 @@ def searchVector(db_name: str, vectorStr: list[float], user_name: str = None): graph = getPolarDb(db_name) # 1,查询search_by_embedding - # nodes = graph.search_by_embedding(vector=vectorStr, top_k=1, user_name=user_name) - # print("search_by_embedding nodes:", len(nodes)) - # for node_i in nodes: - # print("Search result:", graph.get_node(node_i["id"][1:-1])) + nodes = graph.search_by_embedding(vector=vectorStr, top_k=1, user_name=user_name) + print("search_by_embedding nodes:", len(nodes)) + for node_i in nodes: + print("Search result:", graph.get_node(node_i["id"][1:-1])) # 2,查询单个get_node # detail = graph.get_node(id="194f1e30-44d2-4e3f-bc58-c950343c56b7", @@ -182,6 +182,8 @@ def get_grouped_counts(db_name, user_name): graph = getPolarDb(db_name) grouped_counts = graph.get_grouped_counts(group_fields=["status"], where_clause="user_name = %s", params=[user_name], user_name=user_name) + grouped_counts = graph.get_grouped_counts1(group_fields=["status"], + params=[user_name], user_name=user_name) print("get_grouped_counts:", grouped_counts) @@ -360,13 +362,13 @@ def get_all_memory_items(db_name, scope, include_embedding, user_name): # center_status="activated", user_name="memos07ea708ac7eb412887c5c283f874ea30") # - # get_grouped_counts(db_name="memtensor_memos", user_name="memos07ea708ac7eb412887c5c283f874ea30") + get_grouped_counts(db_name="memtensor_memos", user_name="memos07ea708ac7eb412887c5c283f874ea30") # export_graph(db_name="memtensor_memos", include_embedding=False, user_name="memos8698ecb1f76940ff9adc12494c4d57a6") # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") - get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=True, user_name="memos8f5530534d9b413bb8981ffc3d48a495") + # get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=True, user_name="memos8f5530534d9b413bb8981ffc3d48a495") # 测试 get_structure_optimization_candidates 函数 # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 53cb400ca..26a0ea6a5 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1416,19 +1416,20 @@ def get_grouped_counts1( where_clause = f"WHERE {where_clause} AND {user_clause}" else: where_clause = f"WHERE {user_clause}" - + print("where_clause:" + where_clause) # Force RETURN field AS field to guarantee key match group_fields_cypher = ", ".join([f"n.{field} AS {field}" for field in group_fields]) - group_fields_cypher_polardb = "agtype, ".join([f"n.{field} AS {field}" for field in group_fields]) - + # group_fields_cypher_polardb = "agtype, ".join([f"{field}" for field in group_fields]) + group_fields_cypher_polardb = ", ".join([f"{field} agtype" for field in group_fields]) + print("group_fields_cypher_polardb:" + group_fields_cypher_polardb) query = f""" - SELECT * FROM cypher('{self.db_name}_graph', $$ - MATCH (n:Memory) - {where_clause} - RETURN {group_fields_cypher}, COUNT(n) AS count - $$ ) as ({group_fields_cypher_polardb}, count agtype); - """ - print("get_grouped_counts:"+query) + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + {where_clause} + RETURN {group_fields_cypher}, COUNT(n) AS count1 + $$ ) as ({group_fields_cypher_polardb}, count1 agtype); + """ + print("get_grouped_counts:" + query) try: with self.connection.cursor() as cursor: # 处理参数化查询 From 94a557a85e7a503d7f6ec279e1471aea7df1faa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 11:13:11 +0800 Subject: [PATCH 073/137] add_node and graph_id --- src/memos/graph_dbs/polardb.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 26a0ea6a5..00b5ff997 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2249,6 +2249,13 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st WHERE id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring) """ cursor.execute(delete_query, (id,)) + # + get_graph_id_query = f""" + SELECT ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring) + """ + cursor.execute(get_graph_id_query, (id,)) + graph_id = cursor.fetchone()[0] + properties['graph_id'] = str(graph_id) # 然后插入新记录 if embedding_vector: From e3e97d286ecdc48ecd7df8205004dcc756b4165b Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 15:27:53 +0800 Subject: [PATCH 074/137] fix --- .../textual/tree_text_memory/retrieve/recall.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index d4cfcf501..253d0bde0 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -56,7 +56,21 @@ def retrieve( working_memories = self.graph_store.get_all_memory_items( scope="WorkingMemory", include_embedding=False, user_name=user_name ) - return [TextualMemoryItem.from_dict(record) for record in working_memories] + # 过滤数据,只保留模型定义的字段 + if isinstance(working_memories, list): + valid_fields = set(TextualMemoryItem.__fields__.keys()) + + filtered_records = [] + for record in working_memories: + if isinstance(record, dict): + # 过滤每个字典中的字段 + filtered_record = {k: v for k, v in record.items() if k in valid_fields} + # 使用 from_dict 方法创建实例 + memory_item = TextualMemoryItem.from_dict(filtered_record) + filtered_records.append(memory_item) + + return filtered_records + # return [TextualMemoryItem.from_dict(record) for record in working_memories] with ContextThreadPoolExecutor(max_workers=2) as executor: # Structured graph-based retrieval From 1cc11d98b2f0006d64dde1acaaf0d744f24932af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 16:54:05 +0800 Subject: [PATCH 075/137] fix get_all_memory_items false --- src/memos/graph_dbs/polardb.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 00b5ff997..ba78b3ecb 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1799,8 +1799,6 @@ def get_all_memory_items( else: memory_data = row[0] # 如果已经是字典,直接使用 nodes.append(self._parse_node(memory_data)) - json_list = [json.loads(row[0]) if isinstance(row[0], str) else row[0] for row in results] - return json_list except Exception as e: logger.error(f"Failed to get memories: {e}", exc_info=True) From 35254812ee22ceffb6fb0a062087756b86cb0e0e Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 16:57:33 +0800 Subject: [PATCH 076/137] fix --- src/memos/graph_dbs/polardb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index ba78b3ecb..1ffc92e15 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1966,7 +1966,7 @@ def get_structure_optimization_candidates( AND ag_catalog.agtype_access_operator(m.properties, '"user_name"'::ag_catalog.agtype) = '"activated"'::ag_catalog.agtype AND NOT EXISTS ( SELECT 1 - FROM memtensor_memos_graph."PARENT" p + FROM {self.db_name}_graph."PARENT" p WHERE m.id = p.start_id OR m.id = p.end_id ); """ From 50a75bba4f831443362e02aff4c83052eb1e3d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 17:03:44 +0800 Subject: [PATCH 077/137] fix get_all_memory_items true --- src/memos/graph_dbs/polardb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index ba78b3ecb..d99b7eba1 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2302,7 +2302,7 @@ def _build_node_from_agtype(self, node_agtype, embedding=None): if embedding is not None: props["embedding"] = embedding - node_data = {"id": props.get("id", ""), "memory": props.get("memory", ""), "metadata": props} - return self._parse_node_new(node_data) + # 直接返回标准格式,不需要再次调用 _parse_node_new + return {"id": props.get("id", ""), "memory": props.get("memory", ""), "metadata": props} except Exception: return None \ No newline at end of file From d949f9d435148a180149624a6e1b165f3e06c800 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 17:06:35 +0800 Subject: [PATCH 078/137] fix --- .../tree_text_memory/retrieve/recall.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index 253d0bde0..c0a98570b 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -57,20 +57,20 @@ def retrieve( scope="WorkingMemory", include_embedding=False, user_name=user_name ) # 过滤数据,只保留模型定义的字段 - if isinstance(working_memories, list): - valid_fields = set(TextualMemoryItem.__fields__.keys()) - - filtered_records = [] - for record in working_memories: - if isinstance(record, dict): - # 过滤每个字典中的字段 - filtered_record = {k: v for k, v in record.items() if k in valid_fields} - # 使用 from_dict 方法创建实例 - memory_item = TextualMemoryItem.from_dict(filtered_record) - filtered_records.append(memory_item) - - return filtered_records - # return [TextualMemoryItem.from_dict(record) for record in working_memories] + # if isinstance(working_memories, list): + # valid_fields = set(TextualMemoryItem.__fields__.keys()) + # + # filtered_records = [] + # for record in working_memories: + # if isinstance(record, dict): + # # 过滤每个字典中的字段 + # filtered_record = {k: v for k, v in record.items() if k in valid_fields} + # # 使用 from_dict 方法创建实例 + # memory_item = TextualMemoryItem.from_dict(filtered_record) + # filtered_records.append(memory_item) + # + # return filtered_records + return [TextualMemoryItem.from_dict(record) for record in working_memories] with ContextThreadPoolExecutor(max_workers=2) as executor: # Structured graph-based retrieval From 0bd074a3acea6da502a29614277646213fac98f6 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 17:28:22 +0800 Subject: [PATCH 079/137] fix --- src/memos/graph_dbs/polardb.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index b10d9a1fa..2180408a9 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2087,7 +2087,18 @@ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]: # 不再对sources和usage字段进行反序列化,保持List[str]格式 # 不再移除user_name字段,保持所有字段 - return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} + # 1 + # return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} + + # 2 + # node_id = node.pop("id") + # memory = node.pop("memory", "") + # # 在 metadata 中添加 id 字段 + # node["id"] = node_id + # node1 = node + # return {"id": node_id, "memory": memory, "metadata": node1} + + return {"id": node.get("id"), "memory": node.get("memory", ""), "metadata": node} def _parse_node_new(self, node_data: dict[str, Any]) -> dict[str, Any]: """Parse node data from database format to standard format.""" From e1087c1aa3f4630255a0e4f2691c5c84e2076bec Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 18:37:11 +0800 Subject: [PATCH 080/137] fix --- src/memos/graph_dbs/polardb.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 2180408a9..f981fcceb 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1946,13 +1946,14 @@ def get_structure_optimization_candidates( "n.node_type AS node_type", "n.visibility AS visibility", "n.usage AS usage", - "n.background AS background" + "n.background AS background", + "n.graph_id as graph_id" ]) fields = [ "id", "memory", "user_name", "user_id", "session_id", "status", "key", "confidence", "tags", "created_at", "updated_at", "memory_type", "sources", "source", "node_type", "visibility", - "usage", "background" + "usage", "background","graph_id" ] return_fields_agtype = ", ".join([f"{field} agtype" for field in fields]) @@ -2027,7 +2028,7 @@ def get_structure_optimization_candidates( "id", "memory", "user_name", "user_id", "session_id", "status", "key", "confidence", "tags", "created_at", "updated_at", "memory_type", "sources", "source", "node_type", "visibility", - "usage", "background" + "usage", "background","graph_id" ] # 将行数据转换为字典 From c2eb13b373946fff7f3ca5c95a488bb51ea86c7f Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 18:46:46 +0800 Subject: [PATCH 081/137] fix export_graph --- src/memos/graph_dbs/polardb.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index f981fcceb..f755ca291 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1628,7 +1628,7 @@ def export_graph( """ with self.connection.cursor() as cursor: - cursor.execute(node_query) + cursor.execute(node_query+"limit 5") node_results = cursor.fetchall() nodes = [] @@ -1648,17 +1648,17 @@ def export_graph( else: properties = properties_json if properties_json else {} - # Build node data - node_data = { - "id": properties.get("id", node_id), - "memory": properties.get("memory", ""), - "metadata": properties - } + # # Build node data + # node_data = { + # "id": properties.get("id", node_id), + # "memory": properties.get("memory", ""), + # "metadata": properties + # } if include_embedding and embedding_json is not None: - node_data["embedding"] = embedding_json + properties["embedding"] = embedding_json - nodes.append(self._parse_node(node_data)) + nodes.append(self._parse_node(properties)) except Exception as e: logger.error(f"[EXPORT GRAPH - NODES] Exception: {e}", exc_info=True) @@ -1670,7 +1670,7 @@ def export_graph( SELECT * FROM cypher('{self.db_name}_graph', $$ MATCH (a:Memory)-[r]->(b:Memory) WHERE a.user_name = '{user_name}' AND b.user_name = '{user_name}' - RETURN a.id AS source, b.id AS target, type(r) as edge + RETURN a.id AS source, b.id AS target, type(r) as edge limit 5 $$) AS (source agtype, target agtype, edge agtype) """ From fee4a735dfca2d44966a9dedbeecc9585a2b68c3 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 18:50:29 +0800 Subject: [PATCH 082/137] fix export_graph --- src/memos/graph_dbs/polardb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index f755ca291..4c62f06e3 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1628,7 +1628,7 @@ def export_graph( """ with self.connection.cursor() as cursor: - cursor.execute(node_query+"limit 5") + cursor.execute(node_query) node_results = cursor.fetchall() nodes = [] @@ -1670,7 +1670,7 @@ def export_graph( SELECT * FROM cypher('{self.db_name}_graph', $$ MATCH (a:Memory)-[r]->(b:Memory) WHERE a.user_name = '{user_name}' AND b.user_name = '{user_name}' - RETURN a.id AS source, b.id AS target, type(r) as edge limit 5 + RETURN a.id AS source, b.id AS target, type(r) as edge $$) AS (source agtype, target agtype, edge agtype) """ From 7c59fbc959e4c6c606cdd94d4fde59cc2f99c285 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 19:10:46 +0800 Subject: [PATCH 083/137] fix get_by_metadata --- src/memos/graph_dbs/polardb.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 4c62f06e3..443a9763b 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1339,9 +1339,11 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = if op == "=": where_conditions.append(f"n.{field} = {escaped_value}") elif op == "in": - where_conditions.append(f"n.{field} IN {escaped_value}") + # where_conditions.append(f"n.{field} IN {escaped_value}") + where_conditions.append(f"{escaped_value} IN n.{field}") elif op == "contains": - where_conditions.append(f"size(filter(n.{field}, t -> t IN {escaped_value})) > 0") + where_conditions.append(f"{escaped_value} IN n.{field}") + # where_conditions.append(f"size(filter(n.{field}, t -> t IN {escaped_value})) > 0") elif op == "starts_with": where_conditions.append(f"n.{field} STARTS WITH {escaped_value}") elif op == "ends_with": From 7957db409953aac3d9ba9e0d5893fcfea316aa39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 19:56:14 +0800 Subject: [PATCH 084/137] update get_neighbors_by_tag --- examples/basic_modules/polardb_search.py | 30 +++-- src/memos/graph_dbs/polardb.py | 136 ++++++++++++++++++++++- 2 files changed, 154 insertions(+), 12 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index ff1ab510e..37823341d 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -122,11 +122,11 @@ def searchVector(db_name: str, vectorStr: list[float], user_name: str = None): # # # 3,查询多个get_nodes ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] - ids = ['"bfde036f-6276-4485-9dc6-3c64eab3e132"'] - detail_list = graph.get_nodes(ids=ids,user_name='memos7a9f9fbbb61c412f94f77fbaa8103c35') - print("1111多个node:", len(detail_list)) - # - print("多个node:", detail_list) + # ids = ['"bfde036f-6276-4485-9dc6-3c64eab3e132"'] + # detail_list = graph.get_nodes(ids=ids,user_name='memos7a9f9fbbb61c412f94f77fbaa8103c35') + # print("1111多个node:", len(detail_list)) + # # + # print("多个node:", detail_list) # 4,更新 update_node # graph.update_node(id="000009999ef-926f-42e2-b7b5-0224daf0abcd", fields={"name": "new_name"}) @@ -202,7 +202,15 @@ def get_structure_optimization_candidates(db_name, scope, include_embedding, use def get_all_memory_items(db_name, scope, include_embedding, user_name): graph = getPolarDb(db_name) memory_items = graph.get_all_memory_items(scope=scope, include_embedding=include_embedding, user_name=user_name) - print("get_all_memory_items:", memory_items) + print("11111get_all_memory_items:", memory_items) + + +def get_neighbors_by_tag(db_name, user_name): + graph = getPolarDb(db_name) + tags=['旅游建议','景点'] + ids = ['39d12b46-ebe4-4f25-b0b7-1582042049e7'] + neighbors = graph.get_neighbors_by_tag(tags=tags, exclude_ids=ids, user_name=user_name) + print("get_neighbors_by_tag:", neighbors) if __name__ == "__main__": @@ -356,19 +364,21 @@ def get_all_memory_items(db_name, scope, include_embedding, user_name): # target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", type="PARENT", direction="OUTGOING", # user_name="memosbfb3fb32032b4077a641404dc48739cd") - # get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") + get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") # get_subgraph(db_name="memtensor_memos", center_id="13bb9df6-0609-4442-8bed-bba77dadac92", depth=1, # center_status="activated", user_name="memos07ea708ac7eb412887c5c283f874ea30") # - get_grouped_counts(db_name="memtensor_memos", user_name="memos07ea708ac7eb412887c5c283f874ea30") + # get_grouped_counts(db_name="memtensor_memos", user_name="memos07ea708ac7eb412887c5c283f874ea30") - # export_graph(db_name="memtensor_memos", include_embedding=False, user_name="memos8698ecb1f76940ff9adc12494c4d57a6") + # export_graph(db_name="memtensor_memos", include_embedding=True, user_name="memos8698ecb1f76940ff9adc12494c4d57a6") # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") - # get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=True, user_name="memos8f5530534d9b413bb8981ffc3d48a495") + # get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") # 测试 get_structure_optimization_candidates 函数 # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") + + get_neighbors_by_tag(db_name="memtensor_memos",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 443a9763b..9b4d2393e 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -908,7 +908,7 @@ def get_neighbors( """Get connected node IDs in a specific direction and relationship type.""" raise NotImplementedError - def get_neighbors_by_tag( + def get_neighbors_by_tag_old( self, tags: list[str], exclude_ids: list[str], @@ -2319,4 +2319,136 @@ def _build_node_from_agtype(self, node_agtype, embedding=None): # 直接返回标准格式,不需要再次调用 _parse_node_new return {"id": props.get("id", ""), "memory": props.get("memory", ""), "metadata": props} except Exception: - return None \ No newline at end of file + return None + + def get_neighbors_by_tag( + self, + tags: list[str], + exclude_ids: list[str], + top_k: int = 5, + min_overlap: int = 1, + include_embedding: bool = False, + user_name: str | None = None, + ) -> list[dict[str, Any]]: + """ + Find top-K neighbor nodes with maximum tag overlap. + + Args: + tags: The list of tags to match. + exclude_ids: Node IDs to exclude (e.g., local cluster). + top_k: Max number of neighbors to return. + min_overlap: Minimum number of overlapping tags required. + include_embedding: with/without embedding + user_name (str, optional): User name for filtering in non-multi-db mode + + Returns: + List of dicts with node details and overlap count. + """ + if not tags: + return [] + + user_name = user_name if user_name else self._get_config_value("user_name") + + # 构建查询条件,与 nebular.py 保持一致 + where_clauses = [ + 'n.status = "activated"', + 'NOT (n.node_type = "reasoning")', + 'NOT (n.memory_type = "WorkingMemory")', + ] + + if exclude_ids: + exclude_ids_str = "[" + ", ".join(f'"{id}"' for id in exclude_ids) + "]" + where_clauses.append(f"NOT (n.id IN {exclude_ids_str})") + + where_clauses.append(f'n.user_name = "{user_name}"') + + where_clause = " AND ".join(where_clauses) + tag_list_literal = "[" + ", ".join(f'"{t}"' for t in tags) + "]" + + return_fields = [ + "n.id AS id", + "n.memory AS memory", + "n.user_name AS user_name", + "n.user_id AS user_id", + "n.session_id AS session_id", + "n.status AS status", + "n.key AS key", + "n.confidence AS confidence", + "n.tags AS tags", + "n.created_at AS created_at", + "n.updated_at AS updated_at", + "n.memory_type AS memory_type", + "n.sources AS sources", + "n.source AS source", + "n.node_type AS node_type", + "n.visibility AS visibility", + "n.background AS background" + ] + + if include_embedding: + return_fields.append("n.embedding AS embedding") + + return_fields_str = ", ".join(return_fields) + + # 使用 Cypher 查询,与 nebular.py 保持一致 + query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + LET tag_list = {tag_list_literal} + + MATCH (n:Memory) + WHERE {where_clause} + RETURN {return_fields_str}, + size( filter( n.tags, t -> t IN tag_list ) ) AS overlap_count + ORDER BY overlap_count DESC + LIMIT {top_k} + $$) AS ({return_fields_str.replace("n.", "").replace(" AS ", " agtype, ")} overlap_count agtype) + """ + + try: + with self.connection.cursor() as cursor: + cursor.execute(query) + results = cursor.fetchall() + + neighbors = [] + for row in results: + # 解析结果 + props = {} + overlap_count = None + + # 手动解析每个字段 + field_names = [ + "id", "memory", "user_name", "user_id", "session_id", "status", + "key", "confidence", "tags", "created_at", "updated_at", + "memory_type", "sources", "source", "node_type", "visibility", "background" + ] + + if include_embedding: + field_names.append("embedding") + field_names.append("overlap_count") + + for i, field in enumerate(field_names): + if field == "overlap_count": + overlap_count = row[i].value if hasattr(row[i], 'value') else row[i] + else: + props[field] = row[i].value if hasattr(row[i], 'value') else row[i] + + if overlap_count is not None and overlap_count >= min_overlap: + parsed = self._parse_node(props) + parsed["overlap_count"] = overlap_count + neighbors.append(parsed) + + # 按重叠数量排序 + neighbors.sort(key=lambda x: x["overlap_count"], reverse=True) + neighbors = neighbors[:top_k] + + # 移除 overlap_count 字段 + result = [] + for neighbor in neighbors: + neighbor.pop("overlap_count", None) + result.append(neighbor) + + return result + + except Exception as e: + logger.error(f"Failed to get neighbors by tag: {e}", exc_info=True) + return [] \ No newline at end of file From d3cf342d8b3a99e306aaccd423e27a43581ec826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 20:20:57 +0800 Subject: [PATCH 085/137] update get_neighbors_by_tag --- src/memos/graph_dbs/polardb.py | 141 ++++++++++++++------------------- 1 file changed, 60 insertions(+), 81 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 9b4d2393e..4fe1645c4 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2349,105 +2349,84 @@ def get_neighbors_by_tag( user_name = user_name if user_name else self._get_config_value("user_name") - # 构建查询条件,与 nebular.py 保持一致 - where_clauses = [ - 'n.status = "activated"', - 'NOT (n.node_type = "reasoning")', - 'NOT (n.memory_type = "WorkingMemory")', - ] + # 构建查询条件 + where_clauses = [] + params = [] if exclude_ids: - exclude_ids_str = "[" + ", ".join(f'"{id}"' for id in exclude_ids) + "]" - where_clauses.append(f"NOT (n.id IN {exclude_ids_str})") + exclude_conditions = [] + for exclude_id in exclude_ids: + exclude_conditions.append( + "ag_catalog.agtype_access_operator(properties, '\"id\"'::agtype) != %s::agtype") + params.append(f'"{exclude_id}"') + where_clauses.append(f"({' AND '.join(exclude_conditions)})") - where_clauses.append(f'n.user_name = "{user_name}"') + # 状态过滤 + where_clauses.append( + "ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype") - where_clause = " AND ".join(where_clauses) - tag_list_literal = "[" + ", ".join(f'"{t}"' for t in tags) + "]" - - return_fields = [ - "n.id AS id", - "n.memory AS memory", - "n.user_name AS user_name", - "n.user_id AS user_id", - "n.session_id AS session_id", - "n.status AS status", - "n.key AS key", - "n.confidence AS confidence", - "n.tags AS tags", - "n.created_at AS created_at", - "n.updated_at AS updated_at", - "n.memory_type AS memory_type", - "n.sources AS sources", - "n.source AS source", - "n.node_type AS node_type", - "n.visibility AS visibility", - "n.background AS background" - ] + # 类型过滤 + where_clauses.append( + "ag_catalog.agtype_access_operator(properties, '\"node_type\"'::agtype) != '\"reasoning\"'::agtype") - if include_embedding: - return_fields.append("n.embedding AS embedding") + where_clauses.append( + "ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) != '\"WorkingMemory\"'::agtype") - return_fields_str = ", ".join(return_fields) + # 用户过滤 + where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype") + params.append(f'"{user_name}"') - # 使用 Cypher 查询,与 nebular.py 保持一致 - query = f""" - SELECT * FROM cypher('{self.db_name}_graph', $$ - LET tag_list = {tag_list_literal} + where_clause = " AND ".join(where_clauses) - MATCH (n:Memory) + # 获取所有候选节点 + query = f""" + SELECT id, properties, embedding + FROM "{self.db_name}_graph"."Memory" WHERE {where_clause} - RETURN {return_fields_str}, - size( filter( n.tags, t -> t IN tag_list ) ) AS overlap_count - ORDER BY overlap_count DESC - LIMIT {top_k} - $$) AS ({return_fields_str.replace("n.", "").replace(" AS ", " agtype, ")} overlap_count agtype) """ + print(f"[get_neighbors_by_tag] query: {query}, params: {params}") try: with self.connection.cursor() as cursor: - cursor.execute(query) + cursor.execute(query, params) results = cursor.fetchall() - neighbors = [] + nodes_with_overlap = [] for row in results: - # 解析结果 - props = {} - overlap_count = None - - # 手动解析每个字段 - field_names = [ - "id", "memory", "user_name", "user_id", "session_id", "status", - "key", "confidence", "tags", "created_at", "updated_at", - "memory_type", "sources", "source", "node_type", "visibility", "background" - ] - - if include_embedding: - field_names.append("embedding") - field_names.append("overlap_count") - - for i, field in enumerate(field_names): - if field == "overlap_count": - overlap_count = row[i].value if hasattr(row[i], 'value') else row[i] - else: - props[field] = row[i].value if hasattr(row[i], 'value') else row[i] - - if overlap_count is not None and overlap_count >= min_overlap: - parsed = self._parse_node(props) - parsed["overlap_count"] = overlap_count - neighbors.append(parsed) - - # 按重叠数量排序 - neighbors.sort(key=lambda x: x["overlap_count"], reverse=True) - neighbors = neighbors[:top_k] + node_id, properties_json, embedding_json = row + properties = properties_json if properties_json else {} - # 移除 overlap_count 字段 - result = [] - for neighbor in neighbors: - neighbor.pop("overlap_count", None) - result.append(neighbor) + # 解析embedding + if include_embedding and embedding_json is not None: + try: + embedding = json.loads(embedding_json) if isinstance(embedding_json, + str) else embedding_json + properties["embedding"] = embedding + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse embedding for node {node_id}") - return result + # 计算标签重叠 + node_tags = properties.get("tags", []) + if isinstance(node_tags, str): + try: + node_tags = json.loads(node_tags) + except (json.JSONDecodeError, TypeError): + node_tags = [] + + overlap_tags = [tag for tag in tags if tag in node_tags] + overlap_count = len(overlap_tags) + + if overlap_count >= min_overlap: + node_data = self._parse_node({ + "id": properties.get("id", node_id), + "memory": properties.get("memory", ""), + "metadata": properties + }) + nodes_with_overlap.append((node_data, overlap_count)) + + # 按重叠数量排序并返回前top_k个 + nodes_with_overlap.sort(key=lambda x: x[1], reverse=True) + return [node for node, _ in nodes_with_overlap[:top_k]] except Exception as e: logger.error(f"Failed to get neighbors by tag: {e}", exc_info=True) From 73c7f0da190b044423ad709efada56fa2adf952c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 20:25:25 +0800 Subject: [PATCH 086/137] update get_neighbors_by_tag --- src/memos/graph_dbs/polardb.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 4fe1645c4..e09ea4753 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2349,10 +2349,11 @@ def get_neighbors_by_tag( user_name = user_name if user_name else self._get_config_value("user_name") - # 构建查询条件 + # 构建查询条件 - 更宽松的过滤条件 where_clauses = [] params = [] + # 排除指定的ID - 使用 properties 中的 id 字段 if exclude_ids: exclude_conditions = [] for exclude_id in exclude_ids: @@ -2361,21 +2362,21 @@ def get_neighbors_by_tag( params.append(f'"{exclude_id}"') where_clauses.append(f"({' AND '.join(exclude_conditions)})") - # 状态过滤 + # 状态过滤 - 只保留 activated 状态 where_clauses.append( "ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype") - # 类型过滤 + # 类型过滤 - 排除 reasoning 类型 where_clauses.append( "ag_catalog.agtype_access_operator(properties, '\"node_type\"'::agtype) != '\"reasoning\"'::agtype") - where_clauses.append( - "ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) != '\"WorkingMemory\"'::agtype") - # 用户过滤 where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype") params.append(f'"{user_name}"') + # 测试无数据,需要注释 + where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) != '\"WorkingMemory\"'::agtype") + where_clause = " AND ".join(where_clauses) # 获取所有候选节点 @@ -2384,6 +2385,7 @@ def get_neighbors_by_tag( FROM "{self.db_name}_graph"."Memory" WHERE {where_clause} """ + print(f"[get_neighbors_by_tag] query: {query}, params: {params}") try: @@ -2430,4 +2432,4 @@ def get_neighbors_by_tag( except Exception as e: logger.error(f"Failed to get neighbors by tag: {e}", exc_info=True) - return [] \ No newline at end of file + return [] From 56351e5f540746095912b4f4331181d7b02fb0ff Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 20:26:47 +0800 Subject: [PATCH 087/137] fix --- src/memos/graph_dbs/polardb.py | 145 +++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e09ea4753..0c7645e17 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2433,3 +2433,148 @@ def get_neighbors_by_tag( except Exception as e: logger.error(f"Failed to get neighbors by tag: {e}", exc_info=True) return [] + + def get_neighbors_by_tag_ccl( + self, + tags: list[str], + exclude_ids: list[str], + top_k: int = 5, + min_overlap: int = 1, + include_embedding: bool = False, + user_name: str | None = None, + ) -> list[dict[str, Any]]: + """ + Find top-K neighbor nodes with maximum tag overlap. + + Args: + tags: The list of tags to match. + exclude_ids: Node IDs to exclude (e.g., local cluster). + top_k: Max number of neighbors to return. + min_overlap: Minimum number of overlapping tags required. + include_embedding: with/without embedding + user_name (str, optional): User name for filtering in non-multi-db mode + + Returns: + List of dicts with node details and overlap count. + """ + if not tags: + return [] + + user_name = user_name if user_name else self._get_config_value("user_name") + + # 构建查询条件,与 nebular.py 保持一致 + where_clauses = [ + 'n.status = "activated"', + 'NOT (n.node_type = "reasoning")', + 'NOT (n.memory_type = "WorkingMemory")', + ] + where_clauses=[ + 'n.status = "activated"', + 'NOT (n.memory_type = "WorkingMemory")', + ] + + if exclude_ids: + exclude_ids_str = "[" + ", ".join(f'"{id}"' for id in exclude_ids) + "]" + where_clauses.append(f"NOT (n.id IN {exclude_ids_str})") + + where_clauses.append(f'n.user_name = "{user_name}"') + + where_clause = " AND ".join(where_clauses) + tag_list_literal = "[" + ", ".join(f'"{t}"' for t in tags) + "]" + + return_fields = [ + "n.id AS id", + "n.memory AS memory", + "n.user_name AS user_name", + "n.user_id AS user_id", + "n.session_id AS session_id", + "n.status AS status", + "n.key AS key", + "n.confidence AS confidence", + "n.tags AS tags", + "n.created_at AS created_at", + "n.updated_at AS updated_at", + "n.memory_type AS memory_type", + "n.sources AS sources", + "n.source AS source", + "n.node_type AS node_type", + "n.visibility AS visibility", + "n.background AS background" + ] + + if include_embedding: + return_fields.append("n.embedding AS embedding") + + return_fields_str = ", ".join(return_fields) + result_fields = [] + for field in return_fields: + # 从 "n.id AS id" 提取出字段名 "id" + field_name = field.split(" AS ")[-1] + result_fields.append(f"{field_name} agtype") + + # 添加 overlap_count + result_fields.append("overlap_count agtype") + result_fields_str = ", ".join(result_fields) + # 使用 Cypher 查询,与 nebular.py 保持一致 + query = f""" + SELECT * FROM ( + SELECT * FROM cypher('{self.db_name}_graph', $$ + WITH {tag_list_literal} AS tag_list + MATCH (n:Memory) + WHERE {where_clause} + RETURN {return_fields_str}, + size([tag IN n.tags WHERE tag IN tag_list]) AS overlap_count + $$) AS ({result_fields_str}) + ) AS subquery + ORDER BY (overlap_count::integer) DESC + LIMIT {top_k} + """ + print("get_neighbors_by_tag:",query) + try: + with self.connection.cursor() as cursor: + cursor.execute(query) + results = cursor.fetchall() + + neighbors = [] + for row in results: + # 解析结果 + props = {} + overlap_count = None + + # 手动解析每个字段 + field_names = [ + "id", "memory", "user_name", "user_id", "session_id", "status", + "key", "confidence", "tags", "created_at", "updated_at", + "memory_type", "sources", "source", "node_type", "visibility", "background" + ] + + if include_embedding: + field_names.append("embedding") + field_names.append("overlap_count") + + for i, field in enumerate(field_names): + if field == "overlap_count": + overlap_count = row[i].value if hasattr(row[i], 'value') else row[i] + else: + props[field] = row[i].value if hasattr(row[i], 'value') else row[i] + overlap_int = int(overlap_count) + if overlap_count is not None and overlap_int >= min_overlap: + parsed = self._parse_node(props) + parsed["overlap_count"] = overlap_int + neighbors.append(parsed) + + # 按重叠数量排序 + neighbors.sort(key=lambda x: x["overlap_count"], reverse=True) + neighbors = neighbors[:top_k] + + # 移除 overlap_count 字段 + result = [] + for neighbor in neighbors: + neighbor.pop("overlap_count", None) + result.append(neighbor) + + return result + + except Exception as e: + logger.error(f"Failed to get neighbors by tag: {e}", exc_info=True) + return [] From b363c476812522dbd77df571a72da168b0e3e4a1 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 20:32:30 +0800 Subject: [PATCH 088/137] fix --- src/memos/graph_dbs/polardb.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 0c7645e17..e459acc5e 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -12,6 +12,7 @@ from memos.dependency import require_python_package from memos.graph_dbs.base import BaseGraphDB from memos.log import get_logger +from memos.utils import timed logger = get_logger(__name__) @@ -176,6 +177,7 @@ def _ensure_database_exists(self): logger.error(f"Failed to access database '{self.db_name}': {e}") raise + @timed def _create_graph(self): """Create PostgreSQL schema and table for graph storage.""" try: @@ -283,6 +285,7 @@ def get_memory_count(self, memory_type: str, user_name: str | None = None) -> in logger.error(f"[get_memory_count] Failed: {e}") return -1 + @timed def node_not_exist(self, scope: str, user_name: str | None = None) -> int: """Check if a node with given scope exists.""" user_name = user_name if user_name else self._get_config_value("user_name") @@ -307,6 +310,7 @@ def node_not_exist(self, scope: str, user_name: str | None = None) -> int: logger.error(f"[node_not_exist] Query failed: {e}", exc_info=True) raise + @timed def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: str | None = None) -> None: """ Remove all WorkingMemory nodes except the latest `keep_latest` entries. @@ -357,6 +361,7 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st logger.error(f"[remove_oldest_memory] Failed: {e}", exc_info=True) raise + @timed def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = None) -> None: """ Update node fields in PolarDB, auto-converting `created_at` and `updated_at` to datetime type if present. @@ -420,6 +425,7 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N logger.error(f"[update_node] Failed to update node '{id}': {e}", exc_info=True) raise + @timed def delete_node(self, id: str, user_name: str | None = None) -> None: """ Delete a node from the graph. @@ -446,6 +452,7 @@ def delete_node(self, id: str, user_name: str | None = None) -> None: logger.error(f"[delete_node] Failed to delete node '{id}': {e}", exc_info=True) raise + @timed def create_extension(self): extensions = [ ("polar_age", "图引擎"), @@ -472,6 +479,7 @@ def create_extension(self): print(f"⚠️ Failed to access database context: {e}") logger.error(f"Failed to access database context: {e}", exc_info=True) + @timed def create_graph(self): try: with self.connection.cursor() as cursor: @@ -490,6 +498,7 @@ def create_graph(self): print(f"⚠️ Failed to create graph '{self.db_name}_graph': {e}") logger.error(f"Failed to create graph '{self.db_name}_graph': {e}", exc_info=True) + @timed def create_edge(self): """创建所有有效的边类型,如果不存在的话""" VALID_REL_TYPES = { @@ -514,6 +523,7 @@ def create_edge(self): print(f"⚠️ Failed to create label {label_name}: {e}") logger.error(f"Failed to create elabel '{label_name}': {e}", exc_info=True) + @timed def add_edge(self, source_id: str, target_id: str, type: str, user_name: str | None = None) -> None: if not source_id or not target_id: raise ValueError("[add_edge] source_id and target_id must be provided") @@ -550,6 +560,7 @@ def add_edge(self, source_id: str, target_id: str, type: str, user_name: str | N logger.error(f"Failed to insert edge: {e}", exc_info=True) raise + @timed def delete_edge(self, source_id: str, target_id: str, type: str) -> None: """ Delete a specific edge between two nodes. @@ -567,6 +578,7 @@ def delete_edge(self, source_id: str, target_id: str, type: str) -> None: cursor.execute(query, (source_id, target_id, type)) logger.info(f"Edge deleted: {source_id} -[{type}]-> {target_id}") + @timed def edge_exists_old( self, source_id: str, target_id: str, type: str = "ANY", direction: str = "OUTGOING" ) -> bool: @@ -623,6 +635,7 @@ def edge_exists_old( result = cursor.fetchone() return result is not None + @timed def edge_exists( self, source_id: str, @@ -675,6 +688,7 @@ def edge_exists( result = cursor.fetchone() return result is not None and result[0] is not None + @timed def get_node(self, id: str, include_embedding: bool = False, user_name: str | None = None) -> dict[str, Any] | None: """ Retrieve a Memory node by its unique ID. @@ -743,6 +757,7 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No logger.error(f"[get_node] Failed to retrieve node '{id}': {e}", exc_info=True) return None + @timed def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> list[dict[str, Any]]: """ Retrieve the metadata and memory of a list of nodes. @@ -814,6 +829,7 @@ def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> l {"id": properties.get("id", node_id), "memory": properties.get("memory", ""), "metadata": properties})) return nodes + @timed def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY") -> list[dict[str, str]]: """ Get edges connected to a node, with optional type and direction filter. @@ -908,6 +924,7 @@ def get_neighbors( """Get connected node IDs in a specific direction and relationship type.""" raise NotImplementedError + @timed def get_neighbors_by_tag_old( self, tags: list[str], @@ -1002,6 +1019,7 @@ def get_neighbors_by_tag_old( nodes_with_overlap.sort(key=lambda x: x[1], reverse=True) return [node for node, _ in nodes_with_overlap[:top_k]] + @timed def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> list[dict[str, Any]]: """Get children nodes with their embeddings.""" user_name = user_name if user_name else self._get_config_value("user_name") @@ -1087,6 +1105,7 @@ def get_path(self, source_id: str, target_id: str, max_depth: int = 3) -> list[s """Get the path of nodes from source to target within a limited depth.""" raise NotImplementedError + @timed def get_subgraph( self, center_id: str, @@ -1210,6 +1229,7 @@ def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]: """Get the ordered context chain starting from a node.""" raise NotImplementedError + @timed def search_by_embedding( self, vector: list[float], @@ -1292,6 +1312,7 @@ def search_by_embedding( output.append({"id": id_val, "score": score_val}) return output[:top_k] + @timed def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = None) -> list[str]: """ Retrieve node IDs that match given metadata filters. @@ -1382,6 +1403,7 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = return ids + @timed def get_grouped_counts1( self, group_fields: list[str], @@ -1459,7 +1481,7 @@ def get_grouped_counts1( logger.error(f"Failed to get grouped counts: {e}", exc_info=True) return [] - + @timed def get_grouped_counts( self, group_fields: list[str], @@ -1566,6 +1588,7 @@ def merge_nodes(self, id1: str, id2: str) -> str: """Merge two similar or duplicate nodes into one.""" raise NotImplementedError + @timed def clear(self) -> None: """Clear the entire graph.""" try: @@ -1597,6 +1620,7 @@ def clear(self) -> None: logger.warning(f"Failed to clear graph '{self.db_name}_graph': {e}") # Don't raise the exception, just log it as a warning + @timed def export_graph( self, include_embedding: bool = False, user_name: str | None = None ) -> dict[str, Any]: @@ -1695,6 +1719,7 @@ def export_graph( return {"nodes": nodes, "edges": edges} + @timed def import_graph(self, data: dict[str, Any]) -> None: """Import the entire graph from a serialized dictionary.""" with self.connection.cursor() as cursor: @@ -1716,6 +1741,7 @@ def import_graph(self, data: dict[str, Any]) -> None: for edge in data.get("edges", []): self.add_edge(edge["source"], edge["target"], edge["type"]) + @timed def get_all_memory_items( self, scope: str, include_embedding: bool = False, user_name: str | None = None ) -> list[dict]: @@ -1914,6 +1940,7 @@ def get_all_memory_items_old( return nodes + @timed def get_structure_optimization_candidates( self, scope: str, include_embedding: bool = False, user_name: str | None = None ) -> list[dict]: @@ -2200,6 +2227,7 @@ def add_node_old(conn, id: str, memory: str, metadata: dict, graph_name=None): print(f"❌ 插入失败 (ID: {id}): {e}") return False + @timed def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: str | None = None) -> None: """Add a memory node to the graph.""" # user_name 从 metadata 中获取,如果不存在则从配置中获取 From 5a448d8a53851465aa518598478acc423d95bcbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 20:49:33 +0800 Subject: [PATCH 089/137] add import_graph --- src/memos/graph_dbs/polardb.py | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e459acc5e..3d10f09c0 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2606,3 +2606,39 @@ def get_neighbors_by_tag_ccl( except Exception as e: logger.error(f"Failed to get neighbors by tag: {e}", exc_info=True) return [] + @timed + def import_graph(self, data: dict[str, Any], user_name: str | None = None) -> None: + """ + Import the entire graph from a serialized dictionary. + + Args: + data: A dictionary containing all nodes and edges to be loaded. + user_name (str, optional): User name for filtering in non-multi-db mode + """ + user_name = user_name if user_name else self._get_config_value("user_name") + + # Import nodes + for node in data.get("nodes", []): + try: + id, memory, metadata = _compose_node(node) + metadata["user_name"] = user_name + metadata = _prepare_node_metadata(metadata) + metadata.update({"id": id, "memory": memory}) + + # 使用 add_node 方法添加节点 + self.add_node(id, memory, metadata) + + except Exception as e: + logger.error(f"Fail to load node: {node}, error: {e}") + + # Import edges + for edge in data.get("edges", []): + try: + source_id, target_id = edge["source"], edge["target"] + edge_type = edge["type"] + + # 使用 add_edge 方法添加边 + self.add_edge(source_id, target_id, edge_type, user_name) + + except Exception as e: + logger.error(f"Fail to load edge: {edge}, error: {e}") From 0e05327d8f725ae1a681b7d75afce6c46a595276 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Tue, 21 Oct 2025 20:52:39 +0800 Subject: [PATCH 090/137] fix --- src/memos/graph_dbs/polardb.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e459acc5e..09c64e198 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1719,6 +1719,22 @@ def export_graph( return {"nodes": nodes, "edges": edges} + @timed + def count_nodes(self, scope: str, user_name: str | None = None) -> int: + user_name = user_name if user_name else self.config.user_name + + query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + WHERE n.memory_type = '{scope}' + AND n.user_name = '{user_name}' + RETURN count(n) + $$) AS (count agtype) + """ + + result = self.execute_query(query) + return int(result.one_or_none()["count"].value) + @timed def import_graph(self, data: dict[str, Any]) -> None: """Import the entire graph from a serialized dictionary.""" From b96874f91c5ef16b2374c9efab2aa800be5f4418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 21:01:32 +0800 Subject: [PATCH 091/137] add get_edges --- examples/basic_modules/polardb_search.py | 12 +++- src/memos/graph_dbs/polardb.py | 72 +++++++++++++++++++++++- 2 files changed, 81 insertions(+), 3 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index 37823341d..c7dd85fdb 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -213,6 +213,12 @@ def get_neighbors_by_tag(db_name, user_name): print("get_neighbors_by_tag:", neighbors) +def get_edges(db_name, id, user_name): + graph = getPolarDb(db_name) + edges = graph.get_edges(id=id, user_name=user_name) + print("get_edges:", edges) + + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") @@ -364,7 +370,7 @@ def get_neighbors_by_tag(db_name, user_name): # target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", type="PARENT", direction="OUTGOING", # user_name="memosbfb3fb32032b4077a641404dc48739cd") - get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") + # get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") # get_subgraph(db_name="memtensor_memos", center_id="13bb9df6-0609-4442-8bed-bba77dadac92", depth=1, # center_status="activated", user_name="memos07ea708ac7eb412887c5c283f874ea30") @@ -381,4 +387,6 @@ def get_neighbors_by_tag(db_name, user_name): # 测试 get_structure_optimization_candidates 函数 # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") - get_neighbors_by_tag(db_name="memtensor_memos",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") + # get_neighbors_by_tag(db_name="memtensor_memos",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") + + get_edges(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 3d10f09c0..7843dfd4c 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -830,7 +830,7 @@ def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> l return nodes @timed - def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY") -> list[dict[str, str]]: + def get_edges_old(self, id: str, type: str = "ANY", direction: str = "ANY") -> list[dict[str, str]]: """ Get edges connected to a node, with optional type and direction filter. @@ -2642,3 +2642,73 @@ def import_graph(self, data: dict[str, Any], user_name: str | None = None) -> No except Exception as e: logger.error(f"Fail to load edge: {edge}, error: {e}") + + @timed + def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY", user_name: str | None = None) -> list[ + dict[str, str]]: + """ + Get edges connected to a node, with optional type and direction filter. + + Args: + id: Node ID to retrieve edges for. + type: Relationship type to match, or 'ANY' to match all. + direction: 'OUTGOING', 'INCOMING', or 'ANY'. + user_name (str, optional): User name for filtering in non-multi-db mode + + Returns: + List of edges: + [ + {"from": "source_id", "to": "target_id", "type": "RELATE"}, + ... + ] + """ + user_name = user_name if user_name else self._get_config_value("user_name") + + if direction == "OUTGOING": + pattern = f"(a:Memory)-[r]->(b:Memory)" + where_clause = f"a.id = '{id}'" + elif direction == "INCOMING": + pattern = f"(a:Memory)<-[r]-(b:Memory)" + where_clause = f"a.id = '{id}'" + elif direction == "ANY": + pattern = f"(a:Memory)-[r]-(b:Memory)" + where_clause = f"a.id = '{id}' OR b.id = '{id}'" + else: + raise ValueError("Invalid direction. Must be 'OUTGOING', 'INCOMING', or 'ANY'.") + + # 添加类型过滤 + if type != "ANY": + where_clause += f" AND type(r) = '{type}'" + + # 添加用户过滤 + where_clause += f" AND a.user_name = '{user_name}' AND b.user_name = '{user_name}'" + + query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH {pattern} + WHERE {where_clause} + RETURN a.id AS from_id, b.id AS to_id, type(r) AS edge_type + $$) AS (from_id agtype, to_id agtype, edge_type agtype) + """ + + try: + with self.connection.cursor() as cursor: + cursor.execute(query) + results = cursor.fetchall() + + edges = [] + for row in results: + from_id = row[0].value if hasattr(row[0], 'value') else row[0] + to_id = row[1].value if hasattr(row[1], 'value') else row[1] + edge_type = row[2].value if hasattr(row[2], 'value') else row[2] + + edges.append({ + "from": from_id, + "to": to_id, + "type": edge_type + }) + return edges + + except Exception as e: + logger.error(f"Failed to get edges: {e}", exc_info=True) + return [] From 1f8154ad31c286bb975dc9e1da329c5e5b9b7a05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Tue, 21 Oct 2025 21:05:52 +0800 Subject: [PATCH 092/137] add clear --- src/memos/graph_dbs/polardb.py | 46 +++++++++++++++------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 8ef0a5d30..fe32eda56 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1589,36 +1589,30 @@ def merge_nodes(self, id1: str, id2: str) -> str: raise NotImplementedError @timed - def clear(self) -> None: - """Clear the entire graph.""" - try: - with self.connection.cursor() as cursor: - # First check if the graph exists - cursor.execute(f""" - SELECT EXISTS ( - SELECT 1 FROM information_schema.tables - WHERE table_schema = '"{self.db_name}_graph"' - AND table_name = 'Memory' - ) - """) - graph_exists = cursor.fetchone()[0] + def clear(self, user_name: str | None = None) -> None: + """ + Clear the entire graph if the target database exists. - if not graph_exists: - logger.info(f"Graph '{self.db_name}_graph' does not exist, nothing to clear.") - return + Args: + user_name (str, optional): User name for filtering in non-multi-db mode + """ + user_name = user_name if user_name else self._get_config_value("user_name") - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - cursor.execute(f""" - DELETE FROM "{self.db_name}_graph"."Memory" - WHERE properties::text LIKE %s - """, (f"%{self._get_config_value('user_name')}%",)) - else: - cursor.execute(f'DELETE FROM "{self.db_name}_graph"."Memory"') + try: + query = f""" + SELECT * FROM cypher('{self.db_name}_graph', $$ + MATCH (n:Memory) + WHERE n.user_name = '{user_name}' + DETACH DELETE n + $$) AS (result agtype) + """ + + with self.connection.cursor() as cursor: + cursor.execute(query) + logger.info("Cleared all nodes from database.") - logger.info(f"Cleared all nodes from graph '{self.db_name}_graph'.") except Exception as e: - logger.warning(f"Failed to clear graph '{self.db_name}_graph': {e}") - # Don't raise the exception, just log it as a warning + logger.error(f"[ERROR] Failed to clear database: {e}") @timed def export_graph( From 5436b5991dd00c0487cb3323922463be51b42147 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Wed, 22 Oct 2025 09:41:38 +0800 Subject: [PATCH 093/137] get_neighbors_by_tag --- src/memos/graph_dbs/polardb.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index fe32eda56..05edbfc58 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2405,8 +2405,7 @@ def get_neighbors_by_tag( "ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype") # 类型过滤 - 排除 reasoning 类型 - where_clauses.append( - "ag_catalog.agtype_access_operator(properties, '\"node_type\"'::agtype) != '\"reasoning\"'::agtype") + # where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"node_type\"'::agtype) != '\"reasoning\"'::agtype") # 用户过滤 where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype") From c7b5f7c0c0889ea595aa99e8377edd72eefa1d70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Wed, 22 Oct 2025 10:07:48 +0800 Subject: [PATCH 094/137] get_neighbors_by_tag --- src/memos/graph_dbs/polardb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 05edbfc58..29650360a 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2358,7 +2358,7 @@ def _build_node_from_agtype(self, node_agtype, embedding=None): return {"id": props.get("id", ""), "memory": props.get("memory", ""), "metadata": props} except Exception: return None - + @timed def get_neighbors_by_tag( self, tags: list[str], From 32a6298f6546c78722794d432327cb864f5703bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Wed, 22 Oct 2025 16:41:41 +0800 Subject: [PATCH 095/137] update get_by_metadata --- src/memos/graph_dbs/polardb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 29650360a..45ee1e16c 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1387,7 +1387,8 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = RETURN n.id AS id $$) AS (id agtype) """ - + + print(f"[get_by_metadata] query: {cypher_query}, where_str: {where_str}") ids = [] try: with self.connection.cursor() as cursor: From f7ab323c0b10060898985affb03d5f242f30dbac Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Wed, 22 Oct 2025 17:06:04 +0800 Subject: [PATCH 096/137] search_by_emdedding remove embedding --- src/memos/graph_dbs/polardb.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 05edbfc58..93de1de9d 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1280,7 +1280,6 @@ def search_by_embedding( SELECT id, properties, timeline, - embedding, ag_catalog.agtype_access_operator(properties, '"id"'::agtype) AS old_id, (1 - (embedding <=> %s::vector(1024))) AS scope FROM "{self.db_name}_graph"."Memory" @@ -1302,9 +1301,9 @@ def search_by_embedding( for row in results: polarId = row[0] # id properties = row[1] # properties - embedding = row[3] # embedding - oldId = row[4] # old_id - score = row[5] # scope + # embedding = row[3] # embedding + oldId = row[3] # old_id + score = row[4] # scope id_val = str(oldId) score_val = float(score) score_val = (score_val + 1) / 2 # align to neo4j, Normalized Cosine Score From fe1a9fd21286e1887288d8fd8dd7718e3c4ff07d Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Wed, 22 Oct 2025 17:07:09 +0800 Subject: [PATCH 097/137] fix:parseJson.py --- .../batchImport_polardbFromJson.py | 441 ++++++++++++++++++ examples/basic_modules/importPolarDbEdge.py | 29 +- examples/basic_modules/parseJson.py | 21 +- .../basic_modules/polardb_export_insert.py | 77 +-- 4 files changed, 517 insertions(+), 51 deletions(-) create mode 100644 examples/basic_modules/batchImport_polardbFromJson.py diff --git a/examples/basic_modules/batchImport_polardbFromJson.py b/examples/basic_modules/batchImport_polardbFromJson.py new file mode 100644 index 000000000..ef3af528c --- /dev/null +++ b/examples/basic_modules/batchImport_polardbFromJson.py @@ -0,0 +1,441 @@ +import json +import psycopg2 +from psycopg2.extras import Json, execute_batch +import numpy as np +import sys +import os +from datetime import datetime + +# PolarDB 配置 +POLARDB_CONFIG = { + "host": "memory.pg.polardb.rds.aliyuncs.com", + "port": 5432, + "user": "adimin", + "password": "Openmem0925", + # "database": "memtensor_memos", + "database": "test_zdy", + # "graph_name": "memtensor_memos_graph" + "graph_name": "test_zdy_graph" +} + + +class PolarDBGraph: + def __init__(self, config): + self.config = config + self.connection = psycopg2.connect( + host=config["host"], + port=config["port"], + user=config["user"], + password=config["password"], + database=config["database"] + ) + self.graph_name = config.get("graph_name") + # 设置自动提交为False,手动控制事务 + self.connection.autocommit = False + print("✅ PolarDB连接成功") + + def update_graph_id_in_properties(self): + """更新properties字段,添加graph_id""" + print("🔄 开始更新properties字段,添加graph_id...") + start_time = datetime.now() + + try: + with self.connection.cursor() as cursor: + # 执行UPDATE语句,将graph_id添加到properties中 + update_sql = f""" + UPDATE {self.graph_name}."Memory" + SET properties = agtype_concat(properties, agtype_build_map('graph_id', id::text)) + """ + cursor.execute(update_sql) + updated_count = cursor.rowcount + + self.connection.commit() + + elapsed = (datetime.now() - start_time).total_seconds() + print(f"✅ 成功更新 {updated_count} 条记录的properties字段,耗时: {elapsed:.2f}秒") + return updated_count + + except Exception as e: + self.connection.rollback() + print(f"❌ 更新properties字段失败: {e}") + return 0 + + def batch_add_nodes_optimized(self, nodes, batch_size=1000): + """优化版批量插入节点""" + success_count = 0 + error_count = 0 + total_nodes = len(nodes) + + print(f"🚀 开始处理 {total_nodes} 条记录,批次大小: {batch_size}") + start_time = datetime.now() + + # 按批次处理 + for batch_start in range(0, total_nodes, batch_size): + batch_end = min(batch_start + batch_size, total_nodes) + current_batch = nodes[batch_start:batch_end] + + batch_success = 0 + batch_errors = [] + + try: + with self.connection.cursor() as cursor: + + # 准备批量插入数据 + insert_data_1024 = [] + # insert_data_768 = [] + # insert_data_3072 = [] + insert_data_no_embedding = [] + + for node in current_batch: + try: + id_ = node["id"] + memory_ = node["memory"] + metadata = node["metadata"] + + # get_graph_id_query = f""" + # SELECT ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring) + # """ + # cursor.execute(get_graph_id_query, (id_,)) + # graph_id = cursor.fetchone()[0] + # properties['graph_id'] = str(graph_id) + + # 提取 embedding + embedding = None + for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: + if embedding_key in metadata and metadata[embedding_key]: + embedding = metadata[embedding_key] + break + + if isinstance(embedding, str): + try: + embedding = json.loads(embedding) + except json.JSONDecodeError: + print(f"⚠️ 无法解析embedding字符串: {embedding_key}") + embedding = None + # 清理 properties + properties = self.clean_properties(metadata) + properties["id"] = id_ + properties["memory"] = memory_ + + # 根据embedding维度分类 + field_name = self.detect_embedding_field(embedding) + vector_value = self.convert_to_vector(embedding) if field_name else None + + if field_name == "embedding" and vector_value: + insert_data_1024.append((id_, Json(properties), vector_value)) + # elif field_name == "embedding_768" and vector_value: + # insert_data_768.append((id_, Json(properties), vector_value)) + # elif field_name == "embedding_3072" and vector_value: + # insert_data_3072.append((id_, Json(properties), vector_value)) + else: + insert_data_no_embedding.append((id_, Json(properties))) + + except Exception as e: + batch_errors.append(f"ID: {node.get('id', 'unknown')} - {e}") + + # 批量插入不同维度的数据 + if insert_data_1024: + insert_sql_1024 = f""" + INSERT INTO "Memory" (id, properties, embedding) + VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) + """ + execute_batch(cursor, insert_sql_1024, insert_data_1024) + batch_success += len(insert_data_1024) + + # if insert_data_768: + # insert_sql_768 = f""" + # INSERT INTO "Memory" (id, properties, embedding_768) + # VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) + # """ + # execute_batch(cursor, insert_sql_768, insert_data_768) + # batch_success += len(insert_data_768) + # + # if insert_data_3072: + # insert_sql_3072 = f""" + # INSERT INTO "Memory" (id, properties, embedding_3072) + # VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) + # """ + # execute_batch(cursor, insert_sql_3072, insert_data_3072) + # batch_success += len(insert_data_3072) + + if insert_data_no_embedding: + insert_sql_no_embedding = f""" + INSERT INTO "Memory" (id, properties) + VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s) + """ + execute_batch(cursor, insert_sql_no_embedding, insert_data_no_embedding) + batch_success += len(insert_data_no_embedding) + + # 提交当前批次 + self.connection.commit() + success_count += batch_success + error_count += len(batch_errors) + + # 进度显示 + elapsed = (datetime.now() - start_time).total_seconds() + progress = (batch_end / total_nodes) * 100 + estimated_total = (elapsed / batch_end) * total_nodes if batch_end > 0 else 0 + remaining = estimated_total - elapsed + + print(f"📊 进度: {batch_end}/{total_nodes} ({progress:.1f}%) | " + f"成功: {success_count} | 失败: {error_count} | " + f"已用: {elapsed:.0f}s | 剩余: {remaining:.0f}s") + + # 输出批次错误 + if batch_errors: + print(f"❌ 本批次错误: {len(batch_errors)} 条") + for i, error in enumerate(batch_errors[:5]): # 只显示前5个错误 + print(f" {i + 1}. {error}") + if len(batch_errors) > 5: + print(f" ... 还有 {len(batch_errors) - 5} 个错误") + + except Exception as e: + self.connection.rollback() + error_count += len(current_batch) + print(f"❌ 批次 {batch_start}-{batch_end} 整体失败: {e}") + + total_time = (datetime.now() - start_time).total_seconds() + print(f"✅ 批量插入完成: 成功 {success_count} 条, 失败 {error_count} 条, 总耗时: {total_time:.2f}秒") + + return success_count, error_count + + def clean_properties(self, props): + """移除向量字段""" + vector_keys = {"embedding", "embedding_1024", "embedding_3072", "embedding_768"} + if not isinstance(props, dict): + return {} + return {k: v for k, v in props.items() if k not in vector_keys} + + def detect_embedding_field(self, embedding_list): + """检测 embedding 维度并返回对应的字段名""" + if not embedding_list: + return None + dim = len(embedding_list) + # print("---------",dim) + if dim == 1024: + return "embedding" + elif dim == 768: + return "embedding_768" + elif dim == 3072: + return "embedding_3072" + else: + print(f"⚠️ 未知 embedding 维度 {dim},跳过该向量") + return None + + def convert_to_vector(self, embedding_list): + """将 embedding 列表转换为向量字符串""" + if not embedding_list: + return None + if isinstance(embedding_list, np.ndarray): + embedding_list = embedding_list.tolist() + return "[" + ",".join(str(float(x)) for x in embedding_list) + "]" + + def close(self): + """关闭数据库连接""" + if self.connection: + self.connection.close() + print("🔒 PolarDB连接已关闭") + + +def getPolarDb(): + """直接创建 PolarDB 图数据库实例""" + return PolarDBGraph(POLARDB_CONFIG) + + +def process_metadata(item): + """处理元数据,提取和转换字段""" + metadata = {} + for key, value in item.items(): + if key not in ["id", "memory"]: + # 类型转换 + if key == "confidence": + try: + metadata[key] = float(value) + except (ValueError, TypeError): + metadata[key] = value + elif key == "sources" or key == "usage": + if isinstance(value, str): + try: + parsed_value = json.loads(value) + metadata[key] = [json.dumps(item) for item in parsed_value] if isinstance(parsed_value, + list) else [ + json.dumps(parsed_value)] + except json.JSONDecodeError: + metadata[key] = value + else: + metadata[key] = value + elif key == "tags": + if isinstance(value, str): + if value.startswith('[') and value.endswith(']'): + try: + metadata[key] = json.loads(value) + except json.JSONDecodeError: + metadata[key] = [tag.strip() for tag in value[1:-1].split(',')] + else: + metadata[key] = value + else: + metadata[key] = value + else: + metadata[key] = value + return metadata + + +def extract_embedding(item): + """从数据项中提取embedding""" + embedding = None + for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: + if embedding_key in item and item[embedding_key]: + embedding_value = item[embedding_key] + if isinstance(embedding_value, str): + try: + embedding = json.loads(embedding_value) + except json.JSONDecodeError: + print(f"⚠️ 无法解析embedding字符串: {embedding_key}") + embedding = None + else: + embedding = embedding_value + break + return embedding + + +def prepare_nodes_for_insertion(data_list): + """准备要插入的节点数据""" + nodes_to_insert = [] + processed_count = 0 + skipped_count = 0 + + for item in data_list: + id_ = item.get("id") + memory_ = item.get("memory") + + if not id_ or not memory_: + print(f"⚠️ 跳过无效数据: ID或memory为空") + skipped_count += 1 + continue + + # 处理元数据 + metadata = process_metadata(item) + + # 处理embedding字段 + embedding = extract_embedding(item) + if embedding: + metadata["embedding"] = embedding + + # 构建插入的数据 + nodes_to_insert.append({ + "id": id_, + "memory": memory_, + "metadata": metadata + }) + processed_count += 1 + + # 显示进度 + if processed_count % 10000 == 0: + print(f"📝 已预处理 {processed_count} 条数据") + + print(f"✅ 数据预处理完成: 有效 {processed_count} 条, 跳过 {skipped_count} 条") + return nodes_to_insert + + +def insert_data_optimized(data_list, batch_size=1000): + """优化版数据插入""" + graph = getPolarDb() + + # 数据预处理 + print("🔄 开始预处理数据...") + nodes_to_insert = prepare_nodes_for_insertion(data_list) + + if not nodes_to_insert: + print("⚠️ 没有有效数据需要插入") + graph.close() + return 0, 0 + + # 使用优化版批量插入 + print("🚀 开始批量插入数据...") + success_count, error_count = graph.batch_add_nodes_optimized(nodes_to_insert, batch_size) + + graph.close() + return success_count, error_count + + + +def load_data_from_file(filename): + """从文件加载数据""" + print(f"📂 正在加载文件: {filename}") + try: + with open(filename, "r", encoding="utf-8") as f: + data = json.load(f) + print(f"📂 从文件 {filename} 加载了 {len(data)} 条记录") + return data + except Exception as e: + print(f"❌ 加载文件失败: {e}") + return [] + +def update_graph(): + print("-----------update_graph[start]") + graph = getPolarDb() + graph.update_graph_id_in_properties() + print("---------update_graph[end]") + +def insert_data(conn, data): + # 记录总开始时间 + total_start_time = datetime.now() + + + if not data: + print("⚠️ 没有数据") + return + + print(f"🎯 总共需要处理 {len(data)} 条记录") + success_count, error_count = insert_data_optimized(data, batch_size=1000) + + # 计算总耗时 + total_time = (datetime.now() - total_start_time).total_seconds() + minutes, seconds = divmod(total_time, 60) + hours, minutes = divmod(minutes, 60) + + print(f"\n🎉 处理完成!") + print(f"📊 最终结果:") + print(f" ✅ 成功: {success_count} 条") + print(f" ❌ 失败: {error_count} 条") + print(f" ⏱️ 总耗时: {int(hours)}小时{int(minutes)}分钟{seconds:.2f}秒") + +def main(): + json_file = r"/Users/ccl/Desktop/file/export13/ceshi/ceshi.json" + + # 记录总开始时间 + total_start_time = datetime.now() + + # 加载数据 + data = load_data_from_file(json_file) + if not data: + print("⚠️ 没有数据") + return + + print(f"🎯 总共需要处理 {len(data)} 条记录") + + # 使用优化版本,设置批次大小为1000 + # 可以根据实际情况调整批次大小: + # - 网络好:1000-2000 + # - 网络一般:500-1000 + # - 内存有限:200-500 + success_count, error_count = insert_data_optimized(data, batch_size=1000) + + # 计算总耗时 + total_time = (datetime.now() - total_start_time).total_seconds() + minutes, seconds = divmod(total_time, 60) + hours, minutes = divmod(minutes, 60) + + print(f"\n🎉 处理完成!") + print(f"📊 最终结果:") + print(f" ✅ 成功: {success_count} 条") + print(f" ❌ 失败: {error_count} 条") + print(f" ⏱️ 总耗时: {int(hours)}小时{int(minutes)}分钟{seconds:.2f}秒") + + if success_count > 0: + records_per_second = success_count / total_time + print(f" 🚀 处理速度: {records_per_second:.2f} 条/秒") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/basic_modules/importPolarDbEdge.py b/examples/basic_modules/importPolarDbEdge.py index fa50eaf98..f1286c80c 100644 --- a/examples/basic_modules/importPolarDbEdge.py +++ b/examples/basic_modules/importPolarDbEdge.py @@ -28,19 +28,19 @@ BATCH_SIZE = 1000 -def create_elabel(conn, label_name): - """创建关系类型(若不存在)""" - with conn.cursor() as cur: - print(f"🪶 Creating elabel: {label_name}") - try: - cur.execute(f"SELECT create_elabel('memtensor_memos_graph', '{label_name}');") - conn.commit() - except Exception as e: - conn.rollback() - if "already exists" in str(e): - print(f"ℹ️ Label '{label_name}' already exists, skipping.") - else: - print(f"⚠️ Failed to create label {label_name}: {e}") +# def create_elabel(conn, label_name): +# """创建关系类型(若不存在)""" +# with conn.cursor() as cur: +# print(f"🪶 Creating elabel: {label_name}") +# try: +# cur.execute(f"SELECT create_elabel('memtensor_memos_graph', '{label_name}');") +# conn.commit() +# except Exception as e: +# conn.rollback() +# if "already exists" in str(e): +# print(f"ℹ️ Label '{label_name}' already exists, skipping.") +# else: +# print(f"⚠️ Failed to create label {label_name}: {e}") def insert_edges(conn, edges, label_name): @@ -71,7 +71,8 @@ def insert_edges(conn, edges, label_name): def process_relation_folder(conn, folder_path, label_name): """处理一个关系文件夹""" print(f"\n🔗 Processing relation: {label_name}") - create_elabel(conn, label_name) + + # create_elabel(conn, label_name) for root, _, files in os.walk(folder_path): for file in files: if not (file.endswith(".json") or file.endswith(".txt")): diff --git a/examples/basic_modules/parseJson.py b/examples/basic_modules/parseJson.py index 86a41d24b..1fa2a03a2 100644 --- a/examples/basic_modules/parseJson.py +++ b/examples/basic_modules/parseJson.py @@ -4,16 +4,18 @@ import sys # Add the parent directory to the path to allow imports -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) +sys.path.insert(0, src_path) -from polardb_export_insert import insert_data +# from polardb_export_insert_1 import insert_data +from batchImport_polardbFromJson import insert_data, update_graph DB_CONFIG = { - 'host': 'xxxxxxx', + 'host': 'memory.pg.polardb.rds.aliyuncs.com', 'port': 5432, - 'database': 'xxxxx', - 'user': 'xxxx', - 'password': 'xxxx' + 'database': 'test_zdy', + 'user': 'adimin', + 'password': 'Openmem0925' } conn = psycopg2.connect(**DB_CONFIG) @@ -72,10 +74,15 @@ def process_folder(folder_path, batch_size=1000): # 处理最后不足 batch_size 的部分 if batch: insert(batch) + update_graph() print(f"\n✅ 全部完成,共处理 {total_count} 条记录。") if __name__ == "__main__": - folder_path = r"/Users/zhudayang/python/github/1/MemOS/examples/basic_modules" + # folder_path = r"/Users/ccl/Desktop/file/export13/ceshi" + # 10W + folder_path = r"/Users/ccl/Desktop/file/export15/Memory" + # 70W + folder_path = r"/Users/ccl/Desktop/file/export13/Memory" process_folder(folder_path, batch_size=1000) diff --git a/examples/basic_modules/polardb_export_insert.py b/examples/basic_modules/polardb_export_insert.py index d282d5d72..014cfc912 100644 --- a/examples/basic_modules/polardb_export_insert.py +++ b/examples/basic_modules/polardb_export_insert.py @@ -15,17 +15,33 @@ -DB_CONFIG = { - 'host': 'xxxxxxx', - 'port': 5432, - 'database': 'xxxxx', - 'user': 'xxxx', - 'password': 'xxxx' -} - -# 图数据库配置 +# DB_CONFIG = { +# 'host': 'xxxxxxx', +# 'port': 5432, +# 'database': 'xxxxx', +# 'user': 'xxxx', +# 'password': 'xxxx' +# } +# +# # 图数据库配置 GRAPH_NAME = 'memtensor_memos_graph' - +def getPolarDb(): + config = GraphDBConfigFactory( + backend="polardb", + config={ + "host": "memory.pg.polardb.rds.aliyuncs.com", + "port": 5432, + "user": "adimin", + "password": "Openmem0925", + "db_name": "memtensor_memos", + "user_name": 'adimin', + "use_multi_db": True, # 设置为True,不添加user_name过滤条件 + "auto_create": True, + "embedding_dimension": 1024, + }, + ) + graph = GraphStoreFactory.from_config(config) + return graph def create_vector_extension(conn): with conn.cursor() as cursor: @@ -196,23 +212,24 @@ def insert_data(conn, data_list, graph_name=None): graph_name: 图名称,可选 """ # 创建PolarDB配置 - config = GraphDBConfigFactory( - backend="polardb", - config={ - "host": "xxxxxxx", - "port": 5432, - "user": "xxxx", - "password": "xxxx", - "db_name": "xxxxx", - "user_name": 'xxxx', - "use_multi_db": False, - "auto_create": False, - "embedding_dimension": 1024, - }, - ) - - # 创建PolarDB实例 - graph = GraphStoreFactory.from_config(config) + # config = GraphDBConfigFactory( + # backend="polardb", + # config={ + # "host": "xxxxxxx", + # "port": 5432, + # "user": "xxxx", + # "password": "xxxx", + # "db_name": "xxxxx", + # "user_name": 'xxxx', + # "use_multi_db": False, + # "auto_create": False, + # "embedding_dimension": 1024, + # }, + # ) + # + # # 创建PolarDB实例 + # graph = GraphStoreFactory.from_config(config) + graph = getPolarDb() print("✅ PolarDB连接成功") success_count = 0 @@ -326,7 +343,7 @@ def main(): print("⚠️ 没有数据") return - conn = psycopg2.connect(**DB_CONFIG) + # conn = psycopg2.connect(**DB_CONFIG) print("✅ 数据库连接成功") # create_vector_extension(conn) @@ -334,9 +351,9 @@ def main(): # 使用默认的图名称,或者可以传入自定义的图名称 # insert_data(conn, data, "custom_graph_name") - insert_data(conn, data) + insert_data(None, data) - conn.close() + # conn.close() print("🔒 数据库连接1已关闭") From 69cbcb024d66da55c97cffe9a412af1af7491ede Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Wed, 22 Oct 2025 17:46:02 +0800 Subject: [PATCH 098/137] fix:get_my_metadata --- src/memos/graph_dbs/polardb.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 7fe8ccecb..71a71b2c4 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1354,13 +1354,13 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = escaped_value = f"[{', '.join(list_items)}]" else: escaped_value = f"'{value}'" if isinstance(value, str) else str(value) - + print("op=============:",op) # 构建 WHERE 条件 if op == "=": where_conditions.append(f"n.{field} = {escaped_value}") elif op == "in": - # where_conditions.append(f"n.{field} IN {escaped_value}") - where_conditions.append(f"{escaped_value} IN n.{field}") + where_conditions.append(f"n.{field} IN {escaped_value}") + # where_conditions.append(f"{escaped_value} IN n.{field}") elif op == "contains": where_conditions.append(f"{escaped_value} IN n.{field}") # where_conditions.append(f"size(filter(n.{field}, t -> t IN {escaped_value})) > 0") @@ -1393,6 +1393,7 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = with self.connection.cursor() as cursor: cursor.execute(cypher_query) results = cursor.fetchall() + print("[get_by_metadata] result:"+results) for row in results: if row[0] and hasattr(row[0], 'value'): ids.append(row[0].value) From e91d190cfae41b156b3d29fceec634337f9bf994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Wed, 22 Oct 2025 18:04:26 +0800 Subject: [PATCH 099/137] fix --- examples/basic_modules/polardb_search.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index c7dd85fdb..ac2449951 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -213,9 +213,9 @@ def get_neighbors_by_tag(db_name, user_name): print("get_neighbors_by_tag:", neighbors) -def get_edges(db_name, id, user_name): +def get_edges(db_name: str, id: str, type: str, direction: str, user_name: str = None) -> None: graph = getPolarDb(db_name) - edges = graph.get_edges(id=id, user_name=user_name) + edges = graph.get_edges(id=id, type=type, direction=direction, user_name=user_name) print("get_edges:", edges) @@ -383,10 +383,10 @@ def get_edges(db_name, id, user_name): # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") # get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") - + # 测试 get_structure_optimization_candidates 函数 # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") # get_neighbors_by_tag(db_name="memtensor_memos",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") - get_edges(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") + get_edges(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",type="PARENT",direction="OUTGOING",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") From bd190c66235e2085012310dbb8cc4829b86eb088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Wed, 22 Oct 2025 21:06:47 +0800 Subject: [PATCH 100/137] fix get_by_metadata result --- src/memos/graph_dbs/polardb.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 71a71b2c4..acfc4e9bd 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1393,13 +1393,10 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = with self.connection.cursor() as cursor: cursor.execute(cypher_query) results = cursor.fetchall() - print("[get_by_metadata] result:"+results) - for row in results: - if row[0] and hasattr(row[0], 'value'): - ids.append(row[0].value) - elif row[0]: - ids.append(str(row[0])) + print("[get_by_metadata] result:",results) + ids = [str(item[0]).strip('"') for item in results] except Exception as e: + print("Failed to get metadata:", {e}) logger.error(f"Failed to get metadata: {e}, query is {cypher_query}") return ids From 779db23a23a29bbcf842687dc623b8988b623753 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Thu, 23 Oct 2025 14:07:45 +0800 Subject: [PATCH 101/137] update polardb.py --- src/memos/graph_dbs/polardb.py | 284 ++++++++++++++++----------------- 1 file changed, 142 insertions(+), 142 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index acfc4e9bd..4a40fd5d1 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -16,7 +16,7 @@ logger = get_logger(__name__) -# 图数据库配置 +# Graph database configuration GRAPH_NAME = 'test_memos_graph' @@ -55,7 +55,7 @@ def generate_vector(dim=1024, low=-0.2, high=0.2): def find_embedding(metadata): def find_embedding(item): - """在多层结构中查找 embedding 向量""" + """Find an embedding vector within nested structures""" for key in ["embedding", "embedding_1024", "embedding_3072", "embedding_768"]: if key in item and isinstance(item[key], list): return item[key] @@ -73,7 +73,7 @@ def detect_embedding_field(embedding_list): if dim == 1024: return "embedding" else: - print(f"⚠️ 未知 embedding 维度 {dim},跳过该向量") + print(f"⚠️ Unknown embedding dimension {dim}, skipping this vector") return None def convert_to_vector(embedding_list): if not embedding_list: @@ -83,7 +83,7 @@ def convert_to_vector(embedding_list): return "[" + ",".join(str(float(x)) for x in embedding_list) + "]" def clean_properties(props): - """移除向量字段""" + """Remove vector fields""" vector_keys = {"embedding", "embedding_1024", "embedding_3072", "embedding_768"} if not isinstance(props, dict): return {} @@ -322,8 +322,8 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st """ user_name = user_name if user_name else self._get_config_value("user_name") - # 使用真正的 OFFSET 逻辑,与 nebular.py 保持一致 - # 先找到要删除的节点ID,然后删除它们 + # Use actual OFFSET logic, consistent with nebular.py + # First find IDs to delete, then delete them select_query = f""" SELECT id FROM "{self.db_name}_graph"."Memory" WHERE ag_catalog.agtype_access_operator(properties, '"memory_type"'::agtype) = %s::agtype @@ -337,7 +337,7 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st try: with self.connection.cursor() as cursor: - # 执行查询获取要删除的ID列表 + # Execute query to get IDs to delete cursor.execute(select_query, select_params) ids_to_delete = [row[0] for row in cursor.fetchall()] @@ -345,7 +345,7 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st logger.info(f"No {memory_type} memories to remove for user {user_name}") return - # 构建删除查询 + # Build delete query placeholders = ','.join(['%s'] * len(ids_to_delete)) delete_query = f""" DELETE FROM "{self.db_name}_graph"."Memory" @@ -353,7 +353,7 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st """ delete_params = ids_to_delete - # 执行删除 + # Execute deletion cursor.execute(delete_query, delete_params) deleted_count = cursor.rowcount logger.info(f"Removed {deleted_count} oldest {memory_type} memories, keeping {keep_latest} latest for user {user_name}") @@ -371,32 +371,32 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N user_name = user_name if user_name else self.config.user_name - # 获取当前节点 + # Get the current node current_node = self.get_node(id, user_name=user_name) if not current_node: return - # 更新属性,但保留原始的id字段和memory字段 + # Update properties but keep original id and memory fields properties = current_node["metadata"].copy() - original_id = properties.get("id", id) # 保留原始ID - original_memory = current_node.get("memory", "") # 保留原始memory + original_id = properties.get("id", id) # Preserve original ID + original_memory = current_node.get("memory", "") # Preserve original memory - # 如果fields中有memory字段,使用它;否则保留原始的memory + # If fields include memory, use it; otherwise keep original memory if "memory" in fields: original_memory = fields.pop("memory") properties.update(fields) - properties["id"] = original_id # 确保ID不被覆盖 - properties["memory"] = original_memory # 确保memory不被覆盖 + properties["id"] = original_id # Ensure ID is not overwritten + properties["memory"] = original_memory # Ensure memory is not overwritten - # 处理 embedding 字段 + # Handle embedding field embedding_vector = None if "embedding" in fields: embedding_vector = fields.pop("embedding") if not isinstance(embedding_vector, list): embedding_vector = None - # 构建更新查询 + # Build update query if embedding_vector is not None: query = f""" UPDATE "{self.db_name}_graph"."Memory" @@ -412,7 +412,7 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N """ params = [json.dumps(properties), f'"{id}"'] - # 只有在提供了 user_name 参数时才添加用户过滤 + # Only add user filter when user_name is provided if user_name is not None: query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" params.append(f'"{user_name}"') @@ -439,7 +439,7 @@ def delete_node(self, id: str, user_name: str | None = None) -> None: """ params = [f'"{id}"'] - # 只有在提供了 user_name 参数时才添加用户过滤 + # Only add user filter when user_name is provided if user_name is not None: query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" params.append(f'"{user_name}"') @@ -455,15 +455,15 @@ def delete_node(self, id: str, user_name: str | None = None) -> None: @timed def create_extension(self): extensions = [ - ("polar_age", "图引擎"), - ("vector", "向量引擎") + ("polar_age", "Graph engine"), + ("vector", "Vector engine") ] try: with self.connection.cursor() as cursor: - # 确保在正确的数据库上下文中 + # Ensure in the correct database context cursor.execute(f"SELECT current_database();") current_db = cursor.fetchone()[0] - print(f"当前数据库上下文: {current_db}") + print(f"Current database context: {current_db}") for ext_name, ext_desc in extensions: try: @@ -500,7 +500,7 @@ def create_graph(self): @timed def create_edge(self): - """创建所有有效的边类型,如果不存在的话""" + """Create all valid edge types if they do not exist""" VALID_REL_TYPES = { "AGGREGATE_TO", "FOLLOWS", @@ -701,7 +701,7 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No Returns: dict: Node properties as key-value pairs, or None if not found. """ - # 构建查询字段 + # Build select fields if include_embedding: select_fields = "id, properties, embedding" else: @@ -714,7 +714,7 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No """ params = [f'"{id}"'] - # 只有在提供了 user_name 参数时才添加用户过滤 + # Only add user filter when user_name is provided if user_name is not None: query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" params.append(f'"{user_name}"') @@ -847,10 +847,10 @@ def get_edges_old(self, id: str, type: str = "ANY", direction: str = "ANY") -> l ] """ - # 创建一个简单的边表来存储关系(如果不存在的话) + # Create a simple edge table to store relationships (if not exists) try: with self.connection.cursor() as cursor: - # 创建边表 + # Create edge table cursor.execute(f""" CREATE TABLE IF NOT EXISTS "{self.db_name}_graph"."Edges" ( id SERIAL PRIMARY KEY, @@ -864,7 +864,7 @@ def get_edges_old(self, id: str, type: str = "ANY", direction: str = "ANY") -> l ); """) - # 创建索引 + # Create indexes cursor.execute(f""" CREATE INDEX IF NOT EXISTS idx_edges_source ON "{self.db_name}_graph"."Edges" (source_id); @@ -880,7 +880,7 @@ def get_edges_old(self, id: str, type: str = "ANY", direction: str = "ANY") -> l except Exception as e: logger.warning(f"Failed to create edges table: {e}") - # 查询边 + # Query edges where_clauses = [] params = [id] @@ -894,7 +894,7 @@ def get_edges_old(self, id: str, type: str = "ANY", direction: str = "ANY") -> l where_clauses.append("target_id = %s") else: # ANY where_clauses.append("(source_id = %s OR target_id = %s)") - params.append(id) # 添加第二个参数用于ANY方向 + params.append(id) # Add second parameter for ANY direction where_clause = " AND ".join(where_clauses) @@ -944,35 +944,35 @@ def get_neighbors_by_tag_old( Returns: List of dicts with node details and overlap count. """ - # 构建查询条件 + # Build query conditions where_clauses = [] params = [] - # 排除指定的ID + # Exclude specified IDs if exclude_ids: placeholders = ','.join(['%s'] * len(exclude_ids)) where_clauses.append(f"id NOT IN ({placeholders})") params.extend(exclude_ids) - # 状态过滤 + # Status filter where_clauses.append("properties->>'status' = %s") params.append('activated') - # 类型过滤 + # Type filter where_clauses.append("properties->>'type' != %s") params.append('reasoning') where_clauses.append("properties->>'memory_type' != %s") params.append('WorkingMemory') - # 用户过滤 + # User filter if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): where_clauses.append("properties->>'user_name' = %s") params.append(self._get_config_value("user_name")) where_clause = " AND ".join(where_clauses) - # 获取所有候选节点 + # Get all candidate nodes query = f""" SELECT id, properties, embedding FROM "{self.db_name}_graph"."Memory" @@ -988,7 +988,7 @@ def get_neighbors_by_tag_old( node_id, properties_json, embedding_json = row properties = properties_json if properties_json else {} - # 解析embedding + # Parse embedding if embedding_json is not None: try: embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json @@ -996,7 +996,7 @@ def get_neighbors_by_tag_old( except (json.JSONDecodeError, TypeError): logger.warning(f"Failed to parse embedding for node {node_id}") - # 计算标签重叠 + # Compute tag overlap node_tags = properties.get("tags", []) if isinstance(node_tags, str): try: @@ -1015,7 +1015,7 @@ def get_neighbors_by_tag_old( }) nodes_with_overlap.append((node_data, overlap_count)) - # 按重叠数量排序并返回前top_k个 + # Sort by overlap count and return top_k nodes_with_overlap.sort(key=lambda x: x[1], reverse=True) return [node for node, _ in nodes_with_overlap[:top_k]] @@ -1049,10 +1049,10 @@ def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> children = [] for row in results: - # 处理 child_id - 移除可能的引号 + # Handle child_id - remove possible quotes child_id_raw = row[0].value if hasattr(row[0], 'value') else str(row[0]) if isinstance(child_id_raw, str): - # 如果字符串以引号开始和结束,去掉引号 + # If string starts and ends with quotes, remove quotes if child_id_raw.startswith('"') and child_id_raw.endswith('"'): child_id = child_id_raw[1:-1] else: @@ -1060,28 +1060,28 @@ def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> else: child_id = str(child_id_raw) - # 处理 embedding - 从数据库的embedding列获取 + # Handle embedding - get from database embedding column embedding_raw = row[1] embedding = [] if embedding_raw is not None: try: if isinstance(embedding_raw, str): - # 如果是JSON字符串,解析它 + # If it is a JSON string, parse it embedding = json.loads(embedding_raw) elif isinstance(embedding_raw, list): - # 如果已经是列表,直接使用 + # If already a list, use directly embedding = embedding_raw else: - # 尝试转换为列表 + # Try converting to list embedding = list(embedding_raw) except (json.JSONDecodeError, TypeError, ValueError) as e: logger.warning(f"Failed to parse embedding for child node {child_id}: {e}") embedding = [] - # 处理 memory - 移除可能的引号 + # Handle memory - remove possible quotes memory_raw = row[2].value if hasattr(row[2], 'value') else str(row[2]) if isinstance(memory_raw, str): - # 如果字符串以引号开始和结束,去掉引号 + # If string starts and ends with quotes, remove quotes if memory_raw.startswith('"') and memory_raw.endswith('"'): memory = memory_raw[1:-1] else: @@ -1132,7 +1132,7 @@ def get_subgraph( user_name = user_name if user_name else self._get_config_value("user_name") - # 使用简化的查询获取子图(暂时只获取直接邻居) + # Use a simplified query to get the subgraph (temporarily only direct neighbors) query1 = f""" SELECT * FROM cypher('{self.db_name}_graph', $$ MATCH(center: Memory)-[r * 1..{depth}]->(neighbor:Memory) @@ -1169,14 +1169,14 @@ def get_subgraph( if not result or not result[0]: return {"core_node": None, "neighbors": [], "edges": []} - # 解析中心节点 + # Parse center node centers_data = result[0] if result[0] else "[]" neighbors_data = result[1] if result[1] else "[]" edges_data = result[2] if result[2] else "[]" - # 解析 JSON 数据 + # Parse JSON data try: - # 清理数据中的 ::vertex 和 ::edge 后缀 + # Clean ::vertex and ::edge suffixes in data if isinstance(centers_data, str): centers_data = centers_data.replace('::vertex', '') if isinstance(neighbors_data, str): @@ -1191,14 +1191,14 @@ def get_subgraph( logger.error(f"Failed to parse JSON data: {e}") return {"core_node": None, "neighbors": [], "edges": []} - # 解析中心节点 + # Parse center node core_node = None if centers_list and len(centers_list) > 0: center_data = centers_list[0] if isinstance(center_data, dict) and "properties" in center_data: core_node = self._parse_node(center_data["properties"]) - # 解析邻居节点 + # Parse neighbor nodes neighbors = [] if isinstance(neighbors_list, list): for neighbor_data in neighbors_list: @@ -1206,7 +1206,7 @@ def get_subgraph( neighbor_parsed = self._parse_node(neighbor_data["properties"]) neighbors.append(neighbor_parsed) - # 解析边 + # Parse edges edges = [] if isinstance(edges_list, list): for edge_group in edges_list: @@ -1332,7 +1332,7 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = """ user_name = user_name if user_name else self._get_config_value("user_name") - # 构建 cypher 查询的 WHERE 条件 + # Build WHERE conditions for cypher query where_conditions = [] for f in filters: @@ -1340,11 +1340,11 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = op = f.get("op", "=") value = f["value"] - # 格式化值 + # Format value if isinstance(value, str): escaped_value = f"'{value}'" elif isinstance(value, list): - # 处理列表值 + # Handle list values list_items = [] for v in value: if isinstance(v, str): @@ -1355,7 +1355,7 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = else: escaped_value = f"'{value}'" if isinstance(value, str) else str(value) print("op=============:",op) - # 构建 WHERE 条件 + # Build WHERE conditions if op == "=": where_conditions.append(f"n.{field} = {escaped_value}") elif op == "in": @@ -1373,12 +1373,12 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = else: raise ValueError(f"Unsupported operator: {op}") - # 添加用户名称过滤 + # Add user_name filter where_conditions.append(f"n.user_name = '{user_name}'") where_str = " AND ".join(where_conditions) - # 使用 cypher 查询 + # Use cypher query cypher_query = f""" SELECT * FROM cypher('{self.db_name}_graph', $$ MATCH (n:Memory) @@ -1454,7 +1454,7 @@ def get_grouped_counts1( print("get_grouped_counts:" + query) try: with self.connection.cursor() as cursor: - # 处理参数化查询 + # Handle parameterized query if params and isinstance(params, list): cursor.execute(query, final_params) else: @@ -1524,7 +1524,7 @@ def get_grouped_counts( value = f"'{value}'" where_clause = where_clause.replace(f"${key}", str(value)) - # 处理 where_clause 中的 user_name 参数 + # Handle user_name parameter in where_clause if "user_name = %s" in where_clause: where_clause = where_clause.replace("user_name = %s", f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") @@ -1549,7 +1549,7 @@ def get_grouped_counts( try: with self.connection.cursor() as cursor: - # 处理参数化查询 + # Handle parameterized query if params and isinstance(params, list): cursor.execute(query, params) else: @@ -1768,7 +1768,7 @@ def get_all_memory_items( if scope not in {"WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"}: raise ValueError(f"Unsupported memory type scope: {scope}") - # 使用 cypher 查询获取记忆项 + # Use cypher query to retrieve memory items if include_embedding: cypher_query = f""" WITH t as ( @@ -1858,7 +1858,7 @@ def get_all_memory_items_old( if scope not in {"WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"}: raise ValueError(f"Unsupported memory type scope: {scope}") - # 使用 cypher 查询获取记忆项 + # Use cypher query to retrieve memory items if include_embedding: cypher_query = f""" WITH t as ( @@ -1898,16 +1898,16 @@ def get_all_memory_items_old( node_agtype = row[0] # print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") - # 处理字符串格式的数据 + # Handle string-formatted data if isinstance(node_agtype, str): try: - # 移除 ::vertex 后缀 + # Remove ::vertex suffix json_str = node_agtype.replace('::vertex', '') node_data = json.loads(json_str) if isinstance(node_data, dict) and "properties" in node_data: properties = node_data["properties"] - # 构建节点数据 + # Build node data parsed_node_data = { "id": properties.get("id", ""), "memory": properties.get("memory", ""), @@ -1918,17 +1918,17 @@ def get_all_memory_items_old( parsed_node_data["embedding"] = properties["embedding"] nodes.append(self._parse_node(parsed_node_data)) - print(f"[get_all_memory_items] ✅ 成功解析节点: {properties.get('id', '')}") + print(f"[get_all_memory_items] ✅ Parsed node successfully: {properties.get('id', '')}") else: - print(f"[get_all_memory_items] ❌ 节点数据格式不正确: {node_data}") + print(f"[get_all_memory_items] ❌ Invalid node data format: {node_data}") except (json.JSONDecodeError, TypeError) as e: - print(f"[get_all_memory_items] ❌ JSON 解析失败: {e}") + print(f"[get_all_memory_items] ❌ JSON parsing failed: {e}") elif node_agtype and hasattr(node_agtype, 'value'): - # 处理 agtype 对象 + # Handle agtype object node_props = node_agtype.value if isinstance(node_props, dict): - # 解析节点属性 + # Parse node properties node_data = { "id": node_props.get("id", ""), "memory": node_props.get("memory", ""), @@ -1939,9 +1939,9 @@ def get_all_memory_items_old( node_data["embedding"] = node_props["embedding"] nodes.append(self._parse_node(node_data)) - print(f"[get_all_memory_items] ✅ 成功解析 agtype 节点: {node_props.get('id', '')}") + print(f"[get_all_memory_items] ✅ Parsed agtype node successfully: {node_props.get('id', '')}") else: - print(f"[get_all_memory_items] ❌ 未知的数据格式: {type(node_agtype)}") + print(f"[get_all_memory_items] ❌ Unknown data format: {type(node_agtype)}") except Exception as e: logger.error(f"Failed to get memories: {e}", exc_info=True) @@ -1959,12 +1959,12 @@ def get_structure_optimization_candidates( """ user_name = user_name if user_name else self._get_config_value("user_name") - # 构建返回字段,根据 include_embedding 参数决定是否包含 embedding + # Build return fields based on include_embedding flag if include_embedding: return_fields = "id(n) as id1,n" return_fields_agtype = " id1 agtype,n agtype" else: - # 构建不包含 embedding 的字段列表 + # Build field list without embedding return_fields = ",".join([ "n.id AS id", "n.memory AS memory", @@ -1994,7 +1994,7 @@ def get_structure_optimization_candidates( ] return_fields_agtype = ", ".join([f"{field} agtype" for field in fields]) - # 保留写法 + # Keep legacy query cypher_query_1 = f""" SELECT m.* FROM {self.db_name}_graph."Memory" m @@ -2009,7 +2009,7 @@ def get_structure_optimization_candidates( ); """ - # 使用 OPTIONAL MATCH 来查找孤立节点(没有父节点和子节点的节点) + # Use OPTIONAL MATCH to find isolated nodes (no parents or children) cypher_query = f""" SELECT * FROM cypher('{self.db_name}_graph', $$ MATCH (n:Memory) @@ -2046,7 +2046,7 @@ def get_structure_optimization_candidates( print("result------",len(results)) for row in results: if include_embedding: - # 当 include_embedding=True 时,返回完整的节点对象 + # When include_embedding=True, return full node object if isinstance(row, (list, tuple)) and len(row) >= 2: embedding_val, node_val = row[0], row[1] else: @@ -2059,8 +2059,8 @@ def get_structure_optimization_candidates( candidates.append(node) node_ids.add(node_id) else: - # 当 include_embedding=False 时,返回字段字典 - # 定义字段名称(与查询中的 RETURN 字段对应) + # When include_embedding=False, return field dictionary + # Define field names matching the RETURN clause field_names = [ "id", "memory", "user_name", "user_id", "session_id", "status", "key", "confidence", "tags", "created_at", "updated_at", @@ -2068,22 +2068,22 @@ def get_structure_optimization_candidates( "usage", "background","graph_id" ] - # 将行数据转换为字典 + # Convert row to dictionary node_data = {} for i, field_name in enumerate(field_names): if i < len(row): value = row[i] - # 处理特殊字段 + # Handle special fields if field_name in ["tags", "sources", "usage"] and isinstance(value, str): try: - # 尝试解析 JSON 字符串 + # Try parsing JSON string node_data[field_name] = json.loads(value) except (json.JSONDecodeError, TypeError): node_data[field_name] = value else: node_data[field_name] = value - # 使用 _parse_node 方法解析 + # Parse node using _parse_node_new try: node = self._parse_node_new(node_data) node_id = node["id"] @@ -2091,9 +2091,9 @@ def get_structure_optimization_candidates( if node_id not in node_ids: candidates.append(node) node_ids.add(node_id) - print(f"✅ 成功解析节点: {node_id}") + print(f"✅ Parsed node successfully: {node_id}") except Exception as e: - print(f"❌ 解析节点失败: {e}") + print(f"❌ Failed to parse node: {e}") except Exception as e: logger.error(f"Failed to get structure optimization candidates: {e}", exc_info=True) @@ -2122,8 +2122,8 @@ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]: if time_field in node and hasattr(node[time_field], "isoformat"): node[time_field] = node[time_field].isoformat() - # 不再对sources和usage字段进行反序列化,保持List[str]格式 - # 不再移除user_name字段,保持所有字段 + # Do not deserialize sources and usage; keep List[str] format + # Do not remove user_name; keep all fields # 1 # return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} @@ -2131,7 +2131,7 @@ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]: # 2 # node_id = node.pop("id") # memory = node.pop("memory", "") - # # 在 metadata 中添加 id 字段 + # # Add id field into metadata # node["id"] = node_id # node1 = node # return {"id": node_id, "memory": memory, "metadata": node1} @@ -2158,8 +2158,8 @@ def _strip_wrapping_quotes(value: Any) -> Any: if time_field in node and hasattr(node[time_field], "isoformat"): node[time_field] = node[time_field].isoformat() - # 不再对sources和usage字段进行反序列化,保持List[str]格式 - # 不再移除user_name字段,保持所有字段 + # Do not deserialize sources and usage; keep List[str] + # Do not remove user_name; keep all fields return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} @@ -2171,16 +2171,16 @@ def __del__(self): #deprecated def add_node_old(conn, id: str, memory: str, metadata: dict, graph_name=None): """ - 添加单个节点到图数据库 + Add a single node to the graph database Args: - conn: 数据库连接 - id: 节点ID - memory: 内存内容 - metadata: 元数据字典 - graph_name: 图名称,可选 + conn: Database connection + id: Node ID + memory: Memory content + metadata: Metadata dictionary + graph_name: Graph name, optional """ - # 使用传入的graph_name或默认值 + # Use provided graph_name or default if graph_name is None: graph_name = GRAPH_NAME @@ -2197,14 +2197,14 @@ def add_node_old(conn, id: str, memory: str, metadata: dict, graph_name=None): properties["memory"] = memory with conn.cursor() as cursor: - # 先删除现有记录(如果存在) + # Delete existing record first (if any) delete_sql = f""" DELETE FROM "Memory" WHERE id = ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring); """ cursor.execute(delete_sql, (id,)) - # 然后插入新记录 + # Then insert new record if field_name and vector_value: insert_sql = f""" INSERT INTO "Memory" (id, properties, {field_name}) @@ -2215,7 +2215,7 @@ def add_node_old(conn, id: str, memory: str, metadata: dict, graph_name=None): ); """ cursor.execute(insert_sql, (id, Json(properties), vector_value)) - print(f"✅ 成功插入/更新: {id} ({field_name})") + print(f"✅ Insert/update succeeded: {id} ({field_name})") else: insert_sql = f""" INSERT INTO "Memory" (id, properties) @@ -2225,20 +2225,20 @@ def add_node_old(conn, id: str, memory: str, metadata: dict, graph_name=None): ); """ cursor.execute(insert_sql, (id, Json(properties))) - print(f"✅ 成功插入/更新(无向量): {id}") + print(f"✅ Insert/update succeeded (no vector): {id}") conn.commit() return True except Exception as e: conn.rollback() - print(f"❌ 插入失败 (ID: {id}): {e}") + print(f"❌ Insert failed (ID: {id}): {e}") return False @timed def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: str | None = None) -> None: """Add a memory node to the graph.""" - # user_name 从 metadata 中获取,如果不存在则从配置中获取 + # user_name comes from metadata; fallback to config if missing metadata["user_name"] = user_name if user_name else self.config.user_name # if "user_name" not in metadata: # if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): @@ -2264,16 +2264,16 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st if "embedding" not in properties or not properties["embedding"]: properties["embedding"] = generate_vector(self._get_config_value("embedding_dimension", 1024)) - # serialization - 处理sources和usage字段的JSON序列化 + # serialization - JSON-serialize sources and usage fields for field_name in ["sources", "usage"]: if field_name in properties and properties[field_name]: if isinstance(properties[field_name], list): for idx in range(len(properties[field_name])): - # 只有当元素不是字符串时才进行序列化 + # Serialize only when element is not a string if not isinstance(properties[field_name][idx], str): properties[field_name][idx] = json.dumps(properties[field_name][idx]) elif isinstance(properties[field_name], str): - # 如果已经是字符串,保持不变 + # If already a string, leave as-is pass # Extract embedding for separate column @@ -2281,8 +2281,8 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st if not isinstance(embedding_vector, list): embedding_vector = [] - # 根据embedding维度选择正确的列名 - embedding_column = "embedding" # 默认列 + # Select column name based on embedding dimension + embedding_column = "embedding" # default column if len(embedding_vector) == 3072: embedding_column = "embedding_3072" elif len(embedding_vector) == 1024: @@ -2291,7 +2291,7 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st embedding_column = "embedding_768" with self.connection.cursor() as cursor: - # 先删除现有记录(如果存在) + # Delete existing record first (if any) delete_query = f""" DELETE FROM {self.db_name}_graph."Memory" WHERE id = ag_catalog._make_graph_id('{self.db_name}_graph'::name, 'Memory'::name, %s::text::cstring) @@ -2305,7 +2305,7 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st graph_id = cursor.fetchone()[0] properties['graph_id'] = str(graph_id) - # 然后插入新记录 + # Then insert new record if embedding_vector: insert_query = f""" INSERT INTO {self.db_name}_graph."Memory"(id, properties, {embedding_column}) @@ -2329,18 +2329,18 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st def _build_node_from_agtype(self, node_agtype, embedding=None): """ - 将 cypher 返回的 n 列(agtype 或 JSON 字符串)解析为标准节点, - 并把 embedding 合并进 properties 里。 + Parse the cypher-returned column `n` (agtype or JSON string) + into a standard node and merge embedding into properties. """ try: - # 字符串场景: '{"id":...,"label":[...],"properties":{...}}::vertex' + # String case: '{"id":...,"label":[...],"properties":{...}}::vertex' if isinstance(node_agtype, str): json_str = node_agtype.replace('::vertex', '') obj = json.loads(json_str) if not (isinstance(obj, dict) and "properties" in obj): return None props = obj["properties"] - # agtype 场景: 带 value 属性 + # agtype case: has `value` attribute elif node_agtype and hasattr(node_agtype, "value"): val = node_agtype.value if not (isinstance(val, dict) and "properties" in val): @@ -2352,7 +2352,7 @@ def _build_node_from_agtype(self, node_agtype, embedding=None): if embedding is not None: props["embedding"] = embedding - # 直接返回标准格式,不需要再次调用 _parse_node_new + # Return standard format directly; no need to call _parse_node_new again return {"id": props.get("id", ""), "memory": props.get("memory", ""), "metadata": props} except Exception: return None @@ -2385,11 +2385,11 @@ def get_neighbors_by_tag( user_name = user_name if user_name else self._get_config_value("user_name") - # 构建查询条件 - 更宽松的过滤条件 + # Build query conditions - more relaxed filters where_clauses = [] params = [] - # 排除指定的ID - 使用 properties 中的 id 字段 + # Exclude specified IDs - use id in properties if exclude_ids: exclude_conditions = [] for exclude_id in exclude_ids: @@ -2398,23 +2398,23 @@ def get_neighbors_by_tag( params.append(f'"{exclude_id}"') where_clauses.append(f"({' AND '.join(exclude_conditions)})") - # 状态过滤 - 只保留 activated 状态 + # Status filter - keep only 'activated' where_clauses.append( "ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype") - # 类型过滤 - 排除 reasoning 类型 + # Type filter - exclude 'reasoning' type # where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"node_type\"'::agtype) != '\"reasoning\"'::agtype") - # 用户过滤 + # User filter where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype") params.append(f'"{user_name}"') - # 测试无数据,需要注释 + # Testing showed no data; annotate. where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) != '\"WorkingMemory\"'::agtype") where_clause = " AND ".join(where_clauses) - # 获取所有候选节点 + # Fetch all candidate nodes query = f""" SELECT id, properties, embedding FROM "{self.db_name}_graph"."Memory" @@ -2433,7 +2433,7 @@ def get_neighbors_by_tag( node_id, properties_json, embedding_json = row properties = properties_json if properties_json else {} - # 解析embedding + # Parse embedding if include_embedding and embedding_json is not None: try: embedding = json.loads(embedding_json) if isinstance(embedding_json, @@ -2442,7 +2442,7 @@ def get_neighbors_by_tag( except (json.JSONDecodeError, TypeError): logger.warning(f"Failed to parse embedding for node {node_id}") - # 计算标签重叠 + # Compute tag overlap node_tags = properties.get("tags", []) if isinstance(node_tags, str): try: @@ -2461,7 +2461,7 @@ def get_neighbors_by_tag( }) nodes_with_overlap.append((node_data, overlap_count)) - # 按重叠数量排序并返回前top_k个 + # Sort by overlap count and return top_k items nodes_with_overlap.sort(key=lambda x: x[1], reverse=True) return [node for node, _ in nodes_with_overlap[:top_k]] @@ -2497,7 +2497,7 @@ def get_neighbors_by_tag_ccl( user_name = user_name if user_name else self._get_config_value("user_name") - # 构建查询条件,与 nebular.py 保持一致 + # Build query conditions; keep consistent with nebular.py where_clauses = [ 'n.status = "activated"', 'NOT (n.node_type = "reasoning")', @@ -2543,14 +2543,14 @@ def get_neighbors_by_tag_ccl( return_fields_str = ", ".join(return_fields) result_fields = [] for field in return_fields: - # 从 "n.id AS id" 提取出字段名 "id" + # Extract field name 'id' from 'n.id AS id' field_name = field.split(" AS ")[-1] result_fields.append(f"{field_name} agtype") - # 添加 overlap_count + # Add overlap_count result_fields.append("overlap_count agtype") result_fields_str = ", ".join(result_fields) - # 使用 Cypher 查询,与 nebular.py 保持一致 + # Use Cypher query; keep consistent with nebular.py query = f""" SELECT * FROM ( SELECT * FROM cypher('{self.db_name}_graph', $$ @@ -2572,11 +2572,11 @@ def get_neighbors_by_tag_ccl( neighbors = [] for row in results: - # 解析结果 + # Parse results props = {} overlap_count = None - # 手动解析每个字段 + # Manually parse each field field_names = [ "id", "memory", "user_name", "user_id", "session_id", "status", "key", "confidence", "tags", "created_at", "updated_at", @@ -2598,11 +2598,11 @@ def get_neighbors_by_tag_ccl( parsed["overlap_count"] = overlap_int neighbors.append(parsed) - # 按重叠数量排序 + # Sort by overlap count neighbors.sort(key=lambda x: x["overlap_count"], reverse=True) neighbors = neighbors[:top_k] - # 移除 overlap_count 字段 + # Remove overlap_count field result = [] for neighbor in neighbors: neighbor.pop("overlap_count", None) @@ -2632,7 +2632,7 @@ def import_graph(self, data: dict[str, Any], user_name: str | None = None) -> No metadata = _prepare_node_metadata(metadata) metadata.update({"id": id, "memory": memory}) - # 使用 add_node 方法添加节点 + # Use add_node to insert node self.add_node(id, memory, metadata) except Exception as e: @@ -2644,7 +2644,7 @@ def import_graph(self, data: dict[str, Any], user_name: str | None = None) -> No source_id, target_id = edge["source"], edge["target"] edge_type = edge["type"] - # 使用 add_edge 方法添加边 + # Use add_edge to insert edge self.add_edge(source_id, target_id, edge_type, user_name) except Exception as e: @@ -2683,11 +2683,11 @@ def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY", user_nam else: raise ValueError("Invalid direction. Must be 'OUTGOING', 'INCOMING', or 'ANY'.") - # 添加类型过滤 + # Add type filter if type != "ANY": where_clause += f" AND type(r) = '{type}'" - # 添加用户过滤 + # Add user filter where_clause += f" AND a.user_name = '{user_name}' AND b.user_name = '{user_name}'" query = f""" From 3159dc29adb469768913935d4a54583a0935cbd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Thu, 23 Oct 2025 20:30:49 +0800 Subject: [PATCH 102/137] fix _coerce_metadata --- src/memos/memories/textual/item.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/memos/memories/textual/item.py b/src/memos/memories/textual/item.py index 2da283d47..7cdfdf738 100644 --- a/src/memos/memories/textual/item.py +++ b/src/memos/memories/textual/item.py @@ -208,6 +208,17 @@ def _coerce_metadata(cls, v: Any): ): return v if isinstance(v, dict): + if "metadata" in v and isinstance(v["metadata"], dict): + nested_metadata = v["metadata"] + nested_metadata = nested_metadata.copy() + nested_metadata.pop("id", None) + nested_metadata.pop("memory", None) + v = nested_metadata + else: + v = v.copy() + v.pop("id", None) + v.pop("memory", None) + if v.get("relativity") is not None: return SearchedTreeNodeTextualMemoryMetadata(**v) if any(k in v for k in ("sources", "memory_type", "embedding", "background", "usage")): From c66812318da137df561d8ad33054411b1a310912 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Fri, 24 Oct 2025 12:06:24 +0800 Subject: [PATCH 103/137] feat: add rerank time --- src/memos/reranker/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/memos/reranker/base.py b/src/memos/reranker/base.py index 77a24c164..632623c38 100644 --- a/src/memos/reranker/base.py +++ b/src/memos/reranker/base.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING +from memos.utils import timed if TYPE_CHECKING: @@ -13,6 +14,7 @@ class BaseReranker(ABC): """Abstract interface for memory rerankers.""" @abstractmethod + @timed def rerank( self, query: str, From 69745ab6896ab8a86fb25a26e02006685b4c0ea1 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Fri, 24 Oct 2025 12:11:32 +0800 Subject: [PATCH 104/137] feat: add rerank time --- src/memos/reranker/base.py | 2 -- src/memos/reranker/cosine_local.py | 2 ++ src/memos/reranker/http_bge.py | 2 ++ src/memos/reranker/noop.py | 3 ++- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/memos/reranker/base.py b/src/memos/reranker/base.py index 632623c38..77a24c164 100644 --- a/src/memos/reranker/base.py +++ b/src/memos/reranker/base.py @@ -3,7 +3,6 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING -from memos.utils import timed if TYPE_CHECKING: @@ -14,7 +13,6 @@ class BaseReranker(ABC): """Abstract interface for memory rerankers.""" @abstractmethod - @timed def rerank( self, query: str, diff --git a/src/memos/reranker/cosine_local.py b/src/memos/reranker/cosine_local.py index 000b64cf4..5f83e5dda 100644 --- a/src/memos/reranker/cosine_local.py +++ b/src/memos/reranker/cosine_local.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING from .base import BaseReranker +from memos.utils import timed if TYPE_CHECKING: @@ -54,6 +55,7 @@ def __init__( self.level_weights = level_weights or {"topic": 1.0, "concept": 1.0, "fact": 1.0} self.level_field = level_field + @timed def rerank( self, query: str, diff --git a/src/memos/reranker/http_bge.py b/src/memos/reranker/http_bge.py index f0f5d17a0..b8ec82ba7 100644 --- a/src/memos/reranker/http_bge.py +++ b/src/memos/reranker/http_bge.py @@ -12,6 +12,7 @@ from .base import BaseReranker from .concat import concat_original_source +from memos.utils import timed logger = get_logger(__name__) @@ -118,6 +119,7 @@ def __init__( self.warn_unknown_filter_keys = bool(warn_unknown_filter_keys) self._warned_missing_keys: set[str] = set() + @timed def rerank( self, query: str, diff --git a/src/memos/reranker/noop.py b/src/memos/reranker/noop.py index 7a9c02f60..4f6ba0438 100644 --- a/src/memos/reranker/noop.py +++ b/src/memos/reranker/noop.py @@ -3,13 +3,14 @@ from typing import TYPE_CHECKING from .base import BaseReranker - +from memos.utils import timed if TYPE_CHECKING: from memos.memories.textual.item import TextualMemoryItem class NoopReranker(BaseReranker): + @timed def rerank( self, query: str, graph_results: list, top_k: int, **kwargs ) -> list[tuple[TextualMemoryItem, float]]: From 9c15f5b96b5b794e46143d446280ee61f5a61fda Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Fri, 24 Oct 2025 16:04:59 +0800 Subject: [PATCH 105/137] fix:node_not_exist --- src/memos/graph_dbs/polardb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 4a40fd5d1..d571ab205 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -8,6 +8,7 @@ import psycopg2 from psycopg2.extras import Json +from examples.mem_os.locomo_shared_database_memos import result from memos.configs.graph_db import PolarDBGraphDBConfig from memos.dependency import require_python_package from memos.graph_dbs.base import BaseGraphDB @@ -305,7 +306,7 @@ def node_not_exist(self, scope: str, user_name: str | None = None) -> int: cursor.execute(query, params) result = cursor.fetchone() print(f"[node_not_exist] Query result: {result}") - return len(result) + return 1 if result else 0 except Exception as e: logger.error(f"[node_not_exist] Query failed: {e}", exc_info=True) raise From 157deff412bfda71fd5a2f2166560bdc6dee8d9d Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Fri, 24 Oct 2025 16:09:52 +0800 Subject: [PATCH 106/137] import node --- examples/basic_modules/import_polardb_incr.py | 314 ++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 examples/basic_modules/import_polardb_incr.py diff --git a/examples/basic_modules/import_polardb_incr.py b/examples/basic_modules/import_polardb_incr.py new file mode 100644 index 000000000..4b492bf63 --- /dev/null +++ b/examples/basic_modules/import_polardb_incr.py @@ -0,0 +1,314 @@ +import json +import os +from collections import Counter +from psycopg2.extras import execute_batch, Json +import psycopg2 + + +class MemoryDataProcessor: + def __init__(self, db_config): + """ + 初始化数据库连接 + + Args: + db_config: 数据库连接配置 + graph_name: 图数据库名称 + """ + self.db_config = db_config + self.graph_name = db_config.get('graph_name') + print("fff:",db_config.get('graph_name')) + self.connection = None + + def connect(self): + """连接数据库""" + try: + self.connection = psycopg2.connect( + host=self.db_config["host"], + port=self.db_config["port"], + user=self.db_config["user"], + password=self.db_config["password"], + database=self.db_config["database"] + ) + print("✅ 数据库连接成功") + return True + except Exception as e: + print(f"❌ 数据库连接失败: {e}") + return False + + def disconnect(self): + """断开数据库连接""" + if self.connection: + self.connection.close() + print("✅ 数据库连接已关闭") + + def extract_nodes_simple(self, file_path): + """从 JSON 文件提取 id 和 properties 的简洁版本""" + try: + # 检查文件是否存在 + if not os.path.exists(file_path): + print(f"❌ 错误:文件 '{file_path}' 不存在") + return [] + + # 首先尝试用 utf-8-sig 读取(处理 BOM) + try: + with open(file_path, 'r', encoding='utf-8-sig') as file: + data = json.load(file) + print("✅ 使用 utf-8-sig 编码成功读取文件") + except json.JSONDecodeError: + # 如果 utf-8-sig 失败,尝试用 utf-8 + try: + with open(file_path, 'r', encoding='utf-8') as file: + data = json.load(file) + print("✅ 使用 utf-8 编码成功读取文件") + except json.JSONDecodeError as e: + print(f"❌ JSON 解析错误:{e}") + return [] + + result = [] + tables = data.get('tables', []) + + print(f"📊 找到 {len(tables)} 个表格") + + for i, table in enumerate(tables, 1): + n_data = table.get('n', {}) + value = n_data.get('value', {}) + + # 提取 id 和 properties + # node_id = value.get('id') + properties = value.get('properties', {}) + node_id = properties.get('id', {}) + + + + if node_id is not None: + # 构建符合插入格式的数据 + node_data = { + "id": str(node_id), # 转换为字符串 + "memory": properties.get("memory", ""), + "metadata": properties + } + result.append(node_data) + + print(f"🎯 成功提取 {len(result)} 个节点") + return result + + except Exception as e: + print(f"❌ 读取文件时发生错误:{e}") + return [] + + def clean_properties(self, properties): + """清理 properties,移除不需要的字段""" + # 移除 embedding 相关字段,这些字段会单独处理 + exclude_fields = [ + "embedding", "embedding_1024", "embedding_768", "embedding_3072", + "embedding_1024_vector", "embedding_768_vector", "embedding_3072_vector" + ] + + cleaned = {} + for key, value in properties.items(): + if key not in exclude_fields: + cleaned[key] = value + + return cleaned + + def detect_embedding_field(self, embedding): + """检测 embedding 的维度并返回对应的字段名""" + if not embedding: + return None + + if isinstance(embedding, list): + length = len(embedding) + if length == 1024: + return "embedding" + elif length == 768: + return "embedding_768" + elif length == 3072: + return "embedding_3072" + + return None + + def convert_to_vector(self, embedding): + """将 embedding 转换为 PostgreSQL 向量格式""" + if not embedding: + return None + + try: + if isinstance(embedding, list): + # 转换为 PostgreSQL 向量字符串格式: [1,2,3] + vector_str = "[" + ",".join(map(str, embedding)) + "]" + return vector_str + else: + return None + except Exception as e: + print(f"⚠️ 转换向量时出错: {e}") + return None + + def insert_nodes_to_db(self, nodes, batch_size=1000): + """将节点数据插入到数据库""" + if not nodes: + print("❌ 没有数据可插入") + return 0, [] + + if not self.connection: + print("❌ 数据库未连接") + return 0, [] + + total_success = 0 + all_errors = [] + + # 分批处理 + for i in range(0, len(nodes), batch_size): + current_batch = nodes[i:i + batch_size] + batch_success = 0 + batch_errors = [] + + print( + f"🔄 处理批次 {i // batch_size + 1}/{(len(nodes) - 1) // batch_size + 1} ({len(current_batch)} 个节点)") + + try: + with self.connection.cursor() as cursor: + # 准备批量插入数据 + insert_data_1024 = [] + insert_data_no_embedding = [] + + for node in current_batch: + try: + id_ = node["id"] + memory_ = node["memory"] + metadata = node["metadata"] + + # 提取 embedding + embedding = None + for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: + if embedding_key in metadata and metadata[embedding_key]: + embedding = metadata[embedding_key] + break + + if isinstance(embedding, str): + try: + embedding = json.loads(embedding) + except json.JSONDecodeError: + print(f"⚠️ 无法解析embedding字符串: {embedding_key}") + embedding = None + + # 清理 properties + properties = self.clean_properties(metadata) + properties["id"] = id_ + properties["memory"] = memory_ + + # 生成 graph_id 并添加到 properties + try: + get_graph_id_query = f""" + SELECT ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring) + """ + cursor.execute(get_graph_id_query, (id_,)) + graph_id = cursor.fetchone()[0] + properties['graph_id'] = str(graph_id) + except Exception as e: + print(f"⚠️ 生成 graph_id 失败: {e}") + properties['graph_id'] = str(id_) # 备用方案 + + + # 根据embedding维度分类 + field_name = self.detect_embedding_field(embedding) + vector_value = self.convert_to_vector(embedding) if field_name else None + + if field_name == "embedding" and vector_value: + insert_data_1024.append((id_, Json(properties), vector_value)) + else: + insert_data_no_embedding.append((id_, Json(properties))) + + except Exception as e: + batch_errors.append(f"ID: {node.get('id', 'unknown')} - {e}") + + # 批量插入不同维度的数据 + if insert_data_1024: + insert_sql_1024 = f""" + INSERT INTO "Memory" (id, properties, embedding) + VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) + """ + execute_batch(cursor, insert_sql_1024, insert_data_1024) + batch_success += len(insert_data_1024) + print(f" ✅ 插入 {len(insert_data_1024)} 个带 embedding 的节点") + + if insert_data_no_embedding: + insert_sql_no_embedding = f""" + INSERT INTO "Memory" (id, properties) + VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s) + """ + execute_batch(cursor, insert_sql_no_embedding, insert_data_no_embedding) + batch_success += len(insert_data_no_embedding) + print(f" ✅ 插入 {len(insert_data_no_embedding)} 个无 embedding 的节点") + + # 提交当前批次 + self.connection.commit() + total_success += batch_success + all_errors.extend(batch_errors) + + print(f" ✅ 批次完成: {batch_success} 个节点插入成功") + + except Exception as e: + self.connection.rollback() + batch_errors.append(f"批次插入失败: {e}") + all_errors.extend(batch_errors) + print(f"❌ 批次插入失败: {e}") + + return total_success, all_errors + + def process_file(self, file_path, batch_size): + """完整处理流程:提取数据并插入数据库""" + print("🚀 开始处理数据文件...") + + # 1. 提取数据 + nodes = self.extract_nodes_simple(file_path) + if not nodes: + return + + # 3. 连接数据库 + if not self.connect(): + return + + try: + # 4. 插入数据到数据库 + print(f"\n💾 开始插入数据到数据库...") + success_count, errors = self.insert_nodes_to_db(nodes, batch_size) + + # 5. 显示结果 + print(f"\n🎉 处理完成!") + print(f"✅ 成功插入: {success_count}/{len(nodes)} 个节点") + print(f"❌ 错误数量: {len(errors)}") + + if errors: + print(f"\n📋 错误详情 (前10个):") + for error in errors[:10]: + print(f" - {error}") + if len(errors) > 10: + print(f" ... 还有 {len(errors) - 10} 个错误") + + finally: + # 6. 断开数据库连接 + self.disconnect() + + +# 使用示例 +if __name__ == "__main__": + # 数据库配置(请根据实际情况修改) + # POLARDB_CONFIG = { + # "host": "memory.pg.polardb.rds.aliyuncs.com", + # "port": 5432, + # "user": "adimin", + # "password": "Openmem0925", + # "database": "memtensor_memos", + # # "database": "test_zdy", + # "graph_name": "memtensor_memos_graph" + # # "graph_name": "test_zdy_graph" + # } + + # 文件路径 + file_path = "/Users/ccl/Desktop/file/temp/result.json" + + # 创建处理器实例 + processor = MemoryDataProcessor(POLARDB_CONFIG) + + # 处理文件 + processor.process_file(file_path, batch_size=1000) \ No newline at end of file From c8e1cead721f0fe861163f00b678135416e135f3 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Fri, 24 Oct 2025 16:10:06 +0800 Subject: [PATCH 107/137] import node --- examples/basic_modules/import_polardb_incr.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/basic_modules/import_polardb_incr.py b/examples/basic_modules/import_polardb_incr.py index 4b492bf63..743b8f781 100644 --- a/examples/basic_modules/import_polardb_incr.py +++ b/examples/basic_modules/import_polardb_incr.py @@ -293,16 +293,16 @@ def process_file(self, file_path, batch_size): # 使用示例 if __name__ == "__main__": # 数据库配置(请根据实际情况修改) - # POLARDB_CONFIG = { - # "host": "memory.pg.polardb.rds.aliyuncs.com", - # "port": 5432, - # "user": "adimin", - # "password": "Openmem0925", - # "database": "memtensor_memos", - # # "database": "test_zdy", - # "graph_name": "memtensor_memos_graph" - # # "graph_name": "test_zdy_graph" - # } + POLARDB_CONFIG = { + "host": "memory.pg.polardb.rds.aliyuncs.com", + "port": 5432, + "user": "adimin", + "password": "Openmem0925", + "database": "memtensor_memos", + # "database": "test_zdy", + "graph_name": "memtensor_memos_graph" + # "graph_name": "test_zdy_graph" + } # 文件路径 file_path = "/Users/ccl/Desktop/file/temp/result.json" From c3a23673e5a8a95fdd81a8bbd0dc5a433838b8c9 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Fri, 24 Oct 2025 16:11:16 +0800 Subject: [PATCH 108/137] feat: fix merge_config_with_default --- src/memos/mem_cube/utils.py | 103 ++++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 35 deletions(-) diff --git a/src/memos/mem_cube/utils.py b/src/memos/mem_cube/utils.py index a413ccce5..e23412bc1 100644 --- a/src/memos/mem_cube/utils.py +++ b/src/memos/mem_cube/utils.py @@ -68,44 +68,77 @@ def merge_config_with_default( if "graph_db" in existing_text_config and "graph_db" in default_text_config: existing_graph_config = existing_text_config["graph_db"]["config"] default_graph_config = default_text_config["graph_db"]["config"] - - # Define graph_db fields to preserve (user-specific) - preserve_graph_fields = { - "auto_create", - "user_name", - "use_multi_db", - } - - # Create merged graph_db config - merged_graph_config = copy.deepcopy(existing_graph_config) - for key, value in default_graph_config.items(): - if key not in preserve_graph_fields: - merged_graph_config[key] = value - logger.debug( - f"Updated graph_db field '{key}': {existing_graph_config.get(key)} -> {value}" + existing_backend = existing_text_config["graph_db"]["backend"] + default_backend = default_text_config["graph_db"]["backend"] + + # Detect backend change + backend_changed = existing_backend != default_backend + + if backend_changed: + logger.info( + f"Detected graph_db backend change: {existing_backend} -> {default_backend}. " + f"Migrating configuration..." + ) + # Start with default config as base when backend changes + merged_graph_config = copy.deepcopy(default_graph_config) + + # Preserve user-specific fields if they exist in both configs + preserve_graph_fields = { + "auto_create", + "user_name", + "use_multi_db", + } + for field in preserve_graph_fields: + if field in existing_graph_config: + merged_graph_config[field] = existing_graph_config[field] + logger.debug(f"Preserved graph_db field '{field}': {existing_graph_config[field]}") + + # Clean up backend-specific fields that don't exist in the new backend + # This approach is generic: remove any field from merged config that's not in default config + # and not in the preserve list + fields_to_remove = [] + for field in list(merged_graph_config.keys()): + if field not in default_graph_config and field not in preserve_graph_fields: + fields_to_remove.append(field) + + for field in fields_to_remove: + removed_value = merged_graph_config.pop(field) + logger.info( + f"Removed {existing_backend}-specific field '{field}' (value: {removed_value}) " + f"during migration to {default_backend}" ) - if not default_graph_config.get("use_multi_db", True): - # set original use_multi_db to False if default_graph_config.use_multi_db is False - if merged_graph_config.get("use_multi_db", True): - merged_graph_config["use_multi_db"] = False - merged_graph_config["user_name"] = merged_graph_config.get("db_name") - merged_graph_config["db_name"] = default_graph_config.get("db_name") - else: - logger.info("use_multi_db is already False, no need to change") - if "neo4j" not in default_text_config["graph_db"]["backend"]: - if "db_name" in merged_graph_config: - merged_graph_config.pop("db_name") - logger.info("neo4j is not supported, remove db_name") - else: - logger.info("db_name is not in merged_graph_config, no need to remove") else: - if "space" in merged_graph_config: - merged_graph_config.pop("space") - logger.info("neo4j is not supported, remove db_name") - else: - logger.info("space is not in merged_graph_config, no need to remove") + # Same backend: merge configs while preserving user-specific fields + logger.debug(f"Same graph_db backend ({default_backend}), merging configurations") + preserve_graph_fields = { + "auto_create", + "user_name", + "use_multi_db", + } + + # Start with existing config as base + merged_graph_config = copy.deepcopy(existing_graph_config) + + # Update with default config except preserved fields + for key, value in default_graph_config.items(): + if key not in preserve_graph_fields: + merged_graph_config[key] = value + logger.debug( + f"Updated graph_db field '{key}': {existing_graph_config.get(key)} -> {value}" + ) + + # Handle use_multi_db transition + if not default_graph_config.get("use_multi_db", True): + if merged_graph_config.get("use_multi_db", True): + merged_graph_config["use_multi_db"] = False + # For Neo4j: db_name becomes user_name in single-db mode + if "neo4j" in default_backend and "db_name" in merged_graph_config: + merged_graph_config["user_name"] = merged_graph_config.get("db_name") + merged_graph_config["db_name"] = default_graph_config.get("db_name") + logger.info("Transitioned to single-db mode (use_multi_db=False)") + preserved_graph_db = { - "backend": default_text_config["graph_db"]["backend"], + "backend": default_backend, "config": merged_graph_config, } From 8f10b2d166a051fcd6d3f1b507ff74c9d33e42c4 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Fri, 24 Oct 2025 16:13:40 +0800 Subject: [PATCH 109/137] import node --- examples/basic_modules/import_polardb_incr.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/basic_modules/import_polardb_incr.py b/examples/basic_modules/import_polardb_incr.py index 743b8f781..f0e21a7d6 100644 --- a/examples/basic_modules/import_polardb_incr.py +++ b/examples/basic_modules/import_polardb_incr.py @@ -294,13 +294,13 @@ def process_file(self, file_path, batch_size): if __name__ == "__main__": # 数据库配置(请根据实际情况修改) POLARDB_CONFIG = { - "host": "memory.pg.polardb.rds.aliyuncs.com", + "host": "xxx", "port": 5432, - "user": "adimin", - "password": "Openmem0925", - "database": "memtensor_memos", + "user": "xxx", + "password": "xxx", + "database": "xxx", # "database": "test_zdy", - "graph_name": "memtensor_memos_graph" + "graph_name": "xxx" # "graph_name": "test_zdy_graph" } From b8f29c165f1d4393f29b32cde26f14ead24e3a2d Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Fri, 24 Oct 2025 16:18:47 +0800 Subject: [PATCH 110/137] fix --- examples/basic_modules/import_polardb_incr.py | 104 +++++++++--------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/examples/basic_modules/import_polardb_incr.py b/examples/basic_modules/import_polardb_incr.py index f0e21a7d6..51c9da583 100644 --- a/examples/basic_modules/import_polardb_incr.py +++ b/examples/basic_modules/import_polardb_incr.py @@ -8,11 +8,11 @@ class MemoryDataProcessor: def __init__(self, db_config): """ - 初始化数据库连接 + Initialize database connection Args: - db_config: 数据库连接配置 - graph_name: 图数据库名称 + db_config: Database connection configuration + graph_name: Graph database name """ self.db_config = db_config self.graph_name = db_config.get('graph_name') @@ -20,7 +20,7 @@ def __init__(self, db_config): self.connection = None def connect(self): - """连接数据库""" + """Connect to database""" try: self.connection = psycopg2.connect( host=self.db_config["host"], @@ -29,51 +29,51 @@ def connect(self): password=self.db_config["password"], database=self.db_config["database"] ) - print("✅ 数据库连接成功") + print("✅ Database connection successful") return True except Exception as e: - print(f"❌ 数据库连接失败: {e}") + print(f"❌ Database connection failed: {e}") return False def disconnect(self): - """断开数据库连接""" + """Disconnect database connection""" if self.connection: self.connection.close() - print("✅ 数据库连接已关闭") + print("✅ Database connection closed") def extract_nodes_simple(self, file_path): - """从 JSON 文件提取 id 和 properties 的简洁版本""" + """Extract simplified id and properties from JSON file""" try: - # 检查文件是否存在 + # Check if file exists if not os.path.exists(file_path): - print(f"❌ 错误:文件 '{file_path}' 不存在") + print(f"❌ Error: File '{file_path}' does not exist") return [] - # 首先尝试用 utf-8-sig 读取(处理 BOM) + # First try reading with utf-8-sig (handle BOM) try: with open(file_path, 'r', encoding='utf-8-sig') as file: data = json.load(file) - print("✅ 使用 utf-8-sig 编码成功读取文件") + print("✅ Successfully read file with utf-8-sig encoding") except json.JSONDecodeError: - # 如果 utf-8-sig 失败,尝试用 utf-8 + # If utf-8-sig fails, try utf-8 try: with open(file_path, 'r', encoding='utf-8') as file: data = json.load(file) - print("✅ 使用 utf-8 编码成功读取文件") + print("✅ Successfully read file with utf-8 encoding") except json.JSONDecodeError as e: - print(f"❌ JSON 解析错误:{e}") + print(f"❌ JSON parse error: {e}") return [] result = [] tables = data.get('tables', []) - print(f"📊 找到 {len(tables)} 个表格") + print(f"📊 Found {len(tables)} tables") for i, table in enumerate(tables, 1): n_data = table.get('n', {}) value = n_data.get('value', {}) - # 提取 id 和 properties + # Extract id and properties # node_id = value.get('id') properties = value.get('properties', {}) node_id = properties.get('id', {}) @@ -81,7 +81,7 @@ def extract_nodes_simple(self, file_path): if node_id is not None: - # 构建符合插入格式的数据 + # Build data in insertion format node_data = { "id": str(node_id), # 转换为字符串 "memory": properties.get("memory", ""), @@ -89,16 +89,16 @@ def extract_nodes_simple(self, file_path): } result.append(node_data) - print(f"🎯 成功提取 {len(result)} 个节点") + print(f"🎯 Successfully extracted {len(result)} nodes") return result except Exception as e: - print(f"❌ 读取文件时发生错误:{e}") + print(f"❌ Error occurred while reading file: {e}") return [] def clean_properties(self, properties): - """清理 properties,移除不需要的字段""" - # 移除 embedding 相关字段,这些字段会单独处理 + """Clean properties and remove unnecessary fields""" + # Remove embedding-related fields; these will be handled separately exclude_fields = [ "embedding", "embedding_1024", "embedding_768", "embedding_3072", "embedding_1024_vector", "embedding_768_vector", "embedding_3072_vector" @@ -112,7 +112,7 @@ def clean_properties(self, properties): return cleaned def detect_embedding_field(self, embedding): - """检测 embedding 的维度并返回对应的字段名""" + """Detect embedding dimension and return corresponding field name""" if not embedding: return None @@ -128,29 +128,29 @@ def detect_embedding_field(self, embedding): return None def convert_to_vector(self, embedding): - """将 embedding 转换为 PostgreSQL 向量格式""" + """Convert embedding to PostgreSQL vector format""" if not embedding: return None try: if isinstance(embedding, list): - # 转换为 PostgreSQL 向量字符串格式: [1,2,3] + # Convert to PostgreSQL vector string format: [1,2,3] vector_str = "[" + ",".join(map(str, embedding)) + "]" return vector_str else: return None except Exception as e: - print(f"⚠️ 转换向量时出错: {e}") + print(f"⚠️ Error converting vector: {e}") return None def insert_nodes_to_db(self, nodes, batch_size=1000): - """将节点数据插入到数据库""" + """Insert node data into the database""" if not nodes: - print("❌ 没有数据可插入") + print("❌ No data to insert") return 0, [] if not self.connection: - print("❌ 数据库未连接") + print("❌ Database not connected") return 0, [] total_success = 0 @@ -163,11 +163,11 @@ def insert_nodes_to_db(self, nodes, batch_size=1000): batch_errors = [] print( - f"🔄 处理批次 {i // batch_size + 1}/{(len(nodes) - 1) // batch_size + 1} ({len(current_batch)} 个节点)") + f"🔄 Processing batch {i // batch_size + 1}/{(len(nodes) - 1) // batch_size + 1} ({len(current_batch)} nodes)") try: with self.connection.cursor() as cursor: - # 准备批量插入数据 + # Prepare batch insert data insert_data_1024 = [] insert_data_no_embedding = [] @@ -188,7 +188,7 @@ def insert_nodes_to_db(self, nodes, batch_size=1000): try: embedding = json.loads(embedding) except json.JSONDecodeError: - print(f"⚠️ 无法解析embedding字符串: {embedding_key}") + print(f"⚠️ Unable to parse embedding string: {embedding_key}") embedding = None # 清理 properties @@ -205,7 +205,7 @@ def insert_nodes_to_db(self, nodes, batch_size=1000): graph_id = cursor.fetchone()[0] properties['graph_id'] = str(graph_id) except Exception as e: - print(f"⚠️ 生成 graph_id 失败: {e}") + print(f"⚠️ Failed to generate graph_id: {e}") properties['graph_id'] = str(id_) # 备用方案 @@ -229,7 +229,7 @@ def insert_nodes_to_db(self, nodes, batch_size=1000): """ execute_batch(cursor, insert_sql_1024, insert_data_1024) batch_success += len(insert_data_1024) - print(f" ✅ 插入 {len(insert_data_1024)} 个带 embedding 的节点") + print(f" ✅ Inserted {len(insert_data_1024)} nodes with embedding") if insert_data_no_embedding: insert_sql_no_embedding = f""" @@ -238,55 +238,55 @@ def insert_nodes_to_db(self, nodes, batch_size=1000): """ execute_batch(cursor, insert_sql_no_embedding, insert_data_no_embedding) batch_success += len(insert_data_no_embedding) - print(f" ✅ 插入 {len(insert_data_no_embedding)} 个无 embedding 的节点") + print(f" ✅ Inserted {len(insert_data_no_embedding)} nodes without embedding") # 提交当前批次 self.connection.commit() total_success += batch_success all_errors.extend(batch_errors) - print(f" ✅ 批次完成: {batch_success} 个节点插入成功") + print(f" ✅ Batch complete: {batch_success} nodes inserted successfully") except Exception as e: self.connection.rollback() - batch_errors.append(f"批次插入失败: {e}") + batch_errors.append(f"Batch insert failed: {e}") all_errors.extend(batch_errors) - print(f"❌ 批次插入失败: {e}") + print(f"❌ Batch insertion failed: {e}") return total_success, all_errors def process_file(self, file_path, batch_size): - """完整处理流程:提取数据并插入数据库""" - print("🚀 开始处理数据文件...") + """Complete processing flow: extract data and insert into database""" + print("🚀 Starting to process data file...") - # 1. 提取数据 + # 1. Extract data nodes = self.extract_nodes_simple(file_path) if not nodes: return - # 3. 连接数据库 + # 3. Connect to database if not self.connect(): return try: - # 4. 插入数据到数据库 - print(f"\n💾 开始插入数据到数据库...") + # 4. Insert data into database + print(f"\n💾 Starting to insert data into database...") success_count, errors = self.insert_nodes_to_db(nodes, batch_size) - # 5. 显示结果 - print(f"\n🎉 处理完成!") - print(f"✅ 成功插入: {success_count}/{len(nodes)} 个节点") - print(f"❌ 错误数量: {len(errors)}") + # 5. Display results + print(f"\n🎉 Processing complete!") + print(f"✅ Successfully inserted: {success_count}/{len(nodes)} nodes") + print(f"❌ Error count: {len(errors)}") if errors: - print(f"\n📋 错误详情 (前10个):") + print(f"\n📋 Error details (first 10):") for error in errors[:10]: print(f" - {error}") if len(errors) > 10: - print(f" ... 还有 {len(errors) - 10} 个错误") + print(f" ... {len(errors) - 10} more errors") finally: - # 6. 断开数据库连接 + # 6. Disconnect database connection self.disconnect() From 0b58d47b63264855d156e6258a49847172fb425c Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Fri, 24 Oct 2025 16:25:21 +0800 Subject: [PATCH 111/137] fix --- .../batchImport_polardbFromJson.py | 181 +++++++++--------- 1 file changed, 92 insertions(+), 89 deletions(-) diff --git a/examples/basic_modules/batchImport_polardbFromJson.py b/examples/basic_modules/batchImport_polardbFromJson.py index ef3af528c..9c2409631 100644 --- a/examples/basic_modules/batchImport_polardbFromJson.py +++ b/examples/basic_modules/batchImport_polardbFromJson.py @@ -6,16 +6,16 @@ import os from datetime import datetime -# PolarDB 配置 +# PolarDB configuration POLARDB_CONFIG = { - "host": "memory.pg.polardb.rds.aliyuncs.com", + "host": "xxx", "port": 5432, - "user": "adimin", - "password": "Openmem0925", - # "database": "memtensor_memos", - "database": "test_zdy", - # "graph_name": "memtensor_memos_graph" - "graph_name": "test_zdy_graph" + "user": "xxx", + "password": "xxx", + # "database": "xxx", + "database": "xxx", + # "graph_name": "xxx" + "graph_name": "xxx" } @@ -30,18 +30,18 @@ def __init__(self, config): database=config["database"] ) self.graph_name = config.get("graph_name") - # 设置自动提交为False,手动控制事务 + # Set autocommit to False to manually control transactions self.connection.autocommit = False - print("✅ PolarDB连接成功") + print("✅ PolarDB connection successful") def update_graph_id_in_properties(self): - """更新properties字段,添加graph_id""" - print("🔄 开始更新properties字段,添加graph_id...") + """Update properties field to add graph_id""" + print("🔄 Starting to update properties field, adding graph_id...") start_time = datetime.now() try: with self.connection.cursor() as cursor: - # 执行UPDATE语句,将graph_id添加到properties中 + # Execute UPDATE to add graph_id into properties update_sql = f""" UPDATE {self.graph_name}."Memory" SET properties = agtype_concat(properties, agtype_build_map('graph_id', id::text)) @@ -52,24 +52,24 @@ def update_graph_id_in_properties(self): self.connection.commit() elapsed = (datetime.now() - start_time).total_seconds() - print(f"✅ 成功更新 {updated_count} 条记录的properties字段,耗时: {elapsed:.2f}秒") + print(f"✅ Successfully updated {updated_count} records' properties, elapsed: {elapsed:.2f}s") return updated_count except Exception as e: self.connection.rollback() - print(f"❌ 更新properties字段失败: {e}") + print(f"❌ Failed to update properties field: {e}") return 0 def batch_add_nodes_optimized(self, nodes, batch_size=1000): - """优化版批量插入节点""" + """Optimized batch insertion of nodes""" success_count = 0 error_count = 0 total_nodes = len(nodes) - print(f"🚀 开始处理 {total_nodes} 条记录,批次大小: {batch_size}") + print(f"🚀 Start processing {total_nodes} records, batch size: {batch_size}") start_time = datetime.now() - # 按批次处理 + # Process in batches for batch_start in range(0, total_nodes, batch_size): batch_end = min(batch_start + batch_size, total_nodes) current_batch = nodes[batch_start:batch_end] @@ -80,7 +80,7 @@ def batch_add_nodes_optimized(self, nodes, batch_size=1000): try: with self.connection.cursor() as cursor: - # 准备批量插入数据 + # Prepare batch insert data insert_data_1024 = [] # insert_data_768 = [] # insert_data_3072 = [] @@ -99,7 +99,7 @@ def batch_add_nodes_optimized(self, nodes, batch_size=1000): # graph_id = cursor.fetchone()[0] # properties['graph_id'] = str(graph_id) - # 提取 embedding + # Extract embedding embedding = None for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: if embedding_key in metadata and metadata[embedding_key]: @@ -110,14 +110,14 @@ def batch_add_nodes_optimized(self, nodes, batch_size=1000): try: embedding = json.loads(embedding) except json.JSONDecodeError: - print(f"⚠️ 无法解析embedding字符串: {embedding_key}") + print(f"⚠️ Unable to parse embedding string: {embedding_key}") embedding = None - # 清理 properties + # Clean properties properties = self.clean_properties(metadata) properties["id"] = id_ properties["memory"] = memory_ - # 根据embedding维度分类 + # Classify by embedding dimension field_name = self.detect_embedding_field(embedding) vector_value = self.convert_to_vector(embedding) if field_name else None @@ -133,7 +133,7 @@ def batch_add_nodes_optimized(self, nodes, batch_size=1000): except Exception as e: batch_errors.append(f"ID: {node.get('id', 'unknown')} - {e}") - # 批量插入不同维度的数据 + # Batch insert for different dimensions if insert_data_1024: insert_sql_1024 = f""" INSERT INTO "Memory" (id, properties, embedding) @@ -166,48 +166,48 @@ def batch_add_nodes_optimized(self, nodes, batch_size=1000): execute_batch(cursor, insert_sql_no_embedding, insert_data_no_embedding) batch_success += len(insert_data_no_embedding) - # 提交当前批次 + # Commit current batch self.connection.commit() success_count += batch_success error_count += len(batch_errors) - # 进度显示 + # Progress display elapsed = (datetime.now() - start_time).total_seconds() progress = (batch_end / total_nodes) * 100 estimated_total = (elapsed / batch_end) * total_nodes if batch_end > 0 else 0 remaining = estimated_total - elapsed - print(f"📊 进度: {batch_end}/{total_nodes} ({progress:.1f}%) | " - f"成功: {success_count} | 失败: {error_count} | " - f"已用: {elapsed:.0f}s | 剩余: {remaining:.0f}s") + print(f"📊 Progress: {batch_end}/{total_nodes} ({progress:.1f}%) | " + f"Success: {success_count} | Failures: {error_count} | " + f"Elapsed: {elapsed:.0f}s | Remaining: {remaining:.0f}s") - # 输出批次错误 + # Output batch errors if batch_errors: - print(f"❌ 本批次错误: {len(batch_errors)} 条") - for i, error in enumerate(batch_errors[:5]): # 只显示前5个错误 + print(f"❌ Errors in this batch: {len(batch_errors)}") + for i, error in enumerate(batch_errors[:5]): # Only show first 5 errors print(f" {i + 1}. {error}") if len(batch_errors) > 5: - print(f" ... 还有 {len(batch_errors) - 5} 个错误") + print(f" ... {len(batch_errors) - 5} more errors") except Exception as e: self.connection.rollback() error_count += len(current_batch) - print(f"❌ 批次 {batch_start}-{batch_end} 整体失败: {e}") + print(f"❌ Batch {batch_start}-{batch_end} failed: {e}") total_time = (datetime.now() - start_time).total_seconds() - print(f"✅ 批量插入完成: 成功 {success_count} 条, 失败 {error_count} 条, 总耗时: {total_time:.2f}秒") + print(f"✅ Batch insertion complete: Success {success_count}, Failures {error_count}, Total time: {total_time:.2f}s") return success_count, error_count def clean_properties(self, props): - """移除向量字段""" + """Remove vector fields""" vector_keys = {"embedding", "embedding_1024", "embedding_3072", "embedding_768"} if not isinstance(props, dict): return {} return {k: v for k, v in props.items() if k not in vector_keys} def detect_embedding_field(self, embedding_list): - """检测 embedding 维度并返回对应的字段名""" + """Detect embedding dimension and return corresponding field name""" if not embedding_list: return None dim = len(embedding_list) @@ -219,11 +219,11 @@ def detect_embedding_field(self, embedding_list): elif dim == 3072: return "embedding_3072" else: - print(f"⚠️ 未知 embedding 维度 {dim},跳过该向量") + print(f"⚠️ Unknown embedding dimension {dim}, skipping vector") return None def convert_to_vector(self, embedding_list): - """将 embedding 列表转换为向量字符串""" + """Convert embedding list to vector string""" if not embedding_list: return None if isinstance(embedding_list, np.ndarray): @@ -231,23 +231,23 @@ def convert_to_vector(self, embedding_list): return "[" + ",".join(str(float(x)) for x in embedding_list) + "]" def close(self): - """关闭数据库连接""" + """Close database connection""" if self.connection: self.connection.close() - print("🔒 PolarDB连接已关闭") + print("🔒 PolarDB connection closed") def getPolarDb(): - """直接创建 PolarDB 图数据库实例""" + """Create PolarDB graph database instance""" return PolarDBGraph(POLARDB_CONFIG) def process_metadata(item): - """处理元数据,提取和转换字段""" + """Process metadata, extract and convert fields""" metadata = {} for key, value in item.items(): if key not in ["id", "memory"]: - # 类型转换 + # Type conversion if key == "confidence": try: metadata[key] = float(value) @@ -281,7 +281,7 @@ def process_metadata(item): def extract_embedding(item): - """从数据项中提取embedding""" + """Extract embedding from data item""" embedding = None for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: if embedding_key in item and item[embedding_key]: @@ -290,7 +290,7 @@ def extract_embedding(item): try: embedding = json.loads(embedding_value) except json.JSONDecodeError: - print(f"⚠️ 无法解析embedding字符串: {embedding_key}") + print(f"⚠️ Unable to parse embedding string: {embedding_key}") embedding = None else: embedding = embedding_value @@ -299,7 +299,7 @@ def extract_embedding(item): def prepare_nodes_for_insertion(data_list): - """准备要插入的节点数据""" + """Prepare node data for insertion""" nodes_to_insert = [] processed_count = 0 skipped_count = 0 @@ -309,19 +309,19 @@ def prepare_nodes_for_insertion(data_list): memory_ = item.get("memory") if not id_ or not memory_: - print(f"⚠️ 跳过无效数据: ID或memory为空") + print(f"⚠️ Skipping invalid data: ID or memory is empty") skipped_count += 1 continue - # 处理元数据 + # Process metadata metadata = process_metadata(item) - # 处理embedding字段 + # Handle embedding field embedding = extract_embedding(item) if embedding: metadata["embedding"] = embedding - # 构建插入的数据 + # Build data for insertion nodes_to_insert.append({ "id": id_, "memory": memory_, @@ -329,29 +329,32 @@ def prepare_nodes_for_insertion(data_list): }) processed_count += 1 - # 显示进度 + # Show progress if processed_count % 10000 == 0: - print(f"📝 已预处理 {processed_count} 条数据") + print(f"📝 Preprocessed {processed_count} records") - print(f"✅ 数据预处理完成: 有效 {processed_count} 条, 跳过 {skipped_count} 条") + print(f"✅ Data preprocessing complete: Valid {processed_count}, Skipped {skipped_count}") return nodes_to_insert def insert_data_optimized(data_list, batch_size=1000): - """优化版数据插入""" + """Optimized data insertion""" graph = getPolarDb() - # 数据预处理 - print("🔄 开始预处理数据...") + # Data preprocessing + print("🔄 Starting data preprocessing...") nodes_to_insert = prepare_nodes_for_insertion(data_list) if not nodes_to_insert: - print("⚠️ 没有有效数据需要插入") + print("⚠️ No valid data to insert") graph.close() return 0, 0 - # 使用优化版批量插入 - print("🚀 开始批量插入数据...") + # Use optimized version, set batch size to 1000 + # Adjust batch size based on conditions: + # - Good network: 1000-2000 + # - Average network: 500-1000 + # - Limited memory: 200-500 success_count, error_count = graph.batch_add_nodes_optimized(nodes_to_insert, batch_size) graph.close() @@ -360,15 +363,15 @@ def insert_data_optimized(data_list, batch_size=1000): def load_data_from_file(filename): - """从文件加载数据""" - print(f"📂 正在加载文件: {filename}") + """Load data from file""" + print(f"📂 Loading file: {filename}") try: with open(filename, "r", encoding="utf-8") as f: data = json.load(f) - print(f"📂 从文件 {filename} 加载了 {len(data)} 条记录") + print(f"📂 Loaded {len(data)} records from file {filename}") return data except Exception as e: - print(f"❌ 加载文件失败: {e}") + print(f"❌ Failed to load file: {e}") return [] def update_graph(): @@ -378,63 +381,63 @@ def update_graph(): print("---------update_graph[end]") def insert_data(conn, data): - # 记录总开始时间 + # Record total start time total_start_time = datetime.now() if not data: - print("⚠️ 没有数据") + print("⚠️ No data") return - print(f"🎯 总共需要处理 {len(data)} 条记录") + print(f"🎯 Total records to process: {len(data)}") success_count, error_count = insert_data_optimized(data, batch_size=1000) - # 计算总耗时 + # Compute total time total_time = (datetime.now() - total_start_time).total_seconds() minutes, seconds = divmod(total_time, 60) hours, minutes = divmod(minutes, 60) - print(f"\n🎉 处理完成!") - print(f"📊 最终结果:") - print(f" ✅ 成功: {success_count} 条") - print(f" ❌ 失败: {error_count} 条") - print(f" ⏱️ 总耗时: {int(hours)}小时{int(minutes)}分钟{seconds:.2f}秒") + print(f"\n🎉 Processing complete!") + print(f"📊 Final results:") + print(f" ✅ Success: {success_count}") + print(f" ❌ Failures: {error_count}") + print(f" ⏱️ Total time: {int(hours)}h {int(minutes)}m {seconds:.2f}s") def main(): json_file = r"/Users/ccl/Desktop/file/export13/ceshi/ceshi.json" - # 记录总开始时间 + # Record total start time total_start_time = datetime.now() - # 加载数据 + # Load data data = load_data_from_file(json_file) if not data: - print("⚠️ 没有数据") + print("⚠️ No data") return - print(f"🎯 总共需要处理 {len(data)} 条记录") + print(f"🎯 Total records to process: {len(data)}") - # 使用优化版本,设置批次大小为1000 - # 可以根据实际情况调整批次大小: - # - 网络好:1000-2000 - # - 网络一般:500-1000 - # - 内存有限:200-500 + # Use optimized version, set batch size to 1000 + # Adjust batch size based on conditions: + # - Good network: 1000-2000 + # - Average network: 500-1000 + # - Limited memory: 200-500 success_count, error_count = insert_data_optimized(data, batch_size=1000) - # 计算总耗时 + # Compute total time total_time = (datetime.now() - total_start_time).total_seconds() minutes, seconds = divmod(total_time, 60) hours, minutes = divmod(minutes, 60) - print(f"\n🎉 处理完成!") - print(f"📊 最终结果:") - print(f" ✅ 成功: {success_count} 条") - print(f" ❌ 失败: {error_count} 条") - print(f" ⏱️ 总耗时: {int(hours)}小时{int(minutes)}分钟{seconds:.2f}秒") + print(f"\n🎉 Processing complete!") + print(f"📊 Final results:") + print(f" ✅ Success: {success_count}") + print(f" ❌ Failures: {error_count}") + print(f" ⏱️ Total time: {int(hours)}h {int(minutes)}m {seconds:.2f}s") if success_count > 0: records_per_second = success_count / total_time - print(f" 🚀 处理速度: {records_per_second:.2f} 条/秒") + print(f" 🚀 Processing speed: {records_per_second:.2f} records/sec") if __name__ == "__main__": From bfeb4dc27e25c8ba4d6e8bf02a7977b087374dc7 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Sun, 26 Oct 2025 10:42:39 +0800 Subject: [PATCH 112/137] feat: fix polardb --- src/memos/graph_dbs/polardb.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index d571ab205..9676c5a1b 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -8,7 +8,6 @@ import psycopg2 from psycopg2.extras import Json -from examples.mem_os.locomo_shared_database_memos import result from memos.configs.graph_db import PolarDBGraphDBConfig from memos.dependency import require_python_package from memos.graph_dbs.base import BaseGraphDB From ed20e79d90c4f4792ed3a4bceb6d4d8ef27ebab1 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Sun, 26 Oct 2025 12:05:23 +0800 Subject: [PATCH 113/137] feat: fix scheduler method name --- src/memos/mem_scheduler/general_scheduler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py index 25c7b78fd..d56d4f128 100644 --- a/src/memos/mem_scheduler/general_scheduler.py +++ b/src/memos/mem_scheduler/general_scheduler.py @@ -142,7 +142,7 @@ def _query_message_consumer(self, messages: list[ScheduleMessageItem]) -> None: logger.info(f"Messages {messages} assigned to {QUERY_LABEL} handler.") # Process the query in a session turn - grouped_messages = self.dispatcher.group_messages_by_user_and_cube(messages=messages) + grouped_messages = self.dispatcher._group_messages_by_user_and_mem_cube(messages=messages) self.validate_schedule_messages(messages=messages, label=QUERY_LABEL) @@ -164,7 +164,7 @@ def _answer_message_consumer(self, messages: list[ScheduleMessageItem]) -> None: """ logger.info(f"Messages {messages} assigned to {ANSWER_LABEL} handler.") # Process the query in a session turn - grouped_messages = self.dispatcher.group_messages_by_user_and_cube(messages=messages) + grouped_messages = self.dispatcher._group_messages_by_user_and_mem_cube(messages=messages) self.validate_schedule_messages(messages=messages, label=ANSWER_LABEL) @@ -180,7 +180,7 @@ def _answer_message_consumer(self, messages: list[ScheduleMessageItem]) -> None: def _add_message_consumer(self, messages: list[ScheduleMessageItem]) -> None: logger.info(f"Messages {messages} assigned to {ADD_LABEL} handler.") # Process the query in a session turn - grouped_messages = self.dispatcher.group_messages_by_user_and_cube(messages=messages) + grouped_messages = self.dispatcher._group_messages_by_user_and_mem_cube(messages=messages) self.validate_schedule_messages(messages=messages, label=ADD_LABEL) try: From b91c2033d2282495ab5864bd42983bcdadede524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 26 Oct 2025 21:58:12 +0800 Subject: [PATCH 114/137] fix get_by_metadata for "query": "How long ago was Caroline's 18th birthday?" --- src/memos/graph_dbs/polardb.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 4a40fd5d1..8d52f5ff1 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1331,30 +1331,34 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = list[str]: Node IDs whose metadata match the filter conditions. (AND logic). """ user_name = user_name if user_name else self._get_config_value("user_name") - + # Build WHERE conditions for cypher query where_conditions = [] - + for f in filters: field = f["field"] op = f.get("op", "=") value = f["value"] - + # Format value if isinstance(value, str): - escaped_value = f"'{value}'" + # Escape single quotes in string values + escaped_str = value.replace("'", "''") + escaped_value = f"'{escaped_str}'" elif isinstance(value, list): - # Handle list values + # Handle list values - use double quotes for Cypher arrays list_items = [] for v in value: if isinstance(v, str): - list_items.append(f"'{v}'") + # Escape double quotes in string values for Cypher + escaped_str = v.replace('"', '\\"') + list_items.append(f'"{escaped_str}"') else: list_items.append(str(v)) escaped_value = f"[{', '.join(list_items)}]" else: escaped_value = f"'{value}'" if isinstance(value, str) else str(value) - print("op=============:",op) + print("op=============:", op) # Build WHERE conditions if op == "=": where_conditions.append(f"n.{field} = {escaped_value}") @@ -1372,12 +1376,13 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = where_conditions.append(f"n.{field} {op} {escaped_value}") else: raise ValueError(f"Unsupported operator: {op}") - + # Add user_name filter - where_conditions.append(f"n.user_name = '{user_name}'") - + escaped_user_name = user_name.replace("'", "''") + where_conditions.append(f"n.user_name = '{escaped_user_name}'") + where_str = " AND ".join(where_conditions) - + # Use cypher query cypher_query = f""" SELECT * FROM cypher('{self.db_name}_graph', $$ @@ -1393,12 +1398,12 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = with self.connection.cursor() as cursor: cursor.execute(cypher_query) results = cursor.fetchall() - print("[get_by_metadata] result:",results) + print("[get_by_metadata] result:", results) ids = [str(item[0]).strip('"') for item in results] except Exception as e: print("Failed to get metadata:", {e}) logger.error(f"Failed to get metadata: {e}, query is {cypher_query}") - + return ids @timed From c1f2c830206e0714c8bdacecd0904ea2776db327 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Sun, 26 Oct 2025 21:58:52 +0800 Subject: [PATCH 115/137] fix get_by_metadata for "query": "How long ago was Caroline's 18th birthday?" --- examples/basic_modules/polardb_search.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index ac2449951..d7b98c3f2 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -219,6 +219,13 @@ def get_edges(db_name: str, id: str, type: str, direction: str, user_name: str = print("get_edges:", edges) +def get_by_metadata(db_name, filters, user_name): + graph = getPolarDb(db_name) + ids = graph.get_by_metadata(filters=filters, user_name=user_name) + print("get_by_metadata:", ids) + + + if __name__ == "__main__": # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") @@ -382,11 +389,13 @@ def get_edges(db_name: str, id: str, type: str, direction: str, user_name: str = # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") - # get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") + # get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") # 测试 get_structure_optimization_candidates 函数 # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") # get_neighbors_by_tag(db_name="memtensor_memos",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") - get_edges(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",type="PARENT",direction="OUTGOING",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") + # get_edges(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",type="PARENT",direction="OUTGOING",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") + + get_by_metadata(db_name="memtensor_memos", filters=[{"field": "tags", "op": "contains", "value": "glazes"}], user_name="memos452356faadb34b06acc7fa507023d91c") From 365c480e8a30fd46ad03cc20f817c2d0a1891829 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=A4=A7=E6=B4=8B?= <714403855@qq.com> Date: Mon, 27 Oct 2025 09:47:02 +0800 Subject: [PATCH 116/137] fix get_node format_param_value --- examples/basic_modules/polardb_search.py | 18 +++++++-------- src/memos/graph_dbs/polardb.py | 28 +++++++++++++++++------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py index d7b98c3f2..2b7653a2c 100644 --- a/examples/basic_modules/polardb_search.py +++ b/examples/basic_modules/polardb_search.py @@ -110,15 +110,15 @@ def searchVector(db_name: str, vectorStr: list[float], user_name: str = None): graph = getPolarDb(db_name) # 1,查询search_by_embedding - nodes = graph.search_by_embedding(vector=vectorStr, top_k=1, user_name=user_name) - print("search_by_embedding nodes:", len(nodes)) - for node_i in nodes: - print("Search result:", graph.get_node(node_i["id"][1:-1])) + # nodes = graph.search_by_embedding(vector=vectorStr, top_k=1, user_name=user_name) + # print("search_by_embedding nodes:", len(nodes)) + # for node_i in nodes: + # print("Search result:", graph.get_node(node_i["id"][1:-1])) # 2,查询单个get_node - # detail = graph.get_node(id="194f1e30-44d2-4e3f-bc58-c950343c56b7", - # user_name='memos231a22c655fd4b859ca4143b97d2b808') - # print("单个node:", detail) + detail = graph.get_node(id='"194f1e30-44d2-4e3f-bc58-c950343c56b7"', + user_name='"memos231a22c655fd4b859ca4143b97d2b808"') + print("单个node:", detail) # # # 3,查询多个get_nodes ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] @@ -367,7 +367,7 @@ def get_by_metadata(db_name, filters, user_name): -0.01335147, -0.040344328, 0.029144352, -0.04174814, 0.023315482, -0.02227788, -0.0022716573, -0.03152473, 0.0482484, -0.027038634, -0.004882823, 0.06152357, -0.003881463, -0.036041338, -0.0075645614, 0.020660445, -0.07250992, -0.024429375, -0.036377035] - # searchVector(db_name="memtensor_memos", vectorStr=vector, user_name="memos7a9f9fbbb61c412f94f77fbaa8103c35") + searchVector(db_name="memtensor_memos", vectorStr=vector, user_name="memos7a9f9fbbb61c412f94f77fbaa8103c35") # searchVector(db_name="test_1020_02", vectorStr=vector) @@ -398,4 +398,4 @@ def get_by_metadata(db_name, filters, user_name): # get_edges(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",type="PARENT",direction="OUTGOING",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") - get_by_metadata(db_name="memtensor_memos", filters=[{"field": "tags", "op": "contains", "value": "glazes"}], user_name="memos452356faadb34b06acc7fa507023d91c") + # get_by_metadata(db_name="memtensor_memos", filters=[{"field": "tags", "op": "contains", "value": "glazes"}], user_name="memos452356faadb34b06acc7fa507023d91c") diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index f2c5278f0..e458e846f 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -706,18 +706,29 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No select_fields = "id, properties, embedding" else: select_fields = "id, properties" - + + # Helper function to format parameter value + def format_param_value(value: str) -> str: + """Format parameter value to handle both quoted and unquoted formats""" + # Remove outer quotes if they exist + if value.startswith('"') and value.endswith('"'): + # Already has double quotes, return as is + return value + else: + # Add double quotes + return f'"{value}"' + query = f""" SELECT {select_fields} FROM "{self.db_name}_graph"."Memory" WHERE ag_catalog.agtype_access_operator(properties, '"id"'::agtype) = %s::agtype """ - params = [f'"{id}"'] - + params = [format_param_value(id)] + # Only add user filter when user_name is provided if user_name is not None: query += "\nAND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" - params.append(f'"{user_name}"') + params.append(format_param_value(user_name)) print(f"[get_node] query: {query}, params: {params}") try: @@ -731,7 +742,7 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No else: node_id, properties_json = result embedding_json = None - + # Parse properties from JSONB if it's a string if isinstance(properties_json, str): try: @@ -741,15 +752,16 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No properties = {} else: properties = properties_json if properties_json else {} - + # Parse embedding from JSONB if it exists and include_embedding is True if include_embedding and embedding_json is not None: try: - embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json + embedding = json.loads(embedding_json) if isinstance(embedding_json, + str) else embedding_json properties["embedding"] = embedding except (json.JSONDecodeError, TypeError): logger.warning(f"Failed to parse embedding for node {id}") - + return self._parse_node({"id": id, "memory": properties.get("memory", ""), **properties}) return None From 1ebc66076a5313a90283ae637c434d13a02d76d6 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 17:03:10 +0800 Subject: [PATCH 117/137] feat: fix CONFIG --- examples/basic_modules/parseJson.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/basic_modules/parseJson.py b/examples/basic_modules/parseJson.py index 1fa2a03a2..86e48496a 100644 --- a/examples/basic_modules/parseJson.py +++ b/examples/basic_modules/parseJson.py @@ -11,11 +11,11 @@ from batchImport_polardbFromJson import insert_data, update_graph DB_CONFIG = { - 'host': 'memory.pg.polardb.rds.aliyuncs.com', + 'host': 'xxxxx', 'port': 5432, - 'database': 'test_zdy', - 'user': 'adimin', - 'password': 'Openmem0925' + 'database': 'xxxx', + 'user': 'xxxx', + 'password': 'xxxxx' } conn = psycopg2.connect(**DB_CONFIG) From 8c6f7f41496df6a3d9b57b4d727cd0aff140cc43 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 27 Oct 2025 17:18:18 +0800 Subject: [PATCH 118/137] fix --- docker/requirements.txt | 3 ++- pyproject.toml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/requirements.txt b/docker/requirements.txt index d20c0b36e..bb0a27cdd 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -157,4 +157,5 @@ volcengine-python-sdk==4.0.6 watchfiles==1.1.0 websockets==15.0.1 xlrd==2.0.2 -xlsxwriter==3.2.5 \ No newline at end of file +xlsxwriter==3.2.5 +nacos-sdk-python==1.0.0 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index eae2e8050..3440fd403 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ dependencies = [ "scikit-learn (>=1.7.0,<2.0.0)", # Machine learning "fastmcp (>=2.10.5,<3.0.0)", "python-dateutil (>=2.9.0.post0,<3.0.0)", + "nacos-sdk-python (>=1.0.0,<2.0.0)", # Nacos configuration client ] [project.urls] From 67bd2c886b12ba788dfaabe01669b5d20111f5fd Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 17:53:01 +0800 Subject: [PATCH 119/137] feat: fix import --- src/memos/graph_dbs/polardb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e458e846f..81ed9e479 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -5,8 +5,7 @@ from typing import Any, Literal import numpy as np -import psycopg2 -from psycopg2.extras import Json + from memos.configs.graph_db import PolarDBGraphDBConfig from memos.dependency import require_python_package @@ -2198,6 +2197,7 @@ def add_node_old(conn, id: str, memory: str, metadata: dict, graph_name=None): graph_name: Graph name, optional """ # Use provided graph_name or default + from psycopg2.extras import Json if graph_name is None: graph_name = GRAPH_NAME From 20c63da33d78a9b534c6baa5c7463f40280fb575 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 18:06:03 +0800 Subject: [PATCH 120/137] feat: delete test file --- .../batchImport_polardbFromJson.py | 444 --------------- examples/basic_modules/importPolarDbEdge.py | 129 ----- examples/basic_modules/import_polardb_incr.py | 314 ----------- examples/basic_modules/parseJson.py | 88 --- examples/basic_modules/polardb_example.py | 525 ------------------ .../basic_modules/polardb_export_insert.py | 361 ------------ examples/basic_modules/polardb_search.py | 401 ------------- 7 files changed, 2262 deletions(-) delete mode 100644 examples/basic_modules/batchImport_polardbFromJson.py delete mode 100644 examples/basic_modules/importPolarDbEdge.py delete mode 100644 examples/basic_modules/import_polardb_incr.py delete mode 100644 examples/basic_modules/parseJson.py delete mode 100644 examples/basic_modules/polardb_example.py delete mode 100644 examples/basic_modules/polardb_export_insert.py delete mode 100644 examples/basic_modules/polardb_search.py diff --git a/examples/basic_modules/batchImport_polardbFromJson.py b/examples/basic_modules/batchImport_polardbFromJson.py deleted file mode 100644 index 9c2409631..000000000 --- a/examples/basic_modules/batchImport_polardbFromJson.py +++ /dev/null @@ -1,444 +0,0 @@ -import json -import psycopg2 -from psycopg2.extras import Json, execute_batch -import numpy as np -import sys -import os -from datetime import datetime - -# PolarDB configuration -POLARDB_CONFIG = { - "host": "xxx", - "port": 5432, - "user": "xxx", - "password": "xxx", - # "database": "xxx", - "database": "xxx", - # "graph_name": "xxx" - "graph_name": "xxx" -} - - -class PolarDBGraph: - def __init__(self, config): - self.config = config - self.connection = psycopg2.connect( - host=config["host"], - port=config["port"], - user=config["user"], - password=config["password"], - database=config["database"] - ) - self.graph_name = config.get("graph_name") - # Set autocommit to False to manually control transactions - self.connection.autocommit = False - print("✅ PolarDB connection successful") - - def update_graph_id_in_properties(self): - """Update properties field to add graph_id""" - print("🔄 Starting to update properties field, adding graph_id...") - start_time = datetime.now() - - try: - with self.connection.cursor() as cursor: - # Execute UPDATE to add graph_id into properties - update_sql = f""" - UPDATE {self.graph_name}."Memory" - SET properties = agtype_concat(properties, agtype_build_map('graph_id', id::text)) - """ - cursor.execute(update_sql) - updated_count = cursor.rowcount - - self.connection.commit() - - elapsed = (datetime.now() - start_time).total_seconds() - print(f"✅ Successfully updated {updated_count} records' properties, elapsed: {elapsed:.2f}s") - return updated_count - - except Exception as e: - self.connection.rollback() - print(f"❌ Failed to update properties field: {e}") - return 0 - - def batch_add_nodes_optimized(self, nodes, batch_size=1000): - """Optimized batch insertion of nodes""" - success_count = 0 - error_count = 0 - total_nodes = len(nodes) - - print(f"🚀 Start processing {total_nodes} records, batch size: {batch_size}") - start_time = datetime.now() - - # Process in batches - for batch_start in range(0, total_nodes, batch_size): - batch_end = min(batch_start + batch_size, total_nodes) - current_batch = nodes[batch_start:batch_end] - - batch_success = 0 - batch_errors = [] - - try: - with self.connection.cursor() as cursor: - - # Prepare batch insert data - insert_data_1024 = [] - # insert_data_768 = [] - # insert_data_3072 = [] - insert_data_no_embedding = [] - - for node in current_batch: - try: - id_ = node["id"] - memory_ = node["memory"] - metadata = node["metadata"] - - # get_graph_id_query = f""" - # SELECT ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring) - # """ - # cursor.execute(get_graph_id_query, (id_,)) - # graph_id = cursor.fetchone()[0] - # properties['graph_id'] = str(graph_id) - - # Extract embedding - embedding = None - for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: - if embedding_key in metadata and metadata[embedding_key]: - embedding = metadata[embedding_key] - break - - if isinstance(embedding, str): - try: - embedding = json.loads(embedding) - except json.JSONDecodeError: - print(f"⚠️ Unable to parse embedding string: {embedding_key}") - embedding = None - # Clean properties - properties = self.clean_properties(metadata) - properties["id"] = id_ - properties["memory"] = memory_ - - # Classify by embedding dimension - field_name = self.detect_embedding_field(embedding) - vector_value = self.convert_to_vector(embedding) if field_name else None - - if field_name == "embedding" and vector_value: - insert_data_1024.append((id_, Json(properties), vector_value)) - # elif field_name == "embedding_768" and vector_value: - # insert_data_768.append((id_, Json(properties), vector_value)) - # elif field_name == "embedding_3072" and vector_value: - # insert_data_3072.append((id_, Json(properties), vector_value)) - else: - insert_data_no_embedding.append((id_, Json(properties))) - - except Exception as e: - batch_errors.append(f"ID: {node.get('id', 'unknown')} - {e}") - - # Batch insert for different dimensions - if insert_data_1024: - insert_sql_1024 = f""" - INSERT INTO "Memory" (id, properties, embedding) - VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) - """ - execute_batch(cursor, insert_sql_1024, insert_data_1024) - batch_success += len(insert_data_1024) - - # if insert_data_768: - # insert_sql_768 = f""" - # INSERT INTO "Memory" (id, properties, embedding_768) - # VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) - # """ - # execute_batch(cursor, insert_sql_768, insert_data_768) - # batch_success += len(insert_data_768) - # - # if insert_data_3072: - # insert_sql_3072 = f""" - # INSERT INTO "Memory" (id, properties, embedding_3072) - # VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) - # """ - # execute_batch(cursor, insert_sql_3072, insert_data_3072) - # batch_success += len(insert_data_3072) - - if insert_data_no_embedding: - insert_sql_no_embedding = f""" - INSERT INTO "Memory" (id, properties) - VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s) - """ - execute_batch(cursor, insert_sql_no_embedding, insert_data_no_embedding) - batch_success += len(insert_data_no_embedding) - - # Commit current batch - self.connection.commit() - success_count += batch_success - error_count += len(batch_errors) - - # Progress display - elapsed = (datetime.now() - start_time).total_seconds() - progress = (batch_end / total_nodes) * 100 - estimated_total = (elapsed / batch_end) * total_nodes if batch_end > 0 else 0 - remaining = estimated_total - elapsed - - print(f"📊 Progress: {batch_end}/{total_nodes} ({progress:.1f}%) | " - f"Success: {success_count} | Failures: {error_count} | " - f"Elapsed: {elapsed:.0f}s | Remaining: {remaining:.0f}s") - - # Output batch errors - if batch_errors: - print(f"❌ Errors in this batch: {len(batch_errors)}") - for i, error in enumerate(batch_errors[:5]): # Only show first 5 errors - print(f" {i + 1}. {error}") - if len(batch_errors) > 5: - print(f" ... {len(batch_errors) - 5} more errors") - - except Exception as e: - self.connection.rollback() - error_count += len(current_batch) - print(f"❌ Batch {batch_start}-{batch_end} failed: {e}") - - total_time = (datetime.now() - start_time).total_seconds() - print(f"✅ Batch insertion complete: Success {success_count}, Failures {error_count}, Total time: {total_time:.2f}s") - - return success_count, error_count - - def clean_properties(self, props): - """Remove vector fields""" - vector_keys = {"embedding", "embedding_1024", "embedding_3072", "embedding_768"} - if not isinstance(props, dict): - return {} - return {k: v for k, v in props.items() if k not in vector_keys} - - def detect_embedding_field(self, embedding_list): - """Detect embedding dimension and return corresponding field name""" - if not embedding_list: - return None - dim = len(embedding_list) - # print("---------",dim) - if dim == 1024: - return "embedding" - elif dim == 768: - return "embedding_768" - elif dim == 3072: - return "embedding_3072" - else: - print(f"⚠️ Unknown embedding dimension {dim}, skipping vector") - return None - - def convert_to_vector(self, embedding_list): - """Convert embedding list to vector string""" - if not embedding_list: - return None - if isinstance(embedding_list, np.ndarray): - embedding_list = embedding_list.tolist() - return "[" + ",".join(str(float(x)) for x in embedding_list) + "]" - - def close(self): - """Close database connection""" - if self.connection: - self.connection.close() - print("🔒 PolarDB connection closed") - - -def getPolarDb(): - """Create PolarDB graph database instance""" - return PolarDBGraph(POLARDB_CONFIG) - - -def process_metadata(item): - """Process metadata, extract and convert fields""" - metadata = {} - for key, value in item.items(): - if key not in ["id", "memory"]: - # Type conversion - if key == "confidence": - try: - metadata[key] = float(value) - except (ValueError, TypeError): - metadata[key] = value - elif key == "sources" or key == "usage": - if isinstance(value, str): - try: - parsed_value = json.loads(value) - metadata[key] = [json.dumps(item) for item in parsed_value] if isinstance(parsed_value, - list) else [ - json.dumps(parsed_value)] - except json.JSONDecodeError: - metadata[key] = value - else: - metadata[key] = value - elif key == "tags": - if isinstance(value, str): - if value.startswith('[') and value.endswith(']'): - try: - metadata[key] = json.loads(value) - except json.JSONDecodeError: - metadata[key] = [tag.strip() for tag in value[1:-1].split(',')] - else: - metadata[key] = value - else: - metadata[key] = value - else: - metadata[key] = value - return metadata - - -def extract_embedding(item): - """Extract embedding from data item""" - embedding = None - for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: - if embedding_key in item and item[embedding_key]: - embedding_value = item[embedding_key] - if isinstance(embedding_value, str): - try: - embedding = json.loads(embedding_value) - except json.JSONDecodeError: - print(f"⚠️ Unable to parse embedding string: {embedding_key}") - embedding = None - else: - embedding = embedding_value - break - return embedding - - -def prepare_nodes_for_insertion(data_list): - """Prepare node data for insertion""" - nodes_to_insert = [] - processed_count = 0 - skipped_count = 0 - - for item in data_list: - id_ = item.get("id") - memory_ = item.get("memory") - - if not id_ or not memory_: - print(f"⚠️ Skipping invalid data: ID or memory is empty") - skipped_count += 1 - continue - - # Process metadata - metadata = process_metadata(item) - - # Handle embedding field - embedding = extract_embedding(item) - if embedding: - metadata["embedding"] = embedding - - # Build data for insertion - nodes_to_insert.append({ - "id": id_, - "memory": memory_, - "metadata": metadata - }) - processed_count += 1 - - # Show progress - if processed_count % 10000 == 0: - print(f"📝 Preprocessed {processed_count} records") - - print(f"✅ Data preprocessing complete: Valid {processed_count}, Skipped {skipped_count}") - return nodes_to_insert - - -def insert_data_optimized(data_list, batch_size=1000): - """Optimized data insertion""" - graph = getPolarDb() - - # Data preprocessing - print("🔄 Starting data preprocessing...") - nodes_to_insert = prepare_nodes_for_insertion(data_list) - - if not nodes_to_insert: - print("⚠️ No valid data to insert") - graph.close() - return 0, 0 - - # Use optimized version, set batch size to 1000 - # Adjust batch size based on conditions: - # - Good network: 1000-2000 - # - Average network: 500-1000 - # - Limited memory: 200-500 - success_count, error_count = graph.batch_add_nodes_optimized(nodes_to_insert, batch_size) - - graph.close() - return success_count, error_count - - - -def load_data_from_file(filename): - """Load data from file""" - print(f"📂 Loading file: {filename}") - try: - with open(filename, "r", encoding="utf-8") as f: - data = json.load(f) - print(f"📂 Loaded {len(data)} records from file {filename}") - return data - except Exception as e: - print(f"❌ Failed to load file: {e}") - return [] - -def update_graph(): - print("-----------update_graph[start]") - graph = getPolarDb() - graph.update_graph_id_in_properties() - print("---------update_graph[end]") - -def insert_data(conn, data): - # Record total start time - total_start_time = datetime.now() - - - if not data: - print("⚠️ No data") - return - - print(f"🎯 Total records to process: {len(data)}") - success_count, error_count = insert_data_optimized(data, batch_size=1000) - - # Compute total time - total_time = (datetime.now() - total_start_time).total_seconds() - minutes, seconds = divmod(total_time, 60) - hours, minutes = divmod(minutes, 60) - - print(f"\n🎉 Processing complete!") - print(f"📊 Final results:") - print(f" ✅ Success: {success_count}") - print(f" ❌ Failures: {error_count}") - print(f" ⏱️ Total time: {int(hours)}h {int(minutes)}m {seconds:.2f}s") - -def main(): - json_file = r"/Users/ccl/Desktop/file/export13/ceshi/ceshi.json" - - # Record total start time - total_start_time = datetime.now() - - # Load data - data = load_data_from_file(json_file) - if not data: - print("⚠️ No data") - return - - print(f"🎯 Total records to process: {len(data)}") - - # Use optimized version, set batch size to 1000 - # Adjust batch size based on conditions: - # - Good network: 1000-2000 - # - Average network: 500-1000 - # - Limited memory: 200-500 - success_count, error_count = insert_data_optimized(data, batch_size=1000) - - # Compute total time - total_time = (datetime.now() - total_start_time).total_seconds() - minutes, seconds = divmod(total_time, 60) - hours, minutes = divmod(minutes, 60) - - print(f"\n🎉 Processing complete!") - print(f"📊 Final results:") - print(f" ✅ Success: {success_count}") - print(f" ❌ Failures: {error_count}") - print(f" ⏱️ Total time: {int(hours)}h {int(minutes)}m {seconds:.2f}s") - - if success_count > 0: - records_per_second = success_count / total_time - print(f" 🚀 Processing speed: {records_per_second:.2f} records/sec") - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/examples/basic_modules/importPolarDbEdge.py b/examples/basic_modules/importPolarDbEdge.py deleted file mode 100644 index f1286c80c..000000000 --- a/examples/basic_modules/importPolarDbEdge.py +++ /dev/null @@ -1,129 +0,0 @@ -import os -import json -import psycopg2 - -# 数据库连接配置 -DB_CONFIG = { - 'host': 'xxxxx', - 'port': 5432, - 'database': 'xxxxx', - 'user': 'xxxx', - 'password': 'xxxx' -} - -# 顶层目录 -EDGE_ROOT_DIR = r"C:\Users\13282\Desktop\nebular\export13" - -# 合法的关系文件夹(白名单) -VALID_REL_TYPES = { - "AGGREGATE_TO", - "FOLLOWS", - "INFERS", - "MERGED_TO", - "RELATE_TO", - "PARENT" -} - -# 批量大小 -BATCH_SIZE = 1000 - - -# def create_elabel(conn, label_name): -# """创建关系类型(若不存在)""" -# with conn.cursor() as cur: -# print(f"🪶 Creating elabel: {label_name}") -# try: -# cur.execute(f"SELECT create_elabel('memtensor_memos_graph', '{label_name}');") -# conn.commit() -# except Exception as e: -# conn.rollback() -# if "already exists" in str(e): -# print(f"ℹ️ Label '{label_name}' already exists, skipping.") -# else: -# print(f"⚠️ Failed to create label {label_name}: {e}") - - -def insert_edges(conn, edges, label_name): - """批量插入边数据(若已存在则跳过)""" - with conn.cursor() as cur: - for e in edges: - src_id = e["src_id"] - dst_id = e["dst_id"] - user_name = e["user_name"] - - sql = f""" - INSERT INTO memtensor_memos_graph."{label_name}"(id, start_id, end_id, properties) - SELECT - ag_catalog._next_graph_id('memtensor_memos_graph'::name, '{label_name}'), - ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{src_id}'::text::cstring), - ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{dst_id}'::text::cstring), - jsonb_build_object('user_name', '{user_name}')::text::agtype - WHERE NOT EXISTS ( - SELECT 1 FROM memtensor_memos_graph."{label_name}" - WHERE start_id = ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{src_id}'::text::cstring) - AND end_id = ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{dst_id}'::text::cstring) - ); - """ - cur.execute(sql) - conn.commit() - - -def process_relation_folder(conn, folder_path, label_name): - """处理一个关系文件夹""" - print(f"\n🔗 Processing relation: {label_name}") - - # create_elabel(conn, label_name) - for root, _, files in os.walk(folder_path): - for file in files: - if not (file.endswith(".json") or file.endswith(".txt")): - continue - file_path = os.path.join(root, file) - print(f"📄 Reading file: {file_path}") - batch = [] - with open(file_path, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line: - continue - try: - obj = json.loads(line) - batch.append(obj) - except json.JSONDecodeError: - print(f"⚠️ JSON decode error in {file_path}: {line}") - continue - - if len(batch) >= BATCH_SIZE: - insert_edges(conn, batch, label_name) - print(f"✅ Inserted (or skipped) {len(batch)} edges.") - batch.clear() - - if batch: - insert_edges(conn, batch, label_name) - print(f"✅ Inserted (or skipped) {len(batch)} edges.") - - -def main(): - conn = psycopg2.connect(**DB_CONFIG) - try: - for folder_name in os.listdir(EDGE_ROOT_DIR): - folder_path = os.path.join(EDGE_ROOT_DIR, folder_name) - if not os.path.isdir(folder_path): - continue - - # 只处理白名单中的关系类型 - if folder_name.upper() not in VALID_REL_TYPES: - print(f"🚫 Skipping non-relation folder: {folder_name}") - continue - - # 保持大小写一致性 - label_name = folder_name - process_relation_folder(conn, folder_path, label_name) - - print("\n🎉 All relation folders processed successfully!") - - finally: - conn.close() - - -if __name__ == "__main__": - main() diff --git a/examples/basic_modules/import_polardb_incr.py b/examples/basic_modules/import_polardb_incr.py deleted file mode 100644 index 51c9da583..000000000 --- a/examples/basic_modules/import_polardb_incr.py +++ /dev/null @@ -1,314 +0,0 @@ -import json -import os -from collections import Counter -from psycopg2.extras import execute_batch, Json -import psycopg2 - - -class MemoryDataProcessor: - def __init__(self, db_config): - """ - Initialize database connection - - Args: - db_config: Database connection configuration - graph_name: Graph database name - """ - self.db_config = db_config - self.graph_name = db_config.get('graph_name') - print("fff:",db_config.get('graph_name')) - self.connection = None - - def connect(self): - """Connect to database""" - try: - self.connection = psycopg2.connect( - host=self.db_config["host"], - port=self.db_config["port"], - user=self.db_config["user"], - password=self.db_config["password"], - database=self.db_config["database"] - ) - print("✅ Database connection successful") - return True - except Exception as e: - print(f"❌ Database connection failed: {e}") - return False - - def disconnect(self): - """Disconnect database connection""" - if self.connection: - self.connection.close() - print("✅ Database connection closed") - - def extract_nodes_simple(self, file_path): - """Extract simplified id and properties from JSON file""" - try: - # Check if file exists - if not os.path.exists(file_path): - print(f"❌ Error: File '{file_path}' does not exist") - return [] - - # First try reading with utf-8-sig (handle BOM) - try: - with open(file_path, 'r', encoding='utf-8-sig') as file: - data = json.load(file) - print("✅ Successfully read file with utf-8-sig encoding") - except json.JSONDecodeError: - # If utf-8-sig fails, try utf-8 - try: - with open(file_path, 'r', encoding='utf-8') as file: - data = json.load(file) - print("✅ Successfully read file with utf-8 encoding") - except json.JSONDecodeError as e: - print(f"❌ JSON parse error: {e}") - return [] - - result = [] - tables = data.get('tables', []) - - print(f"📊 Found {len(tables)} tables") - - for i, table in enumerate(tables, 1): - n_data = table.get('n', {}) - value = n_data.get('value', {}) - - # Extract id and properties - # node_id = value.get('id') - properties = value.get('properties', {}) - node_id = properties.get('id', {}) - - - - if node_id is not None: - # Build data in insertion format - node_data = { - "id": str(node_id), # 转换为字符串 - "memory": properties.get("memory", ""), - "metadata": properties - } - result.append(node_data) - - print(f"🎯 Successfully extracted {len(result)} nodes") - return result - - except Exception as e: - print(f"❌ Error occurred while reading file: {e}") - return [] - - def clean_properties(self, properties): - """Clean properties and remove unnecessary fields""" - # Remove embedding-related fields; these will be handled separately - exclude_fields = [ - "embedding", "embedding_1024", "embedding_768", "embedding_3072", - "embedding_1024_vector", "embedding_768_vector", "embedding_3072_vector" - ] - - cleaned = {} - for key, value in properties.items(): - if key not in exclude_fields: - cleaned[key] = value - - return cleaned - - def detect_embedding_field(self, embedding): - """Detect embedding dimension and return corresponding field name""" - if not embedding: - return None - - if isinstance(embedding, list): - length = len(embedding) - if length == 1024: - return "embedding" - elif length == 768: - return "embedding_768" - elif length == 3072: - return "embedding_3072" - - return None - - def convert_to_vector(self, embedding): - """Convert embedding to PostgreSQL vector format""" - if not embedding: - return None - - try: - if isinstance(embedding, list): - # Convert to PostgreSQL vector string format: [1,2,3] - vector_str = "[" + ",".join(map(str, embedding)) + "]" - return vector_str - else: - return None - except Exception as e: - print(f"⚠️ Error converting vector: {e}") - return None - - def insert_nodes_to_db(self, nodes, batch_size=1000): - """Insert node data into the database""" - if not nodes: - print("❌ No data to insert") - return 0, [] - - if not self.connection: - print("❌ Database not connected") - return 0, [] - - total_success = 0 - all_errors = [] - - # 分批处理 - for i in range(0, len(nodes), batch_size): - current_batch = nodes[i:i + batch_size] - batch_success = 0 - batch_errors = [] - - print( - f"🔄 Processing batch {i // batch_size + 1}/{(len(nodes) - 1) // batch_size + 1} ({len(current_batch)} nodes)") - - try: - with self.connection.cursor() as cursor: - # Prepare batch insert data - insert_data_1024 = [] - insert_data_no_embedding = [] - - for node in current_batch: - try: - id_ = node["id"] - memory_ = node["memory"] - metadata = node["metadata"] - - # 提取 embedding - embedding = None - for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: - if embedding_key in metadata and metadata[embedding_key]: - embedding = metadata[embedding_key] - break - - if isinstance(embedding, str): - try: - embedding = json.loads(embedding) - except json.JSONDecodeError: - print(f"⚠️ Unable to parse embedding string: {embedding_key}") - embedding = None - - # 清理 properties - properties = self.clean_properties(metadata) - properties["id"] = id_ - properties["memory"] = memory_ - - # 生成 graph_id 并添加到 properties - try: - get_graph_id_query = f""" - SELECT ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring) - """ - cursor.execute(get_graph_id_query, (id_,)) - graph_id = cursor.fetchone()[0] - properties['graph_id'] = str(graph_id) - except Exception as e: - print(f"⚠️ Failed to generate graph_id: {e}") - properties['graph_id'] = str(id_) # 备用方案 - - - # 根据embedding维度分类 - field_name = self.detect_embedding_field(embedding) - vector_value = self.convert_to_vector(embedding) if field_name else None - - if field_name == "embedding" and vector_value: - insert_data_1024.append((id_, Json(properties), vector_value)) - else: - insert_data_no_embedding.append((id_, Json(properties))) - - except Exception as e: - batch_errors.append(f"ID: {node.get('id', 'unknown')} - {e}") - - # 批量插入不同维度的数据 - if insert_data_1024: - insert_sql_1024 = f""" - INSERT INTO "Memory" (id, properties, embedding) - VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) - """ - execute_batch(cursor, insert_sql_1024, insert_data_1024) - batch_success += len(insert_data_1024) - print(f" ✅ Inserted {len(insert_data_1024)} nodes with embedding") - - if insert_data_no_embedding: - insert_sql_no_embedding = f""" - INSERT INTO "Memory" (id, properties) - VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s) - """ - execute_batch(cursor, insert_sql_no_embedding, insert_data_no_embedding) - batch_success += len(insert_data_no_embedding) - print(f" ✅ Inserted {len(insert_data_no_embedding)} nodes without embedding") - - # 提交当前批次 - self.connection.commit() - total_success += batch_success - all_errors.extend(batch_errors) - - print(f" ✅ Batch complete: {batch_success} nodes inserted successfully") - - except Exception as e: - self.connection.rollback() - batch_errors.append(f"Batch insert failed: {e}") - all_errors.extend(batch_errors) - print(f"❌ Batch insertion failed: {e}") - - return total_success, all_errors - - def process_file(self, file_path, batch_size): - """Complete processing flow: extract data and insert into database""" - print("🚀 Starting to process data file...") - - # 1. Extract data - nodes = self.extract_nodes_simple(file_path) - if not nodes: - return - - # 3. Connect to database - if not self.connect(): - return - - try: - # 4. Insert data into database - print(f"\n💾 Starting to insert data into database...") - success_count, errors = self.insert_nodes_to_db(nodes, batch_size) - - # 5. Display results - print(f"\n🎉 Processing complete!") - print(f"✅ Successfully inserted: {success_count}/{len(nodes)} nodes") - print(f"❌ Error count: {len(errors)}") - - if errors: - print(f"\n📋 Error details (first 10):") - for error in errors[:10]: - print(f" - {error}") - if len(errors) > 10: - print(f" ... {len(errors) - 10} more errors") - - finally: - # 6. Disconnect database connection - self.disconnect() - - -# 使用示例 -if __name__ == "__main__": - # 数据库配置(请根据实际情况修改) - POLARDB_CONFIG = { - "host": "xxx", - "port": 5432, - "user": "xxx", - "password": "xxx", - "database": "xxx", - # "database": "test_zdy", - "graph_name": "xxx" - # "graph_name": "test_zdy_graph" - } - - # 文件路径 - file_path = "/Users/ccl/Desktop/file/temp/result.json" - - # 创建处理器实例 - processor = MemoryDataProcessor(POLARDB_CONFIG) - - # 处理文件 - processor.process_file(file_path, batch_size=1000) \ No newline at end of file diff --git a/examples/basic_modules/parseJson.py b/examples/basic_modules/parseJson.py deleted file mode 100644 index 86e48496a..000000000 --- a/examples/basic_modules/parseJson.py +++ /dev/null @@ -1,88 +0,0 @@ -import os -import json -import psycopg2 -import sys - -# Add the parent directory to the path to allow imports -src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) -sys.path.insert(0, src_path) - -# from polardb_export_insert_1 import insert_data -from batchImport_polardbFromJson import insert_data, update_graph - -DB_CONFIG = { - 'host': 'xxxxx', - 'port': 5432, - 'database': 'xxxx', - 'user': 'xxxx', - 'password': 'xxxxx' -} -conn = psycopg2.connect(**DB_CONFIG) - -def insert(batch): - """ - 模拟插入函数。 - 这里你可以替换成实际数据库或API调用逻辑。 - """ - print(f"✅ 调用 insert() 插入 {len(batch)} 条记录") - insert_data(conn, batch) - # 示例:你的数据库插入逻辑写在这里 - # db.insert_many(batch) - - -def process_folder(folder_path, batch_size=1000): - """ - 遍历文件夹,按 batch_size 分批解析 JSON 并调用 insert。 - """ - batch = [] - total_count = 0 - - for root, dirs, files in os.walk(folder_path): - for file in files: - # Only process .json files - if not file.endswith('.json'): - continue - - file_path = os.path.join(root, file) - print(f"📄 正在读取文件: {file_path}") - - try: - with open(file_path, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line: - continue - try: - obj = json.loads(line) - # 确保解析出的对象是字典类型,并且包含必要的字段 - if isinstance(obj, dict) and "id" in obj and "memory" in obj: - batch.append(obj) - total_count += 1 - - # 每满 batch_size 条,调用 insert 并清空缓存 - if len(batch) >= batch_size: - insert(batch) - batch = [] # 清空 - else: - print(f"⚠️ 跳过无效对象(缺少必要字段): {line[:80]}...") - except json.JSONDecodeError: - print(f"⚠️ 跳过无效 JSON: {line[:80]}...") - except (UnicodeDecodeError, IOError) as e: - print(f"⚠️ 跳过无法读取的文件 {file_path}: {e}") - continue - - # 处理最后不足 batch_size 的部分 - if batch: - insert(batch) - update_graph() - - print(f"\n✅ 全部完成,共处理 {total_count} 条记录。") - - -if __name__ == "__main__": - # folder_path = r"/Users/ccl/Desktop/file/export13/ceshi" - # 10W - folder_path = r"/Users/ccl/Desktop/file/export15/Memory" - # 70W - folder_path = r"/Users/ccl/Desktop/file/export13/Memory" - process_folder(folder_path, batch_size=1000) diff --git a/examples/basic_modules/polardb_example.py b/examples/basic_modules/polardb_example.py deleted file mode 100644 index 6569d01d4..000000000 --- a/examples/basic_modules/polardb_example.py +++ /dev/null @@ -1,525 +0,0 @@ -import os -import sys - -# Add the src directory to the Python path -src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) -sys.path.insert(0, src_path) - -from datetime import datetime - -from memos.configs.embedder import EmbedderConfigFactory -from memos.configs.graph_db import GraphDBConfigFactory -from memos.embedders.factory import EmbedderFactory -from memos.graph_dbs.factory import GraphStoreFactory -from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata - - -embedder_config = EmbedderConfigFactory.model_validate( - { - "backend": "universal_api", - "config": { - "provider": "openai", - "api_key": os.getenv("OPENAI_API_KEY", "sk-xxxxx"), - "model_name_or_path": "text-embedding-3-large", - "base_url": os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1"), - }, - } -) -embedder = EmbedderFactory.from_config(embedder_config) - - -def embed_memory_item(memory: str) -> list[float]: - return embedder.embed([memory])[0] - - -def example_multi_db(db_name: str = "paper_polardb"): - """Example using PolarDB with multi-database mode (physical isolation).""" - # Step 1: Build factory config - config = GraphDBConfigFactory( - backend="polardb", - config={ - "host": "xxxxxxx", - "port": 5432, - "user": "xxxx", - "password": "xxxx", - "db_name": db_name, - "auto_create": True, - "embedding_dimension": 1024, - "use_multi_db": True, - }, - ) - - # Step 2: Instantiate the graph store - graph = GraphStoreFactory.from_config(config) - graph.clear() - - # Step 3: Create topic node - topic = TextualMemoryItem( - memory="This research addresses long-term multi-UAV navigation for energy-efficient communication coverage.", - metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", - key="Multi-UAV Long-Term Coverage", - hierarchy_level="topic", - type="fact", - memory_time="2024-01-01", - source="file", - sources=["paper://multi-uav-coverage/intro"], - status="activated", - confidence=95.0, - tags=["UAV", "coverage", "multi-agent"], - entities=["UAV", "coverage", "navigation"], - visibility="public", - updated_at=datetime.now().isoformat(), - embedding=embed_memory_item( - "This research addresses long-term " - "multi-UAV navigation for " - "energy-efficient communication " - "coverage." - ), - ), - ) - - graph.add_node( - id=topic.id, memory=topic.memory, metadata=topic.metadata.model_dump(exclude_none=True) - ) - - # Step 4: Define and write concept nodes - concepts = [ - TextualMemoryItem( - memory="The reward function combines multiple objectives: coverage maximization, energy consumption minimization, and overlap penalty.", - metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", - key="Reward Function Design", - hierarchy_level="concept", - type="fact", - memory_time="2024-01-01", - source="file", - sources=["paper://multi-uav-coverage/reward"], - status="activated", - confidence=92.0, - tags=["reward", "DRL", "multi-objective"], - entities=["reward function"], - visibility="public", - updated_at=datetime.now().isoformat(), - embedding=embed_memory_item( - "The reward function combines " - "multiple objectives: coverage " - "maximization, energy consumption " - "minimization, and overlap penalty." - ), - ), - ), - TextualMemoryItem( - memory="The energy model considers transmission power and mechanical movement power consumption.", - metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", - key="Energy Model", - hierarchy_level="concept", - type="fact", - memory_time="2024-01-01", - source="file", - sources=["paper://multi-uav-coverage/energy"], - status="activated", - confidence=90.0, - tags=["energy", "power model"], - entities=["energy", "power"], - visibility="public", - updated_at=datetime.now().isoformat(), - embedding=embed_memory_item( - "The energy model considers " - "transmission power and mechanical movement power consumption." - ), - ), - ), - TextualMemoryItem( - memory="Coverage performance is measured using CT (Coverage Time) and FT (Fairness Time) metrics.", - metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", - key="Coverage Metrics", - hierarchy_level="concept", - type="fact", - memory_time="2024-01-01", - source="file", - sources=["paper://multi-uav-coverage/metrics"], - status="activated", - confidence=91.0, - tags=["coverage", "fairness", "metrics"], - entities=["CT", "FT"], - visibility="public", - updated_at=datetime.now().isoformat(), - embedding=embed_memory_item( - "Coverage performance is measured using CT (Coverage Time) and FT (Fairness Time) metrics." - ), - ), - ), - ] - - # Step 5: Write and link concepts to topic - for concept in concepts: - graph.add_node( - id=concept.id, - memory=concept.memory, - metadata=concept.metadata.model_dump(exclude_none=True), - ) - graph.add_edge(source_id=concept.id, target_id=topic.id, type="RELATED") - print(f"Creating edge: ({concept.id}) -[:RELATED]-> ({topic.id})") - - # Define concept → fact - fact_pairs = [ - { - "concept_key": "Reward Function Design", - "fact": TextualMemoryItem( - memory="The reward includes three parts: (1) coverage gain, (2) energy penalty, and (3) penalty for overlapping areas with other UAVs.", - metadata=TreeNodeTextualMemoryMetadata( - memory_type="WorkingMemory", - key="Reward Components", - hierarchy_level="fact", - type="fact", - memory_time="2024-01-01", - source="file", - sources=["paper://multi-uav-coverage/reward-details"], - status="activated", - confidence=90.0, - tags=["reward", "overlap", "multi-agent"], - entities=["coverage", "energy", "overlap"], - visibility="public", - updated_at=datetime.now().isoformat(), - embedding=embed_memory_item( - "The reward includes three parts: (1) coverage gain, (2) energy penalty, and (3) penalty for overlapping areas with other UAVs." - ), - ), - ), - }, - { - "concept_key": "Energy Model", - "fact": TextualMemoryItem( - memory="Total energy cost is calculated from both mechanical movement and communication transmission.", - metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", - key="Energy Cost Components", - hierarchy_level="fact", - type="fact", - memory_time="2024-01-01", - source="file", - sources=["paper://multi-uav-coverage/energy-detail"], - status="activated", - confidence=89.0, - tags=["energy", "movement", "transmission"], - entities=["movement power", "transmission power"], - visibility="public", - updated_at=datetime.now().isoformat(), - embedding=embed_memory_item( - "Total energy cost is calculated from both mechanical movement and communication transmission." - ), - ), - ), - }, - { - "concept_key": "Coverage Metrics", - "fact": TextualMemoryItem( - memory="CT measures how long the area is covered; FT reflects the fairness of agent coverage distribution.", - metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", - key="CT and FT Definition", - hierarchy_level="fact", - type="fact", - memory_time="2024-01-01", - source="file", - sources=["paper://multi-uav-coverage/metric-definitions"], - status="activated", - confidence=91.0, - tags=["CT", "FT", "fairness"], - entities=["coverage time", "fairness"], - visibility="public", - updated_at=datetime.now().isoformat(), - embedding=embed_memory_item( - "CT measures how long the area is covered; FT reflects the fairness of agent coverage distribution." - ), - ), - ), - }, - ] - - # Write facts and link to corresponding concept by key - concept_map = {concept.metadata.key: concept.id for concept in concepts} - - for pair in fact_pairs: - fact_item = pair["fact"] - concept_key = pair["concept_key"] - concept_id = concept_map[concept_key] - - graph.add_node( - fact_item.id, - fact_item.memory, - metadata=fact_item.metadata.model_dump(exclude_none=True), - ) - graph.add_edge(source_id=fact_item.id, target_id=concept_id, type="BELONGS_TO") - - all_graph_data = graph.export_graph() - print("Graph data:", all_graph_data) - - nodes = graph.search_by_embedding(vector=embed_memory_item("what does FT reflect?"), top_k=1) - - for node_i in nodes: - print("Search result:", graph.get_node(node_i["id"])) - - -def example_shared_db(db_name: str = "shared_travel_group_polardb"): - """ - Example: Single(Shared)-DB multi-tenant (logical isolation) - Multiple users' data in the same PolarDB with user_name as a tag. - """ - # users - user_list = ["travel_member_alice", "travel_member_bob"] - - for user_name in user_list: - # Step 1: Build factory config - config = GraphDBConfigFactory( - backend="polardb", - config={ - "host": "xxxxxxx", - "port": 5432, - "user": "xxxx", - "password": "xxxx", - "db_name": db_name, - "user_name": user_name, - "use_multi_db": False, - "auto_create": True, - "embedding_dimension": 1024, - }, - ) - # Step 2: Instantiate graph store - graph = GraphStoreFactory.from_config(config) - print(f"\n[INFO] Working in shared DB: {db_name}, for user: {user_name}") - graph.clear() - - # Step 3: Create topic node - topic = TextualMemoryItem( - memory=f"Travel notes for {user_name}", - metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", - hierarchy_level="topic", - status="activated", - visibility="public", - embedding=embed_memory_item(f"Travel notes for {user_name}"), - ), - ) - - graph.add_node( - id=topic.id, memory=topic.memory, metadata=topic.metadata.model_dump(exclude_none=True) - ) - - # Step 4: Add a concept for each user - concept = TextualMemoryItem( - memory=f"Itinerary plan for {user_name}", - metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", - hierarchy_level="concept", - status="activated", - visibility="public", - embedding=embed_memory_item(f"Itinerary plan for {user_name}"), - ), - ) - - graph.add_node( - id=concept.id, - memory=concept.memory, - metadata=concept.metadata.model_dump(exclude_none=True), - ) - - # Link concept to topic - graph.add_edge(source_id=concept.id, target_id=topic.id, type="INCLUDE") - - print(f"[INFO] Added nodes for {user_name}") - - # Step 5: Query and print ALL for verification - print("\n=== Export entire DB (for verification, includes ALL users) ===") - graph = GraphStoreFactory.from_config(config) - all_graph_data = graph.export_graph() - print(all_graph_data) - - # Step 6: Search for alice's data only - print("\n=== Search for travel_member_alice ===") - config_alice = GraphDBConfigFactory( - backend="polardb", - config={ - "host": "xxxxxxx", - "port": 5432, - "user": "xxxx", - "password": "xxxx", - "db_name": db_name, - "user_name": user_list[0], - "embedding_dimension": 1024, - }, - ) - graph_alice = GraphStoreFactory.from_config(config_alice) - nodes = graph_alice.search_by_embedding(vector=embed_memory_item("travel itinerary"), top_k=1) - for node in nodes: - print(graph_alice.get_node(node["id"])) - - -def run_user_session( - user_name: str, - db_name: str, - topic_text: str, - concept_texts: list[str], - fact_texts: list[str], -): - """Run a complete user session with PolarDB.""" - print(f"\n=== {user_name} starts building their memory graph ===") - - config = GraphDBConfigFactory( - backend="polardb", - config={ - "host": "xxxxxxx", - "port": 5432, - "user": "xxxx", - "password": "xxxx", - "db_name": db_name, - "user_name": user_name, - "use_multi_db": False, - "auto_create": True, - "embedding_dimension": 1024, - }, - ) - graph = GraphStoreFactory.from_config(config) - print(f"6666666:{graph}") - - # Start with a clean slate for this user - graph.clear() - - now = datetime.utcnow().isoformat() - - # === Step 1: Create a root topic node (e.g., user's research focus) === - topic = TextualMemoryItem( - memory="55555", - metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", - key="Research Topic", - hierarchy_level="topic", - type="fact", - memory_time="2024-01-01", - status="activated", - visibility="public", - updated_at=now, - embedding=embed_memory_item(topic_text), - ), - ) - print(f"111111111opic:{topic}") - graph.add_node('45678', topic.memory, topic.metadata.model_dump(exclude_none=True)) - - # === Step 2: Create two concept nodes linked to the topic === - concept_items = [] - for i, text in enumerate(concept_texts): - concept = TextualMemoryItem( - memory=text, - metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", - key=f"Concept {i + 1}", - hierarchy_level="concept", - type="fact", - memory_time="2024-01-01", - status="activated", - visibility="public", - updated_at=now, - embedding=embed_memory_item(text), - tags=["concept"], - confidence=90 + i, - ), - ) - graph.add_node(concept.id, concept.memory, concept.metadata.model_dump(exclude_none=True)) - graph.add_edge(topic.id, concept.id, type="PARENT") - concept_items.append(concept) - - # === Step 3: Create supporting facts under each concept === - for i, text in enumerate(fact_texts): - fact = TextualMemoryItem( - memory=text, - metadata=TreeNodeTextualMemoryMetadata( - memory_type="WorkingMemory", - key=f"Fact {i + 1}", - hierarchy_level="fact", - type="fact", - memory_time="2024-01-01", - status="activated", - visibility="public", - updated_at=now, - embedding=embed_memory_item(text), - confidence=85.0, - tags=["fact"], - ), - ) - graph.add_node(fact.id, fact.memory, fact.metadata.model_dump(exclude_none=True)) - graph.add_edge(concept_items[i % len(concept_items)].id, fact.id, type="PARENT") - - # === Step 4: Retrieve memory using semantic search === - vector = embed_memory_item("How is memory retrieved?") - search_result = graph.search_by_embedding(vector, top_k=2) - for r in search_result: - node = graph.get_node(r["id"]) - print("🔍 111111111Search result:", node["memory"]) - - # === Step 5: Tag-based neighborhood discovery === - neighbors = graph.get_neighbors_by_tag(["concept"], exclude_ids=[], top_k=2) - print("📎 222222Tag-related nodes:", [neighbor["memory"] for neighbor in neighbors]) - - # === Step 6: Retrieve children (facts) of first concept === - children = graph.get_children_with_embeddings(concept_items[0].id) - print("📍 333333Children of concept:", [child["memory"] for child in children]) - - # === Step 7: Export a local subgraph and grouped statistics === - subgraph = graph.get_subgraph(topic.id, depth=2) - print("📌444444 Subgraph node count:", len(subgraph["neighbors"])) - - stats = graph.get_grouped_counts(["memory_type", "status"]) - print("📊 55555Grouped counts:", stats) - - # === Step 8: Demonstrate updates and cleanup === - graph.update_node(concept_items[0].id, {"confidence": 99.0}) - graph.remove_oldest_memory("WorkingMemory", keep_latest=1) - graph.delete_edge(topic.id, concept_items[0].id, type="PARENT") - graph.delete_node(concept_items[1].id) - - # === Step 9: Export and re-import the entire graph structure === - exported = graph.export_graph() - graph.import_graph(exported) - print("📦 666666Graph exported and re-imported, total nodes:", len(exported["nodes"])) - - -def example_complex_shared_db(db_name: str = "poc"): - """Complex example with multiple users in shared database.""" - # User 1: Alice explores structured memory for LLMs - run_user_session( - user_name="adimin", - db_name=db_name, - topic_text="Alice studies structured memory and long-term memory optimization in LLMs.", - concept_texts=[ - "Short-term memory can be simulated using WorkingMemory blocks.", - "A structured memory graph improves retrieval precision for agents.", - ], - fact_texts=[ - "Embedding search is used to find semantically similar memory items.", - "User memories are stored as node-edge structures that support hierarchical reasoning.", - ], - ) - - -if __name__ == "__main__": - try: - # print("\n=== PolarDB Example: Multi-DB ===") - # example_multi_db(db_name="paper_polardb") - # - # print("\n=== PolarDB Example: Single-DB ===") - # example_shared_db(db_name="shared_travel_group_polardb") - - print("\n=== PolarDB Example: Single-DB-Complex ===") - example_complex_shared_db(db_name="memtensor_memos") - except Exception as e: - print(f"❌ Error running111111 PolarDB example: {e}") - print("Please check:") - print("1. Network connectivity to PolarDB server") - print("2. Database credentials and permissions") - print("3. Apache AGE extension installation") - print("4. Required Python packages (psycopg2-binary)") - import traceback - traceback.print_exc() \ No newline at end of file diff --git a/examples/basic_modules/polardb_export_insert.py b/examples/basic_modules/polardb_export_insert.py deleted file mode 100644 index 014cfc912..000000000 --- a/examples/basic_modules/polardb_export_insert.py +++ /dev/null @@ -1,361 +0,0 @@ -import json -import psycopg2 -from psycopg2.extras import Json -import numpy as np -import sys -import os - -# 添加src目录到Python路径 -src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) -sys.path.insert(0, src_path) - -from memos.configs.graph_db import GraphDBConfigFactory -from memos.graph_dbs.factory import GraphStoreFactory - - - - -# DB_CONFIG = { -# 'host': 'xxxxxxx', -# 'port': 5432, -# 'database': 'xxxxx', -# 'user': 'xxxx', -# 'password': 'xxxx' -# } -# -# # 图数据库配置 -GRAPH_NAME = 'memtensor_memos_graph' -def getPolarDb(): - config = GraphDBConfigFactory( - backend="polardb", - config={ - "host": "memory.pg.polardb.rds.aliyuncs.com", - "port": 5432, - "user": "adimin", - "password": "Openmem0925", - "db_name": "memtensor_memos", - "user_name": 'adimin', - "use_multi_db": True, # 设置为True,不添加user_name过滤条件 - "auto_create": True, - "embedding_dimension": 1024, - }, - ) - graph = GraphStoreFactory.from_config(config) - return graph - -def create_vector_extension(conn): - with conn.cursor() as cursor: - cursor.execute("CREATE EXTENSION IF NOT EXISTS vector;") - conn.commit() - print("✅ pgvector 扩展创建成功或已存在") - - -def create_table(conn): - create_table_sql = """ - CREATE TABLE IF NOT EXISTS "Memory" ( - id graphid PRIMARY KEY, - properties agtype, - embedding vector(1536), - embedding_1024 vector(1024), - embedding_768 vector(768), - imported_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ); - """ - - with conn.cursor() as cursor: - cursor.execute(create_table_sql) - - # 尝试添加主键约束(如果不存在) - try: - cursor.execute("ALTER TABLE \"Memory\" ADD CONSTRAINT memory_pkey PRIMARY KEY (id);") - print("✅ 主键约束添加成功") - except Exception as e: - print(f"⚠️ 主键约束可能已存在: {e}") - - # 安全地创建索引,检查列是否存在 - try: - cursor.execute("CREATE INDEX IF NOT EXISTS idx_memory_id ON \"Memory\"(id);") - except Exception as e: - print(f"⚠️ 创建ID索引时出错: {e}") - - try: - cursor.execute("CREATE INDEX IF NOT EXISTS idx_memory_properties ON \"Memory\" USING GIN(properties);") - except Exception as e: - print(f"⚠️ 创建properties索引时出错: {e}") - - # 只为存在的embedding列创建索引 - for col in ["embedding", "embedding_1024", "embedding_768"]: - try: - cursor.execute( - f"CREATE INDEX IF NOT EXISTS idx_memory_{col} ON \"Memory\" USING ivfflat ({col} vector_cosine_ops) WITH (lists = 100);") - except Exception as e: - print(f"⚠️ 创建{col}索引时出错: {e}") - conn.commit() - print("✅ 表和索引创建成功(如果不存在)") - - -def convert_to_vector(embedding_list): - if not embedding_list: - return None - if isinstance(embedding_list, np.ndarray): - embedding_list = embedding_list.tolist() - return "[" + ",".join(str(float(x)) for x in embedding_list) + "]" - - -def detect_embedding_field(embedding_list): - if not embedding_list: - return None - dim = len(embedding_list) - if dim == 1024: - return "embedding" - elif dim == 3072: - return "embedding_3072" - else: - print(f"⚠️ 未知 embedding 维度 {dim},跳过该向量") - return None - - -def clean_properties(props): - """移除向量字段""" - vector_keys = {"embedding", "embedding_1024", "embedding_3072", "embedding_768"} - if not isinstance(props, dict): - return {} - return {k: v for k, v in props.items() if k not in vector_keys} - - -def find_embedding(item): - """在多层结构中查找 embedding 向量""" - for key in ["embedding", "embedding_1024", "embedding_3072", "embedding_768"]: - if key in item and isinstance(item[key], list): - return item[key] - if "metadata" in item and key in item["metadata"]: - return item["metadata"][key] - if "properties" in item and key in item["properties"]: - return item["properties"][key] - return None - - -def add_node(conn, id: str, memory: str, metadata: dict, graph_name=None): - """ - 添加单个节点到图数据库 - - Args: - conn: 数据库连接 - id: 节点ID - memory: 内存内容 - metadata: 元数据字典 - graph_name: 图名称,可选 - """ - # 使用传入的graph_name或默认值 - if graph_name is None: - graph_name = GRAPH_NAME - - try: - # 先提取 embedding(在清理properties之前) - embedding = find_embedding(metadata) - field_name = detect_embedding_field(embedding) - vector_value = convert_to_vector(embedding) if field_name else None - - # 提取 properties - properties = metadata.copy() - properties = clean_properties(properties) - properties["id"] = id - properties["memory"] = memory - - with conn.cursor() as cursor: - # 先删除现有记录(如果存在) - delete_sql = f""" - DELETE FROM "Memory" - WHERE id = ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring); - """ - cursor.execute(delete_sql, (id,)) - - # 然后插入新记录 - if field_name and vector_value: - insert_sql = f""" - INSERT INTO "Memory" (id, properties, {field_name}) - VALUES ( - ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring), - %s::text::agtype, - %s::vector - ); - """ - cursor.execute(insert_sql, (id, Json(properties), vector_value)) - print(f"✅ 成功插入/更新: {id} ({field_name})") - else: - insert_sql = f""" - INSERT INTO "Memory" (id, properties) - VALUES ( - ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring), - %s::text::agtype - ); - """ - cursor.execute(insert_sql, (id, Json(properties))) - print(f"✅ 成功插入/更新(无向量): {id}") - - conn.commit() - return True - - except Exception as e: - conn.rollback() - print(f"❌ 插入失败 (ID: {id}): {e}") - return False - - -def insert_data(conn, data_list, graph_name=None): - """ - 批量插入数据,使用PolarDB的add_node方法 - - Args: - conn: 数据库连接 - data_list: 数据列表 - graph_name: 图名称,可选 - """ - # 创建PolarDB配置 - # config = GraphDBConfigFactory( - # backend="polardb", - # config={ - # "host": "xxxxxxx", - # "port": 5432, - # "user": "xxxx", - # "password": "xxxx", - # "db_name": "xxxxx", - # "user_name": 'xxxx', - # "use_multi_db": False, - # "auto_create": False, - # "embedding_dimension": 1024, - # }, - # ) - # - # # 创建PolarDB实例 - # graph = GraphStoreFactory.from_config(config) - graph = getPolarDb() - print("✅ PolarDB连接成功") - - success_count = 0 - error_count = 0 - - for item in data_list: - id_ = item.get("id") - memory_ = item.get("memory") - - # 将所有字段作为metadata,除了id、memory和embedding相关字段 - metadata = {} - for key, value in item.items(): - if key not in ["id", "memory", "embedding_1024", "embedding_768", "embedding_3072", "embedding"]: - # 类型转换 - if key == "confidence": - # confidence 应该是 float - try: - metadata[key] = float(value) - except (ValueError, TypeError): - metadata[key] = value - elif key == "sources": - # sources 应该是 List[str],每个元素是JSON字符串 - if isinstance(value, str): - try: - parsed_sources = json.loads(value) - # 将每个对象转换为JSON字符串 - if isinstance(parsed_sources, list): - metadata[key] = [json.dumps(item) for item in parsed_sources] - else: - metadata[key] = [json.dumps(parsed_sources)] - except json.JSONDecodeError: - metadata[key] = value - else: - metadata[key] = value - elif key == "usage": - # usage 应该是 List[str],每个元素是JSON字符串(和sources格式一样) - if isinstance(value, str): - try: - parsed_usage = json.loads(value) - # 将每个对象转换为JSON字符串 - if isinstance(parsed_usage, list): - metadata[key] = [json.dumps(item) for item in parsed_usage] - else: - metadata[key] = [json.dumps(parsed_usage)] - except json.JSONDecodeError: - metadata[key] = value - else: - metadata[key] = value - elif key == "tags": - # tags 应该是 List[str] - if isinstance(value, str): - # 尝试解析为列表,如果失败则保持原样 - if value.startswith('[') and value.endswith(']'): - try: - metadata[key] = json.loads(value) - except json.JSONDecodeError: - # 如果不是有效的JSON,尝试按逗号分割 - metadata[key] = [tag.strip() for tag in value[1:-1].split(',')] - else: - metadata[key] = value - else: - metadata[key] = value - else: - metadata[key] = value - - # 处理embedding字段 - embedding = None - for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: - if embedding_key in item and item[embedding_key]: - embedding_value = item[embedding_key] - # 如果是字符串,尝试解析为列表 - if isinstance(embedding_value, str): - try: - embedding = json.loads(embedding_value) - except json.JSONDecodeError: - print(f"⚠️ 无法解析embedding字符串: {embedding_key}") - embedding = None - else: - embedding = embedding_value - break - - # 如果有embedding,添加到metadata中 - if embedding: - metadata["embedding"] = embedding - - try: - # 直接调用PolarDB的add_node方法 - graph.add_node(id_, memory_, metadata) - success_count += 1 - print(f"✅ 成功插入/更新: {id_}") - except Exception as e: - error_count += 1 - print(f"❌ 插入失败 (ID: {id_}): {e}") - # PolarDB的add_node方法内部已经处理了事务,不需要外部rollback - - print(f"✅ 插入完成: 成功 {success_count} 条, 失败 {error_count} 条") - - -def load_data_from_file(filename): - print("11111") - with open(filename, "r", encoding="utf-8") as f: - data = json.load(f) - print(f"📂 从文件 {filename} 加载了 {len(data)} 条记录") - return data - - -def main(): - json_file = r"/Users/zhudayang/python/1011/MemOS/examples/basic_modules/2.json" - data = load_data_from_file(json_file) - if not data: - print("⚠️ 没有数据") - return - - # conn = psycopg2.connect(**DB_CONFIG) - print("✅ 数据库连接成功") - - # create_vector_extension(conn) - # create_table(conn) - - # 使用默认的图名称,或者可以传入自定义的图名称 - # insert_data(conn, data, "custom_graph_name") - insert_data(None, data) - - # conn.close() - print("🔒 数据库连接1已关闭") - - -if __name__ == "__main__": - main() diff --git a/examples/basic_modules/polardb_search.py b/examples/basic_modules/polardb_search.py deleted file mode 100644 index 2b7653a2c..000000000 --- a/examples/basic_modules/polardb_search.py +++ /dev/null @@ -1,401 +0,0 @@ -import json -import os -import sys - -src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) -sys.path.insert(0, src_path) - -from memos.configs.graph_db import GraphDBConfigFactory -from memos.graph_dbs.factory import GraphStoreFactory -import psycopg2 - -import psycopg2 - - -def handler_node_edge(db_name: str = "shared-traval-group-complex", type: str = "node"): - if type == "node": - run_user_session_node( - db_name=db_name, - ) - elif type == "edge": - run_user_session_edges( - db_name=db_name, - ) - - -DB_CONFIG = { - 'host': 'xxxxxxx', - 'port': 5432, - 'database': 'xxxxx', - 'user': 'xxxx', - 'password': 'xxxx' -} - - -def run_user_session_node(db_name: str, ): - graph = getGraph(db_name) - exported = graph.export_graph(include_embedding=True) - nodes = exported.get('nodes', []) - print(f"查询数据数量:{len(nodes)}") - list = [] - - conn = psycopg2.connect(**DB_CONFIG) - print("✅ 数据库连接成功") - - # create_vector_extension(conn) - # create_table(conn) - # insert_data(conn, nodes) - # for node in nodes: - # metadata = node.get('metadata') - # # embedding_1024 = metadata.get('embedding_1024') - # # if embedding_1024: - # # metadata['embedding_1024'] = None - # metadata['id'] = node.get('id') - # metadata['memory'] = node.get('memory') - # list.append(metadata) - print("nodes:", list) - - -def run_user_session_edges(db_name: str, ): - graph = getGraph(db_name) - exported = graph.export_graph() - nodes = exported.get('nodes', []) - list = [] - edges = exported.get('edges', []) - for i, edge in edges: - source = edge.get('source') - target = edge.get('target') - edge_type = edge.get('type') - print(f"剩余{i}条边") - print("edges:", list) - - -def getGraph(db_name): - config = GraphDBConfigFactory( - backend="nebular", - config={ - "uri": json.loads(os.getenv("NEBULAR_HOSTS", "localhost")), - "user": os.getenv("NEBULAR_USER", "root"), - "password": os.getenv("NEBULAR_PASSWORD", "xxxxxx"), - "space": db_name, - "use_multi_db": False, - "auto_create": True, - "embedding_dimension": 1024, - }, - ) - graph = GraphStoreFactory.from_config(config) - return graph - - -def getPolarDb(db_name): - config = GraphDBConfigFactory( - backend="polardb", - config={ - "host": "memory.pg.polardb.rds.aliyuncs.com", - "port": 5432, - "user": "adimin", - "password": "Openmem0925", - "db_name": db_name, - "user_name": 'adimin', - "use_multi_db": True, # 设置为True,不添加user_name过滤条件 - "auto_create": True, - "embedding_dimension": 1024, - }, - ) - graph = GraphStoreFactory.from_config(config) - return graph - - -def searchVector(db_name: str, vectorStr: list[float], user_name: str = None): - graph = getPolarDb(db_name) - - # 1,查询search_by_embedding - # nodes = graph.search_by_embedding(vector=vectorStr, top_k=1, user_name=user_name) - # print("search_by_embedding nodes:", len(nodes)) - # for node_i in nodes: - # print("Search result:", graph.get_node(node_i["id"][1:-1])) - - # 2,查询单个get_node - detail = graph.get_node(id='"194f1e30-44d2-4e3f-bc58-c950343c56b7"', - user_name='"memos231a22c655fd4b859ca4143b97d2b808"') - print("单个node:", detail) - # - # # 3,查询多个get_nodes - ids = ['bb079c5b-1937-4125-a9e5-55d4abe6c95d', 'd66120af-992b-44c6-b261-a6ebe6bc57a5'] - # ids = ['"bfde036f-6276-4485-9dc6-3c64eab3e132"'] - # detail_list = graph.get_nodes(ids=ids,user_name='memos7a9f9fbbb61c412f94f77fbaa8103c35') - # print("1111多个node:", len(detail_list)) - # # - # print("多个node:", detail_list) - - # 4,更新 update_node - # graph.update_node(id="000009999ef-926f-42e2-b7b5-0224daf0abcd", fields={"name": "new_name"}) - - # 4,查询 get_memory_count - # count = graph.get_memory_count('UserMemory','memos07ba3d044650474c839e721f3a69d38a') - # print("user count:", count) - # # - # # 4,判断node是否存在 node_not_exist 1代表存在, - # isNodeExist = graph.node_not_exist('UserMemory', 'memos07ba3d044650474c839e721f3a69d38a') - # print("user isNodeExist:", isNodeExist) - # - # # 6,删除跳过多少行之后的数据remove_oldest_memory - # remove_oldest_memory = graph.remove_oldest_memory('UserMemory', 2,'memos07ba3d044650474c839e721f3a69d38a') - # print("user remove_oldest_memory:", remove_oldest_memory) - - # 7,更新 update_node - # isNodeExist = graph.update_node(id="bb079c5b-1937-4125-a9e5-55d4abe6c95d", fields={"status": "inactived","tags": ["yoga", "travel11111111", "local studios5667888"]}) - # print("user update_node:", isNodeExist) - - # 8,删除 delete_node - # isNodeDeleted = graph.delete_node(id="bb079c5b-1937-4125-a9e5-55d4abe6c95d", user_name='memosbfb3fb32032b4077a641404dc48739cd') - # print("user isNodeDeleted:", isNodeDeleted) - - -# 9,添加边 add_edge -def add_edge(db_name: str, source_id: str, target_id: str, edge_type: str = "Memory", user_name: str = None): - graph = getPolarDb(db_name) - graph.add_edge(source_id, target_id, edge_type, user_name) - - -def edge_exists(db_name: str, source_id: str, target_id: str, type: str = "Memory", direction: str = "OUTGOING", - user_name: str = None): - graph = getPolarDb(db_name) - isEdge_exists = graph.edge_exists(source_id=source_id, target_id=target_id, type=type, user_name=user_name, - direction=direction) - print("edge_exists:", isEdge_exists) - - -def get_children_with_embeddings(db_name: str, id: str, user_name: str = None): - graph = getPolarDb(db_name) - children = graph.get_children_with_embeddings(id=id, user_name=user_name) - print("get_children_with_embedding:", children) - - -def get_subgraph(db_name, center_id, depth, center_status, user_name): - graph = getPolarDb(db_name) - subgraph = graph.get_subgraph(center_id, depth, center_status, user_name) - print("111111get_subgraph:", subgraph) - - -def get_grouped_counts(db_name, user_name): - graph = getPolarDb(db_name) - grouped_counts = graph.get_grouped_counts(group_fields=["status"], where_clause="user_name = %s", - params=[user_name], user_name=user_name) - grouped_counts = graph.get_grouped_counts1(group_fields=["status"], - params=[user_name], user_name=user_name) - print("get_grouped_counts:", grouped_counts) - - -def export_graph(db_name, include_embedding, user_name): - graph = getPolarDb(db_name) - export_graphlist = graph.export_graph(include_embedding=include_embedding, user_name=user_name) - print("export_graph:", export_graphlist) - - -def get_structure_optimization_candidates(db_name, scope, include_embedding, user_name): - graph = getPolarDb(db_name) - candidates = graph.get_structure_optimization_candidates(scope=scope, include_embedding=include_embedding, user_name=user_name) - print("get_structure_optimization_candidates:", candidates) - - -def get_all_memory_items(db_name, scope, include_embedding, user_name): - graph = getPolarDb(db_name) - memory_items = graph.get_all_memory_items(scope=scope, include_embedding=include_embedding, user_name=user_name) - print("11111get_all_memory_items:", memory_items) - - -def get_neighbors_by_tag(db_name, user_name): - graph = getPolarDb(db_name) - tags=['旅游建议','景点'] - ids = ['39d12b46-ebe4-4f25-b0b7-1582042049e7'] - neighbors = graph.get_neighbors_by_tag(tags=tags, exclude_ids=ids, user_name=user_name) - print("get_neighbors_by_tag:", neighbors) - - -def get_edges(db_name: str, id: str, type: str, direction: str, user_name: str = None) -> None: - graph = getPolarDb(db_name) - edges = graph.get_edges(id=id, type=type, direction=direction, user_name=user_name) - print("get_edges:", edges) - - -def get_by_metadata(db_name, filters, user_name): - graph = getPolarDb(db_name) - ids = graph.get_by_metadata(filters=filters, user_name=user_name) - print("get_by_metadata:", ids) - - - -if __name__ == "__main__": - # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="node") - # handler_node_edge(db_name="shared-tree-textual-memory-product-0731",type="edge") - vector = [-0.02954108, -0.024826104, -0.07641619, 0.01464847, -0.0032501293, -0.044220068, 0.024322564, 0.018768352, - -0.0030803748, -0.017776528, 0.022201587, -0.0036640249, -0.013397247, -0.02462774, 0.021743823, - 0.0115509285, -0.023223927, 0.024093682, -0.0071144262, -0.014984163, -0.013305694, -0.022064257, - 0.055786256, -0.012374905, 0.019714398, 0.008865376, -0.00803377, -0.006851211, 0.036529623, 0.042785738, - 0.05923475, 0.007347123, 0.058776986, 0.03900155, -0.05065929, -0.060730115, 0.0042343233, -0.0030670234, - -0.02864081, 0.047332868, -0.007789629, -0.011741664, 0.019638104, 0.007427232, -0.036529623, - -0.034484938, -0.046203714, 0.009078999, -0.026367245, -0.049896352, 0.027938904, -0.048950303, - 0.04299936, -0.024597222, -0.018600505, 0.026321469, -0.03240974, -0.07080094, -0.058105595, - -0.0026359616, -0.023757987, 0.014465364, -0.02682501, -0.014427217, -0.01814274, 0.07244889, - -0.0063705584, 0.019378705, -0.05078136, -0.016525306, 0.013534576, -0.01814274, -0.016098058, - 0.018295329, -0.061645642, 0.023773246, 0.03549202, -0.034820635, 0.029464787, 0.026535092, -0.024398858, - -0.004184732, 0.040374845, 0.0037460409, 0.014419587, -0.009384176, 0.010055564, 0.019546552, 0.023498587, - -0.01620487, -0.013389617, -0.016418492, 0.028198304, -0.051117055, 0.023101857, 0.0705568, -0.03991708, - -0.039428797, 0.006595626, 0.0039367764, 0.034515455, -0.02046208, -0.001202586, -0.018569987, - -0.009666464, -0.018966716, -0.0052719233, 0.0012187985, 0.044403173, -0.018295329, 0.01890568, - 0.037994467, 0.020538375, 0.012939482, -0.0036048968, -0.01968388, -0.0021209763, 0.01451114, 0.024429375, - -0.031311106, -0.017852822, -0.0057830936, 0.011459376, -0.020523116, -0.021209763, 0.0082550235, - -0.0035057145, 0.031311106, 0.0063438555, 0.012374905, 0.028595034, 0.03552254, -0.041961763, - 0.0062675616, 0.030426092, -0.030548163, 0.058502328, 0.029205387, -0.02604681, -0.025756892, 0.004211435, - 0.05160534, 0.0092239585, -0.049987905, -0.0013847381, -0.055267457, 0.027282774, 0.012535123, - -0.009971641, -0.0040741055, -0.022613576, 0.018478435, 0.026031552, 0.02720648, -0.04782115, 0.007400529, - 0.034851152, 0.042694185, -0.031005928, -0.0019149822, 0.00042104814, -0.0049972646, -0.02539068, - 0.031738352, 0.010337852, -0.036102373, 0.014213594, 0.011878993, 0.0008330364, -0.035583574, - 0.0034465867, -0.0082550235, 0.009887717, 0.012908964, 0.018875163, 0.01814274, 0.091552936, 0.018768352, - 0.03305061, -0.054260373, -0.02682501, 0.03628548, 0.031158516, -0.048675645, -0.017288245, 0.02929694, - 0.013450652, -0.0058365, 0.032745432, -0.027160704, -0.015655553, -0.032074045, 0.020248458, 0.004760753, - 0.04730235, -0.024841363, 0.020248458, 0.013786347, -0.010810876, -0.033874586, 0.008491535, 0.023651175, - 0.016021764, 0.0039329617, -0.04626475, 0.0050621144, -0.02720648, -0.008140582, 0.017440835, - -0.029830998, 0.04861461, 0.042327974, 0.01529697, -0.027191222, -0.03515633, 0.018264811, 0.038696375, - -0.025543269, 0.018676799, 0.023757987, 0.0058670174, -0.0045013526, 0.017059363, -0.03720101, - -0.03668221, -0.0137253115, 0.024581963, 0.025589045, 0.01270297, -0.027450623, -0.035614092, 0.010299705, - 0.021041917, -0.013893158, 0.014350923, -0.032440256, -0.009338399, 0.006114973, 0.025344905, 0.03512581, - -0.02917487, -0.018432658, 0.008873005, -0.014236482, -0.027359068, -0.055572633, 0.016784705, - 0.041900728, -0.019943282, 0.020080611, 0.018112222, 0.009033223, -0.021087693, -0.05041515, 0.0352784, - 0.024642998, 0.007118241, 0.008804341, -0.022384694, -0.030471869, -0.015762364, 0.048706163, - 0.0064659263, 0.0048790085, 0.003906259, -0.02384954, -0.012771634, -0.025985776, 0.0017061271, 0.0736696, - 0.020950364, 0.013847382, 0.029785221, -0.022201587, -0.003059394, 0.038574304, -0.01647953, -0.022354174, - 0.0562135, 0.001626972, -0.007865923, 0.012001064, 0.043609716, -0.0274964, 0.029846257, -0.0035266953, - -0.00026106893, 0.050903432, 0.022750905, -0.037475668, -0.013244658, -0.015640294, 0.042419527, - -0.027938904, -0.008247394, -0.0047111614, -0.019088788, -0.15368687, 0.033020094, 0.028350893, - 0.02449041, 0.016830482, 0.03189094, -0.04861461, -0.024261529, -0.05029308, 0.018386882, 0.028945986, - -0.044525243, 0.009628317, 0.021667529, 0.026123105, 0.037384115, -0.007335679, -0.02149968, -0.0176392, - -0.0210114, 0.00070810475, -0.05261242, 0.061340466, 0.0037002645, -0.010910058, -0.011360194, - 0.008201617, 0.03793343, -0.0070305024, -0.0225678, -0.00945284, 0.02281194, 0.0013446837, 0.023880057, - -0.0144806225, 0.015914952, 0.018646281, 0.0032444072, 0.016494788, 0.01853947, 0.01985173, 0.03964242, - 0.015541111, 0.016494788, -0.031921457, -0.029693669, 0.020782517, -0.009796164, -0.0032234264, - -0.046478376, 0.044647314, -0.035675127, -0.01853947, 0.0069885408, -0.07635515, -0.010986352, 0.00971987, - 0.034027174, 0.0347596, 0.02812201, -0.03213508, -0.05355847, -0.00038886155, -0.014061005, -0.08074969, - 0.020584151, 0.047760114, 0.03381355, 0.012809781, -0.020904588, 0.013954193, -0.04382334, 0.01062777, - 0.004421244, 0.039337244, 0.014549287, 0.013435394, 0.021316575, -0.01957707, -0.09228536, 0.006145491, - 0.0015955006, 0.0006575599, 0.010147117, -0.03137214, -0.048919786, 0.0044517615, 0.04214487, 0.053466916, - 0.22290088, 0.019592328, 0.021041917, -0.034790117, 0.034393385, -0.016616859, -0.0748903, 0.08294696, - -0.009971641, -0.030548163, 0.010810876, 0.04147348, -0.025421198, -0.020614669, 0.019271893, 0.04562388, - -0.071838535, 0.0140152285, 0.04455576, 0.016021764, 0.0033302382, -0.03851327, -0.05197155, -0.024795586, - -0.04925548, -0.0012359646, -0.028152527, 0.020446822, -0.010955835, 0.007926959, -0.05679334, 0.04898082, - 0.02227788, 0.009887717, -0.037262045, -0.021865893, 0.024658257, -0.03305061, -0.005104076, 0.06274428, - 0.0026741086, -0.0032806469, -0.027450623, 0.016265905, -0.008514423, -0.011116052, -0.008338947, - -0.020813035, -0.025711115, -0.021438645, -0.009872458, -0.04071054, -0.019348187, 0.0037441335, - -0.0155868875, -0.0049705617, -0.009040852, 0.007850665, 0.031463694, 0.05029308, 0.002864844, - 0.0063552996, -0.056945927, -0.046051126, 0.006042494, 0.053833127, -0.013702422, -0.03045661, - 0.048187364, 0.029068056, -0.022766164, -0.002573019, 0.012855558, 0.005336773, -0.009414693, - -0.046173196, -0.014053375, -0.0054741027, 0.001794819, 0.014472993, -0.00087928964, 0.004680644, - 0.02449041, 0.018325847, 0.054199338, -0.006156935, 0.028717104, 0.086425975, 0.02307134, 0.0060958997, - -0.0008125323, 0.018829387, -0.011825588, 0.0032806469, 0.008880635, -0.019271893, -0.015991246, - -0.008018511, -0.03149421, 0.00803377, -0.0137482, 0.0004093656, -0.049682725, -0.015518223, -0.034118727, - -0.0069542085, -0.05297863, -0.0052299616, -0.0038566676, 0.0008196849, -0.037536703, -0.02383428, - -0.033355787, -0.051239125, 0.007118241, 0.03488167, 0.028259339, 0.008842488, 0.009246847, 0.03970346, - -0.019271893, 0.038543787, -0.022659352, 0.022720387, 0.024566704, -0.056030396, -0.0026283322, - -0.009399435, 0.0077743703, -0.02191167, 0.0028667513, -0.028717104, 0.0070991674, 0.027038634, - 0.063964985, 0.0090103345, -0.0053215143, -0.022064257, -0.014091522, -0.0057983524, -0.021087693, - 0.006557479, -0.004325876, 0.045440774, 0.0065765525, 0.0015716588, -0.049804796, 0.03924569, -0.01918034, - -0.021331834, 0.039093103, 0.017395059, 0.012664823, -0.052765008, 0.021331834, -0.07537858, - -0.0061607496, -0.032043528, 0.0067978054, -0.0121917995, 0.0039978116, 0.0088196, 0.006580367, - 0.07238785, 0.0110092405, -0.0074196025, 0.009025593, 0.03085334, -0.03137214, -0.006259932, 0.011901882, - -0.040741056, -0.030242987, 0.008834858, -0.019744916, -0.009712241, -0.0040588467, 0.033172682, - 0.004276285, -0.049072374, 0.03488167, -0.0051269643, 0.007694261, 0.005935682, 0.01788334, -0.0069542085, - 0.0085449405, -0.007194535, -0.041900728, -0.013313323, -0.0013895065, 0.07617205, 0.0037422262, - -0.025009211, 0.0051345937, 0.0066299583, 0.10388207, -0.008834858, 0.006439223, -0.021102952, - -0.03099067, -0.016555823, -0.0126571935, 0.010658287, 0.0057945377, -0.0055503966, -0.009681723, - 0.057617314, -0.017822305, -0.0034828263, 0.0005464566, 0.0043602088, -0.037109457, 0.010849023, - -0.009216329, -0.049194444, 0.01179507, 0.049469102, -0.008514423, -0.009681723, -0.01890568, 0.03500374, - -0.028228821, -0.05871595, 0.0011281992, 0.044799905, -0.0032806469, 0.009002705, 0.030120917, - 0.0073547526, -0.010025047, 0.019012494, -0.031433176, -0.02787787, 0.021621753, -0.011177087, - -0.02630621, 0.042297456, -0.041046232, -0.020919846, -0.002534872, -0.024765069, 0.01632694, - 0.0029258793, -0.0018615763, -0.026748717, -0.030273505, 0.006763473, 0.036590658, 0.027236998, - 0.02307134, 0.031829905, 0.013107329, -0.025451716, 0.040252775, 0.04214487, 0.012710599, 0.01800541, - -0.012130764, -0.056274537, 0.02009587, 0.03695687, 0.024963435, -0.030166693, 0.009002705, -0.06988541, - 0.043212987, 0.01840214, -0.01179507, -0.09484884, -0.023986869, 0.015319858, -0.023498587, -0.034790117, - 0.012176541, 0.0018901867, -0.00037646378, 0.051818963, 0.021804858, -0.05209362, -0.027710022, - 0.051391713, -0.022064257, -0.024139458, -0.018295329, -0.04092416, 0.0063667437, 0.022995045, - 0.0149460165, -0.030059882, 0.019134564, 0.017562905, -0.04962169, 0.015579258, 0.010223411, - -0.0076675583, -0.059021126, 0.04431162, -0.023315482, 0.017517129, -0.0021457719, 0.042968843, - 0.028533999, -0.029449528, -0.016769446, 0.026367245, -0.015762364, -0.01140597, 0.030059882, 0.030929634, - 0.0058250558, -0.06689468, -0.013473541, 0.009323141, 0.025299128, -0.021728564, 0.049987905, - -0.0020599412, 0.04287729, -0.022827199, 0.020828294, -0.001273158, -0.04068002, -0.013664275, - -0.0036945425, -0.019775433, -0.024642998, -0.005275738, -0.036407553, -0.0008239764, -0.027435362, - 0.06427016, -0.012901335, -0.02035527, 0.020614669, -0.0017051734, 0.042480562, -0.0013942749, - 0.018981975, 0.030365057, 0.0028915468, 0.052642938, 0.03408821, -0.01878361, 0.0043525794, -0.014183076, - 0.0009870551, -0.011611964, -0.030273505, -0.010635399, 0.058776986, -0.03625496, -0.008270282, - -0.03295906, 0.04794322, -0.0025119837, 0.045959573, -0.008773823, 0.048584092, 0.048828233, -0.056457642, - 0.06039442, -0.04522715, 0.015617405, 0.030960152, 0.047515973, 0.042572115, -0.069214016, 0.017959634, - -0.0090484815, 0.02073674, -0.013839752, 0.035430986, -0.041046232, -0.009887717, 0.07043473, -0.02787787, - 0.010993982, -0.0017557183, 0.0028057161, -0.031204293, -0.0059700143, 0.0054741027, -0.023666434, - -0.008903523, -0.021316575, 0.00014424356, 0.011863735, -0.0058136117, 0.004821788, -0.01710514, - 0.009384176, -0.02864081, -0.0058288705, -0.13269073, 0.019363446, -0.013923676, 0.025177058, - -0.049194444, 0.015129123, -0.02359014, -0.009155294, 0.0034294203, -0.01878361, -0.0027256072, - -0.000686647, -0.048034772, -0.018264811, 0.071228184, 0.037780844, -0.025726374, -0.028595034, - -0.011177087, 0.031463694, -0.01075747, -0.035705645, 0.097290255, 0.010475182, -0.011199976, -0.02655035, - -0.019241376, -0.021698046, -0.019638104, -0.016769446, -0.02165227, -0.06939713, 0.024658257, 0.05297863, - 0.04586802, 0.00984957, -0.009414693, 0.013458282, -0.014610323, 0.024581963, -0.023376517, 0.01269534, - 0.010284446, 0.023880057, -0.011383082, 0.101684794, -0.007423417, -0.048156846, -0.008140582, - 0.014602694, -0.0033321455, 0.019638104, 0.028976504, 0.025619563, 0.009086629, -0.007049576, 0.011596705, - 0.0047226055, -0.024215752, 0.07684343, -0.003227241, 0.016082799, -0.025207575, -0.025177058, - -0.024002127, 0.017135657, -0.01969914, 0.043212987, 0.024185235, -0.02073674, -0.033874586, - -0.0021591233, -0.045471292, -0.00071954884, -0.008132952, 0.019348187, 0.03948983, -0.033752516, - -0.084961124, 0.0030994483, -0.041381925, -0.041015714, 0.0112839, -0.019836469, 0.032104563, 0.016098058, - 0.020080611, -0.007942217, -0.050140493, -0.034393385, -0.05297863, -0.028137268, -0.0058174264, - -0.0056114323, -0.03189094, 0.021026658, -0.011756923, -0.027267516, 0.006385817, -0.04718028, - -0.012519864, -0.035949785, 0.013076811, -0.02317815, -0.031860422, 0.044769388, -0.015480076, - -0.008018511, -0.043518163, 0.023422293, -0.036895834, -0.040100187, -0.06039442, 0.005691541, - -0.036529623, -0.018585246, 0.023635916, 0.021408128, 0.01152804, 0.013984711, 0.007965106, -0.027801575, - -0.0026226102, -0.021286057, 0.006011976, 0.027389586, 0.0840456, 0.07476823, 0.028564516, 0.015029941, - 0.029342717, -0.047760114, -0.0241242, 0.031082222, 0.017837564, 0.023346, -0.002166753, 0.046295267, - -0.033111647, -0.017715493, -0.016937293, -0.0036678396, -0.01606754, -0.010551476, -0.060730115, - -0.00067806395, 0.005714429, 0.009002705, -0.056549195, -0.053497434, 0.027160704, -0.023803763, - -0.02877814, 0.03189094, -0.015838658, -0.025878964, 0.00014889274, -0.02319341, -0.0028724733, - 0.053222775, -0.0040893643, -0.0034313279, 0.00036740385, 0.0049057114, 0.011291529, 0.056518678, - -0.007972735, -0.041381925, -0.04467783, 0.008804341, 0.026519833, 0.052337762, -0.021209763, - -0.019119306, 0.020126387, 0.00997927, -0.007755297, 0.020492598, -0.014915499, -0.038421717, 0.037353598, - -0.0050888173, 0.029708927, 0.04638682, -0.052917596, -0.0112839, 0.0038433159, -0.011001611, - -0.0023708395, 0.015991246, -0.03381355, 0.017135657, 0.016418492, -0.029449528, 0.047332868, - -0.002183919, 0.018173259, 0.0017023124, 0.01814274, 0.01153567, 0.00042152498, -0.021179246, 0.058441292, - -0.0020771073, -0.036102373, 0.007740038, -0.0042419527, -0.02839667, 0.007713335, -0.016708411, - -0.020538375, 0.0044899085, -0.011131311, 0.0032844616, -0.036468588, -0.005886091, 0.05523694, - -0.015098605, -0.03161628, 0.02462774, 0.028488223, 0.013404876, -0.012916594, -0.012420681, -0.036377035, - -0.01335147, -0.040344328, 0.029144352, -0.04174814, 0.023315482, -0.02227788, -0.0022716573, -0.03152473, - 0.0482484, -0.027038634, -0.004882823, 0.06152357, -0.003881463, -0.036041338, -0.0075645614, 0.020660445, - -0.07250992, -0.024429375, -0.036377035] - searchVector(db_name="memtensor_memos", vectorStr=vector, user_name="memos7a9f9fbbb61c412f94f77fbaa8103c35") - - - # searchVector(db_name="test_1020_02", vectorStr=vector) - - # add_edge(db_name="memtensor_memos",source_id="13bb9df6-0609-4442-8bed-bba77dadac92", target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", edge_type="PARENT", user_name="memosbfb3fb32032b4077a641404dc48739cd") - # edge_exists(db_name="memtensor_memos", source_id="13bb9df6-0609-4442-8bed-bba77dadac92", - # target_id="2dd03a5b-5d5f-49c9-9e0a-9a2a2899b98d", type="PARENT", direction="OUTGOING", - # user_name="memosbfb3fb32032b4077a641404dc48739cd") - - # get_children_with_embeddings(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",user_name="memos07ea708ac7eb412887c5c283f874ea30") - - # get_subgraph(db_name="memtensor_memos", center_id="13bb9df6-0609-4442-8bed-bba77dadac92", depth=1, - # center_status="activated", user_name="memos07ea708ac7eb412887c5c283f874ea30") - - # - # get_grouped_counts(db_name="memtensor_memos", user_name="memos07ea708ac7eb412887c5c283f874ea30") - - # export_graph(db_name="memtensor_memos", include_embedding=True, user_name="memos8698ecb1f76940ff9adc12494c4d57a6") - - # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") - - # get_all_memory_items(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") - - # 测试 get_structure_optimization_candidates 函数 - # get_structure_optimization_candidates(db_name="memtensor_memos", scope='UserMemory', include_embedding=False, user_name="memos8f5530534d9b413bb8981ffc3d48a495") - - # get_neighbors_by_tag(db_name="memtensor_memos",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") - - # get_edges(db_name="memtensor_memos", id="13bb9df6-0609-4442-8bed-bba77dadac92",type="PARENT",direction="OUTGOING",user_name="memosfeebbc2bd1744d7bb5b5ec57f38e828d") - - # get_by_metadata(db_name="memtensor_memos", filters=[{"field": "tags", "op": "contains", "value": "glazes"}], user_name="memos452356faadb34b06acc7fa507023d91c") From 41040e2c87bb92f0cdd7b3e5d4418fedb65ed158 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 18:15:55 +0800 Subject: [PATCH 121/137] feat: fix polardb --- src/memos/graph_dbs/polardb.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index 81ed9e479..d81c894db 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2202,12 +2202,10 @@ def add_node_old(conn, id: str, memory: str, metadata: dict, graph_name=None): graph_name = GRAPH_NAME try: - # 先提取 embedding(在清理properties之前) embedding = find_embedding(metadata) field_name = detect_embedding_field(embedding) vector_value = convert_to_vector(embedding) if field_name else None - # 提取 properties properties = metadata.copy() properties = clean_properties(properties) properties["id"] = id From 4d15cd91dfcb800af4cda6550e71c0d19c81e1ca Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 18:48:12 +0800 Subject: [PATCH 122/137] feat: fix recall --- .../textual/tree_text_memory/retrieve/recall.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index 180798827..c1ade3021 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -56,20 +56,6 @@ def retrieve( working_memories = self.graph_store.get_all_memory_items( scope="WorkingMemory", include_embedding=False, user_name=user_name ) - # 过滤数据,只保留模型定义的字段 - # if isinstance(working_memories, list): - # valid_fields = set(TextualMemoryItem.__fields__.keys()) - # - # filtered_records = [] - # for record in working_memories: - # if isinstance(record, dict): - # # 过滤每个字典中的字段 - # filtered_record = {k: v for k, v in record.items() if k in valid_fields} - # # 使用 from_dict 方法创建实例 - # memory_item = TextualMemoryItem.from_dict(filtered_record) - # filtered_records.append(memory_item) - # - # return filtered_records return [TextualMemoryItem.from_dict(record) for record in working_memories] with ContextThreadPoolExecutor(max_workers=2) as executor: From f005939276193bfbd0f34650d098f7ebd3f54cdd Mon Sep 17 00:00:00 2001 From: Wustzdy <67457465+wustzdy@users.noreply.github.com> Date: Mon, 27 Oct 2025 19:02:03 +0800 Subject: [PATCH 123/137] Comment out unused configuration handling code Commented out code related to auto_create and embedding_dimension handling. --- src/memos/graph_dbs/polardb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index d81c894db..eb493c698 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -145,6 +145,7 @@ def __init__(self, config: PolarDBGraphDBConfig): ) self.connection.autocommit = True + """ # Handle auto_create # auto_create = config.get("auto_create", False) if isinstance(config, dict) else config.auto_create # if auto_create: @@ -158,6 +159,7 @@ def __init__(self, config: PolarDBGraphDBConfig): # Handle embedding_dimension # embedding_dim = config.get("embedding_dimension", 1024) if isinstance(config,dict) else config.embedding_dimension # self.create_index(dimensions=embedding_dim) + """ def _get_config_value(self, key: str, default=None): """Safely get config value from either dict or object.""" From 5cf3fa632ee94f64acf8614c63490591e66a0ee1 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 27 Oct 2025 19:09:28 +0800 Subject: [PATCH 124/137] fix --- src/memos/graph_dbs/polardb.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e458e846f..167987f69 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -146,20 +146,6 @@ def __init__(self, config: PolarDBGraphDBConfig): ) self.connection.autocommit = True - # Handle auto_create - # auto_create = config.get("auto_create", False) if isinstance(config, dict) else config.auto_create - # if auto_create: - # self._ensure_database_exists() - - # Create graph and tables - # self.create_graph() - # self.create_edge() - # self._create_graph() - - # Handle embedding_dimension - # embedding_dim = config.get("embedding_dimension", 1024) if isinstance(config,dict) else config.embedding_dimension - # self.create_index(dimensions=embedding_dim) - def _get_config_value(self, key: str, default=None): """Safely get config value from either dict or object.""" if isinstance(self.config, dict): From 1e0984622d39df4cbb44f3efcde7ab0da07c2c1e Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 19:21:55 +0800 Subject: [PATCH 125/137] feat: fix polardb --- src/memos/graph_dbs/polardb.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index eb493c698..e3367719e 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2140,7 +2140,24 @@ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]: if time_field in node and hasattr(node[time_field], "isoformat"): node[time_field] = node[time_field].isoformat() - # Do not deserialize sources and usage; keep List[str] format + # Deserialize sources and usage if they are not lists + for field_name in ["sources", "usage", "tags"]: + if field_name in node and node[field_name] is not None: + field_value = node[field_name] + + # If it's a string, try to parse it as JSON + if isinstance(field_value, str): + try: + node[field_name] = json.loads(field_value) + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse {field_name} as JSON, wrapping in list") + node[field_name] = [field_value] + + # If it's not a list, wrap it in a list + elif not isinstance(field_value, list): + logger.warning(f"{field_name} is not a list, wrapping value: {type(field_value)}") + node[field_name] = [field_value] + # Do not remove user_name; keep all fields # 1 From 9c8cc61ee6996b14ca086a63e4a385ebc5f13ca9 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 27 Oct 2025 19:36:36 +0800 Subject: [PATCH 126/137] import polardb --- .../batchImport_polardbFromJson.py | 443 ++++++++++++++++++ examples/basic_modules/importPolarDbEdge.py | 111 +++++ examples/basic_modules/import_polardb_incr.py | 308 ++++++++++++ examples/basic_modules/parseJson.py | 105 +++++ 4 files changed, 967 insertions(+) create mode 100644 examples/basic_modules/batchImport_polardbFromJson.py create mode 100644 examples/basic_modules/importPolarDbEdge.py create mode 100644 examples/basic_modules/import_polardb_incr.py create mode 100644 examples/basic_modules/parseJson.py diff --git a/examples/basic_modules/batchImport_polardbFromJson.py b/examples/basic_modules/batchImport_polardbFromJson.py new file mode 100644 index 000000000..355d13127 --- /dev/null +++ b/examples/basic_modules/batchImport_polardbFromJson.py @@ -0,0 +1,443 @@ +import json +import psycopg2 +from psycopg2.extras import Json, execute_batch +import numpy as np +import sys +import os +from datetime import datetime + +# PolarDB configuration +POLARDB_CONFIG = { + 'host': 'memory.pg.polardb.rds.aliyuncs.com', + 'port': 5432, + 'database': 'memtensor_memos', + 'user': 'adimin', + 'password': 'Openmem0925', + + "graph_name": "memtensor_memos_graph" +} + + +class PolarDBGraph: + def __init__(self, config): + self.config = config + self.connection = psycopg2.connect( + host=config["host"], + port=config["port"], + user=config["user"], + password=config["password"], + database=config["database"] + ) + self.graph_name = config.get("graph_name") + # Set autocommit to False to manually control transactions + self.connection.autocommit = False + print("✅ PolarDB connection successful") + + def update_graph_id_in_properties(self): + """Update properties field to add graph_id""" + print("🔄 Starting to update properties field, adding graph_id...") + start_time = datetime.now() + + try: + with self.connection.cursor() as cursor: + # Execute UPDATE to add graph_id into properties + update_sql = f""" + UPDATE {self.graph_name}."Memory" + SET properties = agtype_concat(properties, agtype_build_map('graph_id', id::text)) + """ + cursor.execute(update_sql) + updated_count = cursor.rowcount + + self.connection.commit() + + elapsed = (datetime.now() - start_time).total_seconds() + print(f"✅ Successfully updated {updated_count} records' properties, elapsed: {elapsed:.2f}s") + return updated_count + + except Exception as e: + self.connection.rollback() + print(f"❌ Failed to update properties field: {e}") + return 0 + + def batch_add_nodes_optimized(self, nodes, batch_size=1000): + """Optimized batch insertion of nodes""" + success_count = 0 + error_count = 0 + total_nodes = len(nodes) + + print(f"🚀 Start processing {total_nodes} records, batch size: {batch_size}") + start_time = datetime.now() + + # Process in batches + for batch_start in range(0, total_nodes, batch_size): + batch_end = min(batch_start + batch_size, total_nodes) + current_batch = nodes[batch_start:batch_end] + + batch_success = 0 + batch_errors = [] + + try: + with self.connection.cursor() as cursor: + + # Prepare batch insert data + insert_data_1024 = [] + # insert_data_768 = [] + # insert_data_3072 = [] + insert_data_no_embedding = [] + + for node in current_batch: + try: + id_ = node["id"] + memory_ = node["memory"] + metadata = node["metadata"] + + # get_graph_id_query = f""" + # SELECT ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring) + # """ + # cursor.execute(get_graph_id_query, (id_,)) + # graph_id = cursor.fetchone()[0] + # properties['graph_id'] = str(graph_id) + + # Extract embedding + embedding = None + for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: + if embedding_key in metadata and metadata[embedding_key]: + embedding = metadata[embedding_key] + break + + if isinstance(embedding, str): + try: + embedding = json.loads(embedding) + except json.JSONDecodeError: + print(f"⚠️ Unable to parse embedding string: {embedding_key}") + embedding = None + # Clean properties + properties = self.clean_properties(metadata) + properties["id"] = id_ + properties["memory"] = memory_ + + # Classify by embedding dimension + field_name = self.detect_embedding_field(embedding) + vector_value = self.convert_to_vector(embedding) if field_name else None + + if field_name == "embedding" and vector_value: + insert_data_1024.append((id_, Json(properties), vector_value)) + # elif field_name == "embedding_768" and vector_value: + # insert_data_768.append((id_, Json(properties), vector_value)) + # elif field_name == "embedding_3072" and vector_value: + # insert_data_3072.append((id_, Json(properties), vector_value)) + else: + insert_data_no_embedding.append((id_, Json(properties))) + + except Exception as e: + batch_errors.append(f"ID: {node.get('id', 'unknown')} - {e}") + + # Batch insert for different dimensions + if insert_data_1024: + insert_sql_1024 = f""" + INSERT INTO "Memory" (id, properties, embedding) + VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) + """ + execute_batch(cursor, insert_sql_1024, insert_data_1024) + batch_success += len(insert_data_1024) + + # if insert_data_768: + # insert_sql_768 = f""" + # INSERT INTO "Memory" (id, properties, embedding_768) + # VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) + # """ + # execute_batch(cursor, insert_sql_768, insert_data_768) + # batch_success += len(insert_data_768) + # + # if insert_data_3072: + # insert_sql_3072 = f""" + # INSERT INTO "Memory" (id, properties, embedding_3072) + # VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) + # """ + # execute_batch(cursor, insert_sql_3072, insert_data_3072) + # batch_success += len(insert_data_3072) + + if insert_data_no_embedding: + insert_sql_no_embedding = f""" + INSERT INTO "Memory" (id, properties) + VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s) + """ + execute_batch(cursor, insert_sql_no_embedding, insert_data_no_embedding) + batch_success += len(insert_data_no_embedding) + + # Commit current batch + self.connection.commit() + success_count += batch_success + error_count += len(batch_errors) + + # Progress display + elapsed = (datetime.now() - start_time).total_seconds() + progress = (batch_end / total_nodes) * 100 + estimated_total = (elapsed / batch_end) * total_nodes if batch_end > 0 else 0 + remaining = estimated_total - elapsed + + print(f"📊 Progress: {batch_end}/{total_nodes} ({progress:.1f}%) | " + f"Success: {success_count} | Failures: {error_count} | " + f"Elapsed: {elapsed:.0f}s | Remaining: {remaining:.0f}s") + + # Output batch errors + if batch_errors: + print(f"❌ Errors in this batch: {len(batch_errors)}") + for i, error in enumerate(batch_errors[:5]): # Only show first 5 errors + print(f" {i + 1}. {error}") + if len(batch_errors) > 5: + print(f" ... {len(batch_errors) - 5} more errors") + + except Exception as e: + self.connection.rollback() + error_count += len(current_batch) + print(f"❌ Batch {batch_start}-{batch_end} failed: {e}") + + total_time = (datetime.now() - start_time).total_seconds() + print(f"✅ Batch insertion complete: Success {success_count}, Failures {error_count}, Total time: {total_time:.2f}s") + + return success_count, error_count + + def clean_properties(self, props): + """Remove vector fields""" + vector_keys = {"embedding", "embedding_1024", "embedding_3072", "embedding_768"} + if not isinstance(props, dict): + return {} + return {k: v for k, v in props.items() if k not in vector_keys} + + def detect_embedding_field(self, embedding_list): + """Detect embedding dimension and return corresponding field name""" + if not embedding_list: + return None + dim = len(embedding_list) + # print("---------",dim) + if dim == 1024: + return "embedding" + elif dim == 768: + return "embedding_768" + elif dim == 3072: + return "embedding_3072" + else: + print(f"⚠️ Unknown embedding dimension {dim}, skipping vector") + return None + + def convert_to_vector(self, embedding_list): + """Convert embedding list to vector string""" + if not embedding_list: + return None + if isinstance(embedding_list, np.ndarray): + embedding_list = embedding_list.tolist() + return "[" + ",".join(str(float(x)) for x in embedding_list) + "]" + + def close(self): + """Close database connection""" + if self.connection: + self.connection.close() + print("🔒 PolarDB connection closed") + + +def getPolarDb(): + """Create PolarDB graph database instance""" + return PolarDBGraph(POLARDB_CONFIG) + + +def process_metadata(item): + """Process metadata, extract and convert fields""" + metadata = {} + for key, value in item.items(): + if key not in ["id", "memory"]: + # Type conversion + if key == "confidence": + try: + metadata[key] = float(value) + except (ValueError, TypeError): + metadata[key] = value + elif key == "sources" or key == "usage": + if isinstance(value, str): + try: + parsed_value = json.loads(value) + metadata[key] = [json.dumps(item) for item in parsed_value] if isinstance(parsed_value, + list) else [ + json.dumps(parsed_value)] + except json.JSONDecodeError: + metadata[key] = value + else: + metadata[key] = value + elif key == "tags": + if isinstance(value, str): + if value.startswith('[') and value.endswith(']'): + try: + metadata[key] = json.loads(value) + except json.JSONDecodeError: + metadata[key] = [tag.strip() for tag in value[1:-1].split(',')] + else: + metadata[key] = value + else: + metadata[key] = value + else: + metadata[key] = value + return metadata + + +def extract_embedding(item): + """Extract embedding from data item""" + embedding = None + for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: + if embedding_key in item and item[embedding_key]: + embedding_value = item[embedding_key] + if isinstance(embedding_value, str): + try: + embedding = json.loads(embedding_value) + except json.JSONDecodeError: + print(f"⚠️ Unable to parse embedding string: {embedding_key}") + embedding = None + else: + embedding = embedding_value + break + return embedding + + +def prepare_nodes_for_insertion(data_list): + """Prepare node data for insertion""" + nodes_to_insert = [] + processed_count = 0 + skipped_count = 0 + + for item in data_list: + id_ = item.get("id") + memory_ = item.get("memory") + + if not id_ or not memory_: + print(f"⚠️ Skipping invalid data: ID or memory is empty") + skipped_count += 1 + continue + + # Process metadata + metadata = process_metadata(item) + + # Handle embedding field + embedding = extract_embedding(item) + if embedding: + metadata["embedding"] = embedding + + # Build data for insertion + nodes_to_insert.append({ + "id": id_, + "memory": memory_, + "metadata": metadata + }) + processed_count += 1 + + # Show progress + if processed_count % 10000 == 0: + print(f"📝 Preprocessed {processed_count} records") + + print(f"✅ Data preprocessing complete: Valid {processed_count}, Skipped {skipped_count}") + return nodes_to_insert + + +def insert_data_optimized(data_list, batch_size=1000): + """Optimized data insertion""" + graph = getPolarDb() + + # Data preprocessing + print("🔄 Starting data preprocessing...") + nodes_to_insert = prepare_nodes_for_insertion(data_list) + + if not nodes_to_insert: + print("⚠️ No valid data to insert") + graph.close() + return 0, 0 + + # Use optimized version, set batch size to 1000 + # Adjust batch size based on conditions: + # - Good network: 1000-2000 + # - Average network: 500-1000 + # - Limited memory: 200-500 + success_count, error_count = graph.batch_add_nodes_optimized(nodes_to_insert, batch_size) + + graph.close() + return success_count, error_count + + + +def load_data_from_file(filename): + """Load data from file""" + print(f"📂 Loading file: {filename}") + try: + with open(filename, "r", encoding="utf-8") as f: + data = json.load(f) + print(f"📂 Loaded {len(data)} records from file {filename}") + return data + except Exception as e: + print(f"❌ Failed to load file: {e}") + return [] + +def update_graph(): + print("-----------update_graph[start]") + graph = getPolarDb() + graph.update_graph_id_in_properties() + print("---------update_graph[end]") + +def insert_data(conn, data): + # Record total start time + total_start_time = datetime.now() + + + if not data: + print("⚠️ No data") + return + + print(f"🎯 Total records to process: {len(data)}") + success_count, error_count = insert_data_optimized(data, batch_size=1000) + + # Compute total time + total_time = (datetime.now() - total_start_time).total_seconds() + minutes, seconds = divmod(total_time, 60) + hours, minutes = divmod(minutes, 60) + + print(f"\n🎉 Processing complete!") + print(f"📊 Final results:") + print(f" ✅ Success: {success_count}") + print(f" ❌ Failures: {error_count}") + print(f" ⏱️ Total time: {int(hours)}h {int(minutes)}m {seconds:.2f}s") + +def main(): + json_file = r"/Users/ccl/Desktop/file/export13/ceshi/ceshi.json" + + # Record total start time + total_start_time = datetime.now() + + # Load data + data = load_data_from_file(json_file) + if not data: + print("⚠️ No data") + return + + print(f"🎯 Total records to process: {len(data)}") + + # Use optimized version, set batch size to 1000 + # Adjust batch size based on conditions: + # - Good network: 1000-2000 + # - Average network: 500-1000 + # - Limited memory: 200-500 + success_count, error_count = insert_data_optimized(data, batch_size=1000) + + # Compute total time + total_time = (datetime.now() - total_start_time).total_seconds() + minutes, seconds = divmod(total_time, 60) + hours, minutes = divmod(minutes, 60) + + print(f"\n🎉 Processing complete!") + print(f"📊 Final results:") + print(f" ✅ Success: {success_count}") + print(f" ❌ Failures: {error_count}") + print(f" ⏱️ Total time: {int(hours)}h {int(minutes)}m {seconds:.2f}s") + + if success_count > 0: + records_per_second = success_count / total_time + print(f" 🚀 Processing speed: {records_per_second:.2f} records/sec") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/basic_modules/importPolarDbEdge.py b/examples/basic_modules/importPolarDbEdge.py new file mode 100644 index 000000000..e79e12cec --- /dev/null +++ b/examples/basic_modules/importPolarDbEdge.py @@ -0,0 +1,111 @@ +import os +import json +import psycopg2 + +# 数据库连接配置 +DB_CONFIG = { + 'host': 'xxx', + 'port': 5432, + 'database': 'xxx', + 'user': 'xxx', + 'password': 'xxx' +} + +# 顶层目录 +EDGE_ROOT_DIR = r"/Users/ccl/Desktop/file/ccl/export22" + +# 合法的关系文件夹(白名单) +VALID_REL_TYPES = { + "AGGREGATE_TO", + "FOLLOWS", + "INFERS", + "MERGED_TO", + "RELATE_TO", + "PARENT" +} + +BATCH_SIZE = 1000 + + +def insert_edges(conn, edges, label_name): + with conn.cursor() as cur: + for e in edges: + src_id = e["src_id"] + dst_id = e["dst_id"] + user_name = e["user_name"] + + sql = f""" + INSERT INTO memtensor_memos_graph."{label_name}"(id, start_id, end_id, properties) + SELECT + ag_catalog._next_graph_id('memtensor_memos_graph'::name, '{label_name}'), + ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{src_id}'::text::cstring), + ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{dst_id}'::text::cstring), + jsonb_build_object('user_name', '{user_name}')::text::agtype + WHERE NOT EXISTS ( + SELECT 1 FROM memtensor_memos_graph."{label_name}" + WHERE start_id = ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{src_id}'::text::cstring) + AND end_id = ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{dst_id}'::text::cstring) + ); + """ + cur.execute(sql) + conn.commit() + + +def process_relation_folder(conn, folder_path, label_name): + print(f"\n🔗 Processing relation: {label_name}") + + # create_elabel(conn, label_name) + for root, _, files in os.walk(folder_path): + for file in files: + if not (file.endswith(".json") or file.endswith(".txt")): + continue + file_path = os.path.join(root, file) + print(f"📄 Reading file: {file_path}") + batch = [] + with open(file_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + batch.append(obj) + except json.JSONDecodeError: + print(f"⚠️ JSON decode error in {file_path}: {line}") + continue + + if len(batch) >= BATCH_SIZE: + insert_edges(conn, batch, label_name) + print(f"✅ Inserted (or skipped) {len(batch)} edges.") + batch.clear() + + if batch: + insert_edges(conn, batch, label_name) + print(f"✅ Inserted (or skipped) {len(batch)} edges.") + + +def main(): + conn = psycopg2.connect(**DB_CONFIG) + try: + for folder_name in os.listdir(EDGE_ROOT_DIR): + folder_path = os.path.join(EDGE_ROOT_DIR, folder_name) + if not os.path.isdir(folder_path): + continue + + + if folder_name.upper() not in VALID_REL_TYPES: + print(f"🚫 Skipping non-relation folder: {folder_name}") + continue + + + label_name = folder_name + process_relation_folder(conn, folder_path, label_name) + + print("\n🎉 All relation folders processed successfully!") + + finally: + conn.close() + + +if __name__ == "__main__": + main() diff --git a/examples/basic_modules/import_polardb_incr.py b/examples/basic_modules/import_polardb_incr.py new file mode 100644 index 000000000..60e7c2045 --- /dev/null +++ b/examples/basic_modules/import_polardb_incr.py @@ -0,0 +1,308 @@ +import json +import os +from collections import Counter +from psycopg2.extras import execute_batch, Json +import psycopg2 + + +class MemoryDataProcessor: + def __init__(self, db_config): + """ + Initialize database connection + + Args: + db_config: Database connection configuration + graph_name: Graph database name + """ + self.db_config = db_config + self.graph_name = db_config.get('graph_name') + print("fff:",db_config.get('graph_name')) + self.connection = None + + def connect(self): + """Connect to database""" + try: + self.connection = psycopg2.connect( + host=self.db_config["host"], + port=self.db_config["port"], + user=self.db_config["user"], + password=self.db_config["password"], + database=self.db_config["database"] + ) + print("✅ Database connection successful") + return True + except Exception as e: + print(f"❌ Database connection failed: {e}") + return False + + def disconnect(self): + """Disconnect database connection""" + if self.connection: + self.connection.close() + print("✅ Database connection closed") + + def extract_nodes_simple(self, file_path): + """Extract simplified id and properties from JSON file""" + try: + # Check if file exists + if not os.path.exists(file_path): + print(f"❌ Error: File '{file_path}' does not exist") + return [] + + # First try reading with utf-8-sig (handle BOM) + try: + with open(file_path, 'r', encoding='utf-8-sig') as file: + data = json.load(file) + print("✅ Successfully read file with utf-8-sig encoding") + except json.JSONDecodeError: + # If utf-8-sig fails, try utf-8 + try: + with open(file_path, 'r', encoding='utf-8') as file: + data = json.load(file) + print("✅ Successfully read file with utf-8 encoding") + except json.JSONDecodeError as e: + print(f"❌ JSON parse error: {e}") + return [] + + result = [] + tables = data.get('tables', []) + + print(f"📊 Found {len(tables)} tables") + + for i, table in enumerate(tables, 1): + n_data = table.get('n', {}) + value = n_data.get('value', {}) + + # Extract id and properties + # node_id = value.get('id') + properties = value.get('properties', {}) + node_id = properties.get('id', {}) + + + + if node_id is not None: + # Build data in insertion format + node_data = { + "id": str(node_id), # 转换为字符串 + "memory": properties.get("memory", ""), + "metadata": properties + } + result.append(node_data) + + print(f"🎯 Successfully extracted {len(result)} nodes") + return result + + except Exception as e: + print(f"❌ Error occurred while reading file: {e}") + return [] + + def clean_properties(self, properties): + """Clean properties and remove unnecessary fields""" + # Remove embedding-related fields; these will be handled separately + exclude_fields = [ + "embedding", "embedding_1024", "embedding_768", "embedding_3072", + "embedding_1024_vector", "embedding_768_vector", "embedding_3072_vector" + ] + + cleaned = {} + for key, value in properties.items(): + if key not in exclude_fields: + cleaned[key] = value + + return cleaned + + def detect_embedding_field(self, embedding): + """Detect embedding dimension and return corresponding field name""" + if not embedding: + return None + + if isinstance(embedding, list): + length = len(embedding) + if length == 1024: + return "embedding" + elif length == 768: + return "embedding_768" + elif length == 3072: + return "embedding_3072" + + return None + + def convert_to_vector(self, embedding): + """Convert embedding to PostgreSQL vector format""" + if not embedding: + return None + + try: + if isinstance(embedding, list): + # Convert to PostgreSQL vector string format: [1,2,3] + vector_str = "[" + ",".join(map(str, embedding)) + "]" + return vector_str + else: + return None + except Exception as e: + print(f"⚠️ Error converting vector: {e}") + return None + + def insert_nodes_to_db(self, nodes, batch_size=1000): + """Insert node data into the database""" + if not nodes: + print("❌ No data to insert") + return 0, [] + + if not self.connection: + print("❌ Database not connected") + return 0, [] + + total_success = 0 + all_errors = [] + + # 分批处理 + for i in range(0, len(nodes), batch_size): + current_batch = nodes[i:i + batch_size] + batch_success = 0 + batch_errors = [] + + print( + f"🔄 Processing batch {i // batch_size + 1}/{(len(nodes) - 1) // batch_size + 1} ({len(current_batch)} nodes)") + + try: + with self.connection.cursor() as cursor: + # Prepare batch insert data + insert_data_1024 = [] + insert_data_no_embedding = [] + + for node in current_batch: + try: + id_ = node["id"] + memory_ = node["memory"] + metadata = node["metadata"] + + embedding = None + for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: + if embedding_key in metadata and metadata[embedding_key]: + embedding = metadata[embedding_key] + break + + if isinstance(embedding, str): + try: + embedding = json.loads(embedding) + except json.JSONDecodeError: + print(f"⚠️ Unable to parse embedding string: {embedding_key}") + embedding = None + + properties = self.clean_properties(metadata) + properties["id"] = id_ + properties["memory"] = memory_ + + try: + get_graph_id_query = f""" + SELECT ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring) + """ + cursor.execute(get_graph_id_query, (id_,)) + graph_id = cursor.fetchone()[0] + properties['graph_id'] = str(graph_id) + except Exception as e: + print(f"⚠️ Failed to generate graph_id: {e}") + properties['graph_id'] = str(id_) + + + field_name = self.detect_embedding_field(embedding) + vector_value = self.convert_to_vector(embedding) if field_name else None + + if field_name == "embedding" and vector_value: + insert_data_1024.append((id_, Json(properties), vector_value)) + else: + insert_data_no_embedding.append((id_, Json(properties))) + + except Exception as e: + batch_errors.append(f"ID: {node.get('id', 'unknown')} - {e}") + + # 批量插入不同维度的数据 + if insert_data_1024: + insert_sql_1024 = f""" + INSERT INTO "Memory" (id, properties, embedding) + VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) + """ + execute_batch(cursor, insert_sql_1024, insert_data_1024) + batch_success += len(insert_data_1024) + print(f" ✅ Inserted {len(insert_data_1024)} nodes with embedding") + + if insert_data_no_embedding: + insert_sql_no_embedding = f""" + INSERT INTO "Memory" (id, properties) + VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s) + """ + execute_batch(cursor, insert_sql_no_embedding, insert_data_no_embedding) + batch_success += len(insert_data_no_embedding) + print(f" ✅ Inserted {len(insert_data_no_embedding)} nodes without embedding") + + # 提交当前批次 + self.connection.commit() + total_success += batch_success + all_errors.extend(batch_errors) + + print(f" ✅ Batch complete: {batch_success} nodes inserted successfully") + + except Exception as e: + self.connection.rollback() + batch_errors.append(f"Batch insert failed: {e}") + all_errors.extend(batch_errors) + print(f"❌ Batch insertion failed: {e}") + + return total_success, all_errors + + def process_file(self, file_path, batch_size): + """Complete processing flow: extract data and insert into database""" + print("🚀 Starting to process data file...") + + # 1. Extract data + nodes = self.extract_nodes_simple(file_path) + if not nodes: + return + + # 3. Connect to database + if not self.connect(): + return + + try: + # 4. Insert data into database + print(f"\n💾 Starting to insert data into database...") + success_count, errors = self.insert_nodes_to_db(nodes, batch_size) + + # 5. Display results + print(f"\n🎉 Processing complete!") + print(f"✅ Successfully inserted: {success_count}/{len(nodes)} nodes") + print(f"❌ Error count: {len(errors)}") + + if errors: + print(f"\n📋 Error details (first 10):") + for error in errors[:10]: + print(f" - {error}") + if len(errors) > 10: + print(f" ... {len(errors) - 10} more errors") + + finally: + # 6. Disconnect database connection + self.disconnect() + + +if __name__ == "__main__": + + POLARDB_CONFIG = { + "host": "xxx", + "port": 5432, + "user": "xxx", + "password": "xxx", + "database": "xxx", + "graph_name": "xxx" + + } + + + file_path = "/Users/ccl/Desktop/file/temp/result.json" + + + processor = MemoryDataProcessor(POLARDB_CONFIG) + + + processor.process_file(file_path, batch_size=1000) \ No newline at end of file diff --git a/examples/basic_modules/parseJson.py b/examples/basic_modules/parseJson.py new file mode 100644 index 000000000..d84abfae5 --- /dev/null +++ b/examples/basic_modules/parseJson.py @@ -0,0 +1,105 @@ +import os +import json +import psycopg2 +import sys + +# Add the parent directory to the path to allow imports +src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) +sys.path.insert(0, src_path) + +# from polardb_export_insert_1 import insert_data +from batchImport_polardbFromJson import insert_data, update_graph + +DB_CONFIG = { + 'host': 'xxx', + 'port': 5432, + 'database': 'xx', + 'user': 'xx', + 'password': 'xx' +} +conn = psycopg2.connect(**DB_CONFIG) + +def insert(batch): + + print(f"✅ insert() {len(batch)} records") + insert_data(conn, batch) + + + +def process_folder(folder_path, batch_size=1000): + + batch = [] + total_count = 0 + + for root, dirs, files in os.walk(folder_path): + for file in files: + # Only process .json files + if not file.endswith('.json'): + continue + + file_path = os.path.join(root, file) + print(f"📄 read file: {file_path}") + + try: + with open(file_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + + if isinstance(obj, dict) and "id" in obj and "memory" in obj: + batch.append(obj) + total_count += 1 + + + if len(batch) >= batch_size: + insert(batch) + batch = [] + if "sources" in obj and isinstance(obj["sources"], str): + if not("user:" in obj["sources"] )and not("assistant:" in obj["sources"]): + continue + try: + import re + + cleaned_sources = obj["sources"].replace('\n', '').replace('\\n', '').replace('\ufeff', '') + + + if cleaned_sources.startswith('[') and cleaned_sources.endswith(']'): + inner_str = cleaned_sources[1:-1].strip() + + parts = re.split(r',\s*(?=\w+:)', inner_str) + + parts = [part.strip() for part in parts] + obj["sources"] = parts + else: + + obj["sources"] = [cleaned_sources] + except Exception as e: + print(f"⚠️ not parse sources: {e}") + print(f"⚠️ source content: {obj['sources'][:100]}...") + obj["sources"] = [] + else: + print(f"⚠️ skip: {line[:80]}...") + except json.JSONDecodeError: + print(f"⚠️ skil valid JSON: {line[:80]}...") + except (UnicodeDecodeError, IOError) as e: + print(f"⚠️ skip file {file_path}: {e}") + continue + + # 处理最后不足 batch_size 的部分 + if batch: + insert(batch) + update_graph() + + print(f"\n✅ end,total {total_count} records。") + + +if __name__ == "__main__": + # folder_path = r"/Users/ccl/Desktop/file/export13/ceshi" + # 10W + folder_path = r"/Users/ccl/Desktop/file/export15/Memory" + # 70W + folder_path = r"/Users/ccl/Desktop/file/ccl/export22/Memory" + process_folder(folder_path, batch_size=1000) From 58b4e1d23959b904494277a938b13d5d7f76ac01 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 19:39:06 +0800 Subject: [PATCH 127/137] feat: fix polardb --- src/memos/graph_dbs/polardb.py | 39 ++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e3367719e..e0a177626 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2193,7 +2193,24 @@ def _strip_wrapping_quotes(value: Any) -> Any: if time_field in node and hasattr(node[time_field], "isoformat"): node[time_field] = node[time_field].isoformat() - # Do not deserialize sources and usage; keep List[str] + # Deserialize sources, usage and tags if they are not lists + for field_name in ["sources", "usage", "tags"]: + if field_name in node and node[field_name] is not None: + field_value = node[field_name] + + # If it's a string, try to parse it as JSON + if isinstance(field_value, str): + try: + node[field_name] = json.loads(field_value) + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse {field_name} as JSON, wrapping in list") + node[field_name] = [field_value] + + # If it's not a list, wrap it in a list + elif not isinstance(field_value, list): + logger.warning(f"{field_name} is not a list, wrapping value: {type(field_value)}") + node[field_name] = [field_value] + # Do not remove user_name; keep all fields return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} @@ -2386,7 +2403,25 @@ def _build_node_from_agtype(self, node_agtype, embedding=None): if embedding is not None: props["embedding"] = embedding - # Return standard format directly; no need to call _parse_node_new again + # Deserialize sources, usage and tags if they are not lists + for field_name in ["sources", "usage", "tags"]: + if field_name in props and props[field_name] is not None: + field_value = props[field_name] + + # If it's a string, try to parse it as JSON + if isinstance(field_value, str): + try: + props[field_name] = json.loads(field_value) + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse {field_name} as JSON, wrapping in list") + props[field_name] = [field_value] + + # If it's not a list, wrap it in a list + elif not isinstance(field_value, list): + logger.warning(f"{field_name} is not a list, wrapping value: {type(field_value)}") + props[field_name] = [field_value] + + # Return standard format directly return {"id": props.get("id", ""), "memory": props.get("memory", ""), "metadata": props} except Exception: return None From e0c77acaaf2045d8e046ad279942f96dbdda0d60 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 27 Oct 2025 19:47:35 +0800 Subject: [PATCH 128/137] fix --- src/memos/graph_dbs/polardb.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e3367719e..4cc15fd81 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -502,7 +502,8 @@ def create_graph(self): @timed def create_edge(self): """Create all valid edge types if they do not exist""" - VALID_REL_TYPES = { + + valid_rel_types = { "AGGREGATE_TO", "FOLLOWS", "INFERS", @@ -511,7 +512,7 @@ def create_edge(self): "PARENT" } - for label_name in VALID_REL_TYPES: + for label_name in valid_rel_types: print(f"🪶 Creating elabel: {label_name}") try: with self.connection.cursor() as cursor: From 7ab927f3227f187dbb80f6a25291ff8b8b684ef9 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 20:13:30 +0800 Subject: [PATCH 129/137] feat: fix polardb --- src/memos/graph_dbs/polardb.py | 51 ++++++++-------------------------- 1 file changed, 12 insertions(+), 39 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index be50be748..e0136eb6f 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2141,22 +2141,13 @@ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]: if time_field in node and hasattr(node[time_field], "isoformat"): node[time_field] = node[time_field].isoformat() - # Deserialize sources and usage if they are not lists + # Ensure sources, usage and tags are lists (not deserializing, just type checking) for field_name in ["sources", "usage", "tags"]: if field_name in node and node[field_name] is not None: field_value = node[field_name] - - # If it's a string, try to parse it as JSON - if isinstance(field_value, str): - try: - node[field_name] = json.loads(field_value) - except (json.JSONDecodeError, TypeError): - logger.warning(f"Failed to parse {field_name} as JSON, wrapping in list") - node[field_name] = [field_value] - - # If it's not a list, wrap it in a list - elif not isinstance(field_value, list): - logger.warning(f"{field_name} is not a list, wrapping value: {type(field_value)}") + # If it's not already a list, wrap it in a list + if not isinstance(field_value, list): + logger.warning(f"[_parse_node] {field_name} is not a list (type: {type(field_value).__name__}), wrapping in list") node[field_name] = [field_value] # Do not remove user_name; keep all fields @@ -2194,22 +2185,13 @@ def _strip_wrapping_quotes(value: Any) -> Any: if time_field in node and hasattr(node[time_field], "isoformat"): node[time_field] = node[time_field].isoformat() - # Deserialize sources, usage and tags if they are not lists + # Ensure sources, usage and tags are lists (not deserializing, just type checking) for field_name in ["sources", "usage", "tags"]: if field_name in node and node[field_name] is not None: field_value = node[field_name] - - # If it's a string, try to parse it as JSON - if isinstance(field_value, str): - try: - node[field_name] = json.loads(field_value) - except (json.JSONDecodeError, TypeError): - logger.warning(f"Failed to parse {field_name} as JSON, wrapping in list") - node[field_name] = [field_value] - - # If it's not a list, wrap it in a list - elif not isinstance(field_value, list): - logger.warning(f"{field_name} is not a list, wrapping value: {type(field_value)}") + # If it's not already a list, wrap it in a list + if not isinstance(field_value, list): + logger.warning(f"[_parse_node] {field_name} is not a list (type: {type(field_value).__name__}), wrapping in list") node[field_name] = [field_value] # Do not remove user_name; keep all fields @@ -2404,22 +2386,13 @@ def _build_node_from_agtype(self, node_agtype, embedding=None): if embedding is not None: props["embedding"] = embedding - # Deserialize sources, usage and tags if they are not lists + # Ensure sources, usage and tags are lists (not deserializing, just type checking) for field_name in ["sources", "usage", "tags"]: if field_name in props and props[field_name] is not None: field_value = props[field_name] - - # If it's a string, try to parse it as JSON - if isinstance(field_value, str): - try: - props[field_name] = json.loads(field_value) - except (json.JSONDecodeError, TypeError): - logger.warning(f"Failed to parse {field_name} as JSON, wrapping in list") - props[field_name] = [field_value] - - # If it's not a list, wrap it in a list - elif not isinstance(field_value, list): - logger.warning(f"{field_name} is not a list, wrapping value: {type(field_value)}") + # If it's not already a list, wrap it in a list + if not isinstance(field_value, list): + logger.warning(f"[_build_node_from_agtype] {field_name} is not a list (type: {type(field_value).__name__}), wrapping in list") props[field_name] = [field_value] # Return standard format directly From 8608c078434ad8024460c9e37e4a3891d7be2df5 Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 27 Oct 2025 20:14:39 +0800 Subject: [PATCH 130/137] fix --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9445084d7..3745582f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,6 @@ dependencies = [ "scikit-learn (>=1.7.0,<2.0.0)", # Machine learning "fastmcp (>=2.10.5,<3.0.0)", "python-dateutil (>=2.9.0.post0,<3.0.0)", - "nacos-sdk-python (>=1.0.0,<2.0.0)", # Nacos configuration client ] [project.urls] From 26f2b5d62b85caa660aaf6b272dd555a34e8292a Mon Sep 17 00:00:00 2001 From: ccl <13282138256@163.com> Date: Mon, 27 Oct 2025 20:16:18 +0800 Subject: [PATCH 131/137] fix --- docker/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/requirements.txt b/docker/requirements.txt index bb0a27cdd..d20c0b36e 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -157,5 +157,4 @@ volcengine-python-sdk==4.0.6 watchfiles==1.1.0 websockets==15.0.1 xlrd==2.0.2 -xlsxwriter==3.2.5 -nacos-sdk-python==1.0.0 \ No newline at end of file +xlsxwriter==3.2.5 \ No newline at end of file From d56659075cd050b619fa82fe17ee6fa62525d0f8 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 20:25:04 +0800 Subject: [PATCH 132/137] feat: fix polardb --- src/memos/graph_dbs/polardb.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e0136eb6f..d2d75f005 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2141,15 +2141,6 @@ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]: if time_field in node and hasattr(node[time_field], "isoformat"): node[time_field] = node[time_field].isoformat() - # Ensure sources, usage and tags are lists (not deserializing, just type checking) - for field_name in ["sources", "usage", "tags"]: - if field_name in node and node[field_name] is not None: - field_value = node[field_name] - # If it's not already a list, wrap it in a list - if not isinstance(field_value, list): - logger.warning(f"[_parse_node] {field_name} is not a list (type: {type(field_value).__name__}), wrapping in list") - node[field_name] = [field_value] - # Do not remove user_name; keep all fields # 1 @@ -2185,15 +2176,6 @@ def _strip_wrapping_quotes(value: Any) -> Any: if time_field in node and hasattr(node[time_field], "isoformat"): node[time_field] = node[time_field].isoformat() - # Ensure sources, usage and tags are lists (not deserializing, just type checking) - for field_name in ["sources", "usage", "tags"]: - if field_name in node and node[field_name] is not None: - field_value = node[field_name] - # If it's not already a list, wrap it in a list - if not isinstance(field_value, list): - logger.warning(f"[_parse_node] {field_name} is not a list (type: {type(field_value).__name__}), wrapping in list") - node[field_name] = [field_value] - # Do not remove user_name; keep all fields return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} @@ -2386,15 +2368,6 @@ def _build_node_from_agtype(self, node_agtype, embedding=None): if embedding is not None: props["embedding"] = embedding - # Ensure sources, usage and tags are lists (not deserializing, just type checking) - for field_name in ["sources", "usage", "tags"]: - if field_name in props and props[field_name] is not None: - field_value = props[field_name] - # If it's not already a list, wrap it in a list - if not isinstance(field_value, list): - logger.warning(f"[_build_node_from_agtype] {field_name} is not a list (type: {type(field_value).__name__}), wrapping in list") - props[field_name] = [field_value] - # Return standard format directly return {"id": props.get("id", ""), "memory": props.get("memory", ""), "metadata": props} except Exception: From af929c906e4131a832dd3ffaac78f27da2660f0f Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 20:30:02 +0800 Subject: [PATCH 133/137] feat: delete polardb --- .../batchImport_polardbFromJson.py | 443 ------------------ examples/basic_modules/importPolarDbEdge.py | 111 ----- examples/basic_modules/import_polardb_incr.py | 308 ------------ examples/basic_modules/parseJson.py | 105 ----- 4 files changed, 967 deletions(-) delete mode 100644 examples/basic_modules/batchImport_polardbFromJson.py delete mode 100644 examples/basic_modules/importPolarDbEdge.py delete mode 100644 examples/basic_modules/import_polardb_incr.py delete mode 100644 examples/basic_modules/parseJson.py diff --git a/examples/basic_modules/batchImport_polardbFromJson.py b/examples/basic_modules/batchImport_polardbFromJson.py deleted file mode 100644 index 355d13127..000000000 --- a/examples/basic_modules/batchImport_polardbFromJson.py +++ /dev/null @@ -1,443 +0,0 @@ -import json -import psycopg2 -from psycopg2.extras import Json, execute_batch -import numpy as np -import sys -import os -from datetime import datetime - -# PolarDB configuration -POLARDB_CONFIG = { - 'host': 'memory.pg.polardb.rds.aliyuncs.com', - 'port': 5432, - 'database': 'memtensor_memos', - 'user': 'adimin', - 'password': 'Openmem0925', - - "graph_name": "memtensor_memos_graph" -} - - -class PolarDBGraph: - def __init__(self, config): - self.config = config - self.connection = psycopg2.connect( - host=config["host"], - port=config["port"], - user=config["user"], - password=config["password"], - database=config["database"] - ) - self.graph_name = config.get("graph_name") - # Set autocommit to False to manually control transactions - self.connection.autocommit = False - print("✅ PolarDB connection successful") - - def update_graph_id_in_properties(self): - """Update properties field to add graph_id""" - print("🔄 Starting to update properties field, adding graph_id...") - start_time = datetime.now() - - try: - with self.connection.cursor() as cursor: - # Execute UPDATE to add graph_id into properties - update_sql = f""" - UPDATE {self.graph_name}."Memory" - SET properties = agtype_concat(properties, agtype_build_map('graph_id', id::text)) - """ - cursor.execute(update_sql) - updated_count = cursor.rowcount - - self.connection.commit() - - elapsed = (datetime.now() - start_time).total_seconds() - print(f"✅ Successfully updated {updated_count} records' properties, elapsed: {elapsed:.2f}s") - return updated_count - - except Exception as e: - self.connection.rollback() - print(f"❌ Failed to update properties field: {e}") - return 0 - - def batch_add_nodes_optimized(self, nodes, batch_size=1000): - """Optimized batch insertion of nodes""" - success_count = 0 - error_count = 0 - total_nodes = len(nodes) - - print(f"🚀 Start processing {total_nodes} records, batch size: {batch_size}") - start_time = datetime.now() - - # Process in batches - for batch_start in range(0, total_nodes, batch_size): - batch_end = min(batch_start + batch_size, total_nodes) - current_batch = nodes[batch_start:batch_end] - - batch_success = 0 - batch_errors = [] - - try: - with self.connection.cursor() as cursor: - - # Prepare batch insert data - insert_data_1024 = [] - # insert_data_768 = [] - # insert_data_3072 = [] - insert_data_no_embedding = [] - - for node in current_batch: - try: - id_ = node["id"] - memory_ = node["memory"] - metadata = node["metadata"] - - # get_graph_id_query = f""" - # SELECT ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring) - # """ - # cursor.execute(get_graph_id_query, (id_,)) - # graph_id = cursor.fetchone()[0] - # properties['graph_id'] = str(graph_id) - - # Extract embedding - embedding = None - for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: - if embedding_key in metadata and metadata[embedding_key]: - embedding = metadata[embedding_key] - break - - if isinstance(embedding, str): - try: - embedding = json.loads(embedding) - except json.JSONDecodeError: - print(f"⚠️ Unable to parse embedding string: {embedding_key}") - embedding = None - # Clean properties - properties = self.clean_properties(metadata) - properties["id"] = id_ - properties["memory"] = memory_ - - # Classify by embedding dimension - field_name = self.detect_embedding_field(embedding) - vector_value = self.convert_to_vector(embedding) if field_name else None - - if field_name == "embedding" and vector_value: - insert_data_1024.append((id_, Json(properties), vector_value)) - # elif field_name == "embedding_768" and vector_value: - # insert_data_768.append((id_, Json(properties), vector_value)) - # elif field_name == "embedding_3072" and vector_value: - # insert_data_3072.append((id_, Json(properties), vector_value)) - else: - insert_data_no_embedding.append((id_, Json(properties))) - - except Exception as e: - batch_errors.append(f"ID: {node.get('id', 'unknown')} - {e}") - - # Batch insert for different dimensions - if insert_data_1024: - insert_sql_1024 = f""" - INSERT INTO "Memory" (id, properties, embedding) - VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) - """ - execute_batch(cursor, insert_sql_1024, insert_data_1024) - batch_success += len(insert_data_1024) - - # if insert_data_768: - # insert_sql_768 = f""" - # INSERT INTO "Memory" (id, properties, embedding_768) - # VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) - # """ - # execute_batch(cursor, insert_sql_768, insert_data_768) - # batch_success += len(insert_data_768) - # - # if insert_data_3072: - # insert_sql_3072 = f""" - # INSERT INTO "Memory" (id, properties, embedding_3072) - # VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) - # """ - # execute_batch(cursor, insert_sql_3072, insert_data_3072) - # batch_success += len(insert_data_3072) - - if insert_data_no_embedding: - insert_sql_no_embedding = f""" - INSERT INTO "Memory" (id, properties) - VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s) - """ - execute_batch(cursor, insert_sql_no_embedding, insert_data_no_embedding) - batch_success += len(insert_data_no_embedding) - - # Commit current batch - self.connection.commit() - success_count += batch_success - error_count += len(batch_errors) - - # Progress display - elapsed = (datetime.now() - start_time).total_seconds() - progress = (batch_end / total_nodes) * 100 - estimated_total = (elapsed / batch_end) * total_nodes if batch_end > 0 else 0 - remaining = estimated_total - elapsed - - print(f"📊 Progress: {batch_end}/{total_nodes} ({progress:.1f}%) | " - f"Success: {success_count} | Failures: {error_count} | " - f"Elapsed: {elapsed:.0f}s | Remaining: {remaining:.0f}s") - - # Output batch errors - if batch_errors: - print(f"❌ Errors in this batch: {len(batch_errors)}") - for i, error in enumerate(batch_errors[:5]): # Only show first 5 errors - print(f" {i + 1}. {error}") - if len(batch_errors) > 5: - print(f" ... {len(batch_errors) - 5} more errors") - - except Exception as e: - self.connection.rollback() - error_count += len(current_batch) - print(f"❌ Batch {batch_start}-{batch_end} failed: {e}") - - total_time = (datetime.now() - start_time).total_seconds() - print(f"✅ Batch insertion complete: Success {success_count}, Failures {error_count}, Total time: {total_time:.2f}s") - - return success_count, error_count - - def clean_properties(self, props): - """Remove vector fields""" - vector_keys = {"embedding", "embedding_1024", "embedding_3072", "embedding_768"} - if not isinstance(props, dict): - return {} - return {k: v for k, v in props.items() if k not in vector_keys} - - def detect_embedding_field(self, embedding_list): - """Detect embedding dimension and return corresponding field name""" - if not embedding_list: - return None - dim = len(embedding_list) - # print("---------",dim) - if dim == 1024: - return "embedding" - elif dim == 768: - return "embedding_768" - elif dim == 3072: - return "embedding_3072" - else: - print(f"⚠️ Unknown embedding dimension {dim}, skipping vector") - return None - - def convert_to_vector(self, embedding_list): - """Convert embedding list to vector string""" - if not embedding_list: - return None - if isinstance(embedding_list, np.ndarray): - embedding_list = embedding_list.tolist() - return "[" + ",".join(str(float(x)) for x in embedding_list) + "]" - - def close(self): - """Close database connection""" - if self.connection: - self.connection.close() - print("🔒 PolarDB connection closed") - - -def getPolarDb(): - """Create PolarDB graph database instance""" - return PolarDBGraph(POLARDB_CONFIG) - - -def process_metadata(item): - """Process metadata, extract and convert fields""" - metadata = {} - for key, value in item.items(): - if key not in ["id", "memory"]: - # Type conversion - if key == "confidence": - try: - metadata[key] = float(value) - except (ValueError, TypeError): - metadata[key] = value - elif key == "sources" or key == "usage": - if isinstance(value, str): - try: - parsed_value = json.loads(value) - metadata[key] = [json.dumps(item) for item in parsed_value] if isinstance(parsed_value, - list) else [ - json.dumps(parsed_value)] - except json.JSONDecodeError: - metadata[key] = value - else: - metadata[key] = value - elif key == "tags": - if isinstance(value, str): - if value.startswith('[') and value.endswith(']'): - try: - metadata[key] = json.loads(value) - except json.JSONDecodeError: - metadata[key] = [tag.strip() for tag in value[1:-1].split(',')] - else: - metadata[key] = value - else: - metadata[key] = value - else: - metadata[key] = value - return metadata - - -def extract_embedding(item): - """Extract embedding from data item""" - embedding = None - for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: - if embedding_key in item and item[embedding_key]: - embedding_value = item[embedding_key] - if isinstance(embedding_value, str): - try: - embedding = json.loads(embedding_value) - except json.JSONDecodeError: - print(f"⚠️ Unable to parse embedding string: {embedding_key}") - embedding = None - else: - embedding = embedding_value - break - return embedding - - -def prepare_nodes_for_insertion(data_list): - """Prepare node data for insertion""" - nodes_to_insert = [] - processed_count = 0 - skipped_count = 0 - - for item in data_list: - id_ = item.get("id") - memory_ = item.get("memory") - - if not id_ or not memory_: - print(f"⚠️ Skipping invalid data: ID or memory is empty") - skipped_count += 1 - continue - - # Process metadata - metadata = process_metadata(item) - - # Handle embedding field - embedding = extract_embedding(item) - if embedding: - metadata["embedding"] = embedding - - # Build data for insertion - nodes_to_insert.append({ - "id": id_, - "memory": memory_, - "metadata": metadata - }) - processed_count += 1 - - # Show progress - if processed_count % 10000 == 0: - print(f"📝 Preprocessed {processed_count} records") - - print(f"✅ Data preprocessing complete: Valid {processed_count}, Skipped {skipped_count}") - return nodes_to_insert - - -def insert_data_optimized(data_list, batch_size=1000): - """Optimized data insertion""" - graph = getPolarDb() - - # Data preprocessing - print("🔄 Starting data preprocessing...") - nodes_to_insert = prepare_nodes_for_insertion(data_list) - - if not nodes_to_insert: - print("⚠️ No valid data to insert") - graph.close() - return 0, 0 - - # Use optimized version, set batch size to 1000 - # Adjust batch size based on conditions: - # - Good network: 1000-2000 - # - Average network: 500-1000 - # - Limited memory: 200-500 - success_count, error_count = graph.batch_add_nodes_optimized(nodes_to_insert, batch_size) - - graph.close() - return success_count, error_count - - - -def load_data_from_file(filename): - """Load data from file""" - print(f"📂 Loading file: {filename}") - try: - with open(filename, "r", encoding="utf-8") as f: - data = json.load(f) - print(f"📂 Loaded {len(data)} records from file {filename}") - return data - except Exception as e: - print(f"❌ Failed to load file: {e}") - return [] - -def update_graph(): - print("-----------update_graph[start]") - graph = getPolarDb() - graph.update_graph_id_in_properties() - print("---------update_graph[end]") - -def insert_data(conn, data): - # Record total start time - total_start_time = datetime.now() - - - if not data: - print("⚠️ No data") - return - - print(f"🎯 Total records to process: {len(data)}") - success_count, error_count = insert_data_optimized(data, batch_size=1000) - - # Compute total time - total_time = (datetime.now() - total_start_time).total_seconds() - minutes, seconds = divmod(total_time, 60) - hours, minutes = divmod(minutes, 60) - - print(f"\n🎉 Processing complete!") - print(f"📊 Final results:") - print(f" ✅ Success: {success_count}") - print(f" ❌ Failures: {error_count}") - print(f" ⏱️ Total time: {int(hours)}h {int(minutes)}m {seconds:.2f}s") - -def main(): - json_file = r"/Users/ccl/Desktop/file/export13/ceshi/ceshi.json" - - # Record total start time - total_start_time = datetime.now() - - # Load data - data = load_data_from_file(json_file) - if not data: - print("⚠️ No data") - return - - print(f"🎯 Total records to process: {len(data)}") - - # Use optimized version, set batch size to 1000 - # Adjust batch size based on conditions: - # - Good network: 1000-2000 - # - Average network: 500-1000 - # - Limited memory: 200-500 - success_count, error_count = insert_data_optimized(data, batch_size=1000) - - # Compute total time - total_time = (datetime.now() - total_start_time).total_seconds() - minutes, seconds = divmod(total_time, 60) - hours, minutes = divmod(minutes, 60) - - print(f"\n🎉 Processing complete!") - print(f"📊 Final results:") - print(f" ✅ Success: {success_count}") - print(f" ❌ Failures: {error_count}") - print(f" ⏱️ Total time: {int(hours)}h {int(minutes)}m {seconds:.2f}s") - - if success_count > 0: - records_per_second = success_count / total_time - print(f" 🚀 Processing speed: {records_per_second:.2f} records/sec") - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/examples/basic_modules/importPolarDbEdge.py b/examples/basic_modules/importPolarDbEdge.py deleted file mode 100644 index e79e12cec..000000000 --- a/examples/basic_modules/importPolarDbEdge.py +++ /dev/null @@ -1,111 +0,0 @@ -import os -import json -import psycopg2 - -# 数据库连接配置 -DB_CONFIG = { - 'host': 'xxx', - 'port': 5432, - 'database': 'xxx', - 'user': 'xxx', - 'password': 'xxx' -} - -# 顶层目录 -EDGE_ROOT_DIR = r"/Users/ccl/Desktop/file/ccl/export22" - -# 合法的关系文件夹(白名单) -VALID_REL_TYPES = { - "AGGREGATE_TO", - "FOLLOWS", - "INFERS", - "MERGED_TO", - "RELATE_TO", - "PARENT" -} - -BATCH_SIZE = 1000 - - -def insert_edges(conn, edges, label_name): - with conn.cursor() as cur: - for e in edges: - src_id = e["src_id"] - dst_id = e["dst_id"] - user_name = e["user_name"] - - sql = f""" - INSERT INTO memtensor_memos_graph."{label_name}"(id, start_id, end_id, properties) - SELECT - ag_catalog._next_graph_id('memtensor_memos_graph'::name, '{label_name}'), - ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{src_id}'::text::cstring), - ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{dst_id}'::text::cstring), - jsonb_build_object('user_name', '{user_name}')::text::agtype - WHERE NOT EXISTS ( - SELECT 1 FROM memtensor_memos_graph."{label_name}" - WHERE start_id = ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{src_id}'::text::cstring) - AND end_id = ag_catalog._make_graph_id('memtensor_memos_graph'::name, 'Memory'::name, '{dst_id}'::text::cstring) - ); - """ - cur.execute(sql) - conn.commit() - - -def process_relation_folder(conn, folder_path, label_name): - print(f"\n🔗 Processing relation: {label_name}") - - # create_elabel(conn, label_name) - for root, _, files in os.walk(folder_path): - for file in files: - if not (file.endswith(".json") or file.endswith(".txt")): - continue - file_path = os.path.join(root, file) - print(f"📄 Reading file: {file_path}") - batch = [] - with open(file_path, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line: - continue - try: - obj = json.loads(line) - batch.append(obj) - except json.JSONDecodeError: - print(f"⚠️ JSON decode error in {file_path}: {line}") - continue - - if len(batch) >= BATCH_SIZE: - insert_edges(conn, batch, label_name) - print(f"✅ Inserted (or skipped) {len(batch)} edges.") - batch.clear() - - if batch: - insert_edges(conn, batch, label_name) - print(f"✅ Inserted (or skipped) {len(batch)} edges.") - - -def main(): - conn = psycopg2.connect(**DB_CONFIG) - try: - for folder_name in os.listdir(EDGE_ROOT_DIR): - folder_path = os.path.join(EDGE_ROOT_DIR, folder_name) - if not os.path.isdir(folder_path): - continue - - - if folder_name.upper() not in VALID_REL_TYPES: - print(f"🚫 Skipping non-relation folder: {folder_name}") - continue - - - label_name = folder_name - process_relation_folder(conn, folder_path, label_name) - - print("\n🎉 All relation folders processed successfully!") - - finally: - conn.close() - - -if __name__ == "__main__": - main() diff --git a/examples/basic_modules/import_polardb_incr.py b/examples/basic_modules/import_polardb_incr.py deleted file mode 100644 index 60e7c2045..000000000 --- a/examples/basic_modules/import_polardb_incr.py +++ /dev/null @@ -1,308 +0,0 @@ -import json -import os -from collections import Counter -from psycopg2.extras import execute_batch, Json -import psycopg2 - - -class MemoryDataProcessor: - def __init__(self, db_config): - """ - Initialize database connection - - Args: - db_config: Database connection configuration - graph_name: Graph database name - """ - self.db_config = db_config - self.graph_name = db_config.get('graph_name') - print("fff:",db_config.get('graph_name')) - self.connection = None - - def connect(self): - """Connect to database""" - try: - self.connection = psycopg2.connect( - host=self.db_config["host"], - port=self.db_config["port"], - user=self.db_config["user"], - password=self.db_config["password"], - database=self.db_config["database"] - ) - print("✅ Database connection successful") - return True - except Exception as e: - print(f"❌ Database connection failed: {e}") - return False - - def disconnect(self): - """Disconnect database connection""" - if self.connection: - self.connection.close() - print("✅ Database connection closed") - - def extract_nodes_simple(self, file_path): - """Extract simplified id and properties from JSON file""" - try: - # Check if file exists - if not os.path.exists(file_path): - print(f"❌ Error: File '{file_path}' does not exist") - return [] - - # First try reading with utf-8-sig (handle BOM) - try: - with open(file_path, 'r', encoding='utf-8-sig') as file: - data = json.load(file) - print("✅ Successfully read file with utf-8-sig encoding") - except json.JSONDecodeError: - # If utf-8-sig fails, try utf-8 - try: - with open(file_path, 'r', encoding='utf-8') as file: - data = json.load(file) - print("✅ Successfully read file with utf-8 encoding") - except json.JSONDecodeError as e: - print(f"❌ JSON parse error: {e}") - return [] - - result = [] - tables = data.get('tables', []) - - print(f"📊 Found {len(tables)} tables") - - for i, table in enumerate(tables, 1): - n_data = table.get('n', {}) - value = n_data.get('value', {}) - - # Extract id and properties - # node_id = value.get('id') - properties = value.get('properties', {}) - node_id = properties.get('id', {}) - - - - if node_id is not None: - # Build data in insertion format - node_data = { - "id": str(node_id), # 转换为字符串 - "memory": properties.get("memory", ""), - "metadata": properties - } - result.append(node_data) - - print(f"🎯 Successfully extracted {len(result)} nodes") - return result - - except Exception as e: - print(f"❌ Error occurred while reading file: {e}") - return [] - - def clean_properties(self, properties): - """Clean properties and remove unnecessary fields""" - # Remove embedding-related fields; these will be handled separately - exclude_fields = [ - "embedding", "embedding_1024", "embedding_768", "embedding_3072", - "embedding_1024_vector", "embedding_768_vector", "embedding_3072_vector" - ] - - cleaned = {} - for key, value in properties.items(): - if key not in exclude_fields: - cleaned[key] = value - - return cleaned - - def detect_embedding_field(self, embedding): - """Detect embedding dimension and return corresponding field name""" - if not embedding: - return None - - if isinstance(embedding, list): - length = len(embedding) - if length == 1024: - return "embedding" - elif length == 768: - return "embedding_768" - elif length == 3072: - return "embedding_3072" - - return None - - def convert_to_vector(self, embedding): - """Convert embedding to PostgreSQL vector format""" - if not embedding: - return None - - try: - if isinstance(embedding, list): - # Convert to PostgreSQL vector string format: [1,2,3] - vector_str = "[" + ",".join(map(str, embedding)) + "]" - return vector_str - else: - return None - except Exception as e: - print(f"⚠️ Error converting vector: {e}") - return None - - def insert_nodes_to_db(self, nodes, batch_size=1000): - """Insert node data into the database""" - if not nodes: - print("❌ No data to insert") - return 0, [] - - if not self.connection: - print("❌ Database not connected") - return 0, [] - - total_success = 0 - all_errors = [] - - # 分批处理 - for i in range(0, len(nodes), batch_size): - current_batch = nodes[i:i + batch_size] - batch_success = 0 - batch_errors = [] - - print( - f"🔄 Processing batch {i // batch_size + 1}/{(len(nodes) - 1) // batch_size + 1} ({len(current_batch)} nodes)") - - try: - with self.connection.cursor() as cursor: - # Prepare batch insert data - insert_data_1024 = [] - insert_data_no_embedding = [] - - for node in current_batch: - try: - id_ = node["id"] - memory_ = node["memory"] - metadata = node["metadata"] - - embedding = None - for embedding_key in ["embedding_1024", "embedding_768", "embedding_3072", "embedding"]: - if embedding_key in metadata and metadata[embedding_key]: - embedding = metadata[embedding_key] - break - - if isinstance(embedding, str): - try: - embedding = json.loads(embedding) - except json.JSONDecodeError: - print(f"⚠️ Unable to parse embedding string: {embedding_key}") - embedding = None - - properties = self.clean_properties(metadata) - properties["id"] = id_ - properties["memory"] = memory_ - - try: - get_graph_id_query = f""" - SELECT ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring) - """ - cursor.execute(get_graph_id_query, (id_,)) - graph_id = cursor.fetchone()[0] - properties['graph_id'] = str(graph_id) - except Exception as e: - print(f"⚠️ Failed to generate graph_id: {e}") - properties['graph_id'] = str(id_) - - - field_name = self.detect_embedding_field(embedding) - vector_value = self.convert_to_vector(embedding) if field_name else None - - if field_name == "embedding" and vector_value: - insert_data_1024.append((id_, Json(properties), vector_value)) - else: - insert_data_no_embedding.append((id_, Json(properties))) - - except Exception as e: - batch_errors.append(f"ID: {node.get('id', 'unknown')} - {e}") - - # 批量插入不同维度的数据 - if insert_data_1024: - insert_sql_1024 = f""" - INSERT INTO "Memory" (id, properties, embedding) - VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s, %s) - """ - execute_batch(cursor, insert_sql_1024, insert_data_1024) - batch_success += len(insert_data_1024) - print(f" ✅ Inserted {len(insert_data_1024)} nodes with embedding") - - if insert_data_no_embedding: - insert_sql_no_embedding = f""" - INSERT INTO "Memory" (id, properties) - VALUES (ag_catalog._make_graph_id('{self.graph_name}'::name, 'Memory'::name, %s::text::cstring), %s) - """ - execute_batch(cursor, insert_sql_no_embedding, insert_data_no_embedding) - batch_success += len(insert_data_no_embedding) - print(f" ✅ Inserted {len(insert_data_no_embedding)} nodes without embedding") - - # 提交当前批次 - self.connection.commit() - total_success += batch_success - all_errors.extend(batch_errors) - - print(f" ✅ Batch complete: {batch_success} nodes inserted successfully") - - except Exception as e: - self.connection.rollback() - batch_errors.append(f"Batch insert failed: {e}") - all_errors.extend(batch_errors) - print(f"❌ Batch insertion failed: {e}") - - return total_success, all_errors - - def process_file(self, file_path, batch_size): - """Complete processing flow: extract data and insert into database""" - print("🚀 Starting to process data file...") - - # 1. Extract data - nodes = self.extract_nodes_simple(file_path) - if not nodes: - return - - # 3. Connect to database - if not self.connect(): - return - - try: - # 4. Insert data into database - print(f"\n💾 Starting to insert data into database...") - success_count, errors = self.insert_nodes_to_db(nodes, batch_size) - - # 5. Display results - print(f"\n🎉 Processing complete!") - print(f"✅ Successfully inserted: {success_count}/{len(nodes)} nodes") - print(f"❌ Error count: {len(errors)}") - - if errors: - print(f"\n📋 Error details (first 10):") - for error in errors[:10]: - print(f" - {error}") - if len(errors) > 10: - print(f" ... {len(errors) - 10} more errors") - - finally: - # 6. Disconnect database connection - self.disconnect() - - -if __name__ == "__main__": - - POLARDB_CONFIG = { - "host": "xxx", - "port": 5432, - "user": "xxx", - "password": "xxx", - "database": "xxx", - "graph_name": "xxx" - - } - - - file_path = "/Users/ccl/Desktop/file/temp/result.json" - - - processor = MemoryDataProcessor(POLARDB_CONFIG) - - - processor.process_file(file_path, batch_size=1000) \ No newline at end of file diff --git a/examples/basic_modules/parseJson.py b/examples/basic_modules/parseJson.py deleted file mode 100644 index d84abfae5..000000000 --- a/examples/basic_modules/parseJson.py +++ /dev/null @@ -1,105 +0,0 @@ -import os -import json -import psycopg2 -import sys - -# Add the parent directory to the path to allow imports -src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src')) -sys.path.insert(0, src_path) - -# from polardb_export_insert_1 import insert_data -from batchImport_polardbFromJson import insert_data, update_graph - -DB_CONFIG = { - 'host': 'xxx', - 'port': 5432, - 'database': 'xx', - 'user': 'xx', - 'password': 'xx' -} -conn = psycopg2.connect(**DB_CONFIG) - -def insert(batch): - - print(f"✅ insert() {len(batch)} records") - insert_data(conn, batch) - - - -def process_folder(folder_path, batch_size=1000): - - batch = [] - total_count = 0 - - for root, dirs, files in os.walk(folder_path): - for file in files: - # Only process .json files - if not file.endswith('.json'): - continue - - file_path = os.path.join(root, file) - print(f"📄 read file: {file_path}") - - try: - with open(file_path, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line: - continue - try: - obj = json.loads(line) - - if isinstance(obj, dict) and "id" in obj and "memory" in obj: - batch.append(obj) - total_count += 1 - - - if len(batch) >= batch_size: - insert(batch) - batch = [] - if "sources" in obj and isinstance(obj["sources"], str): - if not("user:" in obj["sources"] )and not("assistant:" in obj["sources"]): - continue - try: - import re - - cleaned_sources = obj["sources"].replace('\n', '').replace('\\n', '').replace('\ufeff', '') - - - if cleaned_sources.startswith('[') and cleaned_sources.endswith(']'): - inner_str = cleaned_sources[1:-1].strip() - - parts = re.split(r',\s*(?=\w+:)', inner_str) - - parts = [part.strip() for part in parts] - obj["sources"] = parts - else: - - obj["sources"] = [cleaned_sources] - except Exception as e: - print(f"⚠️ not parse sources: {e}") - print(f"⚠️ source content: {obj['sources'][:100]}...") - obj["sources"] = [] - else: - print(f"⚠️ skip: {line[:80]}...") - except json.JSONDecodeError: - print(f"⚠️ skil valid JSON: {line[:80]}...") - except (UnicodeDecodeError, IOError) as e: - print(f"⚠️ skip file {file_path}: {e}") - continue - - # 处理最后不足 batch_size 的部分 - if batch: - insert(batch) - update_graph() - - print(f"\n✅ end,total {total_count} records。") - - -if __name__ == "__main__": - # folder_path = r"/Users/ccl/Desktop/file/export13/ceshi" - # 10W - folder_path = r"/Users/ccl/Desktop/file/export15/Memory" - # 70W - folder_path = r"/Users/ccl/Desktop/file/ccl/export22/Memory" - process_folder(folder_path, batch_size=1000) From 5fef6aa0475b02d2a0620c24c6b28d79f5e31b2b Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 21:00:53 +0800 Subject: [PATCH 134/137] feat: fix utils --- src/memos/mem_cube/utils.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/memos/mem_cube/utils.py b/src/memos/mem_cube/utils.py index e23412bc1..916efa506 100644 --- a/src/memos/mem_cube/utils.py +++ b/src/memos/mem_cube/utils.py @@ -128,14 +128,13 @@ def merge_config_with_default( ) # Handle use_multi_db transition - if not default_graph_config.get("use_multi_db", True): - if merged_graph_config.get("use_multi_db", True): - merged_graph_config["use_multi_db"] = False - # For Neo4j: db_name becomes user_name in single-db mode - if "neo4j" in default_backend and "db_name" in merged_graph_config: - merged_graph_config["user_name"] = merged_graph_config.get("db_name") - merged_graph_config["db_name"] = default_graph_config.get("db_name") - logger.info("Transitioned to single-db mode (use_multi_db=False)") + if not default_graph_config.get("use_multi_db", True) and merged_graph_config.get("use_multi_db", True): + merged_graph_config["use_multi_db"] = False + # For Neo4j: db_name becomes user_name in single-db mode + if "neo4j" in default_backend and "db_name" in merged_graph_config: + merged_graph_config["user_name"] = merged_graph_config.get("db_name") + merged_graph_config["db_name"] = default_graph_config.get("db_name") + logger.info("Transitioned to single-db mode (use_multi_db=False)") preserved_graph_db = { "backend": default_backend, From 5a707426305ab7b6daf5e167440c3ffa26a229b2 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 21:26:27 +0800 Subject: [PATCH 135/137] feat: fix polardb --- src/memos/graph_dbs/polardb.py | 175 ++++++++------------------------- 1 file changed, 40 insertions(+), 135 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index d2d75f005..b90b2e0d7 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -703,11 +703,8 @@ def get_node(self, id: str, include_embedding: bool = False, user_name: str | No Returns: dict: Node properties as key-value pairs, or None if not found. """ - # Build select fields - if include_embedding: - select_fields = "id, properties, embedding" - else: - select_fields = "id, properties" + + select_fields = "id, properties, embedding" if include_embedding else "id, properties" # Helper function to format parameter value def format_param_value(value: str) -> str: @@ -807,11 +804,6 @@ def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> l query += " AND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" params.append(f'"{user_name}"') - # if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - # user_name = kwargs.get("cube_name", self._get_config_value("user_name")) - # query += " AND ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" - # params.append(f"{user_name}") - print(f"[get_nodes] query: {query}, params: {params}") with self.connection.cursor() as cursor: cursor.execute(query, params) @@ -835,8 +827,10 @@ def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> l try: print("embedding_json:", embedding_json) # remove embedding - # embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json + """ + embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json # properties["embedding"] = embedding + """ except (json.JSONDecodeError, TypeError): logger.warning(f"Failed to parse embedding for node {node_id}") nodes.append(self._parse_node( @@ -1147,7 +1141,7 @@ def get_subgraph( user_name = user_name if user_name else self._get_config_value("user_name") # Use a simplified query to get the subgraph (temporarily only direct neighbors) - query1 = f""" + """ SELECT * FROM cypher('{self.db_name}_graph', $$ MATCH(center: Memory)-[r * 1..{depth}]->(neighbor:Memory) WHERE @@ -1269,12 +1263,14 @@ def search_by_embedding( where_clauses.append("embedding is not null") # Add user_name filter like nebular.py + """ # user_name = self._get_config_value("user_name") # if not self.config.use_multi_db and user_name: # if kwargs.get("cube_name"): # where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{kwargs['cube_name']}\"'::agtype") # else: # where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") + """ user_name = user_name if user_name else self.config.user_name where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") @@ -1313,12 +1309,14 @@ def search_by_embedding( results = cursor.fetchall() output = [] for row in results: + """ polarId = row[0] # id properties = row[1] # properties # embedding = row[3] # embedding - oldId = row[3] # old_id + """ + oldid = row[3] # old_id score = row[4] # scope - id_val = str(oldId) + id_val = str(oldid) score_val = float(score) score_val = (score_val + 1) / 2 # align to neo4j, Normalized Cosine Score if threshold is None or score_val >= threshold: @@ -1378,10 +1376,14 @@ def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = where_conditions.append(f"n.{field} = {escaped_value}") elif op == "in": where_conditions.append(f"n.{field} IN {escaped_value}") + """ # where_conditions.append(f"{escaped_value} IN n.{field}") + """ elif op == "contains": where_conditions.append(f"{escaped_value} IN n.{field}") + """ # where_conditions.append(f"size(filter(n.{field}, t -> t IN {escaped_value})) > 0") + """ elif op == "starts_with": where_conditions.append(f"n.{field} STARTS WITH {escaped_value}") elif op == "ends_with": @@ -1460,7 +1462,9 @@ def get_grouped_counts1( print("where_clause:" + where_clause) # Force RETURN field AS field to guarantee key match group_fields_cypher = ", ".join([f"n.{field} AS {field}" for field in group_fields]) + """ # group_fields_cypher_polardb = "agtype, ".join([f"{field}" for field in group_fields]) + """ group_fields_cypher_polardb = ", ".join([f"{field} agtype" for field in group_fields]) print("group_fields_cypher_polardb:" + group_fields_cypher_polardb) query = f""" @@ -1686,11 +1690,14 @@ def export_graph( properties = properties_json if properties_json else {} # # Build node data + + """ # node_data = { # "id": properties.get("id", node_id), # "memory": properties.get("memory", ""), # "metadata": properties # } + """ if include_embedding and embedding_json is not None: properties["embedding"] = embedding_json @@ -1746,28 +1753,6 @@ def count_nodes(self, scope: str, user_name: str | None = None) -> int: result = self.execute_query(query) return int(result.one_or_none()["count"].value) - @timed - def import_graph(self, data: dict[str, Any]) -> None: - """Import the entire graph from a serialized dictionary.""" - with self.connection.cursor() as cursor: - for node in data.get("nodes", []): - id, memory, metadata = _compose_node(node) - - if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - metadata["user_name"] = self._get_config_value("user_name") - - metadata = _prepare_node_metadata(metadata) - - # Generate embedding if not provided - if "embedding" not in metadata or not metadata["embedding"]: - metadata["embedding"] = generate_vector(self._get_config_value("embedding_dimension", 1024)) - - self.add_node(id, memory, metadata) - - # Import edges - for edge in data.get("edges", []): - self.add_edge(edge["source"], edge["target"], edge["type"]) - @timed def get_all_memory_items( self, scope: str, include_embedding: bool = False, user_name: str | None = None @@ -1814,7 +1799,10 @@ def get_all_memory_items( results = cursor.fetchall() for row in results: + """ if isinstance(row, (list, tuple)) and len(row) >= 2: + """ + if isinstance(row, list | tuple) and len(row) >= 2: embedding_val, node_val = row[0], row[1] else: embedding_val, node_val = None, row[0] @@ -1849,11 +1837,16 @@ def get_all_memory_items( results = cursor.fetchall() for row in results: + """ if isinstance(row[0], str): memory_data = json.loads(row[0]) else: memory_data = row[0] # 如果已经是字典,直接使用 nodes.append(self._parse_node(memory_data)) + """ + memory_data = json.loads(row[0]) if isinstance(row[0], str) else row[0] + nodes.append(self._parse_node(memory_data)) + except Exception as e: logger.error(f"Failed to get memories: {e}", exc_info=True) @@ -1915,7 +1908,6 @@ def get_all_memory_items_old( for row in results: node_agtype = row[0] - # print(f"[get_all_memory_items] Processing row: {type(node_agtype)} = {node_agtype}") # Handle string-formatted data if isinstance(node_agtype, str): @@ -2013,20 +2005,6 @@ def get_structure_optimization_candidates( ] return_fields_agtype = ", ".join([f"{field} agtype" for field in fields]) - # Keep legacy query - cypher_query_1 = f""" - SELECT m.* - FROM {self.db_name}_graph."Memory" m - WHERE - ag_catalog.agtype_access_operator(m.properties, '"memory_type"'::ag_catalog.agtype) = '"LongTermMemory"'::ag_catalog.agtype - AND ag_catalog.agtype_access_operator(m.properties, '"status"'::ag_catalog.agtype) = '"activated"'::ag_catalog.agtype - AND ag_catalog.agtype_access_operator(m.properties, '"user_name"'::ag_catalog.agtype) = '"activated"'::ag_catalog.agtype - AND NOT EXISTS ( - SELECT 1 - FROM {self.db_name}_graph."PARENT" p - WHERE m.id = p.start_id OR m.id = p.end_id - ); - """ # Use OPTIONAL MATCH to find isolated nodes (no parents or children) cypher_query = f""" @@ -2066,7 +2044,10 @@ def get_structure_optimization_candidates( for row in results: if include_embedding: # When include_embedding=True, return full node object + """ if isinstance(row, (list, tuple)) and len(row) >= 2: + """ + if isinstance(row, list | tuple) and len(row) >= 2: embedding_val, node_val = row[0], row[1] else: embedding_val, node_val = None, row[0] @@ -2141,19 +2122,6 @@ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]: if time_field in node and hasattr(node[time_field], "isoformat"): node[time_field] = node[time_field].isoformat() - # Do not remove user_name; keep all fields - - # 1 - # return {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node} - - # 2 - # node_id = node.pop("id") - # memory = node.pop("memory", "") - # # Add id field into metadata - # node["id"] = node_id - # node1 = node - # return {"id": node_id, "memory": memory, "metadata": node1} - return {"id": node.get("id"), "memory": node.get("memory", ""), "metadata": node} def _parse_node_new(self, node_data: dict[str, Any]) -> dict[str, Any]: @@ -2162,10 +2130,16 @@ def _parse_node_new(self, node_data: dict[str, Any]) -> dict[str, Any]: # Normalize string values that may arrive as quoted literals (e.g., '"abc"') def _strip_wrapping_quotes(value: Any) -> Any: + """ if isinstance(value, str) and len(value) >= 2: if value[0] == value[-1] and value[0] in ("'", '"'): return value[1:-1] return value + """ + if (isinstance(value, str) and len(value) >= 2 and value[0] == value[-1] and value[0] in ("'", '"') + ): + return value[1:-1] + return value for k, v in list(node.items()): if isinstance(v, str): @@ -2185,80 +2159,11 @@ def __del__(self): if hasattr(self, 'connection') and self.connection: self.connection.close() - #deprecated - def add_node_old(conn, id: str, memory: str, metadata: dict, graph_name=None): - """ - Add a single node to the graph database - - Args: - conn: Database connection - id: Node ID - memory: Memory content - metadata: Metadata dictionary - graph_name: Graph name, optional - """ - # Use provided graph_name or default - from psycopg2.extras import Json - if graph_name is None: - graph_name = GRAPH_NAME - - try: - embedding = find_embedding(metadata) - field_name = detect_embedding_field(embedding) - vector_value = convert_to_vector(embedding) if field_name else None - - properties = metadata.copy() - properties = clean_properties(properties) - properties["id"] = id - properties["memory"] = memory - - with conn.cursor() as cursor: - # Delete existing record first (if any) - delete_sql = f""" - DELETE FROM "Memory" - WHERE id = ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring); - """ - cursor.execute(delete_sql, (id,)) - - # Then insert new record - if field_name and vector_value: - insert_sql = f""" - INSERT INTO "Memory" (id, properties, {field_name}) - VALUES ( - ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring), - %s::text::agtype, - %s::vector - ); - """ - cursor.execute(insert_sql, (id, Json(properties), vector_value)) - print(f"✅ Insert/update succeeded: {id} ({field_name})") - else: - insert_sql = f""" - INSERT INTO "Memory" (id, properties) - VALUES ( - ag_catalog._make_graph_id('{graph_name}'::name, 'Memory'::name, %s::text::cstring), - %s::text::agtype - ); - """ - cursor.execute(insert_sql, (id, Json(properties))) - print(f"✅ Insert/update succeeded (no vector): {id}") - - conn.commit() - return True - - except Exception as e: - conn.rollback() - print(f"❌ Insert failed (ID: {id}): {e}") - return False - @timed def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: str | None = None) -> None: """Add a memory node to the graph.""" # user_name comes from metadata; fallback to config if missing metadata["user_name"] = user_name if user_name else self.config.user_name - # if "user_name" not in metadata: - # if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): - # metadata["user_name"] = self._get_config_value("user_name") # Safely process metadata metadata = _prepare_node_metadata(metadata) @@ -2282,7 +2187,7 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st # serialization - JSON-serialize sources and usage fields for field_name in ["sources", "usage"]: - if field_name in properties and properties[field_name]: + if properties.get(field_name): if isinstance(properties[field_name], list): for idx in range(len(properties[field_name])): # Serialize only when element is not a string @@ -2419,7 +2324,7 @@ def get_neighbors_by_tag( "ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype") # Type filter - exclude 'reasoning' type - # where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"node_type\"'::agtype) != '\"reasoning\"'::agtype") + where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"node_type\"'::agtype) != '\"reasoning\"'::agtype") # User filter where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype") From 83cc7a3d45190d91a45a397b6a73d5a6e35a2dd0 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 21:42:19 +0800 Subject: [PATCH 136/137] feat: format polardb --- src/memos/graph_dbs/polardb.py | 631 ++++++++++++++++++++------------- 1 file changed, 383 insertions(+), 248 deletions(-) diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index b90b2e0d7..38e71298f 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -16,7 +16,7 @@ logger = get_logger(__name__) # Graph database configuration -GRAPH_NAME = 'test_memos_graph' +GRAPH_NAME = "test_memos_graph" def _compose_node(item: dict[str, Any]) -> tuple[str, str, dict[str, Any]]: @@ -74,6 +74,8 @@ def detect_embedding_field(embedding_list): else: print(f"⚠️ Unknown embedding dimension {dim}, skipping this vector") return None + + def convert_to_vector(embedding_list): if not embedding_list: return None @@ -81,6 +83,7 @@ def convert_to_vector(embedding_list): embedding_list = embedding_list.tolist() return "[" + ",".join(str(float(x)) for x in embedding_list) + "]" + def clean_properties(props): """Remove vector fields""" vector_keys = {"embedding", "embedding_1024", "embedding_3072", "embedding_768"} @@ -137,11 +140,7 @@ def __init__(self, config: PolarDBGraphDBConfig): # Create connection self.connection = psycopg2.connect( - host=host, - port=port, - user=user, - password=password, - dbname=self.db_name + host=host, port=port, user=user, password=password, dbname=self.db_name ) self.connection.autocommit = True @@ -232,11 +231,11 @@ def _create_graph(self): raise e def create_index( - self, - label: str = "Memory", - vector_property: str = "embedding", - dimensions: int = 1024, - index_name: str = "memory_vector_index", + self, + label: str = "Memory", + vector_property: str = "embedding", + dimensions: int = 1024, + index_name: str = "memory_vector_index", ) -> None: """ Create indexes for embedding and other fields. @@ -312,7 +311,9 @@ def node_not_exist(self, scope: str, user_name: str | None = None) -> int: raise @timed - def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: str | None = None) -> None: + def remove_oldest_memory( + self, memory_type: str, keep_latest: int, user_name: str | None = None + ) -> None: """ Remove all WorkingMemory nodes except the latest `keep_latest` entries. @@ -322,7 +323,7 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st user_name (str, optional): User name for filtering in non-multi-db mode """ user_name = user_name if user_name else self._get_config_value("user_name") - + # Use actual OFFSET logic, consistent with nebular.py # First find IDs to delete, then delete them select_query = f""" @@ -335,7 +336,7 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st select_params = [f'"{memory_type}"', f'"{user_name}"', keep_latest] print(f"[remove_oldest_memory] Select query: {select_query}") print(f"[remove_oldest_memory] Select params: {select_params}") - + try: with self.connection.cursor() as cursor: # Execute query to get IDs to delete @@ -347,7 +348,7 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st return # Build delete query - placeholders = ','.join(['%s'] * len(ids_to_delete)) + placeholders = ",".join(["%s"] * len(ids_to_delete)) delete_query = f""" DELETE FROM "{self.db_name}_graph"."Memory" WHERE id IN ({placeholders}) @@ -357,7 +358,9 @@ def remove_oldest_memory(self, memory_type: str, keep_latest: int, user_name: st # Execute deletion cursor.execute(delete_query, delete_params) deleted_count = cursor.rowcount - logger.info(f"Removed {deleted_count} oldest {memory_type} memories, keeping {keep_latest} latest for user {user_name}") + logger.info( + f"Removed {deleted_count} oldest {memory_type} memories, keeping {keep_latest} latest for user {user_name}" + ) except Exception as e: logger.error(f"[remove_oldest_memory] Failed: {e}", exc_info=True) raise @@ -381,11 +384,11 @@ def update_node(self, id: str, fields: dict[str, Any], user_name: str | None = N properties = current_node["metadata"].copy() original_id = properties.get("id", id) # Preserve original ID original_memory = current_node.get("memory", "") # Preserve original memory - + # If fields include memory, use it; otherwise keep original memory if "memory" in fields: original_memory = fields.pop("memory") - + properties.update(fields) properties["id"] = original_id # Ensure ID is not overwritten properties["memory"] = original_memory # Ensure memory is not overwritten @@ -455,17 +458,14 @@ def delete_node(self, id: str, user_name: str | None = None) -> None: @timed def create_extension(self): - extensions = [ - ("polar_age", "Graph engine"), - ("vector", "Vector engine") - ] + extensions = [("polar_age", "Graph engine"), ("vector", "Vector engine")] try: with self.connection.cursor() as cursor: # Ensure in the correct database context cursor.execute(f"SELECT current_database();") current_db = cursor.fetchone()[0] print(f"Current database context: {current_db}") - + for ext_name, ext_desc in extensions: try: cursor.execute(f"create extension if not exists {ext_name};") @@ -475,7 +475,9 @@ def create_extension(self): print(f"ℹ️ Extension '{ext_name}' ({ext_desc}) already exists.") else: print(f"⚠️ Failed to create extension '{ext_name}' ({ext_desc}): {e}") - logger.error(f"Failed to create extension '{ext_name}': {e}", exc_info=True) + logger.error( + f"Failed to create extension '{ext_name}': {e}", exc_info=True + ) except Exception as e: print(f"⚠️ Failed to access database context: {e}") logger.error(f"Failed to access database context: {e}", exc_info=True) @@ -489,7 +491,7 @@ def create_graph(self): WHERE name = '{self.db_name}_graph'; """) graph_exists = cursor.fetchone()[0] > 0 - + if graph_exists: print(f"ℹ️ Graph '{self.db_name}_graph' already exists.") else: @@ -503,15 +505,8 @@ def create_graph(self): def create_edge(self): """Create all valid edge types if they do not exist""" - valid_rel_types = { - "AGGREGATE_TO", - "FOLLOWS", - "INFERS", - "MERGED_TO", - "RELATE_TO", - "PARENT" - } - + valid_rel_types = {"AGGREGATE_TO", "FOLLOWS", "INFERS", "MERGED_TO", "RELATE_TO", "PARENT"} + for label_name in valid_rel_types: print(f"🪶 Creating elabel: {label_name}") try: @@ -526,7 +521,9 @@ def create_edge(self): logger.error(f"Failed to create elabel '{label_name}': {e}", exc_info=True) @timed - def add_edge(self, source_id: str, target_id: str, type: str, user_name: str | None = None) -> None: + def add_edge( + self, source_id: str, target_id: str, type: str, user_name: str | None = None + ) -> None: if not source_id or not target_id: raise ValueError("[add_edge] source_id and target_id must be provided") @@ -582,7 +579,7 @@ def delete_edge(self, source_id: str, target_id: str, type: str) -> None: @timed def edge_exists_old( - self, source_id: str, target_id: str, type: str = "ANY", direction: str = "OUTGOING" + self, source_id: str, target_id: str, type: str = "ANY", direction: str = "OUTGOING" ) -> bool: """ Check if an edge exists between two nodes. @@ -613,7 +610,9 @@ def edge_exists_old( where_clauses.append("source_id = %s AND target_id = %s") params.extend([target_id, source_id]) elif direction == "ANY": - where_clauses.append("((source_id = %s AND target_id = %s) OR (source_id = %s AND target_id = %s))") + where_clauses.append( + "((source_id = %s AND target_id = %s) OR (source_id = %s AND target_id = %s))" + ) params.extend([source_id, target_id, target_id, source_id]) else: raise ValueError( @@ -639,12 +638,12 @@ def edge_exists_old( @timed def edge_exists( - self, - source_id: str, - target_id: str, - type: str = "ANY", - direction: str = "OUTGOING", - user_name: str | None = None, + self, + source_id: str, + target_id: str, + type: str = "ANY", + direction: str = "OUTGOING", + user_name: str | None = None, ) -> bool: """ Check if an edge exists between two nodes. @@ -691,7 +690,9 @@ def edge_exists( return result is not None and result[0] is not None @timed - def get_node(self, id: str, include_embedding: bool = False, user_name: str | None = None) -> dict[str, Any] | None: + def get_node( + self, id: str, include_embedding: bool = False, user_name: str | None = None + ) -> dict[str, Any] | None: """ Retrieve a Memory node by its unique ID. @@ -755,13 +756,18 @@ def format_param_value(value: str) -> str: # Parse embedding from JSONB if it exists and include_embedding is True if include_embedding and embedding_json is not None: try: - embedding = json.loads(embedding_json) if isinstance(embedding_json, - str) else embedding_json + embedding = ( + json.loads(embedding_json) + if isinstance(embedding_json, str) + else embedding_json + ) properties["embedding"] = embedding except (json.JSONDecodeError, TypeError): logger.warning(f"Failed to parse embedding for node {id}") - return self._parse_node({"id": id, "memory": properties.get("memory", ""), **properties}) + return self._parse_node( + {"id": id, "memory": properties.get("memory", ""), **properties} + ) return None except Exception as e: @@ -769,7 +775,9 @@ def format_param_value(value: str) -> str: return None @timed - def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> list[dict[str, Any]]: + def get_nodes( + self, ids: list[str], user_name: str | None = None, **kwargs + ) -> list[dict[str, Any]]: """ Retrieve the metadata and memory of a list of nodes. Args: @@ -787,13 +795,15 @@ def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> l # Build WHERE clause using agtype_access_operator like get_node method where_conditions = [] params = [] - + for id_val in ids: - where_conditions.append("ag_catalog.agtype_access_operator(properties, '\"id\"'::agtype) = %s::agtype") - params.append(f'{id_val}') - + where_conditions.append( + "ag_catalog.agtype_access_operator(properties, '\"id\"'::agtype) = %s::agtype" + ) + params.append(f"{id_val}") + where_clause = " OR ".join(where_conditions) - + query = f""" SELECT id, properties, embedding FROM "{self.db_name}_graph"."Memory" @@ -821,7 +831,7 @@ def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> l properties = {} else: properties = properties_json if properties_json else {} - + # Parse embedding from JSONB if it exists if embedding_json is not None: try: @@ -833,12 +843,21 @@ def get_nodes(self, ids: list[str], user_name: str | None = None,**kwargs) -> l """ except (json.JSONDecodeError, TypeError): logger.warning(f"Failed to parse embedding for node {node_id}") - nodes.append(self._parse_node( - {"id": properties.get("id", node_id), "memory": properties.get("memory", ""), "metadata": properties})) + nodes.append( + self._parse_node( + { + "id": properties.get("id", node_id), + "memory": properties.get("memory", ""), + "metadata": properties, + } + ) + ) return nodes @timed - def get_edges_old(self, id: str, type: str = "ANY", direction: str = "ANY") -> list[dict[str, str]]: + def get_edges_old( + self, id: str, type: str = "ANY", direction: str = "ANY" + ) -> list[dict[str, str]]: """ Get edges connected to a node, with optional type and direction filter. @@ -919,26 +938,22 @@ def get_edges_old(self, id: str, type: str = "ANY", direction: str = "ANY") -> l edges = [] for row in results: source_id, target_id, edge_type = row - edges.append({ - "from": source_id, - "to": target_id, - "type": edge_type - }) + edges.append({"from": source_id, "to": target_id, "type": edge_type}) return edges def get_neighbors( - self, id: str, type: str, direction: Literal["in", "out", "both"] = "out" + self, id: str, type: str, direction: Literal["in", "out", "both"] = "out" ) -> list[str]: """Get connected node IDs in a specific direction and relationship type.""" raise NotImplementedError @timed def get_neighbors_by_tag_old( - self, - tags: list[str], - exclude_ids: list[str], - top_k: int = 5, - min_overlap: int = 1, + self, + tags: list[str], + exclude_ids: list[str], + top_k: int = 5, + min_overlap: int = 1, ) -> list[dict[str, Any]]: """ Find top-K neighbor nodes with maximum tag overlap. @@ -958,20 +973,20 @@ def get_neighbors_by_tag_old( # Exclude specified IDs if exclude_ids: - placeholders = ','.join(['%s'] * len(exclude_ids)) + placeholders = ",".join(["%s"] * len(exclude_ids)) where_clauses.append(f"id NOT IN ({placeholders})") params.extend(exclude_ids) # Status filter where_clauses.append("properties->>'status' = %s") - params.append('activated') + params.append("activated") # Type filter where_clauses.append("properties->>'type' != %s") - params.append('reasoning') + params.append("reasoning") where_clauses.append("properties->>'memory_type' != %s") - params.append('WorkingMemory') + params.append("WorkingMemory") # User filter if not self._get_config_value("use_multi_db", True) and self._get_config_value("user_name"): @@ -999,7 +1014,11 @@ def get_neighbors_by_tag_old( # Parse embedding if embedding_json is not None: try: - embedding = json.loads(embedding_json) if isinstance(embedding_json, str) else embedding_json + embedding = ( + json.loads(embedding_json) + if isinstance(embedding_json, str) + else embedding_json + ) properties["embedding"] = embedding except (json.JSONDecodeError, TypeError): logger.warning(f"Failed to parse embedding for node {node_id}") @@ -1016,11 +1035,13 @@ def get_neighbors_by_tag_old( overlap_count = len(overlap_tags) if overlap_count >= min_overlap: - node_data = self._parse_node({ - "id": properties.get("id", node_id), - "memory": properties.get("memory", ""), - "metadata": properties - }) + node_data = self._parse_node( + { + "id": properties.get("id", node_id), + "memory": properties.get("memory", ""), + "metadata": properties, + } + ) nodes_with_overlap.append((node_data, overlap_count)) # Sort by overlap count and return top_k @@ -1028,7 +1049,9 @@ def get_neighbors_by_tag_old( return [node for node, _ in nodes_with_overlap[:top_k]] @timed - def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> list[dict[str, Any]]: + def get_children_with_embeddings( + self, id: str, user_name: str | None = None + ) -> list[dict[str, Any]]: """Get children nodes with their embeddings.""" user_name = user_name if user_name else self._get_config_value("user_name") where_user = f"AND p.user_name = '{user_name}' AND c.user_name = '{user_name}'" @@ -1047,7 +1070,6 @@ def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> WHERE t.cid::graphid = m.id; """ - print("[get_children_with_embeddings] query:", query) try: @@ -1058,7 +1080,7 @@ def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> children = [] for row in results: # Handle child_id - remove possible quotes - child_id_raw = row[0].value if hasattr(row[0], 'value') else str(row[0]) + child_id_raw = row[0].value if hasattr(row[0], "value") else str(row[0]) if isinstance(child_id_raw, str): # If string starts and ends with quotes, remove quotes if child_id_raw.startswith('"') and child_id_raw.endswith('"'): @@ -1083,11 +1105,13 @@ def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> # Try converting to list embedding = list(embedding_raw) except (json.JSONDecodeError, TypeError, ValueError) as e: - logger.warning(f"Failed to parse embedding for child node {child_id}: {e}") + logger.warning( + f"Failed to parse embedding for child node {child_id}: {e}" + ) embedding = [] # Handle memory - remove possible quotes - memory_raw = row[2].value if hasattr(row[2], 'value') else str(row[2]) + memory_raw = row[2].value if hasattr(row[2], "value") else str(row[2]) if isinstance(memory_raw, str): # If string starts and ends with quotes, remove quotes if memory_raw.startswith('"') and memory_raw.endswith('"'): @@ -1097,11 +1121,7 @@ def get_children_with_embeddings(self, id: str, user_name: str | None = None) -> else: memory = str(memory_raw) - children.append({ - "id": child_id, - "embedding": embedding, - "memory": memory - }) + children.append({"id": child_id, "embedding": embedding, "memory": memory}) return children @@ -1173,7 +1193,7 @@ def get_subgraph( cursor.execute(query) result = cursor.fetchone() print("[get_subgraph] result:", result) - + if not result or not result[0]: return {"core_node": None, "neighbors": [], "edges": []} @@ -1181,31 +1201,39 @@ def get_subgraph( centers_data = result[0] if result[0] else "[]" neighbors_data = result[1] if result[1] else "[]" edges_data = result[2] if result[2] else "[]" - + # Parse JSON data try: # Clean ::vertex and ::edge suffixes in data if isinstance(centers_data, str): - centers_data = centers_data.replace('::vertex', '') + centers_data = centers_data.replace("::vertex", "") if isinstance(neighbors_data, str): - neighbors_data = neighbors_data.replace('::vertex', '') + neighbors_data = neighbors_data.replace("::vertex", "") if isinstance(edges_data, str): - edges_data = edges_data.replace('::edge', '') - - centers_list = json.loads(centers_data) if isinstance(centers_data, str) else centers_data - neighbors_list = json.loads(neighbors_data) if isinstance(neighbors_data, str) else neighbors_data - edges_list = json.loads(edges_data) if isinstance(edges_data, str) else edges_data + edges_data = edges_data.replace("::edge", "") + + centers_list = ( + json.loads(centers_data) if isinstance(centers_data, str) else centers_data + ) + neighbors_list = ( + json.loads(neighbors_data) + if isinstance(neighbors_data, str) + else neighbors_data + ) + edges_list = ( + json.loads(edges_data) if isinstance(edges_data, str) else edges_data + ) except json.JSONDecodeError as e: logger.error(f"Failed to parse JSON data: {e}") return {"core_node": None, "neighbors": [], "edges": []} - + # Parse center node core_node = None if centers_list and len(centers_list) > 0: center_data = centers_list[0] if isinstance(center_data, dict) and "properties" in center_data: core_node = self._parse_node(center_data["properties"]) - + # Parse neighbor nodes neighbors = [] if isinstance(neighbors_list, list): @@ -1221,11 +1249,13 @@ def get_subgraph( if isinstance(edge_group, list): for edge_data in edge_group: if isinstance(edge_data, dict): - edges.append({ - "type": edge_data.get("label", ""), - "source": edge_data.get("start_id", ""), - "target": edge_data.get("end_id", "") - }) + edges.append( + { + "type": edge_data.get("label", ""), + "source": edge_data.get("start_id", ""), + "target": edge_data.get("end_id", ""), + } + ) return {"core_node": core_node, "neighbors": neighbors, "edges": edges} @@ -1239,15 +1269,15 @@ def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]: @timed def search_by_embedding( - self, - vector: list[float], - top_k: int = 5, - scope: str | None = None, - status: str | None = None, - threshold: float | None = None, - search_filter: dict | None = None, - user_name: str | None = None, - **kwargs, + self, + vector: list[float], + top_k: int = 5, + scope: str | None = None, + status: str | None = None, + threshold: float | None = None, + search_filter: dict | None = None, + user_name: str | None = None, + **kwargs, ) -> list[dict]: """ Retrieve node IDs based on vector similarity using PostgreSQL vector operations. @@ -1255,11 +1285,17 @@ def search_by_embedding( # Build WHERE clause dynamically like nebular.py where_clauses = [] if scope: - where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) = '\"{scope}\"'::agtype") + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) = '\"{scope}\"'::agtype" + ) if status: - where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"{status}\"'::agtype") + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"{status}\"'::agtype" + ) else: - where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype") + where_clauses.append( + "ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype" + ) where_clauses.append("embedding is not null") # Add user_name filter like nebular.py @@ -1272,18 +1308,24 @@ def search_by_embedding( # where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") """ user_name = user_name if user_name else self.config.user_name - where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype" + ) # Add search_filter conditions like nebular.py if search_filter: for key, value in search_filter.items(): if isinstance(value, str): - where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = '\"{value}\"'::agtype") + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = '\"{value}\"'::agtype" + ) else: - where_clauses.append(f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = {value}::agtype") - + where_clauses.append( + f"ag_catalog.agtype_access_operator(properties, '\"{key}\"'::agtype) = {value}::agtype" + ) + where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else "" - + # Keep original simple query structure but add dynamic WHERE clause query = f""" WITH t AS ( @@ -1303,7 +1345,9 @@ def search_by_embedding( """ params = [vector] - print(f"[search_by_embedding] query: {query}, params: {params}, where_clause: {where_clause}") + print( + f"[search_by_embedding] query: {query}, params: {params}, where_clause: {where_clause}" + ) with self.connection.cursor() as cursor: cursor.execute(query, params) results = cursor.fetchall() @@ -1324,7 +1368,9 @@ def search_by_embedding( return output[:top_k] @timed - def get_by_metadata(self, filters: list[dict[str, Any]], user_name: str | None = None) -> list[str]: + def get_by_metadata( + self, filters: list[dict[str, Any]], user_name: str | None = None + ) -> list[str]: """ Retrieve node IDs that match given metadata filters. Supports exact match. @@ -1447,7 +1493,7 @@ def get_grouped_counts1( raise ValueError("group_fields cannot be empty") final_params = params.copy() if params else {} - print("username:"+user_name) + print("username:" + user_name) if not self.config.use_multi_db and (self.config.user_name or user_name): user_clause = "n.user_name = $user_name" final_params["user_name"] = user_name @@ -1489,7 +1535,7 @@ def get_grouped_counts1( group_values = {} for i, field in enumerate(group_fields): value = row[i] - if hasattr(value, 'value'): + if hasattr(value, "value"): group_values[field] = value.value else: group_values[field] = str(value) @@ -1525,9 +1571,9 @@ def get_grouped_counts( """ if not group_fields: raise ValueError("group_fields cannot be empty") - + user_name = user_name if user_name else self._get_config_value("user_name") - + # Build user clause user_clause = f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype" if where_clause: @@ -1546,10 +1592,13 @@ def get_grouped_counts( if isinstance(value, str): value = f"'{value}'" where_clause = where_clause.replace(f"${key}", str(value)) - + # Handle user_name parameter in where_clause if "user_name = %s" in where_clause: - where_clause = where_clause.replace("user_name = %s", f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype") + where_clause = where_clause.replace( + "user_name = %s", + f"ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = '\"{user_name}\"'::agtype", + ) # Build return fields and group by fields return_fields = [] @@ -1557,7 +1606,9 @@ def get_grouped_counts( for field in group_fields: alias = field.replace(".", "_") - return_fields.append(f"ag_catalog.agtype_access_operator(properties, '\"{field}\"'::agtype) AS {alias}") + return_fields.append( + f"ag_catalog.agtype_access_operator(properties, '\"{field}\"'::agtype) AS {alias}" + ) group_by_fields.append(alias) # Full SQL query construction @@ -1584,7 +1635,7 @@ def get_grouped_counts( group_values = {} for i, field in enumerate(group_fields): value = row[i] - if hasattr(value, 'value'): + if hasattr(value, "value"): group_values[field] = value.value else: group_values[field] = str(value) @@ -1652,7 +1703,7 @@ def export_graph( } """ user_name = user_name if user_name else self._get_config_value("user_name") - + try: # Export nodes if include_embedding: @@ -1667,19 +1718,19 @@ def export_graph( FROM "{self.db_name}_graph"."Memory" WHERE ag_catalog.agtype_access_operator(properties, '"user_name"'::agtype) = '\"{user_name}\"'::agtype """ - + with self.connection.cursor() as cursor: cursor.execute(node_query) node_results = cursor.fetchall() nodes = [] - + for row in node_results: if include_embedding: node_id, properties_json, embedding_json = row else: node_id, properties_json = row embedding_json = None - + # Parse properties from JSONB if it's a string if isinstance(properties_json, str): try: @@ -1688,7 +1739,7 @@ def export_graph( properties = {} else: properties = properties_json if properties_json else {} - + # # Build node data """ @@ -1698,12 +1749,12 @@ def export_graph( # "metadata": properties # } """ - + if include_embedding and embedding_json is not None: properties["embedding"] = embedding_json - + nodes.append(self._parse_node(properties)) - + except Exception as e: logger.error(f"[EXPORT GRAPH - NODES] Exception: {e}", exc_info=True) raise RuntimeError(f"[EXPORT GRAPH - NODES] Exception: {e}") from e @@ -1717,20 +1768,28 @@ def export_graph( RETURN a.id AS source, b.id AS target, type(r) as edge $$) AS (source agtype, target agtype, edge agtype) """ - + with self.connection.cursor() as cursor: cursor.execute(edge_query) edge_results = cursor.fetchall() edges = [] - + for row in edge_results: source_agtype, target_agtype, edge_agtype = row - edges.append({ - "source": source_agtype.value if hasattr(source_agtype, 'value') else str(source_agtype), - "target": target_agtype.value if hasattr(target_agtype, 'value') else str(target_agtype), - "type": edge_agtype.value if hasattr(edge_agtype, 'value') else str(edge_agtype) - }) - + edges.append( + { + "source": source_agtype.value + if hasattr(source_agtype, "value") + else str(source_agtype), + "target": target_agtype.value + if hasattr(target_agtype, "value") + else str(target_agtype), + "type": edge_agtype.value + if hasattr(edge_agtype, "value") + else str(edge_agtype), + } + ) + except Exception as e: logger.error(f"[EXPORT GRAPH - EDGES] Exception: {e}", exc_info=True) raise RuntimeError(f"[EXPORT GRAPH - EDGES] Exception: {e}") from e @@ -1755,7 +1814,7 @@ def count_nodes(self, scope: str, user_name: str | None = None) -> int: @timed def get_all_memory_items( - self, scope: str, include_embedding: bool = False, user_name: str | None = None + self, scope: str, include_embedding: bool = False, user_name: str | None = None ) -> list[dict]: """ Retrieve all memory items of a specific memory_type. @@ -1814,7 +1873,6 @@ def get_all_memory_items( nodes.append(node) node_ids.add(node_id) - except Exception as e: logger.error(f"Failed to get memories: {e}", exc_info=True) @@ -1913,7 +1971,7 @@ def get_all_memory_items_old( if isinstance(node_agtype, str): try: # Remove ::vertex suffix - json_str = node_agtype.replace('::vertex', '') + json_str = node_agtype.replace("::vertex", "") node_data = json.loads(json_str) if isinstance(node_data, dict) and "properties" in node_data: @@ -1922,20 +1980,24 @@ def get_all_memory_items_old( parsed_node_data = { "id": properties.get("id", ""), "memory": properties.get("memory", ""), - "metadata": properties + "metadata": properties, } if include_embedding and "embedding" in properties: parsed_node_data["embedding"] = properties["embedding"] nodes.append(self._parse_node(parsed_node_data)) - print(f"[get_all_memory_items] ✅ Parsed node successfully: {properties.get('id', '')}") + print( + f"[get_all_memory_items] ✅ Parsed node successfully: {properties.get('id', '')}" + ) else: - print(f"[get_all_memory_items] ❌ Invalid node data format: {node_data}") + print( + f"[get_all_memory_items] ❌ Invalid node data format: {node_data}" + ) except (json.JSONDecodeError, TypeError) as e: print(f"[get_all_memory_items] ❌ JSON parsing failed: {e}") - elif node_agtype and hasattr(node_agtype, 'value'): + elif node_agtype and hasattr(node_agtype, "value"): # Handle agtype object node_props = node_agtype.value if isinstance(node_props, dict): @@ -1943,16 +2005,20 @@ def get_all_memory_items_old( node_data = { "id": node_props.get("id", ""), "memory": node_props.get("memory", ""), - "metadata": node_props + "metadata": node_props, } if include_embedding and "embedding" in node_props: node_data["embedding"] = node_props["embedding"] nodes.append(self._parse_node(node_data)) - print(f"[get_all_memory_items] ✅ Parsed agtype node successfully: {node_props.get('id', '')}") + print( + f"[get_all_memory_items] ✅ Parsed agtype node successfully: {node_props.get('id', '')}" + ) else: - print(f"[get_all_memory_items] ❌ Unknown data format: {type(node_agtype)}") + print( + f"[get_all_memory_items] ❌ Unknown data format: {type(node_agtype)}" + ) except Exception as e: logger.error(f"Failed to get memories: {e}", exc_info=True) @@ -1969,43 +2035,59 @@ def get_structure_optimization_candidates( - Plus: the child of any parent node that has exactly one child. """ user_name = user_name if user_name else self._get_config_value("user_name") - + # Build return fields based on include_embedding flag if include_embedding: return_fields = "id(n) as id1,n" return_fields_agtype = " id1 agtype,n agtype" else: # Build field list without embedding - return_fields = ",".join([ - "n.id AS id", - "n.memory AS memory", - "n.user_name AS user_name", - "n.user_id AS user_id", - "n.session_id AS session_id", - "n.status AS status", - "n.key AS key", - "n.confidence AS confidence", - "n.tags AS tags", - "n.created_at AS created_at", - "n.updated_at AS updated_at", - "n.memory_type AS memory_type", - "n.sources AS sources", - "n.source AS source", - "n.node_type AS node_type", - "n.visibility AS visibility", - "n.usage AS usage", - "n.background AS background", - "n.graph_id as graph_id" - ]) + return_fields = ",".join( + [ + "n.id AS id", + "n.memory AS memory", + "n.user_name AS user_name", + "n.user_id AS user_id", + "n.session_id AS session_id", + "n.status AS status", + "n.key AS key", + "n.confidence AS confidence", + "n.tags AS tags", + "n.created_at AS created_at", + "n.updated_at AS updated_at", + "n.memory_type AS memory_type", + "n.sources AS sources", + "n.source AS source", + "n.node_type AS node_type", + "n.visibility AS visibility", + "n.usage AS usage", + "n.background AS background", + "n.graph_id as graph_id", + ] + ) fields = [ - "id", "memory", "user_name", "user_id", "session_id", "status", - "key", "confidence", "tags", "created_at", "updated_at", - "memory_type", "sources", "source", "node_type", "visibility", - "usage", "background","graph_id" + "id", + "memory", + "user_name", + "user_id", + "session_id", + "status", + "key", + "confidence", + "tags", + "created_at", + "updated_at", + "memory_type", + "sources", + "source", + "node_type", + "visibility", + "usage", + "background", + "graph_id", ] return_fields_agtype = ", ".join([f"{field} agtype" for field in fields]) - # Use OPTIONAL MATCH to find isolated nodes (no parents or children) cypher_query = f""" SELECT * FROM cypher('{self.db_name}_graph', $$ @@ -2040,7 +2122,7 @@ def get_structure_optimization_candidates( with self.connection.cursor() as cursor: cursor.execute(cypher_query) results = cursor.fetchall() - print("result------",len(results)) + print("result------", len(results)) for row in results: if include_embedding: # When include_embedding=True, return full node object @@ -2062,19 +2144,36 @@ def get_structure_optimization_candidates( # When include_embedding=False, return field dictionary # Define field names matching the RETURN clause field_names = [ - "id", "memory", "user_name", "user_id", "session_id", "status", - "key", "confidence", "tags", "created_at", "updated_at", - "memory_type", "sources", "source", "node_type", "visibility", - "usage", "background","graph_id" + "id", + "memory", + "user_name", + "user_id", + "session_id", + "status", + "key", + "confidence", + "tags", + "created_at", + "updated_at", + "memory_type", + "sources", + "source", + "node_type", + "visibility", + "usage", + "background", + "graph_id", ] - + # Convert row to dictionary node_data = {} for i, field_name in enumerate(field_names): if i < len(row): value = row[i] # Handle special fields - if field_name in ["tags", "sources", "usage"] and isinstance(value, str): + if field_name in ["tags", "sources", "usage"] and isinstance( + value, str + ): try: # Try parsing JSON string node_data[field_name] = json.loads(value) @@ -2082,22 +2181,22 @@ def get_structure_optimization_candidates( node_data[field_name] = value else: node_data[field_name] = value - + # Parse node using _parse_node_new try: node = self._parse_node_new(node_data) node_id = node["id"] - + if node_id not in node_ids: candidates.append(node) node_ids.add(node_id) print(f"✅ Parsed node successfully: {node_id}") except Exception as e: print(f"❌ Failed to parse node: {e}") - + except Exception as e: logger.error(f"Failed to get structure optimization candidates: {e}", exc_info=True) - + return candidates def drop_database(self) -> None: @@ -2136,7 +2235,11 @@ def _strip_wrapping_quotes(value: Any) -> Any: return value[1:-1] return value """ - if (isinstance(value, str) and len(value) >= 2 and value[0] == value[-1] and value[0] in ("'", '"') + if ( + isinstance(value, str) + and len(value) >= 2 + and value[0] == value[-1] + and value[0] in ("'", '"') ): return value[1:-1] return value @@ -2156,11 +2259,13 @@ def _strip_wrapping_quotes(value: Any) -> Any: def __del__(self): """Close database connection when object is destroyed.""" - if hasattr(self, 'connection') and self.connection: + if hasattr(self, "connection") and self.connection: self.connection.close() @timed - def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: str | None = None) -> None: + def add_node( + self, id: str, memory: str, metadata: dict[str, Any], user_name: str | None = None + ) -> None: """Add a memory node to the graph.""" # user_name comes from metadata; fallback to config if missing metadata["user_name"] = user_name if user_name else self.config.user_name @@ -2178,12 +2283,14 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st "memory": memory, "created_at": created_at, "updated_at": updated_at, - **metadata + **metadata, } # Generate embedding if not provided if "embedding" not in properties or not properties["embedding"]: - properties["embedding"] = generate_vector(self._get_config_value("embedding_dimension", 1024)) + properties["embedding"] = generate_vector( + self._get_config_value("embedding_dimension", 1024) + ) # serialization - JSON-serialize sources and usage fields for field_name in ["sources", "usage"]: @@ -2224,7 +2331,7 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st """ cursor.execute(get_graph_id_query, (id,)) graph_id = cursor.fetchone()[0] - properties['graph_id'] = str(graph_id) + properties["graph_id"] = str(graph_id) # Then insert new record if embedding_vector: @@ -2236,7 +2343,9 @@ def add_node(self, id: str, memory: str, metadata: dict[str, Any], user_name: st %s ) """ - cursor.execute(insert_query, (id, json.dumps(properties), json.dumps(embedding_vector))) + cursor.execute( + insert_query, (id, json.dumps(properties), json.dumps(embedding_vector)) + ) else: insert_query = f""" INSERT INTO {self.db_name}_graph."Memory"(id, properties) @@ -2256,7 +2365,7 @@ def _build_node_from_agtype(self, node_agtype, embedding=None): try: # String case: '{"id":...,"label":[...],"properties":{...}}::vertex' if isinstance(node_agtype, str): - json_str = node_agtype.replace('::vertex', '') + json_str = node_agtype.replace("::vertex", "") obj = json.loads(json_str) if not (isinstance(obj, dict) and "properties" in obj): return None @@ -2277,15 +2386,16 @@ def _build_node_from_agtype(self, node_agtype, embedding=None): return {"id": props.get("id", ""), "memory": props.get("memory", ""), "metadata": props} except Exception: return None + @timed def get_neighbors_by_tag( - self, - tags: list[str], - exclude_ids: list[str], - top_k: int = 5, - min_overlap: int = 1, - include_embedding: bool = False, - user_name: str | None = None, + self, + tags: list[str], + exclude_ids: list[str], + top_k: int = 5, + min_overlap: int = 1, + include_embedding: bool = False, + user_name: str | None = None, ) -> list[dict[str, Any]]: """ Find top-K neighbor nodes with maximum tag overlap. @@ -2315,23 +2425,31 @@ def get_neighbors_by_tag( exclude_conditions = [] for exclude_id in exclude_ids: exclude_conditions.append( - "ag_catalog.agtype_access_operator(properties, '\"id\"'::agtype) != %s::agtype") + "ag_catalog.agtype_access_operator(properties, '\"id\"'::agtype) != %s::agtype" + ) params.append(f'"{exclude_id}"') where_clauses.append(f"({' AND '.join(exclude_conditions)})") # Status filter - keep only 'activated' where_clauses.append( - "ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype") + "ag_catalog.agtype_access_operator(properties, '\"status\"'::agtype) = '\"activated\"'::agtype" + ) # Type filter - exclude 'reasoning' type - where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"node_type\"'::agtype) != '\"reasoning\"'::agtype") + where_clauses.append( + "ag_catalog.agtype_access_operator(properties, '\"node_type\"'::agtype) != '\"reasoning\"'::agtype" + ) # User filter - where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype") + where_clauses.append( + "ag_catalog.agtype_access_operator(properties, '\"user_name\"'::agtype) = %s::agtype" + ) params.append(f'"{user_name}"') # Testing showed no data; annotate. - where_clauses.append("ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) != '\"WorkingMemory\"'::agtype") + where_clauses.append( + "ag_catalog.agtype_access_operator(properties, '\"memory_type\"'::agtype) != '\"WorkingMemory\"'::agtype" + ) where_clause = " AND ".join(where_clauses) @@ -2357,8 +2475,11 @@ def get_neighbors_by_tag( # Parse embedding if include_embedding and embedding_json is not None: try: - embedding = json.loads(embedding_json) if isinstance(embedding_json, - str) else embedding_json + embedding = ( + json.loads(embedding_json) + if isinstance(embedding_json, str) + else embedding_json + ) properties["embedding"] = embedding except (json.JSONDecodeError, TypeError): logger.warning(f"Failed to parse embedding for node {node_id}") @@ -2375,11 +2496,13 @@ def get_neighbors_by_tag( overlap_count = len(overlap_tags) if overlap_count >= min_overlap: - node_data = self._parse_node({ - "id": properties.get("id", node_id), - "memory": properties.get("memory", ""), - "metadata": properties - }) + node_data = self._parse_node( + { + "id": properties.get("id", node_id), + "memory": properties.get("memory", ""), + "metadata": properties, + } + ) nodes_with_overlap.append((node_data, overlap_count)) # Sort by overlap count and return top_k items @@ -2391,13 +2514,13 @@ def get_neighbors_by_tag( return [] def get_neighbors_by_tag_ccl( - self, - tags: list[str], - exclude_ids: list[str], - top_k: int = 5, - min_overlap: int = 1, - include_embedding: bool = False, - user_name: str | None = None, + self, + tags: list[str], + exclude_ids: list[str], + top_k: int = 5, + min_overlap: int = 1, + include_embedding: bool = False, + user_name: str | None = None, ) -> list[dict[str, Any]]: """ Find top-K neighbor nodes with maximum tag overlap. @@ -2424,7 +2547,7 @@ def get_neighbors_by_tag_ccl( 'NOT (n.node_type = "reasoning")', 'NOT (n.memory_type = "WorkingMemory")', ] - where_clauses=[ + where_clauses = [ 'n.status = "activated"', 'NOT (n.memory_type = "WorkingMemory")', ] @@ -2455,7 +2578,7 @@ def get_neighbors_by_tag_ccl( "n.source AS source", "n.node_type AS node_type", "n.visibility AS visibility", - "n.background AS background" + "n.background AS background", ] if include_embedding: @@ -2485,7 +2608,7 @@ def get_neighbors_by_tag_ccl( ORDER BY (overlap_count::integer) DESC LIMIT {top_k} """ - print("get_neighbors_by_tag:",query) + print("get_neighbors_by_tag:", query) try: with self.connection.cursor() as cursor: cursor.execute(query) @@ -2499,9 +2622,23 @@ def get_neighbors_by_tag_ccl( # Manually parse each field field_names = [ - "id", "memory", "user_name", "user_id", "session_id", "status", - "key", "confidence", "tags", "created_at", "updated_at", - "memory_type", "sources", "source", "node_type", "visibility", "background" + "id", + "memory", + "user_name", + "user_id", + "session_id", + "status", + "key", + "confidence", + "tags", + "created_at", + "updated_at", + "memory_type", + "sources", + "source", + "node_type", + "visibility", + "background", ] if include_embedding: @@ -2510,9 +2647,9 @@ def get_neighbors_by_tag_ccl( for i, field in enumerate(field_names): if field == "overlap_count": - overlap_count = row[i].value if hasattr(row[i], 'value') else row[i] + overlap_count = row[i].value if hasattr(row[i], "value") else row[i] else: - props[field] = row[i].value if hasattr(row[i], 'value') else row[i] + props[field] = row[i].value if hasattr(row[i], "value") else row[i] overlap_int = int(overlap_count) if overlap_count is not None and overlap_int >= min_overlap: parsed = self._parse_node(props) @@ -2534,6 +2671,7 @@ def get_neighbors_by_tag_ccl( except Exception as e: logger.error(f"Failed to get neighbors by tag: {e}", exc_info=True) return [] + @timed def import_graph(self, data: dict[str, Any], user_name: str | None = None) -> None: """ @@ -2572,8 +2710,9 @@ def import_graph(self, data: dict[str, Any], user_name: str | None = None) -> No logger.error(f"Fail to load edge: {edge}, error: {e}") @timed - def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY", user_name: str | None = None) -> list[ - dict[str, str]]: + def get_edges( + self, id: str, type: str = "ANY", direction: str = "ANY", user_name: str | None = None + ) -> list[dict[str, str]]: """ Get edges connected to a node, with optional type and direction filter. @@ -2626,15 +2765,11 @@ def get_edges(self, id: str, type: str = "ANY", direction: str = "ANY", user_nam edges = [] for row in results: - from_id = row[0].value if hasattr(row[0], 'value') else row[0] - to_id = row[1].value if hasattr(row[1], 'value') else row[1] - edge_type = row[2].value if hasattr(row[2], 'value') else row[2] - - edges.append({ - "from": from_id, - "to": to_id, - "type": edge_type - }) + from_id = row[0].value if hasattr(row[0], "value") else row[0] + to_id = row[1].value if hasattr(row[1], "value") else row[1] + edge_type = row[2].value if hasattr(row[2], "value") else row[2] + + edges.append({"from": from_id, "to": to_id, "type": edge_type}) return edges except Exception as e: From 4c7f680b63aaddb81ff83361225fb7fd0cbc2932 Mon Sep 17 00:00:00 2001 From: liji <532311301@qq.com> Date: Mon, 27 Oct 2025 21:43:32 +0800 Subject: [PATCH 137/137] feat: format utils --- src/memos/mem_cube/utils.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/memos/mem_cube/utils.py b/src/memos/mem_cube/utils.py index 916efa506..24836c509 100644 --- a/src/memos/mem_cube/utils.py +++ b/src/memos/mem_cube/utils.py @@ -73,7 +73,7 @@ def merge_config_with_default( # Detect backend change backend_changed = existing_backend != default_backend - + if backend_changed: logger.info( f"Detected graph_db backend change: {existing_backend} -> {default_backend}. " @@ -81,18 +81,20 @@ def merge_config_with_default( ) # Start with default config as base when backend changes merged_graph_config = copy.deepcopy(default_graph_config) - + # Preserve user-specific fields if they exist in both configs preserve_graph_fields = { "auto_create", - "user_name", + "user_name", "use_multi_db", } for field in preserve_graph_fields: if field in existing_graph_config: merged_graph_config[field] = existing_graph_config[field] - logger.debug(f"Preserved graph_db field '{field}': {existing_graph_config[field]}") - + logger.debug( + f"Preserved graph_db field '{field}': {existing_graph_config[field]}" + ) + # Clean up backend-specific fields that don't exist in the new backend # This approach is generic: remove any field from merged config that's not in default config # and not in the preserve list @@ -100,7 +102,7 @@ def merge_config_with_default( for field in list(merged_graph_config.keys()): if field not in default_graph_config and field not in preserve_graph_fields: fields_to_remove.append(field) - + for field in fields_to_remove: removed_value = merged_graph_config.pop(field) logger.info( @@ -115,10 +117,10 @@ def merge_config_with_default( "user_name", "use_multi_db", } - + # Start with existing config as base merged_graph_config = copy.deepcopy(existing_graph_config) - + # Update with default config except preserved fields for key, value in default_graph_config.items(): if key not in preserve_graph_fields: @@ -126,16 +128,18 @@ def merge_config_with_default( logger.debug( f"Updated graph_db field '{key}': {existing_graph_config.get(key)} -> {value}" ) - + # Handle use_multi_db transition - if not default_graph_config.get("use_multi_db", True) and merged_graph_config.get("use_multi_db", True): + if not default_graph_config.get("use_multi_db", True) and merged_graph_config.get( + "use_multi_db", True + ): merged_graph_config["use_multi_db"] = False # For Neo4j: db_name becomes user_name in single-db mode if "neo4j" in default_backend and "db_name" in merged_graph_config: merged_graph_config["user_name"] = merged_graph_config.get("db_name") merged_graph_config["db_name"] = default_graph_config.get("db_name") logger.info("Transitioned to single-db mode (use_multi_db=False)") - + preserved_graph_db = { "backend": default_backend, "config": merged_graph_config,