From 174c47c3de9ce102be4ad52883162bcc8fa1d78e Mon Sep 17 00:00:00 2001 From: chenzihong-gavin Date: Mon, 26 Jan 2026 13:53:38 +0800 Subject: [PATCH 1/3] refactor: use get_nerghbors in storage instead of building adj_list --- graphgen/bases/base_partitioner.py | 21 ------------------- graphgen/bases/base_storage.py | 4 ++++ graphgen/common/init_storage.py | 6 ++++++ .../models/partitioner/bfs_partitioner.py | 7 +++---- .../models/partitioner/dfs_partitioner.py | 7 +++---- .../models/partitioner/ece_partitioner.py | 3 +-- graphgen/models/storage/graph/kuzu_storage.py | 13 ++++++++++++ .../models/storage/graph/networkx_storage.py | 12 +++++++++++ 8 files changed, 42 insertions(+), 31 deletions(-) diff --git a/graphgen/bases/base_partitioner.py b/graphgen/bases/base_partitioner.py index 384c9e4e..406d296e 100644 --- a/graphgen/bases/base_partitioner.py +++ b/graphgen/bases/base_partitioner.py @@ -51,24 +51,3 @@ def community2batch( if edge_data: edges_data.append((u, v, edge_data)) return nodes_data, edges_data - - @staticmethod - def _build_adjacency_list( - nodes: List[tuple[str, dict]], edges: List[tuple[str, str, dict]] - ) -> tuple[dict[str, List[str]], set[tuple[str, str]]]: - """ - Build adjacency list and edge set from nodes and edges. - :param nodes - :param edges - :return: adjacency list, edge set - """ - adj: dict[str, List[str]] = {n[0]: [] for n in nodes} - edge_set: set[tuple[str, str]] = set() - for u, v, _ in edges: - if u == v: - continue - adj[u].append(v) - adj[v].append(u) - edge_set.add((u, v)) - edge_set.add((v, u)) - return adj, edge_set diff --git a/graphgen/bases/base_storage.py b/graphgen/bases/base_storage.py index dd9cf151..be50c2c7 100644 --- a/graphgen/bases/base_storage.py +++ b/graphgen/bases/base_storage.py @@ -126,6 +126,10 @@ def upsert_edge( def delete_node(self, node_id: str): raise NotImplementedError + @abstractmethod + def get_neighbors(self, node_id: str) -> List[str]: + raise NotImplementedError + @abstractmethod def reload(self): raise NotImplementedError diff --git a/graphgen/common/init_storage.py b/graphgen/common/init_storage.py index aaffb630..226aeb5c 100644 --- a/graphgen/common/init_storage.py +++ b/graphgen/common/init_storage.py @@ -129,6 +129,9 @@ def upsert_edge( def delete_node(self, node_id: str): return self.graph.delete_node(node_id) + def get_neighbors(self, node_id: str) -> List[str]: + return self.graph.get_neighbors(node_id) + def reload(self): return self.graph.reload() @@ -245,6 +248,9 @@ def upsert_edge( def delete_node(self, node_id: str): return ray.get(self.actor.delete_node.remote(node_id)) + def get_neighbors(self, node_id: str) -> List[str]: + return ray.get(self.actor.get_neighbors.remote(node_id)) + def reload(self): return ray.get(self.actor.reload.remote()) diff --git a/graphgen/models/partitioner/bfs_partitioner.py b/graphgen/models/partitioner/bfs_partitioner.py index a00ad76d..4284cf98 100644 --- a/graphgen/models/partitioner/bfs_partitioner.py +++ b/graphgen/models/partitioner/bfs_partitioner.py @@ -26,8 +26,6 @@ def partition( nodes = g.get_all_nodes() edges = g.get_all_edges() - adj, _ = self._build_adjacency_list(nodes, edges) - used_n: set[str] = set() used_e: set[frozenset[str]] = set() @@ -55,7 +53,7 @@ def partition( used_n.add(it) comm_n.append(it) cnt += 1 - for nei in adj[it]: + for nei in g.get_neighbors(it): e_key = frozenset((it, nei)) if e_key not in used_e: queue.append((EDGE_UNIT, e_key)) @@ -63,7 +61,8 @@ def partition( if it in used_e: continue used_e.add(it) - comm_e.append(tuple(sorted(it))) + u, v = tuple(it) + comm_e.append((u, v)) cnt += 1 # push nodes that are not visited for n in it: diff --git a/graphgen/models/partitioner/dfs_partitioner.py b/graphgen/models/partitioner/dfs_partitioner.py index fa2786e6..38508097 100644 --- a/graphgen/models/partitioner/dfs_partitioner.py +++ b/graphgen/models/partitioner/dfs_partitioner.py @@ -26,8 +26,6 @@ def partition( nodes = g.get_all_nodes() edges = g.get_all_edges() - adj, _ = self._build_adjacency_list(nodes, edges) - used_n: set[str] = set() used_e: set[frozenset[str]] = set() @@ -55,7 +53,7 @@ def partition( used_n.add(it) comm_n.append(it) cnt += 1 - for nei in adj[it]: + for nei in g.get_neighbors(it): e_key = frozenset((it, nei)) if e_key not in used_e: stack.append((EDGE_UNIT, e_key)) @@ -64,7 +62,8 @@ def partition( if it in used_e: continue used_e.add(it) - comm_e.append(tuple(sorted(it))) + u, v = sorted(it) + comm_e.append((u, v)) cnt += 1 # push neighboring nodes for n in it: diff --git a/graphgen/models/partitioner/ece_partitioner.py b/graphgen/models/partitioner/ece_partitioner.py index 733f6ea1..83e10cba 100644 --- a/graphgen/models/partitioner/ece_partitioner.py +++ b/graphgen/models/partitioner/ece_partitioner.py @@ -65,7 +65,6 @@ def partition( nodes: List[Tuple[str, dict]] = g.get_all_nodes() edges: List[Tuple[str, str, dict]] = g.get_all_edges() - adj, _ = self._build_adjacency_list(nodes, edges) node_dict = dict(nodes) edge_dict = {frozenset((u, v)): d for u, v, d in edges} @@ -118,7 +117,7 @@ def _add_unit(u): neighbors: List[Tuple[str, Any, dict]] = [] if cur_type == NODE_UNIT: - for nb_id in adj.get(cur_id, []): + for nb_id in g.get_neighbors(cur_id): e_key = frozenset((cur_id, nb_id)) if e_key not in used_e and e_key not in community_edges: neighbors.append((EDGE_UNIT, e_key, edge_dict[e_key])) diff --git a/graphgen/models/storage/graph/kuzu_storage.py b/graphgen/models/storage/graph/kuzu_storage.py index 00f55ac0..1288a78d 100644 --- a/graphgen/models/storage/graph/kuzu_storage.py +++ b/graphgen/models/storage/graph/kuzu_storage.py @@ -373,6 +373,19 @@ def delete_node(self, node_id: str): self._conn.execute(query, {"id": node_id}) print(f"Node {node_id} deleted from KuzuDB.") + def get_neighbors(self, node_id: str) -> List[str]: + query = """ + MATCH (a:Entity {id: $id})-[:Relation]->(b:Entity) + RETURN b.id + """ + result = self._conn.execute(query, {"id": node_id}) + neighbors = [] + while result.has_next(): + row = result.get_next() + if row and len(row) >= 1: + neighbors.append(row[0]) + return neighbors + def clear(self): """Clear all data but keep schema (or drop tables).""" self._conn.execute("MATCH (n) DETACH DELETE n") diff --git a/graphgen/models/storage/graph/networkx_storage.py b/graphgen/models/storage/graph/networkx_storage.py index 16d60a39..2aa21400 100644 --- a/graphgen/models/storage/graph/networkx_storage.py +++ b/graphgen/models/storage/graph/networkx_storage.py @@ -188,6 +188,18 @@ def delete_node(self, node_id: str): else: print(f"Node {node_id} not found in the graph for deletion.") + def get_neighbors(self, node_id: str) -> List[str]: + """ + Get the neighbors of a node based on the specified node_id. + + :param node_id: The node_id to get neighbors for + :return: List of neighbor node IDs + """ + if self._graph.has_node(node_id): + return list(self._graph.neighbors(node_id)) + print(f"Node {node_id} not found in the graph.") + return [] + def clear(self): """ Clear the graph by removing all nodes and edges. From 8511eb5bdeb30a1ea082f24f36f7c90ac345af73 Mon Sep 17 00:00:00 2001 From: chenzihong <58508660+ChenZiHong-Gavin@users.noreply.github.com> Date: Mon, 26 Jan 2026 14:06:19 +0800 Subject: [PATCH 2/3] Update graphgen/models/storage/graph/kuzu_storage.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- graphgen/models/storage/graph/kuzu_storage.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/graphgen/models/storage/graph/kuzu_storage.py b/graphgen/models/storage/graph/kuzu_storage.py index 1288a78d..cbd243da 100644 --- a/graphgen/models/storage/graph/kuzu_storage.py +++ b/graphgen/models/storage/graph/kuzu_storage.py @@ -375,16 +375,11 @@ def delete_node(self, node_id: str): def get_neighbors(self, node_id: str) -> List[str]: query = """ - MATCH (a:Entity {id: $id})-[:Relation]->(b:Entity) - RETURN b.id + MATCH (a:Entity {id: $id})-[:Relation]-(b:Entity) + RETURN DISTINCT b.id """ result = self._conn.execute(query, {"id": node_id}) - neighbors = [] - while result.has_next(): - row = result.get_next() - if row and len(row) >= 1: - neighbors.append(row[0]) - return neighbors + return [row[0] for row in result if row] def clear(self): """Clear all data but keep schema (or drop tables).""" From 34824317f8f7e76c2fa92a6d7d43a0395b4f4101 Mon Sep 17 00:00:00 2001 From: chenzihong <58508660+ChenZiHong-Gavin@users.noreply.github.com> Date: Mon, 26 Jan 2026 14:07:25 +0800 Subject: [PATCH 3/3] Update graphgen/models/partitioner/bfs_partitioner.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- graphgen/models/partitioner/bfs_partitioner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphgen/models/partitioner/bfs_partitioner.py b/graphgen/models/partitioner/bfs_partitioner.py index 4284cf98..e38f55e0 100644 --- a/graphgen/models/partitioner/bfs_partitioner.py +++ b/graphgen/models/partitioner/bfs_partitioner.py @@ -61,7 +61,7 @@ def partition( if it in used_e: continue used_e.add(it) - u, v = tuple(it) + u, v = sorted(it) comm_e.append((u, v)) cnt += 1 # push nodes that are not visited