5656"""
5757
5858import argparse
59+ import re
5960import shutil
6061import time
6162from pathlib import Path
6667ColumnDef = Tuple [str , str ]
6768
6869NUMERIC_COLUMN_TYPES = {"INTEGER" , "LONG" }
70+ SAFE_IDENTIFIER_RE = re .compile (r"^[A-Za-z_][A-Za-z0-9_]*$" )
71+
72+
73+ def _validated_identifier (identifier : str ) -> str :
74+ if not SAFE_IDENTIFIER_RE .fullmatch (identifier ):
75+ raise ValueError (f"Unsafe SQL identifier: { identifier !r} " )
76+ return identifier
77+
78+
79+ def _quote_identifier (identifier : str ) -> str :
80+ return f"`{ _validated_identifier (identifier )} `"
6981
7082
7183def result_int (row , * keys : str ) -> int :
@@ -131,9 +143,10 @@ def edge_endpoints(edge_id: int, vertex_count: int) -> Tuple[int, int]:
131143
132144
133145def build_rid_lookup_for_vertex_type (db , vertex_type : str ) -> Dict [int , str ]:
146+ safe_vertex_type = _quote_identifier (vertex_type )
134147 rows = db .query (
135148 "sql" ,
136- f"SELECT Id, @rid as rid FROM { vertex_type } " ,
149+ f"SELECT Id, @rid as rid FROM { safe_vertex_type } " , # nosec B608 - validated identifier
137150 ).to_list ()
138151 rid_lookup : Dict [int , str ] = {}
139152 for row in rows :
@@ -167,11 +180,11 @@ def query_one_or_none(result_set):
167180def collect_vertex_sample (
168181 db , vertex_type : str , vertex_id : int , props : List [ColumnDef ]
169182) -> dict :
183+ safe_vertex_type = _quote_identifier (vertex_type )
170184 row = query_one_or_none (
171185 db .query (
172186 "sql" ,
173- # vertex_type is a constant from this script; vertex_id is bound as parameter.
174- f"SELECT FROM { vertex_type } WHERE Id = ?" ,
187+ f"SELECT FROM { safe_vertex_type } WHERE Id = ?" , # nosec B608 - validated identifier
175188 vertex_id ,
176189 )
177190 )
@@ -230,6 +243,7 @@ def collect_graph_signature(
230243 str (db_path ),
231244 jvm_kwargs = {"heap_size" : heap_size } if heap_size else None ,
232245 ) as db :
246+ safe_vertex_type = _quote_identifier (vertex_type )
233247 vertex_int_props = [
234248 name for name , kind in vertex_props if kind in NUMERIC_COLUMN_TYPES
235249 ]
@@ -242,7 +256,10 @@ def collect_graph_signature(
242256 "sum(Id) AS sum_id" ,
243257 "min(Id) AS min_id" ,
244258 "max(Id) AS max_id" ,
245- ] + [f"sum({ name } ) AS sum_{ name } " for name in vertex_int_props ]
259+ ] + [
260+ f"sum({ _quote_identifier (name )} ) AS sum_{ _validated_identifier (name )} "
261+ for name in vertex_int_props
262+ ]
246263
247264 edge_match_aggregate_fields = [
248265 "count(r) AS count" ,
@@ -254,8 +271,7 @@ def collect_graph_signature(
254271 vertex_aggregate = query_one_or_none (
255272 db .query (
256273 "sql" ,
257- # vertex_aggregate_fields and vertex_type are script-local constants.
258- f"SELECT { ', ' .join (vertex_aggregate_fields )} FROM { vertex_type } " ,
274+ f"SELECT { ', ' .join (vertex_aggregate_fields )} FROM { safe_vertex_type } " , # nosec B608 - validated identifier
259275 )
260276 )
261277 edge_aggregate = query_one_or_none (
0 commit comments