Skip to content

Commit bdc8b6d

Browse files
authored
fix(python-driver): quote-escape column aliases in buildCypher() (#2373)
* fix(python-driver): quote-escape column aliases in buildCypher() Always double-quote column names in the AS (...) clause generated by buildCypher() and _validate_column(). This prevents PostgreSQL parse errors when column aliases happen to be reserved words (e.g. 'count', 'order', 'type', 'group', 'select'). Before: SELECT * from cypher(NULL,NULL) as (count agtype); After: SELECT * from cypher(NULL,NULL) as ("count" agtype); PostgreSQL always accepts double-quoted identifiers, so quoting unconditionally is safe for all names — no reserved word list needed. Closes #2370 * Address review feedback: document type quoting, improve tests - _validate_column: add comment explaining why type_name is intentionally left unquoted (PG type names are case-insensitive) - buildCypher: fix graphName == None to idiomatic 'is None' - test_reserved_word_count: replace fragile assertNotIn with regex - Add test for reserved word in "name type" pair (e.g. "order agtype") - Add comment explaining intentional _validate_column private import * Fix test_security.py for quoted column names, improve comment - Update test_security.py assertions to expect double-quoted column names (consistent with the always-quote-column-names change) - Reword type_name comment to clarify case-folding semantics Made-with: Cursor
1 parent 4f9db37 commit bdc8b6d

3 files changed

Lines changed: 95 additions & 15 deletions

File tree

drivers/python/age/age.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -216,14 +216,19 @@ def _validate_column(col: str) -> str:
216216
name, type_name = parts
217217
validate_identifier(name, "Column name")
218218
validate_identifier(type_name, "Column type")
219-
return f"{name} {type_name}"
219+
# Only the column name is double-quoted. The type name is left
220+
# unquoted so PostgreSQL applies its default identifier folding
221+
# for type names in column definitions. Double-quoting would
222+
# make the type name case-sensitive and could change type
223+
# resolution in surprising ways for user-defined types.
224+
return f'"{name}" {type_name}'
220225
else:
221226
validate_identifier(col, "Column name")
222-
return f"{col} agtype"
227+
return f'"{col}" agtype'
223228

224229

225230
def buildCypher(graphName:str, cypherStmt:str, columns:list) ->str:
226-
if graphName == None:
231+
if graphName is None:
227232
raise _EXCEPTION_GraphNotSet
228233

229234
columnExp=[]
@@ -233,16 +238,18 @@ def buildCypher(graphName:str, cypherStmt:str, columns:list) ->str:
233238
if validated:
234239
columnExp.append(validated)
235240
else:
236-
columnExp.append('v agtype')
241+
columnExp.append('"v" agtype')
237242

238243
# Design note: String concatenation is used here instead of
239244
# psycopg.sql.Identifier() because column specifications are
240-
# "name type" pairs (e.g. "v agtype") that don't map directly to
245+
# "name type" pairs (e.g. '"v" agtype') that don't map directly to
241246
# sql.Identifier(). Each component has already been validated by
242247
# _validate_column() → validate_identifier(), which restricts
243-
# names to ^[A-Za-z_][A-Za-z0-9_]*$ and max 63 chars. The
244-
# graphName and cypherStmt are NOT embedded here — this template
245-
# only contains the validated column list and static SQL keywords.
248+
# names to ^[A-Za-z_][A-Za-z0-9_]*$ and max 63 chars. Column names
249+
# are always double-quoted to avoid conflicts with PostgreSQL
250+
# reserved words (e.g. "count", "order", "type"). The graphName
251+
# and cypherStmt are NOT embedded here — this template only
252+
# contains the validated column list and static SQL keywords.
246253
stmtArr = []
247254
stmtArr.append("SELECT * from cypher(NULL,NULL) as (")
248255
stmtArr.append(','.join(columnExp))

drivers/python/test_age_py.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,18 @@
1313
# specific language governing permissions and limitations
1414
# under the License.
1515
import json
16+
import re
1617

1718
from age.models import Vertex
1819
import unittest
1920
import unittest.mock
2021
import decimal
2122
import age
23+
# _validate_column is private but tested directly because its quoting
24+
# behavior is security-relevant and the public surface (buildCypher)
25+
# makes it difficult to isolate quoting assertions.
26+
from age.age import buildCypher, _validate_column
27+
from age.exceptions import InvalidIdentifier
2228
import argparse
2329

2430
TEST_HOST = "localhost"
@@ -99,6 +105,72 @@ def test_connect_forwards_skip_load_to_setup(self):
99105
)
100106

101107

108+
class TestBuildCypher(unittest.TestCase):
109+
"""Unit tests for buildCypher() and _validate_column() — no DB required."""
110+
111+
def test_simple_column(self):
112+
result = buildCypher("g", "MATCH (n) RETURN n", ["n"])
113+
self.assertIn('"n" agtype', result)
114+
115+
def test_column_with_type(self):
116+
result = buildCypher("g", "MATCH (n) RETURN n", ["n agtype"])
117+
self.assertIn('"n" agtype', result)
118+
119+
def test_reserved_word_count(self):
120+
"""Issue #2370: 'count' is a PostgreSQL reserved word."""
121+
result = buildCypher("g", "MATCH (n) RETURN count(n)", ["count"])
122+
self.assertIn('"count" agtype', result)
123+
# Verify 'count' never appears unquoted as a column name
124+
self.assertIsNone(
125+
re.search(r'(?<!")\bcount\s+agtype\b', result),
126+
f"'count' must be quoted in: {result}"
127+
)
128+
129+
def test_reserved_word_order(self):
130+
"""Issue #2370: 'order' is a PostgreSQL reserved word."""
131+
result = buildCypher("g", "MATCH (n) RETURN n.order", ["order"])
132+
self.assertIn('"order" agtype', result)
133+
134+
def test_reserved_word_type(self):
135+
"""Issue #2370: 'type' is a PostgreSQL reserved word."""
136+
result = buildCypher("g", "MATCH ()-[r]->() RETURN type(r)", ["type"])
137+
self.assertIn('"type" agtype', result)
138+
139+
def test_reserved_word_select(self):
140+
"""Issue #2370: 'select' is a PostgreSQL reserved word."""
141+
result = buildCypher("g", "MATCH (n) RETURN n", ["select"])
142+
self.assertIn('"select" agtype', result)
143+
144+
def test_reserved_word_group(self):
145+
"""Issue #2370: 'group' is a PostgreSQL reserved word."""
146+
result = buildCypher("g", "MATCH (n) RETURN n", ["group"])
147+
self.assertIn('"group" agtype', result)
148+
149+
def test_multiple_columns(self):
150+
result = buildCypher("g", "MATCH (n) RETURN n.name, count(n)", ["name", "count"])
151+
self.assertIn('"name" agtype', result)
152+
self.assertIn('"count" agtype', result)
153+
154+
def test_default_column(self):
155+
result = buildCypher("g", "MATCH (n) RETURN n", None)
156+
self.assertIn('"v" agtype', result)
157+
158+
def test_invalid_column_rejected(self):
159+
with self.assertRaises(InvalidIdentifier):
160+
buildCypher("g", "MATCH (n) RETURN n", ["invalid;col"])
161+
162+
def test_reserved_word_in_name_type_pair(self):
163+
"""Quoting applies even when the column is specified as 'name type'."""
164+
result = buildCypher("g", "MATCH (n) RETURN n.order", ["order agtype"])
165+
self.assertIn('"order" agtype', result)
166+
167+
def test_validate_column_quoting(self):
168+
self.assertEqual(_validate_column("v"), '"v" agtype')
169+
self.assertEqual(_validate_column("v agtype"), '"v" agtype')
170+
self.assertEqual(_validate_column("count"), '"count" agtype')
171+
self.assertEqual(_validate_column("my_col"), '"my_col" agtype')
172+
173+
102174
class TestAgeBasic(unittest.TestCase):
103175
ag = None
104176
args: argparse.Namespace = argparse.Namespace(
@@ -558,6 +630,7 @@ def testSerialization(self):
558630
suite = unittest.TestSuite()
559631
loader = unittest.TestLoader()
560632
suite.addTests(loader.loadTestsFromTestCase(TestSetUpAge))
633+
suite.addTests(loader.loadTestsFromTestCase(TestBuildCypher))
561634
suite.addTest(TestAgeBasic("testExec"))
562635
suite.addTest(TestAgeBasic("testQuery"))
563636
suite.addTest(TestAgeBasic("testChangeData"))

drivers/python/test_security.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,10 @@ class TestColumnValidation(unittest.TestCase):
167167
"""Test _validate_column prevents injection through column specs."""
168168

169169
def test_plain_column_name(self):
170-
self.assertEqual(_validate_column('v'), 'v agtype')
170+
self.assertEqual(_validate_column('v'), '"v" agtype')
171171

172172
def test_column_with_type(self):
173-
self.assertEqual(_validate_column('n agtype'), 'n agtype')
173+
self.assertEqual(_validate_column('n agtype'), '"n" agtype')
174174

175175
def test_empty_column(self):
176176
self.assertEqual(_validate_column(''), '')
@@ -198,20 +198,20 @@ class TestBuildCypher(unittest.TestCase):
198198

199199
def test_default_column(self):
200200
result = buildCypher('test_graph', 'MATCH (n) RETURN n', None)
201-
self.assertIn('v agtype', result)
201+
self.assertIn('"v" agtype', result)
202202

203203
def test_single_column(self):
204204
result = buildCypher('test_graph', 'MATCH (n) RETURN n', ['n'])
205-
self.assertIn('n agtype', result)
205+
self.assertIn('"n" agtype', result)
206206

207207
def test_typed_column(self):
208208
result = buildCypher('test_graph', 'MATCH (n) RETURN n', ['n agtype'])
209-
self.assertIn('n agtype', result)
209+
self.assertIn('"n" agtype', result)
210210

211211
def test_multiple_columns(self):
212212
result = buildCypher('test_graph', 'MATCH (n) RETURN n', ['a', 'b'])
213-
self.assertIn('a agtype', result)
214-
self.assertIn('b agtype', result)
213+
self.assertIn('"a" agtype', result)
214+
self.assertIn('"b" agtype', result)
215215

216216
def test_rejects_injection_in_column(self):
217217
with self.assertRaises(InvalidIdentifier):

0 commit comments

Comments
 (0)