Skip to content

Commit 5857d24

Browse files
resolve comments
1 parent ce0a9cb commit 5857d24

2 files changed

Lines changed: 27 additions & 2 deletions

File tree

src/snowflake/snowpark/_internal/type_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1430,8 +1430,11 @@ def _scan_quoted_identifier(s: str, start: int) -> int:
14301430
and nothing else.
14311431
14321432
Raises ``ValueError`` if the closing quote is missing.
1433+
1434+
Precondition: ``s[start] == '"'``. All current callers guard on this; we
1435+
do not re-check here because asserts are stripped under ``python -O`` and
1436+
promoting to ``raise`` would be overkill for a private helper.
14331437
"""
1434-
assert s[start] == '"'
14351438
i = start + 1
14361439
while i < len(s):
14371440
if s[i] == '"':

tests/unit/test_dataframe_reader_type_parsing.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def test_simple_quoted_name(self):
106106
assert _scan_quoted_identifier(s, 0) == 5 # index just past closing "
107107

108108
def test_escaped_quote_inside(self):
109-
# "a""b" is the 7-char span 0..6 inclusive; index past it is 7
109+
# "a""b" is a 6-char span (positions 0-5); index just past it is 6
110110
s = '"a""b" rest'
111111
assert _scan_quoted_identifier(s, 0) == 6
112112

@@ -341,6 +341,28 @@ def test_quoted_name_with_array_not_null(self):
341341
assert isinstance(arr, ArrayType)
342342
assert arr.contains_null is False
343343

344+
# --- malformed inputs surface as ValueError, not silent corruption ---
345+
#
346+
# Pin the error-surfacing contract for the two adversarial shapes that can
347+
# actually reach `_split_object_field` after `split_top_level_comma_fields`
348+
# (which is greedy on `"..."` spans). The parser does *not* validate
349+
# OBJECT inputs upstream; it relies on INFER_SCHEMA emitting
350+
# grammar-compliant strings. These tests pin that any deviation raises a
351+
# clear `ValueError` from the appropriate parse step.
352+
353+
def test_quoted_name_with_garbage_type_raises_unsupported_type(self):
354+
# `OBJECT("a NUM"BER)` — `_scan_quoted_identifier` greedily matches
355+
# `"a NUM"`, leaves `BER` as the type token, and `_sf_type_to_type_object`
356+
# rejects the unknown type rather than silently producing a struct.
357+
with pytest.raises(ValueError, match="not a supported type"):
358+
_sf_type_to_type_object('OBJECT("a NUM"BER)')
359+
360+
def test_unterminated_quoted_name_raises(self):
361+
# `OBJECT("a NUMBER)` — no closing `"`. `_scan_quoted_identifier`
362+
# raises rather than silently consuming the trailing `)`.
363+
with pytest.raises(ValueError, match="Unterminated quoted identifier"):
364+
_sf_type_to_type_object('OBJECT("a NUMBER)')
365+
344366

345367
# ---------------------------------------------------------------------------
346368
# _sf_type_to_type_object

0 commit comments

Comments
 (0)