@@ -106,7 +106,7 @@ def test_simple_quoted_name(self):
106106 assert _scan_quoted_identifier (s , 0 ) == 5 # index just past closing "
107107
108108 def test_escaped_quote_inside (self ):
109- # "a""b" is the 7 -char span 0..6 inclusive ; index past it is 7
109+ # "a""b" is a 6 -char span (positions 0-5) ; index just past it is 6
110110 s = '"a""b" rest'
111111 assert _scan_quoted_identifier (s , 0 ) == 6
112112
@@ -341,6 +341,28 @@ def test_quoted_name_with_array_not_null(self):
341341 assert isinstance (arr , ArrayType )
342342 assert arr .contains_null is False
343343
344+ # --- malformed inputs surface as ValueError, not silent corruption ---
345+ #
346+ # Pin the error-surfacing contract for the two adversarial shapes that can
347+ # actually reach `_split_object_field` after `split_top_level_comma_fields`
348+ # (which is greedy on `"..."` spans). The parser does *not* validate
349+ # OBJECT inputs upstream; it relies on INFER_SCHEMA emitting
350+ # grammar-compliant strings. These tests pin that any deviation raises a
351+ # clear `ValueError` from the appropriate parse step.
352+
353+ def test_quoted_name_with_garbage_type_raises_unsupported_type (self ):
354+ # `OBJECT("a NUM"BER)` — `_scan_quoted_identifier` greedily matches
355+ # `"a NUM"`, leaves `BER` as the type token, and `_sf_type_to_type_object`
356+ # rejects the unknown type rather than silently producing a struct.
357+ with pytest .raises (ValueError , match = "not a supported type" ):
358+ _sf_type_to_type_object ('OBJECT("a NUM"BER)' )
359+
360+ def test_unterminated_quoted_name_raises (self ):
361+ # `OBJECT("a NUMBER)` — no closing `"`. `_scan_quoted_identifier`
362+ # raises rather than silently consuming the trailing `)`.
363+ with pytest .raises (ValueError , match = "Unterminated quoted identifier" ):
364+ _sf_type_to_type_object ('OBJECT("a NUMBER)' )
365+
344366
345367# ---------------------------------------------------------------------------
346368# _sf_type_to_type_object
0 commit comments