resolve comments

sfc-gh-wshangguan · sfc-gh-wshangguan · commit 5857d24b7b7f · 2026-05-01T16:48:22.000-07:00
diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py
@@ -1430,8 +1430,11 @@ def _scan_quoted_identifier(s: str, start: int) -> int:
     and nothing else.
 
     Raises ``ValueError`` if the closing quote is missing.
+
+    Precondition: ``s[start] == '"'``. All current callers guard on this; we
+    do not re-check here because asserts are stripped under ``python -O`` and
+    promoting to ``raise`` would be overkill for a private helper.
     """
-    assert s[start] == '"'
     i = start + 1
     while i < len(s):
         if s[i] == '"':
diff --git a/tests/unit/test_dataframe_reader_type_parsing.py b/tests/unit/test_dataframe_reader_type_parsing.py
@@ -106,7 +106,7 @@ def test_simple_quoted_name(self):
         assert _scan_quoted_identifier(s, 0) == 5  # index just past closing "
 
     def test_escaped_quote_inside(self):
-        # "a""b" is the 7-char span 0..6 inclusive; index past it is 7
+        # "a""b" is a 6-char span (positions 0-5); index just past it is 6
         s = '"a""b" rest'
         assert _scan_quoted_identifier(s, 0) == 6
 
@@ -341,6 +341,28 @@ def test_quoted_name_with_array_not_null(self):
         assert isinstance(arr, ArrayType)
         assert arr.contains_null is False
 
+    # --- malformed inputs surface as ValueError, not silent corruption ---
+    #
+    # Pin the error-surfacing contract for the two adversarial shapes that can
+    # actually reach `_split_object_field` after `split_top_level_comma_fields`
+    # (which is greedy on `"..."` spans). The parser does *not* validate
+    # OBJECT inputs upstream; it relies on INFER_SCHEMA emitting
+    # grammar-compliant strings. These tests pin that any deviation raises a
+    # clear `ValueError` from the appropriate parse step.
+
+    def test_quoted_name_with_garbage_type_raises_unsupported_type(self):
+        # `OBJECT("a NUM"BER)` — `_scan_quoted_identifier` greedily matches
+        # `"a NUM"`, leaves `BER` as the type token, and `_sf_type_to_type_object`
+        # rejects the unknown type rather than silently producing a struct.
+        with pytest.raises(ValueError, match="not a supported type"):
+            _sf_type_to_type_object('OBJECT("a NUM"BER)')
+
+    def test_unterminated_quoted_name_raises(self):
+        # `OBJECT("a NUMBER)` — no closing `"`. `_scan_quoted_identifier`
+        # raises rather than silently consuming the trailing `)`.
+        with pytest.raises(ValueError, match="Unterminated quoted identifier"):
+            _sf_type_to_type_object('OBJECT("a NUMBER)')
+
 
 # ---------------------------------------------------------------------------
 # _sf_type_to_type_object