Skip to content

Commit 788ad8b

Browse files
SNOW-3375781: Preserve 1-pass read for SCOS XML user schema performance (#4185)
1 parent c63ad27 commit 788ad8b

2 files changed

Lines changed: 36 additions & 1 deletion

File tree

src/snowflake/snowpark/dataframe_reader.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1741,7 +1741,14 @@ def _read_semi_structured_file(self, path: str, format: str) -> DataFrame:
17411741

17421742
xml_inferred_schema = None
17431743
if format == "XML" and XML_ROW_TAG_STRING in self._cur_options:
1744-
if context._is_snowpark_connect_compatible_mode and not self._user_schema:
1744+
# Internal flag set by SCOS to skip the inference pass when a user schema
1745+
# is already present, maintaining 1-pass reading when user schema is provided.
1746+
skip_inference = self._cur_options.get("_XML_SKIP_INFERENCE", False)
1747+
if (
1748+
context._is_snowpark_connect_compatible_mode
1749+
and not self._user_schema
1750+
and not skip_inference
1751+
):
17451752
string_types_only = not self._cur_options.get("INFER_SCHEMA", True)
17461753
xml_inferred_schema = self._infer_schema_for_xml(
17471754
path, string_types_only

tests/unit/test_xml_schema_inference.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,3 +1937,31 @@ def test_udtf_process_string_types_only():
19371937
)[0][0]
19381938
assert "string" in schema_str
19391939
assert "bigint" not in schema_str and "date" not in schema_str
1940+
1941+
1942+
# ===========================================================================
1943+
# _XML_SKIP_INFERENCE option
1944+
# ===========================================================================
1945+
1946+
1947+
@pytest.mark.parametrize("skip_inference", [True, False])
1948+
def test_xml_skip_inference_option(skip_inference):
1949+
reader = DataFrameReader(mock.MagicMock(), _emit_ast=False)
1950+
reader._cur_options[_dr_mod.XML_ROW_TAG_STRING] = "row"
1951+
if skip_inference:
1952+
reader._cur_options["_XML_SKIP_INFERENCE"] = True
1953+
1954+
with mock.patch.object(
1955+
reader, "_infer_schema_for_xml", return_value=None
1956+
) as mock_infer, mock.patch.object(
1957+
_dr_mod.context, "_is_snowpark_connect_compatible_mode", True
1958+
):
1959+
try:
1960+
reader._read_semi_structured_file("@s/f.xml", "XML")
1961+
except Exception:
1962+
pass
1963+
1964+
if skip_inference:
1965+
mock_infer.assert_not_called()
1966+
else:
1967+
mock_infer.assert_called_once()

0 commit comments

Comments
 (0)