From 60b045efd82fdebcadfab6789a5e4bbd373ed942 Mon Sep 17 00:00:00 2001 From: Jianzhun Du Date: Fri, 26 Sep 2025 14:22:50 -0700 Subject: [PATCH 1/2] d --- src/snowflake/snowpark/_internal/xml_reader.py | 3 ++- tests/integ/test_xml_reader_row_tag.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/snowflake/snowpark/_internal/xml_reader.py b/src/snowflake/snowpark/_internal/xml_reader.py index 45e7ab33af..47b9849592 100644 --- a/src/snowflake/snowpark/_internal/xml_reader.py +++ b/src/snowflake/snowpark/_internal/xml_reader.py @@ -503,7 +503,8 @@ def process_xml_range( yield {column_name_of_corrupt_record: record_str} elif mode == "FAILFAST": raise RuntimeError( - f"Malformed XML record at bytes {record_start}-{record_end}: {e}" + f"Malformed XML record at bytes {record_start}-{record_end}: {e}\n" + f"XML record string: {record_str}" ) if record_end > approx_end: diff --git a/tests/integ/test_xml_reader_row_tag.py b/tests/integ/test_xml_reader_row_tag.py index e19443280f..069fbbb41e 100644 --- a/tests/integ/test_xml_reader_row_tag.py +++ b/tests/integ/test_xml_reader_row_tag.py @@ -416,3 +416,15 @@ def test_read_xml_row_validation_xsd_path(session): assert result[0]["'price'"] == '"44.95"' assert result[0]["'publish_date'"] == '"2000-10-01"' assert result[0]["'_id'"] == '"bk101"' + + +def test_read_xml_row_validation_xsd_path_failfast(session): + row_tag = "book" + df = ( + session.read.option("rowTag", row_tag) + .option("rowValidationXSDPath", f"@{tmp_stage_name}/{test_file_books_xsd}") + .option("mode", "failfast") + .xml(f"@{tmp_stage_name}/{test_file_books_xml}") + ) + with pytest.raises(SnowparkSQLException, match="XML record string:"): + df.collect() From 06e7bece6cf41fd7e7e32948da5b4c3d62cce9e7 Mon Sep 17 00:00:00 2001 From: Jianzhun Du Date: Fri, 26 Sep 2025 15:42:18 -0700 Subject: [PATCH 2/2] d --- CHANGELOG.md | 1 + tests/integ/test_xml_reader_row_tag.py | 10 +++------- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbefc6fd34..273f0a2b7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,6 +87,7 @@ #### Improvements - Improved `DataFrameReader.dbapi` (PuPr) reading performance by setting the default `fetch_size` parameter value to 100000. +- Improved error message for XSD validation failure when reading XML files using `session.read.option('rowValidationXSDPath', ).xml()`. ### Snowpark pandas API Updates diff --git a/tests/integ/test_xml_reader_row_tag.py b/tests/integ/test_xml_reader_row_tag.py index 069fbbb41e..10328205ed 100644 --- a/tests/integ/test_xml_reader_row_tag.py +++ b/tests/integ/test_xml_reader_row_tag.py @@ -420,11 +420,7 @@ def test_read_xml_row_validation_xsd_path(session): def test_read_xml_row_validation_xsd_path_failfast(session): row_tag = "book" - df = ( - session.read.option("rowTag", row_tag) - .option("rowValidationXSDPath", f"@{tmp_stage_name}/{test_file_books_xsd}") - .option("mode", "failfast") - .xml(f"@{tmp_stage_name}/{test_file_books_xml}") - ) with pytest.raises(SnowparkSQLException, match="XML record string:"): - df.collect() + session.read.option("rowTag", row_tag).option( + "rowValidationXSDPath", f"@{tmp_stage_name}/{test_file_books_xsd}" + ).option("mode", "failfast").xml(f"@{tmp_stage_name}/{test_file_books_xml}")