diff --git a/CHANGELOG.md b/CHANGELOG.md index bbefc6fd34..273f0a2b7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,6 +87,7 @@ #### Improvements - Improved `DataFrameReader.dbapi` (PuPr) reading performance by setting the default `fetch_size` parameter value to 100000. +- Improved error message for XSD validation failure when reading XML files using `session.read.option('rowValidationXSDPath', ).xml()`. ### Snowpark pandas API Updates diff --git a/src/snowflake/snowpark/_internal/xml_reader.py b/src/snowflake/snowpark/_internal/xml_reader.py index 45e7ab33af..47b9849592 100644 --- a/src/snowflake/snowpark/_internal/xml_reader.py +++ b/src/snowflake/snowpark/_internal/xml_reader.py @@ -503,7 +503,8 @@ def process_xml_range( yield {column_name_of_corrupt_record: record_str} elif mode == "FAILFAST": raise RuntimeError( - f"Malformed XML record at bytes {record_start}-{record_end}: {e}" + f"Malformed XML record at bytes {record_start}-{record_end}: {e}\n" + f"XML record string: {record_str}" ) if record_end > approx_end: diff --git a/tests/integ/test_xml_reader_row_tag.py b/tests/integ/test_xml_reader_row_tag.py index e19443280f..10328205ed 100644 --- a/tests/integ/test_xml_reader_row_tag.py +++ b/tests/integ/test_xml_reader_row_tag.py @@ -416,3 +416,11 @@ def test_read_xml_row_validation_xsd_path(session): assert result[0]["'price'"] == '"44.95"' assert result[0]["'publish_date'"] == '"2000-10-01"' assert result[0]["'_id'"] == '"bk101"' + + +def test_read_xml_row_validation_xsd_path_failfast(session): + row_tag = "book" + with pytest.raises(SnowparkSQLException, match="XML record string:"): + session.read.option("rowTag", row_tag).option( + "rowValidationXSDPath", f"@{tmp_stage_name}/{test_file_books_xsd}" + ).option("mode", "failfast").xml(f"@{tmp_stage_name}/{test_file_books_xml}")