|
13 | 13 | from numpy.testing import assert_equal |
14 | 14 |
|
15 | 15 | import snowflake.snowpark.modin.plugin # noqa: F401 |
16 | | -from tests.integ.utils.sql_counter import sql_count_checker |
| 16 | +from tests.integ.utils.sql_counter import sql_count_checker, SqlCounter |
17 | 17 | from tests.utils import Utils |
18 | 18 |
|
19 | 19 | temp_dir = tempfile.TemporaryDirectory() |
@@ -293,3 +293,57 @@ def test_timedeltaindex_to_csv_dataframe_local(): |
293 | 293 | pd.DataFrame(native_df).to_csv(snow_path) |
294 | 294 |
|
295 | 295 | assert_file_equal(snow_path, native_path, is_compressed=False) |
| 296 | + |
| 297 | + |
| 298 | +def test_to_csv_stage_path_escapes_special_characters(sf_stage, session): |
| 299 | + """Snowpark-pandas ``to_csv`` to a stage path must escape special characters |
| 300 | + in the path. |
| 301 | +
|
| 302 | + ``DataFrame.to_csv(path_or_buf="@stage/...")`` routes server-side into |
| 303 | + ``snowpark_df.write.csv(location=...)`` -> ``normalize_path`` -> |
| 304 | + ``COPY INTO <location>``. A path containing a backslash immediately followed |
| 305 | + by a single quote must stay inside the stage-location string literal so the |
| 306 | + generated ``COPY INTO`` is valid and the path is treated as literal data. |
| 307 | + """ |
| 308 | + snow_df = pd.DataFrame({"A": ["one", "two", "three"], "B": [1, 2, 3]}) |
| 309 | + # None index name is not supported when writing to a Snowflake stage. |
| 310 | + snow_df.index.set_names(["X"], inplace=True) |
| 311 | + |
| 312 | + # (a) Stage path whose directory name contains a single quote. The quote is |
| 313 | + # escaped as literal data, so the write succeeds and the file lands under |
| 314 | + # that exact name. ``to_csv`` to a stage emits one query (the COPY INTO); |
| 315 | + # downloading it back confirms the path was treated as a literal file |
| 316 | + # name and not parsed as SQL. |
| 317 | + quote_name = "o'clock/mods.csv" |
| 318 | + quote_path = f"@{sf_stage}/{quote_name}" |
| 319 | + with SqlCounter(query_count=1): |
| 320 | + snow_df.to_csv(quote_path, index=False) |
| 321 | + listed = [row[0] for row in session.sql(f"LIST '@{sf_stage}'").collect()] |
| 322 | + assert any(name.endswith(quote_name) for name in listed), listed |
| 323 | + |
| 324 | + download_dir = tempfile.mkdtemp() |
| 325 | + session.file.get(quote_path, download_dir) |
| 326 | + downloaded = [ |
| 327 | + f |
| 328 | + for f in os.listdir(download_dir) |
| 329 | + if os.path.isfile(os.path.join(download_dir, f)) |
| 330 | + ] |
| 331 | + assert len(downloaded) == 1, downloaded |
| 332 | + with open(os.path.join(download_dir, downloaded[0])) as fh: |
| 333 | + content = fh.read() |
| 334 | + data_rows = [line for line in content.splitlines() if line.strip()] |
| 335 | + # Header ("A,B") + the DataFrame's own 3 data rows == 4 lines. |
| 336 | + assert len(data_rows) == 4, content |
| 337 | + assert content == "A,B\none,1\ntwo,2\nthree,3\n", content |
| 338 | + |
| 339 | + # (b) A file name mixing a backslash, a single quote, parentheses, a comma |
| 340 | + # and a trailing ``--`` must produce valid SQL: before the fix the |
| 341 | + # unescaped backslash/quote closed the location literal early. The write |
| 342 | + # must succeed with a single COPY INTO query -- if the path were parsed as |
| 343 | + # SQL the statement would error instead. Stage storage does not preserve a |
| 344 | + # literal backslash as a path character, so we assert the write succeeds |
| 345 | + # rather than reading back the exact name. |
| 346 | + special_name = "report\\' , (note) -- draft" |
| 347 | + special_path = f"@{sf_stage}/{special_name}" |
| 348 | + with SqlCounter(query_count=1): |
| 349 | + snow_df.to_csv(special_path, index=False) |
0 commit comments