|
1 | 1 | """Tests for gcsutil.StorageControl validation and normalized/raw archive flow.""" |
2 | 2 |
|
| 3 | +import errno |
| 4 | +import os |
| 5 | +import tempfile |
3 | 6 | from typing import Any |
4 | 7 | from unittest.mock import MagicMock, patch |
5 | 8 |
|
@@ -363,3 +366,108 @@ def assert_csv_at_path(path: str, **kwargs: Any) -> None: |
363 | 366 | assert mock_blob.upload_from_filename.call_args.kwargs["content_type"] == ( |
364 | 367 | "text/csv; charset=utf-8" |
365 | 368 | ) |
| 369 | + |
| 370 | + |
| 371 | +def test_run_validation_download_oserror_unlinks_temp_and_skips_validate() -> None: |
| 372 | + """If GCS download fails, temp file is removed and validate_file_reader is not run.""" |
| 373 | + fd, real_path = tempfile.mkstemp(suffix=".csv", prefix="test_dl_oserr_") |
| 374 | + mock_blob = MagicMock() |
| 375 | + mock_blob.download_to_filename.side_effect = OSError( |
| 376 | + errno.ENOSPC, "No space left on device" |
| 377 | + ) |
| 378 | + |
| 379 | + control = StorageControl() |
| 380 | + with patch("src.webapp.gcsutil.tempfile.mkstemp", return_value=(fd, real_path)): |
| 381 | + with patch("src.webapp.gcsutil.validate_file_reader") as mock_validate: |
| 382 | + with pytest.raises(OSError, match="No space left"): |
| 383 | + control._run_validation_and_get_normalized_df( |
| 384 | + mock_blob, |
| 385 | + "school_course.csv", |
| 386 | + ["STUDENT"], |
| 387 | + {}, |
| 388 | + None, |
| 389 | + "pdp", |
| 390 | + None, |
| 391 | + ) |
| 392 | + mock_validate.assert_not_called() |
| 393 | + |
| 394 | + assert not os.path.exists(real_path) |
| 395 | + mock_blob.download_to_filename.assert_called_once_with(real_path) |
| 396 | + |
| 397 | + |
| 398 | +def test_run_validation_download_oserror_logs_errno() -> None: |
| 399 | + """OSError from download_to_filename is logged with errno before re-raise.""" |
| 400 | + fd, real_path = tempfile.mkstemp(suffix=".csv", prefix="test_dl_log_") |
| 401 | + mock_blob = MagicMock() |
| 402 | + mock_blob.download_to_filename.side_effect = OSError( |
| 403 | + errno.ENOSPC, "No space left on device" |
| 404 | + ) |
| 405 | + |
| 406 | + control = StorageControl() |
| 407 | + with patch("src.webapp.gcsutil.tempfile.mkstemp", return_value=(fd, real_path)): |
| 408 | + with patch("src.webapp.gcsutil.logger") as mock_logger: |
| 409 | + with pytest.raises(OSError): |
| 410 | + control._run_validation_and_get_normalized_df( |
| 411 | + mock_blob, |
| 412 | + "f.csv", |
| 413 | + ["STUDENT"], |
| 414 | + {}, |
| 415 | + None, |
| 416 | + "pdp", |
| 417 | + None, |
| 418 | + ) |
| 419 | + mock_logger.error.assert_called_once() |
| 420 | + msg = mock_logger.error.call_args[0][0] |
| 421 | + assert "download_to_filename failed" in msg |
| 422 | + assert mock_logger.error.call_args[0][3] == errno.ENOSPC |
| 423 | + |
| 424 | + assert not os.path.exists(real_path) |
| 425 | + |
| 426 | + |
| 427 | +def test_write_dataframe_to_csv_oserror_unlinks_temp() -> None: |
| 428 | + """If to_csv fails (e.g. disk full), temp file is removed and upload is not attempted.""" |
| 429 | + fd, real_path = tempfile.mkstemp(suffix=".csv", prefix="test_csv_oserr_") |
| 430 | + mock_blob = MagicMock() |
| 431 | + mock_bucket = MagicMock() |
| 432 | + mock_bucket.blob.return_value = mock_blob |
| 433 | + |
| 434 | + control = StorageControl() |
| 435 | + with patch("src.webapp.gcsutil.tempfile.mkstemp", return_value=(fd, real_path)): |
| 436 | + with patch.object( |
| 437 | + pd.DataFrame, |
| 438 | + "to_csv", |
| 439 | + side_effect=OSError(errno.ENOSPC, "No space left on device"), |
| 440 | + ): |
| 441 | + with patch("src.webapp.gcsutil.logger") as mock_logger: |
| 442 | + with pytest.raises(OSError, match="No space left"): |
| 443 | + control._write_dataframe_to_gcs_as_csv( |
| 444 | + mock_bucket, |
| 445 | + "validated/out.csv", |
| 446 | + pd.DataFrame({"a": [1]}), |
| 447 | + ) |
| 448 | + mock_logger.error.assert_called_once() |
| 449 | + assert "to_csv failed" in mock_logger.error.call_args[0][0] |
| 450 | + assert mock_logger.error.call_args[0][3] == errno.ENOSPC |
| 451 | + |
| 452 | + assert not os.path.exists(real_path) |
| 453 | + mock_blob.upload_from_filename.assert_not_called() |
| 454 | + |
| 455 | + |
| 456 | +def test_write_dataframe_upload_failure_still_unlinks_temp() -> None: |
| 457 | + """If GCS upload fails after to_csv, the local temp file is still deleted.""" |
| 458 | + fd, real_path = tempfile.mkstemp(suffix=".csv", prefix="test_upload_fail_") |
| 459 | + mock_blob = MagicMock() |
| 460 | + mock_blob.upload_from_filename.side_effect = RuntimeError("upload failed") |
| 461 | + mock_bucket = MagicMock() |
| 462 | + mock_bucket.blob.return_value = mock_blob |
| 463 | + |
| 464 | + control = StorageControl() |
| 465 | + with patch("src.webapp.gcsutil.tempfile.mkstemp", return_value=(fd, real_path)): |
| 466 | + with pytest.raises(RuntimeError, match="upload failed"): |
| 467 | + control._write_dataframe_to_gcs_as_csv( |
| 468 | + mock_bucket, |
| 469 | + "validated/out.csv", |
| 470 | + pd.DataFrame({"x": [1]}), |
| 471 | + ) |
| 472 | + |
| 473 | + assert not os.path.exists(real_path) |
0 commit comments