|
13 | 13 | # See the License for the specific language governing permissions and |
14 | 14 | # limitations under the License. |
15 | 15 |
|
| 16 | +import importlib |
16 | 17 | from unittest import mock |
17 | 18 |
|
18 | 19 | import pytest |
@@ -579,3 +580,90 @@ def test_get_blobs_with_no_input(): |
579 | 580 | match="You must provide either `gcs_uri` or both `gcs_bucket_name` and `gcs_prefix`.", |
580 | 581 | ): |
581 | 582 | gcs_utilities.get_blobs() |
| 583 | + |
| 584 | + |
| 585 | +@mock.patch("google.cloud.documentai_toolbox.utilities.gcs_utilities.storage") |
| 586 | +def test_get_blobs_with_gcs_uri(mock_storage): |
| 587 | + client = mock_storage.Client.return_value |
| 588 | + gcs_uri = "gs://test-bucket/test-directory/1/" |
| 589 | + |
| 590 | + gcs_utilities.get_blobs(gcs_uri=gcs_uri) |
| 591 | + |
| 592 | + mock_storage.Client.assert_called_once() |
| 593 | + client.list_blobs.assert_called_once_with("test-bucket", prefix="test-directory/1/") |
| 594 | + |
| 595 | + |
| 596 | +def test_get_blobs_with_file_type_error(): |
| 597 | + with pytest.raises(ValueError, match="gcs_prefix cannot contain file types"): |
| 598 | + gcs_utilities.get_blobs(gcs_bucket_name="test-bucket", gcs_prefix="test.json") |
| 599 | + |
| 600 | + |
| 601 | +@mock.patch("google.cloud.documentai_toolbox.utilities.gcs_utilities.storage") |
| 602 | +def test_get_blob_success_major_3(mock_storage): |
| 603 | + mock_version = "3.0.0" |
| 604 | + with mock.patch("importlib.metadata.version", return_value=mock_version): |
| 605 | + client = mock_storage.Client.return_value |
| 606 | + gcs_uri = "gs://test-bucket/test.json" |
| 607 | + |
| 608 | + gcs_utilities.get_blob(gcs_uri) |
| 609 | + |
| 610 | + mock_storage.Blob.from_uri.assert_called_once_with(gcs_uri, client) |
| 611 | + |
| 612 | + |
| 613 | +@mock.patch("google.cloud.documentai_toolbox.utilities.gcs_utilities.storage") |
| 614 | +def test_get_blob_success_major_2(mock_storage): |
| 615 | + mock_version = "2.0.0" |
| 616 | + with mock.patch("importlib.metadata.version", return_value=mock_version): |
| 617 | + client = mock_storage.Client.return_value |
| 618 | + gcs_uri = "gs://test-bucket/test.json" |
| 619 | + |
| 620 | + gcs_utilities.get_blob(gcs_uri) |
| 621 | + |
| 622 | + mock_storage.Blob.from_string.assert_called_once_with(gcs_uri, client) |
| 623 | + |
| 624 | + |
| 625 | +def test_get_blob_invalid_uri(): |
| 626 | + with pytest.raises(ValueError, match="gcs_uri must link to a single file."): |
| 627 | + gcs_utilities.get_blob("gs://test-bucket/prefix/") |
| 628 | + |
| 629 | + |
| 630 | +def test_get_blob_import_error(): |
| 631 | + with mock.patch( |
| 632 | + "importlib.metadata.version", |
| 633 | + side_effect=importlib.metadata.PackageNotFoundError, |
| 634 | + ): |
| 635 | + with pytest.raises(ImportError, match="google-cloud-storage is not installed."): |
| 636 | + gcs_utilities.get_blob("gs://test-bucket/test.json") |
| 637 | + |
| 638 | + |
| 639 | +@mock.patch("google.cloud.documentai_toolbox.utilities.gcs_utilities.storage") |
| 640 | +def test_print_gcs_document_tree_with_skipping_files(mock_storage, capfd): |
| 641 | + client = mock_storage.Client.return_value |
| 642 | + mock_bucket = mock.Mock() |
| 643 | + client.Bucket.return_value = mock_bucket |
| 644 | + |
| 645 | + blobs = [ |
| 646 | + storage.Blob( |
| 647 | + name=f"gs://test-directory/1/test_shard{i}.json", |
| 648 | + bucket="gs://test-directory/1", |
| 649 | + ) |
| 650 | + for i in range(1, 11) |
| 651 | + ] |
| 652 | + |
| 653 | + client.list_blobs.return_value = blobs |
| 654 | + |
| 655 | + # files_to_display = 2. 10 files total. |
| 656 | + # idx 0, 1, 2 -> print |
| 657 | + # idx 3, 4, 5, 6, 7, 8 -> skip |
| 658 | + # idx 9 -> print last |
| 659 | + gcs_utilities.print_gcs_document_tree( |
| 660 | + gcs_bucket_name="test-directory", gcs_prefix="/", files_to_display=2 |
| 661 | + ) |
| 662 | + |
| 663 | + out, err = capfd.readouterr() |
| 664 | + assert "├──test_shard1.json" in out |
| 665 | + assert "├──test_shard2.json" in out |
| 666 | + assert "├──test_shard3.json" in out |
| 667 | + assert "├──test_shard4.json" not in out |
| 668 | + assert "│ ...." in out |
| 669 | + assert "└──test_shard10.json" in out |
0 commit comments