|
15 | 15 | # specific language governing permissions and limitations |
16 | 16 | # under the License. |
17 | 17 | # pylint: disable=redefined-outer-name,arguments-renamed,fixme |
| 18 | +import importlib |
18 | 19 | from tempfile import TemporaryDirectory |
19 | 20 |
|
20 | 21 | import fastavro |
@@ -998,3 +999,65 @@ def test_clear_manifest_cache() -> None: |
998 | 999 | cache_after = manifest_module._manifest_cache |
999 | 1000 | assert cache_after is not None, "Cache should still be enabled after clear" |
1000 | 1001 | assert len(cache_after) == 0, "Cache should be empty after clear" |
| 1002 | + |
| 1003 | + |
| 1004 | +def test_manifest_cache_can_be_disabled_with_zero_size(monkeypatch: pytest.MonkeyPatch) -> None: |
| 1005 | + """Test that setting manifest-cache-size to 0 disables caching.""" |
| 1006 | + monkeypatch.setenv("PYICEBERG_MANIFEST_CACHE_SIZE", "0") |
| 1007 | + importlib.reload(manifest_module) |
| 1008 | + |
| 1009 | + try: |
| 1010 | + assert manifest_module._manifest_cache_size == 0 |
| 1011 | + assert len(manifest_module._manifest_cache) == 0 |
| 1012 | + |
| 1013 | + io = PyArrowFileIO() |
| 1014 | + |
| 1015 | + with TemporaryDirectory() as tmp_dir: |
| 1016 | + schema = Schema(NestedField(field_id=1, name="id", field_type=IntegerType(), required=True)) |
| 1017 | + spec = UNPARTITIONED_PARTITION_SPEC |
| 1018 | + |
| 1019 | + manifest_path = f"{tmp_dir}/manifest.avro" |
| 1020 | + with manifest_module.write_manifest( |
| 1021 | + format_version=2, |
| 1022 | + spec=spec, |
| 1023 | + schema=schema, |
| 1024 | + output_file=io.new_output(manifest_path), |
| 1025 | + snapshot_id=1, |
| 1026 | + avro_compression="zstandard", |
| 1027 | + ) as writer: |
| 1028 | + data_file = manifest_module.DataFile.from_args( |
| 1029 | + content=manifest_module.DataFileContent.DATA, |
| 1030 | + file_path=f"{tmp_dir}/data.parquet", |
| 1031 | + file_format=manifest_module.FileFormat.PARQUET, |
| 1032 | + partition=Record(), |
| 1033 | + record_count=100, |
| 1034 | + file_size_in_bytes=1000, |
| 1035 | + ) |
| 1036 | + writer.add_entry( |
| 1037 | + manifest_module.ManifestEntry.from_args( |
| 1038 | + status=manifest_module.ManifestEntryStatus.ADDED, |
| 1039 | + snapshot_id=1, |
| 1040 | + data_file=data_file, |
| 1041 | + ) |
| 1042 | + ) |
| 1043 | + manifest_file = writer.to_manifest_file() |
| 1044 | + |
| 1045 | + list_path = f"{tmp_dir}/manifest-list.avro" |
| 1046 | + with manifest_module.write_manifest_list( |
| 1047 | + format_version=2, |
| 1048 | + output_file=io.new_output(list_path), |
| 1049 | + snapshot_id=1, |
| 1050 | + parent_snapshot_id=None, |
| 1051 | + sequence_number=1, |
| 1052 | + avro_compression="zstandard", |
| 1053 | + ) as list_writer: |
| 1054 | + list_writer.add_manifests([manifest_file]) |
| 1055 | + |
| 1056 | + manifests_first_call = manifest_module._manifests(io, list_path) |
| 1057 | + manifests_second_call = manifest_module._manifests(io, list_path) |
| 1058 | + |
| 1059 | + assert len(manifest_module._manifest_cache) == 0 |
| 1060 | + assert manifests_first_call[0] is not manifests_second_call[0] |
| 1061 | + finally: |
| 1062 | + monkeypatch.delenv("PYICEBERG_MANIFEST_CACHE_SIZE", raising=False) |
| 1063 | + importlib.reload(manifest_module) |
0 commit comments