Skip to content

Commit a511f4c

Browse files
tests: assert direct parquet read fails, polish PME error message
1 parent 8427e54 commit a511f4c

2 files changed

Lines changed: 30 additions & 3 deletions

File tree

pyiceberg/io/pyarrow.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,14 +1120,20 @@ def _get_file_format(file_format: FileFormat, **kwargs: dict[str, Any]) -> ds.Fi
11201120
def _get_decryption_properties(key_metadata_bytes: bytes) -> Any:
11211121
"""Build FileDecryptionProperties from Iceberg key metadata.
11221122
1123-
Requires a custom PyArrow build with pyarrow.parquet.encryption support.
1123+
Requires PyArrow >= 25 (currently nightly-only) for the direct-key
1124+
`create_decryption_properties` API added by apache/arrow#49667.
11241125
"""
11251126
try:
11261127
import pyarrow.parquet.encryption as pe
1128+
1129+
if not hasattr(pe, "create_decryption_properties"):
1130+
raise ImportError("create_decryption_properties not available")
11271131
except ImportError as e:
11281132
raise ImportError(
1129-
"Parquet Modular Encryption requires a PyArrow build with encryption support. "
1130-
"See PYARROW_ENCRYPTION_HANDOFF.md for build instructions."
1133+
"Parquet Modular Encryption requires PyArrow >= 25 with the direct-key API "
1134+
"(apache/arrow#49667). Until it releases, install the nightly: "
1135+
"`make install-pyarrow-nightly` (or `uv pip install -i "
1136+
"https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pyarrow`)."
11311137
) from e
11321138

11331139
from pyiceberg.encryption.key_metadata import StandardKeyMetadata

tests/integration/test_encryption.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,24 @@ def test_encrypted_table_to_polars(hive_catalog_with_kms) -> None: # type: igno
110110
assert df["id"].to_list() == [1, 2, 3]
111111
assert df["data"].to_list() == ["alice", "bob", "charlie"]
112112
assert df["value"].to_list() == [1.0, 2.0, 3.0]
113+
114+
115+
@pytest.mark.integration
116+
def test_encrypted_table_direct_parquet_read_fails(hive_catalog_with_kms) -> None: # type: ignore[no-untyped-def]
117+
"""Canary: a raw PyArrow read of a data file without decryption properties must fail.
118+
119+
Mirrors iceberg-java's TestTableEncryption#testDirectDataFileRead, which proves the data
120+
files are genuinely PME-encrypted by asserting that reading them without the keys raises
121+
ParquetCryptoRuntimeException. Without this check, the read tests above could silently pass
122+
on plaintext Parquet and the POC would be meaningless.
123+
"""
124+
import pyarrow.parquet as pq
125+
126+
tbl = hive_catalog_with_kms.load_table("default.test_encrypted")
127+
128+
data_files = [task.file.file_path for task in tbl.scan().plan_files()]
129+
assert data_files, "expected at least one data file in the encrypted table"
130+
131+
for file_path in data_files:
132+
with pytest.raises(OSError, match="encrypted"), tbl.io.new_input(file_path).open() as fi:
133+
pq.read_table(fi)

0 commit comments

Comments
 (0)