Skip to content

Commit e2425a0

Browse files
committed
more removals
1 parent f370963 commit e2425a0

File tree

3 files changed

+0
-58
lines changed

3 files changed

+0
-58
lines changed

pyiceberg/io/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@
9999
GCS_VERSION_AWARE = "gcs.version-aware"
100100
HF_ENDPOINT = "hf.endpoint"
101101
HF_TOKEN = "hf.token"
102-
PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"
103102

104103

105104
@runtime_checkable

pyiceberg/io/pyarrow.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@
178178
from pyiceberg.utils.config import Config
179179
from pyiceberg.utils.datetime import millis_to_datetime
180180
from pyiceberg.utils.decimal import unscaled_to_decimal
181-
from pyiceberg.utils.deprecated import deprecation_message
182181
from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int
183182
from pyiceberg.utils.singleton import Singleton
184183
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
@@ -1863,7 +1862,6 @@ class ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, pa.Array | None]
18631862
_file_schema: Schema
18641863
_include_field_ids: bool
18651864
_downcast_ns_timestamp_to_us: bool
1866-
_use_large_types: bool | None
18671865
_projected_missing_fields: dict[int, Any]
18681866
_allow_timestamp_tz_mismatch: bool
18691867

@@ -1872,26 +1870,17 @@ def __init__(
18721870
file_schema: Schema,
18731871
downcast_ns_timestamp_to_us: bool = False,
18741872
include_field_ids: bool = False,
1875-
use_large_types: bool | None = None,
18761873
projected_missing_fields: dict[int, Any] = EMPTY_DICT,
18771874
allow_timestamp_tz_mismatch: bool = False,
18781875
) -> None:
18791876
self._file_schema = file_schema
18801877
self._include_field_ids = include_field_ids
18811878
self._downcast_ns_timestamp_to_us = downcast_ns_timestamp_to_us
1882-
self._use_large_types = use_large_types
18831879
self._projected_missing_fields = projected_missing_fields
18841880
# When True, allows projecting timestamptz (UTC) to timestamp (no tz).
18851881
# Allowed for reading (aligns with Spark); disallowed for writing to enforce Iceberg spec's strict typing.
18861882
self._allow_timestamp_tz_mismatch = allow_timestamp_tz_mismatch
18871883

1888-
if use_large_types is not None:
1889-
deprecation_message(
1890-
deprecated_in="0.10.0",
1891-
removed_in="0.11.0",
1892-
help_message="Argument `use_large_types` will be removed from ArrowProjectionVisitor",
1893-
)
1894-
18951884
def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
18961885
file_field = self._file_schema.find_field(field.field_id)
18971886

@@ -1940,8 +1929,6 @@ def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
19401929
target_schema = schema_to_pyarrow(
19411930
promote(file_field.field_type, field.field_type), include_field_ids=self._include_field_ids
19421931
)
1943-
if self._use_large_types is False:
1944-
target_schema = _pyarrow_schema_ensure_small_types(target_schema)
19451932
return values.cast(target_schema)
19461933

19471934
return values

tests/integration/test_reads.py

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
NotNaN,
4646
NotNull,
4747
)
48-
from pyiceberg.io import PYARROW_USE_LARGE_TYPES_ON_READ
4948
from pyiceberg.io.pyarrow import (
5049
pyarrow_to_schema,
5150
)
@@ -1125,49 +1124,6 @@ def test_table_scan_keep_types(catalog: Catalog) -> None:
11251124
assert result_table.schema.equals(expected_schema)
11261125

11271126

1128-
@pytest.mark.integration
1129-
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
1130-
def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
1131-
identifier = "default.test_table_scan_override_with_small_types"
1132-
arrow_table = pa.Table.from_arrays(
1133-
[
1134-
pa.array(["a", "b", "c"]),
1135-
pa.array(["a", "b", "c"]),
1136-
pa.array([b"a", b"b", b"c"]),
1137-
pa.array([["a", "b"], ["c", "d"], ["e", "f"]]),
1138-
],
1139-
names=["string", "string-to-binary", "binary", "list"],
1140-
)
1141-
1142-
try:
1143-
catalog.drop_table(identifier)
1144-
except NoSuchTableError:
1145-
pass
1146-
1147-
tbl = catalog.create_table(
1148-
identifier,
1149-
schema=arrow_table.schema,
1150-
)
1151-
1152-
tbl.append(arrow_table)
1153-
1154-
with tbl.update_schema() as update_schema:
1155-
update_schema.update_column("string-to-binary", BinaryType())
1156-
1157-
tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "False"
1158-
result_table = tbl.scan().to_arrow()
1159-
1160-
expected_schema = pa.schema(
1161-
[
1162-
pa.field("string", pa.string()),
1163-
pa.field("string-to-binary", pa.large_binary()),
1164-
pa.field("binary", pa.binary()),
1165-
pa.field("list", pa.list_(pa.string())),
1166-
]
1167-
)
1168-
assert result_table.schema.equals(expected_schema)
1169-
1170-
11711127
@pytest.mark.integration
11721128
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
11731129
def test_empty_scan_ordered_str(catalog: Catalog) -> None:

0 commit comments

Comments
 (0)