Skip to content

Commit 6ac4868

Browse files
committed
generalize InMemoryCatalog
1 parent 37918eb commit 6ac4868

File tree

2 files changed

+43
-23
lines changed

2 files changed

+43
-23
lines changed

pyiceberg/catalog/memory.py

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import uuid
12
from typing import (
23
Dict,
34
List,
@@ -28,13 +29,13 @@
2829
CommitTableResponse,
2930
Table,
3031
)
31-
from pyiceberg.table.metadata import TableMetadata, TableMetadataV1, new_table_metadata
32+
from pyiceberg.table.metadata import TableMetadata, new_table_metadata
3233
from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
3334
from pyiceberg.typedef import EMPTY_DICT
3435

3536

3637
class InMemoryCatalog(Catalog):
37-
"""An in-memory catalog implementation for testing purposes."""
38+
"""An in-memory catalog implementation."""
3839

3940
__tables: Dict[Identifier, Table]
4041
__namespaces: Dict[Identifier, Properties]
@@ -52,6 +53,7 @@ def create_table(
5253
partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
5354
sort_order: SortOrder = UNSORTED_SORT_ORDER,
5455
properties: Properties = EMPTY_DICT,
56+
table_uuid: Optional[uuid.UUID] = None,
5557
) -> Table:
5658
identifier = Catalog.identifier_to_tuple(identifier)
5759
namespace = Catalog.namespace_from(identifier)
@@ -62,24 +64,24 @@ def create_table(
6264
if namespace not in self.__namespaces:
6365
self.__namespaces[namespace] = {}
6466

65-
new_location = location or f's3://warehouse/{"/".join(identifier)}/data'
66-
metadata = TableMetadataV1(**{
67-
"format-version": 1,
68-
"table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
69-
"location": new_location,
70-
"last-updated-ms": 1602638573874,
71-
"last-column-id": schema.highest_field_id,
72-
"schema": schema.model_dump(),
73-
"partition-spec": partition_spec.model_dump()["fields"],
74-
"properties": properties,
75-
"current-snapshot-id": -1,
76-
"snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}],
77-
})
67+
if not location:
68+
location = f's3://warehouse/{"/".join(identifier)}/data'
69+
70+
metadata_location = f's3://warehouse/{"/".join(identifier)}/metadata/metadata.json'
71+
72+
metadata = new_table_metadata(
73+
schema=schema,
74+
partition_spec=partition_spec,
75+
sort_order=sort_order,
76+
location=location,
77+
properties=properties,
78+
table_uuid=table_uuid,
79+
)
7880
table = Table(
7981
identifier=identifier,
8082
metadata=metadata,
81-
metadata_location=f's3://warehouse/{"/".join(identifier)}/metadata/metadata.json',
82-
io=load_file_io(),
83+
metadata_location=metadata_location,
84+
io=self._load_file_io(properties=metadata.properties, location=metadata_location),
8385
catalog=self,
8486
)
8587
self.__tables[identifier] = table
@@ -109,7 +111,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
109111
identifier=identifier,
110112
metadata=new_metadata,
111113
metadata_location=f's3://warehouse/{"/".join(identifier)}/metadata/metadata.json',
112-
io=load_file_io(),
114+
io=self._load_file_io(properties=new_metadata.properties, location=metadata_location),
113115
catalog=self,
114116
)
115117

@@ -154,7 +156,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
154156
identifier=to_identifier,
155157
metadata=table.metadata,
156158
metadata_location=table.metadata_location,
157-
io=load_file_io(),
159+
io=self._load_file_io(properties=table.metadata.properties, location=table.metadata_location),
158160
catalog=self,
159161
)
160162
return self.__tables[to_identifier]

tests/cli/test_console.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
import datetime
1718
import os
19+
import uuid
20+
from unittest.mock import MagicMock
1821

1922
import pytest
2023
from click.testing import CliRunner
@@ -59,6 +62,13 @@ def fixture_namespace_properties() -> Properties:
5962
return TEST_NAMESPACE_PROPERTIES.copy()
6063

6164

65+
@pytest.fixture()
66+
def mock_datetime_now(monkeypatch: pytest.MonkeyPatch) -> None:
67+
datetime_mock = MagicMock(wraps=datetime.datetime)
68+
datetime_mock.now.return_value = datetime.datetime.fromtimestamp(TEST_TIMESTAMP / 1000.0).astimezone()
69+
monkeypatch.setattr(datetime, "datetime", datetime_mock)
70+
71+
6272
TEST_TABLE_IDENTIFIER = ("default", "my_table")
6373
TEST_TABLE_NAMESPACE = "default"
6474
TEST_NAMESPACE_PROPERTIES = {"location": "s3://warehouse/database/location"}
@@ -71,6 +81,8 @@ def fixture_namespace_properties() -> Properties:
7181
TEST_TABLE_LOCATION = "s3://bucket/test/location"
7282
TEST_TABLE_PARTITION_SPEC = PartitionSpec(PartitionField(name="x", transform=IdentityTransform(), source_id=1, field_id=1000))
7383
TEST_TABLE_PROPERTIES = {"read.split.target.size": "134217728"}
84+
TEST_TABLE_UUID = uuid.UUID("d20125c8-7284-442c-9aea-15fee620737c")
85+
TEST_TIMESTAMP = 1602638573874
7486
MOCK_ENVIRONMENT = {"PYICEBERG_CATALOG__PRODUCTION__URI": "test://doesnotexist"}
7587

7688

@@ -120,12 +132,14 @@ def test_describe_namespace_does_not_exists(catalog: InMemoryCatalog) -> None:
120132
assert result.output == "Namespace does not exist: ('doesnotexist',)\n"
121133

122134

123-
def test_describe_table(catalog: InMemoryCatalog) -> None:
135+
@pytest.fixture()
136+
def test_describe_table(catalog: InMemoryCatalog, mock_datetime_now: None) -> None:
124137
catalog.create_table(
125138
identifier=TEST_TABLE_IDENTIFIER,
126139
schema=TEST_TABLE_SCHEMA,
127140
location=TEST_TABLE_LOCATION,
128141
partition_spec=TEST_TABLE_PARTITION_SPEC,
142+
table_uuid=TEST_TABLE_UUID,
129143
)
130144

131145
runner = CliRunner()
@@ -134,7 +148,7 @@ def test_describe_table(catalog: InMemoryCatalog) -> None:
134148
assert (
135149
# Strip the whitespace on the end
136150
"\n".join([line.rstrip() for line in result.output.split("\n")])
137-
== """Table format version 1
151+
== """Table format version 2
138152
Metadata location s3://warehouse/default/my_table/metadata/metadata.json
139153
Table UUID d20125c8-7284-442c-9aea-15fee620737c
140154
Last Updated 1602638573874
@@ -227,6 +241,7 @@ def test_uuid(catalog: InMemoryCatalog) -> None:
227241
schema=TEST_TABLE_SCHEMA,
228242
location=TEST_TABLE_LOCATION,
229243
partition_spec=TEST_TABLE_PARTITION_SPEC,
244+
table_uuid=TEST_TABLE_UUID,
230245
)
231246

232247
runner = CliRunner()
@@ -550,20 +565,22 @@ def test_json_describe_namespace_does_not_exists(catalog: InMemoryCatalog) -> No
550565
assert result.output == """{"type": "NoSuchNamespaceError", "message": "Namespace does not exist: ('doesnotexist',)"}\n"""
551566

552567

553-
def test_json_describe_table(catalog: InMemoryCatalog) -> None:
568+
@pytest.fixture()
569+
def test_json_describe_table(catalog: InMemoryCatalog, mock_datetime_now: None) -> None:
554570
catalog.create_table(
555571
identifier=TEST_TABLE_IDENTIFIER,
556572
schema=TEST_TABLE_SCHEMA,
557573
location=TEST_TABLE_LOCATION,
558574
partition_spec=TEST_TABLE_PARTITION_SPEC,
575+
table_uuid=TEST_TABLE_UUID,
559576
)
560577

561578
runner = CliRunner()
562579
result = runner.invoke(run, ["--output=json", "describe", "default.my_table"])
563580
assert result.exit_code == 0
564581
assert (
565582
result.output
566-
== """{"identifier":["default","my_table"],"metadata_location":"s3://warehouse/default/my_table/metadata/metadata.json","metadata":{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":1,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},"partition-spec":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}}\n"""
583+
== """{"identifier":["default","my_table"],"metadata_location":"s3://warehouse/default/my_table/metadata/metadata.json","metadata":{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":2,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},"partition-spec":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}}\n"""
567584
)
568585

569586

@@ -634,6 +651,7 @@ def test_json_uuid(catalog: InMemoryCatalog) -> None:
634651
schema=TEST_TABLE_SCHEMA,
635652
location=TEST_TABLE_LOCATION,
636653
partition_spec=TEST_TABLE_PARTITION_SPEC,
654+
table_uuid=TEST_TABLE_UUID,
637655
)
638656

639657
runner = CliRunner()

0 commit comments

Comments
 (0)