feat: Refresh method support for table#152
Conversation
test/in_memory_catalog_test.cc
Outdated
| ASSERT_TRUE(table.value()->current_snapshot().has_value()); | ||
| ASSERT_EQ(table.value()->current_snapshot().value()->snapshot_id, 3055729675574597004); | ||
|
|
||
| // Now we don't support commit method in catalog, so here only test refresh with the |
There was a problem hiding this comment.
I think we can add a test/mock_catalog.h to use gmock to write a MockCatalog so that we can inject table metadata at any time by mocking the call of Catalog::LoadTable. In the future, we may need to mock the catalog a lot of times, so I think it deserves to be added.
8e29a16 to
323aebe
Compare
test/mock_catalog.h
Outdated
|
|
||
| namespace iceberg { | ||
|
|
||
| class ICEBERG_EXPORT MockInMemoryCatalog : public InMemoryCatalog { |
There was a problem hiding this comment.
| class ICEBERG_EXPORT MockInMemoryCatalog : public InMemoryCatalog { | |
| class MockInMemoryCatalog : public InMemoryCatalog { |
Test files do not need to add this.
There was a problem hiding this comment.
BTW, should we simply use class MockCatalog : public Catalog instead of extending InMemoryCatalog? Usually we mock everything of the catalog so we don't need anything from InMemoryCatalog.
There was a problem hiding this comment.
Some tests rely on InMemoryCatalog's other methods. Mocking Catalog directly would require mocking all methods the tests touch, which could be more cumbersome.
test/in_memory_catalog_test.cc
Outdated
| int64_t snapshot_id, int64_t version) -> std::unique_ptr<Table> { | ||
| std::unique_ptr<TableMetadata> metadata; | ||
|
|
||
| ReadTableMetadata("TableMetadataV2Valid.json", &metadata); |
There was a problem hiding this comment.
We can use simple steps for the test, for example:
auto metadata_v0 = std::make_shared<TableMetadata>(
{.format_version = 1,
.table_uuid = "1234567890",
...
});
auto metadata_v1 = ...;
// Use created two metadatas to create two tables
auto table_v0 = ...
auto table_v1 = ...
// Mock LoadTable like you did
EXPECT_CALL(*mock_catalog, LoadTable(::testing::_))
.WillOnce(::testing::Return(
::testing::ByMove(Result<std::unique_ptr<Table>>(std::move(table_v0)))))
.WillOnce(::testing::Return(
::testing::ByMove(Result<std::unique_ptr<Table>>(std::move(table_v1)))));
...
There was a problem hiding this comment.
I simplified the buildTable lambda logic — the steps are simple too.
There was a problem hiding this comment.
I would recommend to do the following:
class MockCatalog : public Catalog {
public:
MockCatalog() = default;
~MockCatalog() override = default;
MOCK_METHOD(std::string_view, name, (), (const, override));
MOCK_METHOD(Status, CreateNamespace,
(const Namespace&, (const std::unordered_map<std::string, std::string>&)),
(override));
MOCK_METHOD((Result<std::vector<Namespace>>), ListNamespaces, (const Namespace&),
(const, override));
MOCK_METHOD((Result<std::unordered_map<std::string, std::string>>),
GetNamespaceProperties, (const Namespace&), (const, override));
MOCK_METHOD(Status, UpdateNamespaceProperties,
(const Namespace&, (const std::unordered_map<std::string, std::string>&),
(const std::unordered_set<std::string>&)),
(override));
MOCK_METHOD(Status, DropNamespace, (const Namespace&), (override));
MOCK_METHOD(Result<bool>, NamespaceExists, (const Namespace&), (const, override));
MOCK_METHOD((Result<std::vector<TableIdentifier>>), ListTables, (const Namespace&),
(const, override));
MOCK_METHOD((Result<std::unique_ptr<Table>>), LoadTable, (const TableIdentifier&),
(override));
MOCK_METHOD(Status, DropTable, (const TableIdentifier&, bool), (override));
MOCK_METHOD(Result<bool>, TableExists, (const TableIdentifier&), (const, override));
MOCK_METHOD((Result<std::shared_ptr<Table>>), RegisterTable,
(const TableIdentifier&, const std::string&), (override));
MOCK_METHOD((std::unique_ptr<Catalog::TableBuilder>), BuildTable,
(const TableIdentifier&, const Schema&), (const, override));
MOCK_METHOD((Result<std::unique_ptr<Table>>), CreateTable,
(const TableIdentifier&, const Schema&, const PartitionSpec&,
const std::string&, (const std::unordered_map<std::string, std::string>&)),
(override));
MOCK_METHOD((Result<std::unique_ptr<Table>>), UpdateTable,
(const TableIdentifier&,
(const std::vector<std::unique_ptr<UpdateRequirement>>&),
(const std::vector<std::unique_ptr<MetadataUpdate>>&)),
(override));
MOCK_METHOD((Result<std::shared_ptr<Transaction>>), StageCreateTable,
(const TableIdentifier&, const Schema&, const PartitionSpec&,
const std::string&, (const std::unordered_map<std::string, std::string>&)),
(override));
};
TEST(CatalogTest, MockCatalog) {
TableIdentifier table_ident{.ns = {}, .name = "t1"};
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "x", int64())},
/*schema_id=*/1);
std::shared_ptr<FileIO> io;
auto catalog = std::make_shared<MockCatalog>();
EXPECT_CALL(*catalog, name()).WillOnce(::testing::Return("test_catalog"));
EXPECT_EQ(catalog->name(), "test_catalog");
// Mock 1st call to LoadTable
EXPECT_CALL(*catalog, LoadTable(::testing::_))
.WillOnce(::testing::Return(
std::make_unique<Table>(table_ident,
std::make_shared<TableMetadata>(TableMetadata{
.schemas = {schema},
.current_schema_id = 1,
.current_snapshot_id = 1,
.snapshots = {std::make_shared<Snapshot>(Snapshot{
.snapshot_id = 1,
.sequence_number = 1,
})}}),
"s3://location", io, catalog)));
auto load_table_result = catalog->LoadTable(table_ident);
ASSERT_THAT(load_table_result, IsOk());
auto loaded_table = std::move(load_table_result.value());
ASSERT_EQ(loaded_table->current_snapshot().value()->snapshot_id, 1);
// Mock 2nd call to LoadTable
EXPECT_CALL(*catalog, LoadTable(::testing::_))
.WillOnce(::testing::Return(
std::make_unique<Table>(table_ident,
std::make_shared<TableMetadata>(TableMetadata{
.schemas = {schema},
.current_schema_id = 1,
.current_snapshot_id = 2,
.snapshots = {std::make_shared<Snapshot>(Snapshot{
.snapshot_id = 1,
.sequence_number = 1,
}),
std::make_shared<Snapshot>(Snapshot{
.snapshot_id = 2,
.sequence_number = 2,
})}}),
"s3://location", io, catalog)));
auto refreshed_result = loaded_table->Refresh();
ASSERT_THAT(refreshed_result, IsOk());
ASSERT_EQ(refreshed_result.value()->current_snapshot().value()->snapshot_id, 2);
}eb0f821 to
d8ee5ec
Compare
|
@Fokko @zeroshade Could you help review this? Thanks! |
| schemas_map_.reset(); | ||
| partition_spec_map_.reset(); | ||
| sort_orders_map_.reset(); |
There was a problem hiding this comment.
shouldn't these get updated from the refreshed table?
There was a problem hiding this comment.
Because this information is lazily initialized in the table, and the refreshed table hasn’t initialized these objects either, so a reset is sufficient.
|
This looks good, thanks @lishuxu for working on this, and thanks @wgtmac and @zeroshade for the review 🚀 |
No description provided.