From 3f94baeacadebcb7df03074f517b82cf36c034a7 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Tue, 3 Dec 2024 15:33:45 +0100 Subject: [PATCH 001/195] feat: emulator skeleton --- google/cloud/bigtable/CMakeLists.txt | 3 + google/cloud/bigtable/emulator/CMakeLists.txt | 94 +++++ .../emulator/bigtable_emulator_common.bzl | 31 ++ .../emulator/bigtable_emulator_programs.bzl | 21 ++ .../emulator/bigtable_emulator_unit_tests.bzl | 21 ++ google/cloud/bigtable/emulator/cluster.cc | 193 ++++++++++ google/cloud/bigtable/emulator/cluster.h | 72 ++++ google/cloud/bigtable/emulator/emulator.cc | 33 ++ google/cloud/bigtable/emulator/server.cc | 269 ++++++++++++++ google/cloud/bigtable/emulator/server.h | 44 +++ google/cloud/bigtable/emulator/server_test.cc | 24 ++ google/cloud/bigtable/emulator/table.cc | 350 ++++++++++++++++++ google/cloud/bigtable/emulator/table.h | 102 +++++ .../cloud/bigtable/emulator/to_grpc_status.cc | 90 +++++ .../cloud/bigtable/emulator/to_grpc_status.h | 34 ++ 15 files changed, 1381 insertions(+) create mode 100644 google/cloud/bigtable/emulator/CMakeLists.txt create mode 100644 google/cloud/bigtable/emulator/bigtable_emulator_common.bzl create mode 100644 google/cloud/bigtable/emulator/bigtable_emulator_programs.bzl create mode 100644 google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl create mode 100644 google/cloud/bigtable/emulator/cluster.cc create mode 100644 google/cloud/bigtable/emulator/cluster.h create mode 100644 google/cloud/bigtable/emulator/emulator.cc create mode 100644 google/cloud/bigtable/emulator/server.cc create mode 100644 google/cloud/bigtable/emulator/server.h create mode 100644 google/cloud/bigtable/emulator/server_test.cc create mode 100644 google/cloud/bigtable/emulator/table.cc create mode 100644 google/cloud/bigtable/emulator/table.h create mode 100644 google/cloud/bigtable/emulator/to_grpc_status.cc create mode 100644 google/cloud/bigtable/emulator/to_grpc_status.h diff --git a/google/cloud/bigtable/CMakeLists.txt b/google/cloud/bigtable/CMakeLists.txt index c46a509fb8c5f..fb0c02230a9b3 100644 --- a/google/cloud/bigtable/CMakeLists.txt +++ b/google/cloud/bigtable/CMakeLists.txt @@ -26,6 +26,7 @@ set(DOXYGEN_EXCLUDE_SYMBOLS "benchmarks" "bigtable_admin_internal" "bigtable_internal" + "emulator" "internal" "testing" "examples" @@ -512,6 +513,8 @@ if (BUILD_TESTING) add_subdirectory(tests) endif () +add_subdirectory(emulator) + # Examples are enabled if possible, but package maintainers may want to disable # compilation to speed up their builds. if (GOOGLE_CLOUD_CPP_ENABLE_EXAMPLES) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt new file mode 100644 index 0000000000000..a696b9e57437d --- /dev/null +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -0,0 +1,94 @@ +# ~~~ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ~~~ + +add_library( + bigtable_emulator_common # cmake-format: sort + cluster.cc + cluster.h + table.cc + table.h + server.cc + server.h + to_grpc_status.cc + to_grpc_status.h) +target_link_libraries( + bigtable_emulator_common + google-cloud-cpp::bigtable + google-cloud-cpp::bigtable_protos + google-cloud-cpp::common + google-cloud-cpp::grpc_utils + gRPC::grpc++ + gRPC::grpc + protobuf::libprotobuf) +google_cloud_cpp_add_common_options(bigtable_emulator_common) + +include(CreateBazelConfig) +create_bazel_config(bigtable_emulator_common YEAR 2024) + +if (BUILD_TESTING) + # List the unit tests, then setup the targets and dependencies. + set(bigtable_emulator_unit_tests + # cmake-format: sort + server_test.cc) + export_list_to_bazel("bigtable_emulator_unit_tests.bzl" + "bigtable_emulator_unit_tests" YEAR "2024") + + foreach (fname ${bigtable_emulators_unit_tests}) + google_cloud_cpp_add_executable(target "bigtable" "${fname}") + target_link_libraries( + ${target} + PRIVATE bigtable_emulator_common + bigtable_client_testing + google_cloud_cpp_testing + google_cloud_cpp_testing_grpc + google-cloud-cpp::bigtable + google-cloud-cpp::bigtable_protos + google-cloud-cpp::common + google-cloud-cpp::grpc_utils + GTest::gmock_main + GTest::gmock + GTest::gtest + gRPC::grpc++ + gRPC::grpc + protobuf::libprotobuf) + google_cloud_cpp_add_common_options(${target}) + add_test(NAME ${target} COMMAND ${target}) + endforeach () +endif () + +set(bigtable_emulator_programs + # cmake-format: sort + emulator.cc) +export_list_to_bazel("bigtable_emulator_programs.bzl" + "bigtable_emulator_programs" YEAR "2024") + +foreach (fname ${bigtable_emulator_programs}) + google_cloud_cpp_add_executable(target "bigtable" "${fname}") + target_link_libraries( + ${target} + PRIVATE bigtable_emulator_common + google-cloud-cpp::bigtable + google-cloud-cpp::bigtable_protos + google-cloud-cpp::grpc_utils + google_cloud_cpp_testing + gRPC::grpc++ + gRPC::grpc + protobuf::libprotobuf) + google_cloud_cpp_add_common_options(${target}) + if (BUILD_TESTING) + add_test(NAME ${target} COMMAND ${target}) + endif () +endforeach () diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl new file mode 100644 index 0000000000000..6ab48e081f561 --- /dev/null +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -0,0 +1,31 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated source lists for bigtable_emulator_common - DO NOT EDIT.""" + +bigtable_emulator_common_hdrs = [ + "cluster.h", + "table.h", + "server.h", + "to_grpc_status.h", +] + +bigtable_emulator_common_srcs = [ + "cluster.cc", + "table.cc", + "server.cc", + "to_grpc_status.cc", +] diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_programs.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_programs.bzl new file mode 100644 index 0000000000000..f0260e80cf394 --- /dev/null +++ b/google/cloud/bigtable/emulator/bigtable_emulator_programs.bzl @@ -0,0 +1,21 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated unit tests list - DO NOT EDIT.""" + +bigtable_emulator_programs = [ + "emulator.cc", +] diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl new file mode 100644 index 0000000000000..c80fa828dc78c --- /dev/null +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -0,0 +1,21 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated unit tests list - DO NOT EDIT.""" + +bigtable_emulator_unit_tests = [ + "server_test.cc", +] diff --git a/google/cloud/bigtable/emulator/cluster.cc b/google/cloud/bigtable/emulator/cluster.cc new file mode 100644 index 0000000000000..2177089086cd0 --- /dev/null +++ b/google/cloud/bigtable/emulator/cluster.cc @@ -0,0 +1,193 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/cluster.h" +#include "google/cloud/internal/make_status.h" +#include "absl/strings/match.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace{ + +namespace btadmin = google::bigtable::admin::v2; + +StatusOr ApplyView(std::string const& table_name, + Table const &table, + btadmin::Table_View view, + btadmin::Table_View default_view) { + if (view == btadmin::Table::VIEW_UNSPECIFIED) { + view = default_view; + } + switch (view) { + case btadmin::Table::VIEW_UNSPECIFIED: + return internal::InternalError( + "VIEW_UNSPECIFIED cannot be the default view"); + case btadmin::Table::NAME_ONLY: { + btadmin::Table res; + res.set_name(table_name); + return res; + } + case btadmin::Table::SCHEMA_VIEW: { + btadmin::Table res; + res.set_name(table_name); + auto before_view = table.GetSchema(); + *res.mutable_column_families() = + std::move(*before_view.mutable_column_families()); + res.set_granularity(before_view.granularity()); + return res; + } + case btadmin::Table::REPLICATION_VIEW: + case btadmin::Table::ENCRYPTION_VIEW: { + btadmin::Table res; + res.set_name(table_name); + auto before_view = table.GetSchema(); + *res.mutable_cluster_states() = + std::move(*before_view.mutable_cluster_states()); + return res; + } + case btadmin::Table::FULL: + return table.GetSchema(); + default: + return internal::UnimplementedError( + "Unsupported view.", + GCP_ERROR_INFO().WithMetadata("view", Table_View_Name(view))); + } +} + +} // anonymous namespace + +StatusOr Cluster::CreateTable(std::string const& table_name, + btadmin::Table schema) { + schema.set_name(table_name); + std::cout << "Creating table " << table_name << std::endl; + auto to_insert = std::make_shared(); + auto status = to_insert->Construct(std::move(schema)); + if (!status.ok()) { + return status; + } + { + std::lock_guard lock(mu_); + if (!table_by_name_.emplace(table_name, to_insert).second) { + return internal::AlreadyExistsError( + "Table already exists.", + GCP_ERROR_INFO().WithMetadata("table_name", table_name)); + } + } + return to_insert->GetSchema(); +} + +StatusOr> Cluster::ListTables( + std::string const& instance_name, btadmin::Table_View view) const { + std::map> table_by_name_copy; + { + std::lock_guard lock(mu_); + table_by_name_copy = table_by_name_; + } + std::vector res; + std::string const prefix = instance_name + "/tables/"; + std::cout << "Listing tables with prefix " << prefix << std::endl; + for (auto name_and_table_it = table_by_name_copy.upper_bound(prefix); + name_and_table_it != table_by_name_copy.end() && + absl::StartsWith(name_and_table_it->first, prefix); + ++name_and_table_it) { + auto maybe_view = + ApplyView(name_and_table_it->first, *name_and_table_it->second, view, + btadmin::Table::NAME_ONLY); + if (!maybe_view) { + return maybe_view.status(); + } + res.emplace_back(*maybe_view); + } + return res; +} + +StatusOr Cluster::GetTable(std::string const& table_name, + btadmin::Table_View view) const { + std::shared_ptr
found_table; + { + std::lock_guard lock(mu_); + auto it = table_by_name_.find(table_name); + if (it == table_by_name_.end()) { + return NotFoundError("No such table.", GCP_ERROR_INFO().WithMetadata( + "table_name", table_name)); + } + found_table = it->second; + } + return ApplyView(table_name, *found_table, view, btadmin::Table::SCHEMA_VIEW); +} + +Status Cluster::DeleteTable(std::string const& table_name) { + { + std::lock_guard lock(mu_); + auto it = table_by_name_.find(table_name); + if (it == table_by_name_.end()) { + return NotFoundError( + "No such table.", + GCP_ERROR_INFO().WithMetadata("table_name", table_name)); + } + if (it->second->IsDeleteProtected()) { + return FailedPreconditionError( + "The table has deletion protection.", + GCP_ERROR_INFO().WithMetadata("table_name", table_name)); + } + table_by_name_.erase(it); + } + return Status(); +} + +Status Cluster::UpdateTable(btadmin::Table const& new_schema, + google::protobuf::FieldMask const& to_update) { + auto maybe_table = FindTable(new_schema.name()); + if (!maybe_table) { + return maybe_table.status(); + } + (*maybe_table)->Update(new_schema, to_update); + return Status(); +} + +StatusOr Cluster::ModifyColumnFamilies( + btadmin::ModifyColumnFamiliesRequest const& request) { + auto maybe_table = FindTable(request.name()); + if (!maybe_table) { + return maybe_table.status(); + } + return (*maybe_table)->ModifyColumnFamilies(request); +} + + +bool Cluster::HasTable(std::string const& table_name) const { + std::lock_guard lock(mu_); + return table_by_name_.find(table_name) != table_by_name_.end(); +} + +StatusOr> Cluster::FindTable( + std::string const& table_name) { + { + std::lock_guard lock(mu_); + auto it = table_by_name_.find(table_name); + if (it == table_by_name_.end()) { + return NotFoundError( + "No such table.", + GCP_ERROR_INFO().WithMetadata("table_name", table_name)); + } + return it->second; + } +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/cluster.h b/google/cloud/bigtable/emulator/cluster.h new file mode 100644 index 0000000000000..f9baaac3207e5 --- /dev/null +++ b/google/cloud/bigtable/emulator/cluster.h @@ -0,0 +1,72 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CLUSTER_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CLUSTER_H + +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/status_or.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +/** + * An emulated cluster, which manages the lifecycle of all tables. + * + * This emulated cluster holds tables from all projects and instances - they are + * merely a component of table names. + */ +class Cluster { + public: + StatusOr CreateTable( + std::string const& table_name, + google::bigtable::admin::v2::Table schema); + + StatusOr> ListTables( + std::string const& instance_name, + google::bigtable::admin::v2::Table_View view) const; + + StatusOr GetTable( + std::string const& table_name, + google::bigtable::admin::v2::Table_View view) const; + + Status DeleteTable(std::string const &table_name); + + Status UpdateTable(google::bigtable::admin::v2::Table const& new_schema, + google::protobuf::FieldMask const& to_update); + + StatusOr ModifyColumnFamilies( + google::bigtable::admin::v2::ModifyColumnFamiliesRequest const& request); + + bool HasTable(std::string const &table_name) const; + + StatusOr> FindTable(std::string const& table_name); + + private: + + mutable std::mutex mu_; + // All the tables indexed by their names (i.e. + // projects/{}/instances/{}/tables/{}). We're holding the tables by + // `shared_ptr`s in order to be able to allow for more concurrency - every + // access to a table should start with creating a copy of the shared pointer. + std::map> table_by_name_; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CLUSTER_H diff --git a/google/cloud/bigtable/emulator/emulator.cc b/google/cloud/bigtable/emulator/emulator.cc new file mode 100644 index 0000000000000..ff6cb1d0a4060 --- /dev/null +++ b/google/cloud/bigtable/emulator/emulator.cc @@ -0,0 +1,33 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/server.h" +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +int main() { + using namespace google::cloud::bigtable::emulator; + auto server = CreateDefaultEmulatorServer("[::]", 8888); + std::cout << "Server running on port " << server->bound_port() << "\n"; + server->Wait(); + return 0; +} diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc new file mode 100644 index 0000000000000..e61bd386edb7b --- /dev/null +++ b/google/cloud/bigtable/emulator/server.cc @@ -0,0 +1,269 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/cluster.h" +#include "google/cloud/bigtable/emulator/server.h" +#include "google/cloud/bigtable/emulator/to_grpc_status.h" +#include "google/cloud/internal/make_status.h" +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +namespace btproto = ::google::bigtable::v2; +namespace btadmin = ::google::bigtable::admin::v2; + +class EmulatorService final : public btproto::Bigtable::Service { + public: + EmulatorService(std::shared_ptr cluster) + : cluster_(std::move(cluster)) {} + grpc::Status ReadRows( + grpc::ServerContext* /* context */, + btproto::ReadRowsRequest const* /* request */, + grpc::ServerWriter* writer) override { + btproto::ReadRowsResponse msg; + writer->WriteLast(msg, grpc::WriteOptions()); + return grpc::Status::OK; + } + grpc::Status SampleRowKeys( + grpc::ServerContext* /* context */, + btproto::SampleRowKeysRequest const* /* request */, + grpc::ServerWriter* /* writer */) + override { + return grpc::Status::OK; + } + grpc::Status MutateRow(grpc::ServerContext* /* context */, + btproto::MutateRowRequest const* request, + btproto::MutateRowResponse* /* response */) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + return ToGrpcStatus((*maybe_table)->MutateRow(*request)); + } + grpc::Status MutateRows( + grpc::ServerContext* /* context */, + btproto::MutateRowsRequest const* /* request */, + grpc::ServerWriter* /* writer */) override { + return grpc::Status::OK; + } + grpc::Status CheckAndMutateRow( + grpc::ServerContext* /* context */, + btproto::CheckAndMutateRowRequest const* /* request */, + btproto::CheckAndMutateRowResponse* /* response */) override { + return grpc::Status::OK; + } + grpc::Status PingAndWarm( + grpc::ServerContext* /* context */, + btproto::PingAndWarmRequest const* /* request */, + btproto::PingAndWarmResponse* /* response */) override { + return grpc::Status::OK; + } + grpc::Status ReadModifyWriteRow( + grpc::ServerContext* /* context */, + btproto::ReadModifyWriteRowRequest const* /* request */, + btproto::ReadModifyWriteRowResponse* /* response */) override { + return grpc::Status::OK; + } + std::shared_ptr cluster_; +}; + +class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { + public: + EmulatorTableService(std::shared_ptr cluster) + : cluster_(std::move(cluster)) {} + grpc::Status CreateTable(grpc::ServerContext* /* context */, + btadmin::CreateTableRequest const* request, + btadmin::Table* response) override { + auto table_name = request->parent() + "/tables/" + request->table_id(); + auto maybe_table = + cluster_->CreateTable(table_name, request->table()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + *response = *std::move(maybe_table); + return grpc::Status::OK; + } + + grpc::Status ListTables( + grpc::ServerContext* /* context */, + btadmin::ListTablesRequest const* request, + btadmin::ListTablesResponse* response) override { + + if (!request->page_token().empty()) { + return ToGrpcStatus(UnimplementedError( + "Pagination is not supported.", + GCP_ERROR_INFO().WithMetadata("page_token", request->page_token()))); + } + auto maybe_tables = cluster_->ListTables(request->parent(), request->view()); + if (!maybe_tables) { + return ToGrpcStatus(maybe_tables.status()); + } + if (request->page_size() < 0) { + return ToGrpcStatus(InvalidArgumentError( + "Negative page size.", + GCP_ERROR_INFO().WithMetadata("page_size", + std::to_string(request->page_size())))); + } + if (request->page_size() > 0 && + maybe_tables->size() > static_cast(request->page_size())) { + response->set_next_page_token("unsupported"); + maybe_tables->resize(request->page_size()); + } + for (auto &table : *maybe_tables) { + *response->add_tables() = std::move(table); + } + return grpc::Status::OK; + } + + grpc::Status GetTable(grpc::ServerContext* /* context */, + btadmin::GetTableRequest const* request, + btadmin::Table* response) override { + auto maybe_table = cluster_->GetTable(request->name(), request->view()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + *response = *std::move(maybe_table); + return grpc::Status::OK; + } + + grpc::Status UpdateTable( + grpc::ServerContext* /* context */, + btadmin::UpdateTableRequest const* request, + google::longrunning::Operation* response) override { + auto status = + cluster_->UpdateTable(request->table(), request->update_mask()); + if (!status.ok()) { + return ToGrpcStatus(status); + } + btadmin::UpdateTableMetadata res_md; + res_md.set_name(request->table().name()); + *res_md.mutable_start_time() = + google::protobuf::util::TimeUtil::GetCurrentTime(); + *res_md.mutable_end_time() = + google::protobuf::util::TimeUtil::GetCurrentTime(); + response->set_name("UpdateTable"); + response->mutable_metadata()->PackFrom(std::move(res_md)); + response->set_done(true); + google::protobuf::Empty empty_response; + response->mutable_response()->PackFrom(std::move(empty_response)); + return grpc::Status::OK; + } + + grpc::Status DeleteTable(grpc::ServerContext* /* context */, + btadmin::DeleteTableRequest const* request, + google::protobuf::Empty* /* response */) override { + return ToGrpcStatus(cluster_->DeleteTable(request->name())); + } + + grpc::Status ModifyColumnFamilies( + grpc::ServerContext* /* context */, + btadmin::ModifyColumnFamiliesRequest const* request, + btadmin::Table* response) override { + auto maybe_table = cluster_->ModifyColumnFamilies(*request); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + *response = *std::move(maybe_table); + + return grpc::Status::OK; + } + + grpc::Status DropRowRange(grpc::ServerContext* /* context */, + btadmin::DropRowRangeRequest const* /* request */, + google::protobuf::Empty* /* response */) override { + // FIXME + return grpc::Status::OK; + } + + grpc::Status GenerateConsistencyToken( + grpc::ServerContext* /* context */, + btadmin::GenerateConsistencyTokenRequest const* request, + btadmin::GenerateConsistencyTokenResponse* response) override { + if (!cluster_->HasTable(request->name())) { + return ToGrpcStatus(NotFoundError( + "Table does not exist.", + GCP_ERROR_INFO().WithMetadata("table_name", request->name()))); + } + response->set_consistency_token("some fake token"); + return grpc::Status::OK; + } + + grpc::Status CheckConsistency( + grpc::ServerContext* /* context */, + btadmin::CheckConsistencyRequest const* request, + btadmin::CheckConsistencyResponse* response) override { + if (!cluster_->HasTable(request->name())) { + return ToGrpcStatus(NotFoundError( + "Table does not exist.", + GCP_ERROR_INFO().WithMetadata("table_name", request->name()))); + } + if (request->consistency_token() != "some fake token") { + return ToGrpcStatus(NotFoundError( + "Unknown consistency token.", + GCP_ERROR_INFO().WithMetadata("consistency_token", + request->consistency_token()))); + } + // Emulator is always consistent. + response->set_consistent(true); + return grpc::Status::OK; + } + + private: + std::shared_ptr cluster_; +}; + +class DefaultEmulatorServer : public EmulatorServer { + public: + DefaultEmulatorServer(std::string const& host, std::uint16_t port) + : bound_port_(port), + cluster_(std::make_shared()), + bt_service_(cluster_), + table_service_(cluster_) { + builder_.AddListeningPort(host + ":" + std::to_string(port), + grpc::InsecureServerCredentials(), &bound_port_); + builder_.RegisterService(&bt_service_); + builder_.RegisterService(&table_service_); + server_ = builder_.BuildAndStart(); + } + virtual ~DefaultEmulatorServer() = default; + + virtual int bound_port() { return bound_port_; } + virtual void Shutdown() { server_->Shutdown(); } + virtual void Wait() { server_->Wait(); } + + private: + int bound_port_; + std::shared_ptr cluster_; + EmulatorService bt_service_; + EmulatorTableService table_service_; + grpc::ServerBuilder builder_; + std::unique_ptr server_; +}; + +std::unique_ptr CreateDefaultEmulatorServer( + std::string const& host, std::uint16_t port) { + return std::unique_ptr(new DefaultEmulatorServer(host, port)); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/server.h b/google/cloud/bigtable/emulator/server.h new file mode 100644 index 0000000000000..8e7659cc25fad --- /dev/null +++ b/google/cloud/bigtable/emulator/server.h @@ -0,0 +1,44 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SERVER_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SERVER_H + +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +class EmulatorServer { + public: + virtual ~EmulatorServer() = default; + + virtual int bound_port() = 0; + virtual void Shutdown() = 0; + virtual void Wait() = 0; +}; + +std::unique_ptr CreateDefaultEmulatorServer( + std::string const& host, std::uint16_t port); + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SERVER_H diff --git a/google/cloud/bigtable/emulator/server_test.cc b/google/cloud/bigtable/emulator/server_test.cc new file mode 100644 index 0000000000000..3312f88f4536d --- /dev/null +++ b/google/cloud/bigtable/emulator/server_test.cc @@ -0,0 +1,24 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/server.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc new file mode 100644 index 0000000000000..ab19442390d98 --- /dev/null +++ b/google/cloud/bigtable/emulator/table.cc @@ -0,0 +1,350 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "google/cloud/bigtable/emulator/table.h" +#include "google/protobuf/util/field_mask_util.h" +#include "google/cloud/internal/make_status.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +namespace btadmin = ::google::bigtable::admin::v2; +namespace btproto = ::google::bigtable::v2; + +void ColumnRow::SetCell(std::int64_t timestamp_micros, std::string const& value) { + if (timestamp_micros == -1) { + // Time since epoch expressed in microseconds but rounded to milliseconds. + timestamp_micros = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000LL; + } + cells_[timestamp_micros] = std::move(value); +} + +std::size_t ColumnRow::DeleteTimeRange( + ::google::bigtable::v2::TimestampRange const& time_range) { + std::size_t num_erased = 0; + for (auto cell_it = cells_.lower_bound(time_range.start_timestamp_micros()); + cell_it != cells_.end() && + (time_range.end_timestamp_micros() == 0 || + cell_it->first < time_range.end_timestamp_micros());) { + cells_.erase(cell_it++); + ++num_erased; + } + return num_erased; +} + +void ColumnFamilyRow::SetCell(std::string const& column_qualifier, + std::int64_t timestamp_micros, + std::string const& value) { + columns_[column_qualifier].SetCell(timestamp_micros, value); +} + +std::size_t ColumnFamilyRow::DeleteColumn( + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range) { + auto column_it = columns_.find(column_qualifier); + if (column_it != columns_.end()) { + return column_it->second.DeleteTimeRange(time_range); + } + if (!column_it->second.HasCells()) { + columns_.erase(column_it); + } + return 0; +} + +void ColumnFamily::SetCell(std::string const& row_key, + std::string const& column_qualifier, + std::int64_t timestamp_micros, + std::string const& value) { + rows_[row_key].SetCell(column_qualifier, timestamp_micros, value); +} + +bool ColumnFamily::DeleteRow(std::string const& row_key) { + return rows_.erase(row_key) > 0; +} + +std::size_t ColumnFamily::DeleteColumn( + std::string const& row_key, std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range) { + auto row_it = rows_.find(row_key); + if (row_it != rows_.end()) { + auto num_erased_cells = + row_it->second.DeleteColumn(column_qualifier, time_range); + if (!row_it->second.HasColumns()) { + rows_.erase(row_it); + } + return num_erased_cells; + } + return 0; +} + +Status Table::Construct(google::bigtable::admin::v2::Table schema) { + // Normally the constructor acts as a synchronization point. We don't have + // that luxury here, so we need to make sure that the changes performed in + // this member function are reflected in other threads. The simplest way to do + // this is the mutex. + std::lock_guard lock(mu_); + schema_ = std::move(schema); + if (schema_.granularity() == + btadmin::Table::TIMESTAMP_GRANULARITY_UNSPECIFIED) { + schema_.set_granularity(btadmin::Table::MILLIS); + } + if (schema_.cluster_states_size() > 0) { + return InvalidArgumentError( + "`cluster_states` not empty.", + GCP_ERROR_INFO().WithMetadata("schema", schema.DebugString())); + } + if (schema_.has_restore_info()) { + return InvalidArgumentError( + "`restore_info` not empty.", + GCP_ERROR_INFO().WithMetadata("schema", schema.DebugString())); + } + if (schema_.has_change_stream_config()) { + return UnimplementedError( + "`change_stream_config` not empty.", + GCP_ERROR_INFO().WithMetadata( + "schema", schema.DebugString())); + } + if (schema_.has_automated_backup_policy()) { + return UnimplementedError( + "`automated_backup_policy` not empty.", + GCP_ERROR_INFO().WithMetadata( + "schema", schema.DebugString())); + } + for (auto const &column_family_def : schema_.column_families()) { + column_families_.emplace( + column_family_def.first, + std::make_shared()); + } + return Status(); +} + +StatusOr Table::ModifyColumnFamilies( + btadmin::ModifyColumnFamiliesRequest const& request) { + std::cout << "Modify column families: " << request.DebugString() + << std::endl; + std::unique_lock lock(mu_); + auto new_schema = schema_; + auto new_column_families = column_families_; + for (auto const& modification : request.modifications()) { + if (modification.drop()) { + if (IsDeleteProtectedNoLock()) { + return FailedPreconditionError( + "The table has deletion protection.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + if (new_column_families.erase(modification.id()) == 0) { + return NotFoundError( + "No such column family.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + if (new_schema.mutable_column_families()->erase(modification.id()) == 0) { + return InternalError( + "Column family with no schema.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + } else if (modification.has_update()) { + auto& cfs = *new_schema.mutable_column_families(); + auto cf_it = cfs.find(modification.id()); + if (cf_it == cfs.end()) { + return NotFoundError( + "No such column family.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + using google::protobuf::util::FieldMaskUtil; + + using google::protobuf::util::FieldMaskUtil; + google::protobuf::FieldMask effective_mask; + if (modification.has_update_mask()) { + effective_mask = modification.update_mask(); + if (!FieldMaskUtil::IsValidFieldMask< + google::bigtable::admin::v2::ColumnFamily>(effective_mask)) { + return InvalidArgumentError( + "Update mask is invalid.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + } else { + FieldMaskUtil::FromString("gc_rule", &effective_mask); + if (!FieldMaskUtil::IsValidFieldMask< + google::bigtable::admin::v2::ColumnFamily>(effective_mask)) { + return InternalError("Default update mask is invalid.", + GCP_ERROR_INFO().WithMetadata( + "mask", effective_mask.DebugString())); + } + } + FieldMaskUtil::MergeMessageTo(modification.update(), effective_mask, + FieldMaskUtil::MergeOptions(), + &(cf_it->second)); + } else if (modification.has_create()) { + if (!new_column_families + .emplace(modification.id(), std::make_shared()) + .second) { + return AlreadyExistsError( + "Column family already exists.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + if (!new_schema.mutable_column_families() + ->emplace(modification.id(), modification.create()) + .second) { + return InternalError( + "Column family with schema but no data.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + } else { + return UnimplementedError( + "Unsupported modification.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + } + // Defer destorying potentially large objects to after releasing the lock. + column_families_.swap(new_column_families); + schema_ = new_schema; + lock.unlock(); + return new_schema; +} + +google::bigtable::admin::v2::Table Table::GetSchema() const { + std::lock_guard lock(mu_); + return schema_; +} + +Status Table::Update(google::bigtable::admin::v2::Table const& new_schema, + google::protobuf::FieldMask const& to_update) { + std::cout << "Update schema: " << new_schema.DebugString() + << " mask: " << to_update.DebugString() << std::endl; + using google::protobuf::util::FieldMaskUtil; + google::protobuf::FieldMask allowed_mask; + FieldMaskUtil::FromString( + "change_stream_config," + "change_stream_config.retention_period," + "deletion_protection", + &allowed_mask); + if (!FieldMaskUtil::IsValidFieldMask( + to_update)) { + return InvalidArgumentError( + "Update mask is invalid.", + GCP_ERROR_INFO().WithMetadata( + "mask", to_update.DebugString())); + } + google::protobuf::FieldMask disallowed_mask; + FieldMaskUtil::Subtract( + to_update, allowed_mask, &disallowed_mask); + if (disallowed_mask.paths_size() > 0) { + return UnimplementedError( + "Update mask contains disallowed fields.", + GCP_ERROR_INFO().WithMetadata( + "mask", disallowed_mask.DebugString())); + } + std::lock_guard lock(mu_); + FieldMaskUtil::MergeMessageTo(new_schema, to_update, + FieldMaskUtil::MergeOptions(), &schema_); + return Status(); +} + +template +StatusOr> Table::FindColumnFamily( + MESSAGE const& message) const { + auto column_family_it = column_families_.find(message.family_name()); + if (column_family_it == column_families_.end()) { + return NotFoundError( + "No such column family.", + GCP_ERROR_INFO().WithMetadata("mutation", message.DebugString())); + } + return std::ref(*column_family_it->second); +} + +Status Table::MutateRow( + google::bigtable::v2::MutateRowRequest const &request) { + // FIXME - add atomicity + // FIXME - determine what happens when row/column family/column does not exist + std::lock_guard lock(mu_); + assert(request.table_name() == schema_.name()); + for (auto mutation : request.mutations()) { + if (mutation.has_set_cell()) { + auto const & set_cell = mutation.set_cell(); + auto maybe_column_family = FindColumnFamily(set_cell); + if (!maybe_column_family) { + return maybe_column_family.status(); + } + maybe_column_family->get().SetCell( + request.row_key(), set_cell.column_qualifier(), + set_cell.timestamp_micros(), set_cell.value()); + } else if (mutation.has_add_to_cell()) { + // FIXME + } else if (mutation.has_merge_to_cell()) { + // FIXME + } else if (mutation.has_delete_from_column()) { + auto const & delete_from_column = mutation.delete_from_column(); + auto maybe_column_family = + FindColumnFamily(delete_from_column); + if (!maybe_column_family) { + return maybe_column_family.status(); + } + if (maybe_column_family->get().DeleteColumn( + request.row_key(), delete_from_column.column_qualifier(), + delete_from_column.time_range()) == 0) { + // FIXME no such row or column + } + } else if (mutation.has_delete_from_family()) { + auto maybe_column_family = + FindColumnFamily(mutation.delete_from_family()); + if (!maybe_column_family) { + return maybe_column_family.status(); + } + if (maybe_column_family->get().DeleteRow(request.row_key())) { + // FIXME no such row existed in that column family + } + } else if (mutation.has_delete_from_row()) { + bool row_existed = false; + for (auto& column_family : column_families_) { + row_existed |= column_family.second->DeleteRow(request.row_key()); + } + if (!row_existed) { + // FIXME no such row existed + } + } else { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); + } + } + return Status(); +} + +bool Table::IsDeleteProtected() const { + std::lock_guard lock(mu_); + return IsDeleteProtectedNoLock(); +} + +bool Table::IsDeleteProtectedNoLock() const { + return schema_.deletion_protection(); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h new file mode 100644 index 0000000000000..d56c2c7f808cf --- /dev/null +++ b/google/cloud/bigtable/emulator/table.h @@ -0,0 +1,102 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H + +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +class ColumnRow { + public: + void SetCell(std::int64_t timestamp_micros, std::string const& value); + std::size_t DeleteTimeRange( + ::google::bigtable::v2::TimestampRange const& time_range); + + bool HasCells() const { return !cells_.empty(); } + + private: + std::map cells_; +}; + +class ColumnFamilyRow { + public: + void SetCell(std::string const& column_qualifier, + std::int64_t timestamp_micros, std::string const& value); + std::size_t DeleteColumn( + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); + bool HasColumns() { return !columns_.empty(); } + + private: + std::map columns_; +}; + +class ColumnFamily { + public: + void SetCell(std::string const& row_key, std::string const& column_qualifier, + std::int64_t timestamp_micros, std::string const& value); + bool DeleteRow(std::string const& row_key); + std::size_t DeleteColumn( + std::string const& row_key, std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); + + private: + std::map rows_; +}; + +class Table { + public: + Status Construct(google::bigtable::admin::v2::Table schema); + + google::bigtable::admin::v2::Table GetSchema() const; + + Status Update(google::bigtable::admin::v2::Table const& new_schema, + google::protobuf::FieldMask const& to_update); + + StatusOr ModifyColumnFamilies( + google::bigtable::admin::v2::ModifyColumnFamiliesRequest const& request); + + bool IsDeleteProtected() const; + + Status MutateRow(google::bigtable::v2::MutateRowRequest const & request); + + private: + template + StatusOr> FindColumnFamily( + MESSAGE const& message) const; + bool IsDeleteProtectedNoLock() const; + + mutable std::mutex mu_; + google::bigtable::admin::v2::Table schema_; + std::map> column_families_; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H diff --git a/google/cloud/bigtable/emulator/to_grpc_status.cc b/google/cloud/bigtable/emulator/to_grpc_status.cc new file mode 100644 index 0000000000000..09a843df2185c --- /dev/null +++ b/google/cloud/bigtable/emulator/to_grpc_status.cc @@ -0,0 +1,90 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/to_grpc_status.h" +#include "google/rpc/status.pb.h" +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +grpc::StatusCode MapStatusCode(google::cloud::StatusCode code) { + switch (code) { + case google::cloud::StatusCode::kOk: + return grpc::StatusCode::OK; + case google::cloud::StatusCode::kCancelled: + return grpc::StatusCode::CANCELLED; + case google::cloud::StatusCode::kUnknown: + return grpc::StatusCode::UNKNOWN; + case google::cloud::StatusCode::kInvalidArgument: + return grpc::StatusCode::INVALID_ARGUMENT; + case google::cloud::StatusCode::kDeadlineExceeded: + return grpc::StatusCode::DEADLINE_EXCEEDED; + case google::cloud::StatusCode::kNotFound: + return grpc::StatusCode::NOT_FOUND; + case google::cloud::StatusCode::kAlreadyExists: + return grpc::StatusCode::ALREADY_EXISTS; + case google::cloud::StatusCode::kPermissionDenied: + return grpc::StatusCode::PERMISSION_DENIED; + case google::cloud::StatusCode::kUnauthenticated: + return grpc::StatusCode::UNAUTHENTICATED; + case google::cloud::StatusCode::kResourceExhausted: + return grpc::StatusCode::RESOURCE_EXHAUSTED; + case google::cloud::StatusCode::kFailedPrecondition: + return grpc::StatusCode::FAILED_PRECONDITION; + case google::cloud::StatusCode::kAborted: + return grpc::StatusCode::ABORTED; + case google::cloud::StatusCode::kOutOfRange: + return grpc::StatusCode::OUT_OF_RANGE; + case google::cloud::StatusCode::kUnimplemented: + return grpc::StatusCode::UNIMPLEMENTED; + case google::cloud::StatusCode::kInternal: + return grpc::StatusCode::INTERNAL; + case google::cloud::StatusCode::kUnavailable: + return grpc::StatusCode::UNAVAILABLE; + case google::cloud::StatusCode::kDataLoss: + return grpc::StatusCode::DATA_LOSS; + default: + return grpc::StatusCode::UNKNOWN; + } +} + +::grpc::Status ToGrpcStatus(Status const& to_convert) { + google::rpc::ErrorInfo error_info; + error_info.set_reason(to_convert.error_info().reason()); + error_info.set_domain(to_convert.error_info().domain()); + for (auto const& md_name_value : to_convert.error_info().metadata()) { + (*error_info.mutable_metadata())[md_name_value.first] = + md_name_value.second; + } + + google::rpc::Status rpc_status; + rpc_status.set_code(static_cast(to_convert.code())); + rpc_status.set_message(to_convert.message()); + auto& rpc_status_details = *rpc_status.add_details(); + rpc_status_details.PackFrom(std::move(error_info)); + + std::string serialized_rpc_status; + rpc_status.SerializeToString(&serialized_rpc_status); + return ::grpc::Status(MapStatusCode(to_convert.code()), to_convert.message(), + std::move(serialized_rpc_status)); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/to_grpc_status.h b/google/cloud/bigtable/emulator/to_grpc_status.h new file mode 100644 index 0000000000000..14355438149ed --- /dev/null +++ b/google/cloud/bigtable/emulator/to_grpc_status.h @@ -0,0 +1,34 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TO_GRPC_STATUS_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TO_GRPC_STATUS_H + +#include "google/cloud/status.h" +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +::grpc::Status ToGrpcStatus(Status const& to_convert); + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TO_GRPC_STATUS_H + From ef4b99a03dd069f34ada719a3edb04f30dd173c2 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Tue, 17 Dec 2024 14:53:13 +0100 Subject: [PATCH 002/195] Working read rows --- google/cloud/bigtable/CMakeLists.txt | 2 + google/cloud/bigtable/emulator/CMakeLists.txt | 17 +- .../emulator/bigtable_emulator_common.bzl | 8 + .../emulator/bigtable_emulator_unit_tests.bzl | 3 + google/cloud/bigtable/emulator/cluster.cc | 41 +-- google/cloud/bigtable/emulator/cluster.h | 6 - .../cloud/bigtable/emulator/column_family.cc | 191 +++++++++++++ .../cloud/bigtable/emulator/column_family.h | 125 +++++++++ .../bigtable/emulator/column_family_test.cc | 54 ++++ .../cloud/bigtable/emulator/row_iterators.cc | 28 ++ .../cloud/bigtable/emulator/row_iterators.h | 220 +++++++++++++++ .../bigtable/emulator/row_iterators_test.cc | 47 ++++ .../cloud/bigtable/emulator/row_streamer.cc | 99 +++++++ google/cloud/bigtable/emulator/row_streamer.h | 54 ++++ google/cloud/bigtable/emulator/server.cc | 27 +- .../cloud/bigtable/emulator/sorted_row_set.cc | 133 +++++++++ .../cloud/bigtable/emulator/sorted_row_set.h | 47 ++++ .../bigtable/emulator/sorted_row_set_test.cc | 68 +++++ google/cloud/bigtable/emulator/table.cc | 221 ++++++++++----- google/cloud/bigtable/emulator/table.h | 51 +--- .../bigtable/google_cloud_cpp_bigtable.bzl | 2 + .../bigtable/internal/row_range_helpers.cc | 261 ++++++++++++++++++ .../bigtable/internal/row_range_helpers.h | 66 +++++ google/cloud/bigtable/row_range.cc | 157 +---------- google/cloud/bigtable/row_range.h | 9 +- 25 files changed, 1634 insertions(+), 303 deletions(-) create mode 100644 google/cloud/bigtable/emulator/column_family.cc create mode 100644 google/cloud/bigtable/emulator/column_family.h create mode 100644 google/cloud/bigtable/emulator/column_family_test.cc create mode 100644 google/cloud/bigtable/emulator/row_iterators.cc create mode 100644 google/cloud/bigtable/emulator/row_iterators.h create mode 100644 google/cloud/bigtable/emulator/row_iterators_test.cc create mode 100644 google/cloud/bigtable/emulator/row_streamer.cc create mode 100644 google/cloud/bigtable/emulator/row_streamer.h create mode 100644 google/cloud/bigtable/emulator/sorted_row_set.cc create mode 100644 google/cloud/bigtable/emulator/sorted_row_set.h create mode 100644 google/cloud/bigtable/emulator/sorted_row_set_test.cc create mode 100644 google/cloud/bigtable/internal/row_range_helpers.cc create mode 100644 google/cloud/bigtable/internal/row_range_helpers.h diff --git a/google/cloud/bigtable/CMakeLists.txt b/google/cloud/bigtable/CMakeLists.txt index fb0c02230a9b3..430ab2284b051 100644 --- a/google/cloud/bigtable/CMakeLists.txt +++ b/google/cloud/bigtable/CMakeLists.txt @@ -206,6 +206,8 @@ add_library( internal/retry_context.h internal/retry_traits.h internal/row_reader_impl.h + internal/row_range_helpers.cc + internal/row_range_helpers.h internal/rpc_policy_parameters.h internal/rpc_policy_parameters.inc internal/traced_row_reader.cc diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index a696b9e57437d..4da665c7297f9 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -18,10 +18,18 @@ add_library( bigtable_emulator_common # cmake-format: sort cluster.cc cluster.h + column_family.cc + column_family.h table.cc table.h server.cc server.h + sorted_row_set.cc + sorted_row_set.h + row_iterators.cc + row_iterators.h + row_streamer.cc + row_streamer.h to_grpc_status.cc to_grpc_status.h) target_link_libraries( @@ -42,12 +50,15 @@ if (BUILD_TESTING) # List the unit tests, then setup the targets and dependencies. set(bigtable_emulator_unit_tests # cmake-format: sort - server_test.cc) + column_family_test.cc + row_iterators_test.cc + server_test.cc + sorted_row_set_test.cc) export_list_to_bazel("bigtable_emulator_unit_tests.bzl" "bigtable_emulator_unit_tests" YEAR "2024") - foreach (fname ${bigtable_emulators_unit_tests}) - google_cloud_cpp_add_executable(target "bigtable" "${fname}") + foreach (fname ${bigtable_emulator_unit_tests}) + google_cloud_cpp_add_executable(target "bigtable_emulator" "${fname}") target_link_libraries( ${target} PRIVATE bigtable_emulator_common diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index 6ab48e081f561..02f827b6c4036 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -18,14 +18,22 @@ bigtable_emulator_common_hdrs = [ "cluster.h", + "column_family.h", "table.h", "server.h", + "sorted_row_set.h", + "row_iterators.h", + "row_streamer.h", "to_grpc_status.h", ] bigtable_emulator_common_srcs = [ "cluster.cc", + "column_family.cc", "table.cc", "server.cc", + "sorted_row_set.cc", + "row_iterators.cc", + "row_streamer.cc", "to_grpc_status.cc", ] diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index c80fa828dc78c..135bacf54e7e8 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -17,5 +17,8 @@ """Automatically generated unit tests list - DO NOT EDIT.""" bigtable_emulator_unit_tests = [ + "column_family_test.cc", + "row_iterators_test.cc", "server_test.cc", + "sorted_row_set_test.cc", ] diff --git a/google/cloud/bigtable/emulator/cluster.cc b/google/cloud/bigtable/emulator/cluster.cc index 2177089086cd0..c327457a2fb1a 100644 --- a/google/cloud/bigtable/emulator/cluster.cc +++ b/google/cloud/bigtable/emulator/cluster.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/cluster.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" #include "google/cloud/internal/make_status.h" #include "absl/strings/match.h" @@ -20,7 +22,7 @@ namespace google { namespace cloud { namespace bigtable { namespace emulator { -namespace{ +namespace { namespace btadmin = google::bigtable::admin::v2; @@ -33,7 +35,7 @@ StatusOr ApplyView(std::string const& table_name, } switch (view) { case btadmin::Table::VIEW_UNSPECIFIED: - return internal::InternalError( + return google::cloud::internal::InternalError( "VIEW_UNSPECIFIED cannot be the default view"); case btadmin::Table::NAME_ONLY: { btadmin::Table res; @@ -61,7 +63,7 @@ StatusOr ApplyView(std::string const& table_name, case btadmin::Table::FULL: return table.GetSchema(); default: - return internal::UnimplementedError( + return google::cloud::internal::UnimplementedError( "Unsupported view.", GCP_ERROR_INFO().WithMetadata("view", Table_View_Name(view))); } @@ -73,20 +75,19 @@ StatusOr Cluster::CreateTable(std::string const& table_name, btadmin::Table schema) { schema.set_name(table_name); std::cout << "Creating table " << table_name << std::endl; - auto to_insert = std::make_shared
(); - auto status = to_insert->Construct(std::move(schema)); - if (!status.ok()) { - return status; + auto maybe_table = Table::Create(std::move(schema)); + if (!maybe_table) { + return maybe_table.status(); } { std::lock_guard lock(mu_); - if (!table_by_name_.emplace(table_name, to_insert).second) { - return internal::AlreadyExistsError( + if (!table_by_name_.emplace(table_name, *maybe_table).second) { + return google::cloud::internal::AlreadyExistsError( "Table already exists.", GCP_ERROR_INFO().WithMetadata("table_name", table_name)); } } - return to_insert->GetSchema(); + return (*maybe_table)->GetSchema(); } StatusOr> Cluster::ListTables( @@ -148,26 +149,6 @@ Status Cluster::DeleteTable(std::string const& table_name) { return Status(); } -Status Cluster::UpdateTable(btadmin::Table const& new_schema, - google::protobuf::FieldMask const& to_update) { - auto maybe_table = FindTable(new_schema.name()); - if (!maybe_table) { - return maybe_table.status(); - } - (*maybe_table)->Update(new_schema, to_update); - return Status(); -} - -StatusOr Cluster::ModifyColumnFamilies( - btadmin::ModifyColumnFamiliesRequest const& request) { - auto maybe_table = FindTable(request.name()); - if (!maybe_table) { - return maybe_table.status(); - } - return (*maybe_table)->ModifyColumnFamilies(request); -} - - bool Cluster::HasTable(std::string const& table_name) const { std::lock_guard lock(mu_); return table_by_name_.find(table_name) != table_by_name_.end(); diff --git a/google/cloud/bigtable/emulator/cluster.h b/google/cloud/bigtable/emulator/cluster.h index f9baaac3207e5..73bbbc8cab44c 100644 --- a/google/cloud/bigtable/emulator/cluster.h +++ b/google/cloud/bigtable/emulator/cluster.h @@ -44,12 +44,6 @@ class Cluster { Status DeleteTable(std::string const &table_name); - Status UpdateTable(google::bigtable::admin::v2::Table const& new_schema, - google::protobuf::FieldMask const& to_update); - - StatusOr ModifyColumnFamilies( - google::bigtable::admin::v2::ModifyColumnFamiliesRequest const& request); - bool HasTable(std::string const &table_name) const; StatusOr> FindTable(std::string const& table_name); diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc new file mode 100644 index 0000000000000..58c5bee629901 --- /dev/null +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -0,0 +1,191 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/column_family.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +ColumnFamily::iterator::iterator( + ColumnFamily const& column_family, std::shared_ptr row_set) + : column_family_(std::cref(column_family)), row_set_(std::move(row_set)) { + if (row_set_) { + std::cout << "ColumnFamily::iterator::iterator():" << std::endl; + for (auto const& range : row_set_->disjoint_ranges()) { + std::cout << " "; + if (range.has_start_key_closed()) { + std::cout << "[" << range.start_key_closed(); + } else if (range.has_start_key_open()) { + std::cout << "(" << range.start_key_open(); + } else { + std::cout << "(inf"; + } + std::cout << ":"; + if (range.has_end_key_closed()) { + std::cout << range.end_key_closed() << "]"; + } else if (range.has_end_key_open()) { + std::cout << range.end_key_open() << ")"; + } else { + std::cout << "inf)"; + } + std::cout << std::endl; + } + + row_set_pos_ = row_set_->disjoint_ranges().begin(); + row_pos_ = column_family_.get().rows_.begin(); + + AdvanceToNextRange(); + EnsureIteratorValid(); + } else { + row_pos_ = column_family_.get().rows_.end(); + } +} + +void ColumnFamily::iterator::AdvanceToNextRange() { + if (row_set_pos_ == row_set_->disjoint_ranges().end()) { + // We've reached the end. + row_pos_ = column_family_.get().rows_.end(); + return; + } + if (row_pos_ == column_family_.get().rows_.end()) { + // row_pos_ is already pointing far enough. + return; + } + if (!internal::RowRangeHelpers::BelowStart(*row_set_pos_, row_pos_->first)) { + // row_pos_ is already pointing far enough. + return; + } + if (row_set_pos_->has_start_key_closed()) { + row_pos_ = column_family_.get().rows_.lower_bound( + row_set_pos_->start_key_closed()); + } else if (row_set_pos_->has_start_key_open()) { + row_pos_ = + column_family_.get().rows_.upper_bound(row_set_pos_->start_key_open()); + } else { + // Range open on the left + row_pos_ = column_family_.get().rows_.begin(); + } +} + +void ColumnFamily::iterator::EnsureIteratorValid() { + // `row_pos_` may point to a row which is past the end of the range pointed by + // row_set_pos_. Make sure this only happens when the iteration reaches its + // end. + while (row_pos_ != column_family_.get().rows_.end() && + row_set_pos_ != row_set_->disjoint_ranges().end() && + internal::RowRangeHelpers::AboveEnd(*row_set_pos_, row_pos_->first)) { + ++row_set_pos_; + AdvanceToNextRange(); + } + // This situation indicates that there are no rows which start after + // current (as pointed by `row_set_pos_`) range's start. Given that we're + // traversing `row_set_` in order, there will be no such rows for + // following ranges, i.e. we've reached the end. +} + +ColumnFamily::iterator& ColumnFamily::iterator::operator++() { + std::cout << "ColumnFamily::iterator::operator++ this=" + << reinterpret_cast(this) << " val before: " + << (row_pos_ == column_family_.get().rows_.end() + ? std::string("end") + : row_pos_->first) + << std::endl; + ++row_pos_; + EnsureIteratorValid(); + return *this; +} + +ColumnFamily::iterator ColumnFamily::iterator::operator++(int) { + ColumnFamily::iterator retval = *this; + ++(*this); + return retval; +} + +void ColumnRow::SetCell(std::int64_t timestamp_micros, std::string const& value) { + if (timestamp_micros < 0) { + // Time since epoch expressed in microseconds but rounded to milliseconds. + timestamp_micros = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000LL; + } + cells_[timestamp_micros] = std::move(value); +} + +std::size_t ColumnRow::DeleteTimeRange( + ::google::bigtable::v2::TimestampRange const& time_range) { + std::size_t num_erased = 0; + for (auto cell_it = cells_.lower_bound(time_range.start_timestamp_micros()); + cell_it != cells_.end() && + (time_range.end_timestamp_micros() == 0 || + cell_it->first < time_range.end_timestamp_micros());) { + cells_.erase(cell_it++); + ++num_erased; + } + return num_erased; +} + +void ColumnFamilyRow::SetCell(std::string const& column_qualifier, + std::int64_t timestamp_micros, + std::string const& value) { + columns_[column_qualifier].SetCell(timestamp_micros, value); +} + +std::size_t ColumnFamilyRow::DeleteColumn( + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range) { + auto column_it = columns_.find(column_qualifier); + if (column_it != columns_.end()) { + return column_it->second.DeleteTimeRange(time_range); + } + if (!column_it->second.HasCells()) { + columns_.erase(column_it); + } + return 0; +} + +void ColumnFamily::SetCell(std::string const& row_key, + std::string const& column_qualifier, + std::int64_t timestamp_micros, + std::string const& value) { + rows_[row_key].SetCell(column_qualifier, timestamp_micros, value); +} + +bool ColumnFamily::DeleteRow(std::string const& row_key) { + return rows_.erase(row_key) > 0; +} + +std::size_t ColumnFamily::DeleteColumn( + std::string const& row_key, std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range) { + auto row_it = rows_.find(row_key); + if (row_it != rows_.end()) { + auto num_erased_cells = + row_it->second.DeleteColumn(column_qualifier, time_range); + if (!row_it->second.HasColumns()) { + rows_.erase(row_it); + } + return num_erased_cells; + } + return 0; +} + + + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h new file mode 100644 index 0000000000000..9a93e6aecd1fc --- /dev/null +++ b/google/cloud/bigtable/emulator/column_family.h @@ -0,0 +1,125 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H + +#include +#include +#include "google/cloud/bigtable/emulator/sorted_row_set.h" +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + + +class ColumnRow { + public: + void SetCell(std::int64_t timestamp_micros, std::string const& value); + std::size_t DeleteTimeRange( + ::google::bigtable::v2::TimestampRange const& time_range); + + bool HasCells() const { return !cells_.empty(); } + using iterator = std::map::const_iterator; + iterator begin() const { return cells_.begin(); } + iterator end() const { return cells_.end(); } + + private: + std::map cells_; +}; + +class ColumnFamilyRow { + public: + void SetCell(std::string const& column_qualifier, + std::int64_t timestamp_micros, std::string const& value); + std::size_t DeleteColumn( + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); + bool HasColumns() { return !columns_.empty(); } + using iterator = std::map::const_iterator; + iterator begin() const { return columns_.begin(); } + iterator end() const { return columns_.end(); } + + private: + std::map columns_; +}; + +class ColumnFamily { + public: + class iterator; + + void SetCell(std::string const& row_key, std::string const& column_qualifier, + std::int64_t timestamp_micros, std::string const& value); + bool DeleteRow(std::string const& row_key); + std::size_t DeleteColumn( + std::string const& row_key, std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); + + iterator begin(std::shared_ptr row_set) const { + return iterator(*this, std::move(row_set)); + } + iterator end() const { return iterator(*this, {}); } + + class iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = std::pair; + using difference_type = std::size_t; + using reference = value_type&; + using pointer = value_type*; + using const_reference = value_type const&; + using const_pointer = value_type const*; + + iterator& operator++(); + iterator operator++(int); + bool operator==(iterator const& other) const { + return row_pos_ == other.row_pos_; + } + + bool operator!=(iterator const& other) const { + return !(*this == other); + } + + const_reference operator*() const { return *row_pos_; } + + friend iterator ColumnFamily::begin(std::shared_ptr) const; + friend iterator ColumnFamily::end() const; + private: + iterator(ColumnFamily const& column_family, + std::shared_ptr row_set); + + void AdvanceToNextRange(); + void EnsureIteratorValid(); + + std::reference_wrapper column_family_; + std::shared_ptr row_set_; + std::set::const_iterator row_set_pos_; + std::map::const_iterator row_pos_; + }; + + private: + friend class iterator; + std::map rows_; +}; + + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc new file mode 100644 index 0000000000000..6b821938641cd --- /dev/null +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -0,0 +1,54 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/row_iterators.h" +#include "google/cloud/bigtable/row_range.h" +#include "google/cloud/testing_util/is_proto_equal.h" +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +TEST(ColumnFamilyIterator, Simple) { + ColumnFamily fam; + fam.SetCell("row1", "col1", 123, "foo"); + fam.SetCell("row1", "col1", 124, "fo"); + fam.SetCell("row1", "col2", 123, "bar"); + fam.SetCell("row2", "col1", 123, "foo"); + fam.SetCell("row2", "col3", 120, "baz"); + fam.SetCell("row2", "col3", 120, "baz"); + std::vector rows; + std::transform( + fam.begin(std::shared_ptr( + new SortedRowSet(SortedRowSet::AllRows()))), + fam.end(), + std::back_inserter(rows), + [](std::pair const& val) { + return val.first; + }); + std::vector expected{"row1", "row2"}; + EXPECT_EQ(expected, rows); +} + +} // anonymous namespace +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + + diff --git a/google/cloud/bigtable/emulator/row_iterators.cc b/google/cloud/bigtable/emulator/row_iterators.cc new file mode 100644 index 0000000000000..ba6a224012255 --- /dev/null +++ b/google/cloud/bigtable/emulator/row_iterators.cc @@ -0,0 +1,28 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/row_iterators.h" +#include "google/cloud/bigtable/internal/google_bytes_traits.h" +#include "google/cloud/bigtable/internal/row_range_helpers.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + diff --git a/google/cloud/bigtable/emulator/row_iterators.h b/google/cloud/bigtable/emulator/row_iterators.h new file mode 100644 index 0000000000000..5578dc0a0636b --- /dev/null +++ b/google/cloud/bigtable/emulator/row_iterators.h @@ -0,0 +1,220 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_ITERATORS_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_ITERATORS_H + +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/sorted_row_set.h" +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +template +class MergedSortedIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = typename std::iterator_traits::value_type; + using difference_type = std::size_t; + using reference = value_type&; + using pointer = value_type*; + using const_reference = value_type const&; + using const_pointer = value_type const*; + + // end() iterator. + MergedSortedIterator() = default; + MergedSortedIterator(MergedSortedIterator const& other) = default; + MergedSortedIterator(MergedSortedIterator&& other) = default; + + MergedSortedIterator(std::vector> ranges) { + for (auto & range : ranges) { + if (range.first != range.second) { + ranges_.emplace(std::move(range)); + } + } + } + + value_type operator*() const { + return *ranges_.top().first; + } + + MergedSortedIterator& operator++() { + auto prev_top = ranges_.top();; + // We need to remove it from the priority queue because we're likely to + // change the order. + ranges_.pop(); + + ++prev_top.first; + + if (prev_top.first != prev_top.second) { + ranges_.emplace(std::move(prev_top)); + } + + return *this; + } + + bool operator==(MergedSortedIterator const& other) const { + if (ranges_.empty() || other.ranges_.empty()) { + return ranges_.empty() == other.ranges_.empty(); + } + return ranges_.top() == other.ranges_.top(); + } + + bool operator!=(MergedSortedIterator const& other) const { + return !(*this == other); + } + + private: + struct InternalGreater { + bool operator()(std::pair const &lhs, + std::pair const &rhs) const { + return IteratorLess()(*rhs.first, *lhs.first); + } + }; + + std::priority_queue, + std::vector>, + InternalGreater> + ranges_; +}; + +template +class FlattenedIterator { + public: + using InnerCollection = typename std::result_of::value_type const&)>::type; + using InnerIterator = typename std::decay_t::iterator; + using iterator_category = std::input_iterator_tag; + using value_type = typename std::result_of::value_type const&, + typename std::iterator_traits::value_type const&)>::type; + using difference_type = + typename std::iterator_traits::difference_type; + using pointer = value_type const*; + using reference = value_type const&; + + FlattenedIterator(OuterIterator begin, OuterIterator end) + : outer_pos_(std::move(begin)), outer_end_(std::move(end)) { + if (outer_pos_ != outer_end_) { + inner_pos_ = DescendFunctor()(*outer_pos_).begin(); + EnsureIteratorValid(); + } + } + + value_type operator*() const { + assert(inner_pos_ != DescendFunctor()(*outer_pos_).end()); + return ValueCombineFunctor()(*outer_pos_, *inner_pos_); + } + + FlattenedIterator& operator++() { + ++inner_pos_; + EnsureIteratorValid(); + return *this; + } + + FlattenedIterator operator++(int) { + FlattenedIterator tmp = *this; + ++(*this); + return tmp; + } + + bool operator==(FlattenedIterator const& other) const { + return outer_pos_ == other.outer_pos_ && + (outer_pos_ == outer_end_ || inner_pos_ == other.inner_pos_); + } + + bool operator!=(FlattenedIterator const& other) const { + return !(*this == other); + } + + private: + OuterIterator outer_pos_; + OuterIterator outer_end_; + InnerIterator inner_pos_; + + void EnsureIteratorValid() { + while (outer_pos_ != outer_end_ && + inner_pos_ == DescendFunctor()(*outer_pos_).end()) { + ++outer_pos_; + if (outer_pos_ != outer_end_) { + inner_pos_ = DescendFunctor()(*outer_pos_).begin(); + } + } + } +}; + +template +class TransformIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = typename std::result_of::value_type)>::type; + using difference_type = + typename std::iterator_traits::difference_type; + using pointer = value_type*; + using reference = value_type&; + + TransformIterator(InputIterator it, Functor func) + : current(std::move(it)), transformer(std::move(func)) {} + + value_type operator*() const { return transformer(*current); } + + TransformIterator& operator++() { + ++current; + return *this; + } + + TransformIterator operator++(int) { + TransformIterator tmp = *this; + ++(*this); + return tmp; + } + + bool operator==(TransformIterator const& other) const { + return current == other.current; + } + + bool operator!=(TransformIterator const& other) const { + return current != other.current; + } + + private: + InputIterator current; + Functor transformer; +}; + +// Helper function to create a TransformIterator +template +std::pair, + TransformIterator> +TransformIteratorRange(InputIterator begin, InputIterator end, Functor func) { + Functor func_copy(func); // avoid two copies + return std::make_pair(TransformIterator( + std::move(begin), std::move(func)), + TransformIterator( + std::move(end), std::move(func_copy))); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_ITERATORS_H diff --git a/google/cloud/bigtable/emulator/row_iterators_test.cc b/google/cloud/bigtable/emulator/row_iterators_test.cc new file mode 100644 index 0000000000000..32ee60572ac68 --- /dev/null +++ b/google/cloud/bigtable/emulator/row_iterators_test.cc @@ -0,0 +1,47 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/row_iterators.h" +#include "google/cloud/bigtable/row_range.h" +#include "google/cloud/testing_util/is_proto_equal.h" +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +TEST(MergedSortedIterator, Simple) { + std::vector a{4, 5, 6, 6, 9, 20}; + std::vector b{1, 2, 3, 4, 7, 20}; + std::vector expected{1, 2, 3, 4, 4, 5, 6, 6, 7, 9, 20, 20}; + std::vector merged; + using MSI = MergedSortedIterator::iterator, std::less>; + for (MSI it(std::vector::iterator, + std::vector::iterator>>{ + {a.begin(), a.end()}, {b.begin(), b.end()}}); + it != MSI(); ++it) { + merged.push_back(*it); + } + EXPECT_EQ(expected, merged); +} + +} // anonymous namespace +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + diff --git a/google/cloud/bigtable/emulator/row_streamer.cc b/google/cloud/bigtable/emulator/row_streamer.cc new file mode 100644 index 0000000000000..7902cfea4375e --- /dev/null +++ b/google/cloud/bigtable/emulator/row_streamer.cc @@ -0,0 +1,99 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/row_streamer.h" +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +namespace btproto = ::google::bigtable::v2; + +RowStreamer::RowStreamer(grpc::ServerWriter& writer) + : writer_(writer) {} + +bool RowStreamer::Stream( + std::tuple const& cell) { + std::cout << "Attempting to stream" << std::endl; + btproto::ReadRowsResponse::CellChunk chunk; + if (!current_row_key_ || (¤t_row_key_->get() != &std::get<0>(cell) && + current_row_key_->get() != std::get<0>(cell))) { + if (!pending_chunks_.empty()) { + pending_chunks_.back().set_commit_row(true); + } + current_row_key_ = std::cref(std::get<0>(cell)); + current_column_family_ = std::cref(std::get<1>(cell)); + current_column_qualifier_ = std::cref(std::get<2>(cell)); + chunk.set_row_key(std::get<0>(cell)); + chunk.mutable_family_name()->set_value(std::get<1>(cell)); + chunk.mutable_qualifier()->set_value(std::get<2>(cell)); + } + if (¤t_column_family_->get() != &std::get<1>(cell) && + current_row_key_->get() != std::get<1>(cell)) { + current_column_family_ = std::cref(std::get<1>(cell)); + current_column_qualifier_ = std::cref(std::get<2>(cell)); + chunk.mutable_family_name()->set_value(std::get<1>(cell)); + chunk.mutable_qualifier()->set_value(std::get<2>(cell)); + } + if (¤t_column_qualifier_->get() != &std::get<2>(cell) && + current_row_key_->get() != std::get<2>(cell)) { + current_column_qualifier_ = std::cref(std::get<2>(cell)); + chunk.mutable_qualifier()->set_value(std::get<2>(cell)); + } + chunk.set_timestamp_micros(std::get<3>(cell)); + chunk.set_value(std::get<4>(cell)); + pending_chunks_.emplace_back(std::move(chunk)); + if (pending_chunks_.size() > 200) { + return Flush(false); + } + std::cout << "Not flushing" << std::endl; + return true; +} + +bool RowStreamer::Flush(bool stream_finished) { + std::cout << "Flushing" << std::endl; + absl::optional dont_flush_this; + if (stream_finished) { + if (!pending_chunks_.empty()) { + pending_chunks_.back().set_commit_row(true); + } + current_row_key_.reset(); + current_column_family_.reset(); + current_column_qualifier_.reset(); + } else { + if (!pending_chunks_.empty()) { + dont_flush_this = std::move(pending_chunks_.back()); + pending_chunks_.resize(pending_chunks_.size() - 1); + } + } + btproto::ReadRowsResponse resp; + for (auto &chunk : pending_chunks_) { + *resp.add_chunks() = std::move(chunk); + } + pending_chunks_.resize(0); + if (dont_flush_this) { + pending_chunks_.emplace_back(*std::move(dont_flush_this)); + } + std::cout << "Writing: " << resp.DebugString() << std::endl; + return writer_.Write(resp); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + diff --git a/google/cloud/bigtable/emulator/row_streamer.h b/google/cloud/bigtable/emulator/row_streamer.h new file mode 100644 index 0000000000000..558d8dd266a00 --- /dev/null +++ b/google/cloud/bigtable/emulator/row_streamer.h @@ -0,0 +1,54 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_STREAMER_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_STREAMER_H + +#include +#include +#include "absl/types/optional.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +class RowStreamer { + public: + RowStreamer( + grpc::ServerWriter& writer); + bool Stream( + std::tuple const&); + + bool Flush(bool stream_finished); + + private: + grpc::ServerWriter& writer_; + absl::optional> current_row_key_; + absl::optional> + current_column_family_; + absl::optional> + current_column_qualifier_; + std::vector + pending_chunks_; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_STREAMER_H + diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index e61bd386edb7b..015203c1b7f84 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -34,14 +34,19 @@ class EmulatorService final : public btproto::Bigtable::Service { public: EmulatorService(std::shared_ptr cluster) : cluster_(std::move(cluster)) {} + grpc::Status ReadRows( grpc::ServerContext* /* context */, - btproto::ReadRowsRequest const* /* request */, + btproto::ReadRowsRequest const* request, grpc::ServerWriter* writer) override { - btproto::ReadRowsResponse msg; - writer->WriteLast(msg, grpc::WriteOptions()); - return grpc::Status::OK; + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + RowStreamer row_streamer(*writer); + return ToGrpcStatus((*maybe_table)->ReadRows(*request, row_streamer)); } + grpc::Status SampleRowKeys( grpc::ServerContext* /* context */, btproto::SampleRowKeysRequest const* /* request */, @@ -148,8 +153,12 @@ class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { grpc::ServerContext* /* context */, btadmin::UpdateTableRequest const* request, google::longrunning::Operation* response) override { + auto maybe_table = cluster_->FindTable(request->table().name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } auto status = - cluster_->UpdateTable(request->table(), request->update_mask()); + (*maybe_table)->Update(request->table(), request->update_mask()); if (!status.ok()) { return ToGrpcStatus(status); } @@ -177,11 +186,15 @@ class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { grpc::ServerContext* /* context */, btadmin::ModifyColumnFamiliesRequest const* request, btadmin::Table* response) override { - auto maybe_table = cluster_->ModifyColumnFamilies(*request); + auto maybe_table = cluster_->FindTable(request->name()); if (!maybe_table) { return ToGrpcStatus(maybe_table.status()); } - *response = *std::move(maybe_table); + auto maybe_table_res = (*maybe_table)->ModifyColumnFamilies(*request); + if (!maybe_table_res) { + return ToGrpcStatus(maybe_table_res.status()); + } + *response = *std::move(maybe_table_res); return grpc::Status::OK; } diff --git a/google/cloud/bigtable/emulator/sorted_row_set.cc b/google/cloud/bigtable/emulator/sorted_row_set.cc new file mode 100644 index 0000000000000..152acee36ab0e --- /dev/null +++ b/google/cloud/bigtable/emulator/sorted_row_set.cc @@ -0,0 +1,133 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/sorted_row_set.h" +#include "google/cloud/bigtable/internal/row_range_helpers.h" +#include "google/cloud/bigtable/row_range.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +namespace btproto = google::bigtable::v2; + +bool HasOverlap(btproto::RowRange const& lhs, btproto::RowRange const& rhs) { + return internal::RowRangeHelpers::Intersect(lhs, rhs).first; +} + +bool DisjointRangesAdjacent(btproto::RowRange const& left, + btproto::RowRange const& right) { + assert(internal::RowRangeHelpers::StartLess()(left, right)); + if (left.has_end_key_closed() && + right.has_start_key_open() && + left.end_key_closed() == right.start_key_open()) { + return true; + } + if (left.has_end_key_open() && + right.has_start_key_closed() && + left.end_key_open() == right.start_key_closed()) { + return true; + } + if (left.has_end_key_closed() && + right.has_start_key_closed() && + internal::ConsecutiveRowKeys(left.end_key_closed(), + right.start_key_closed())) { + return true; + } + return false; +} + +} // anonymous namespace + +StatusOr SortedRowSet::Create( + google::bigtable::v2::RowSet const& row_set) { + SortedRowSet res; + for (auto const& row_key : row_set.row_keys()) { + if (row_key.empty()) { + return InvalidArgumentError( + "`row_key` empty", + GCP_ERROR_INFO().WithMetadata("row_set", row_set.DebugString())); + } + btproto::RowRange to_insert; + to_insert.set_start_key_closed(row_key); + to_insert.set_end_key_closed(row_key); + res.Insert(std::move(to_insert)); + } + for (auto const& row_range : row_set.row_ranges()) { + btproto::RowRange to_insert(row_range); + internal::RowRangeHelpers::SanitizeEmptyEndKeys(to_insert); + if (internal::RowRangeHelpers::IsEmpty(to_insert)) { + continue; + } + res.Insert(row_range); + } + return res; +} + +SortedRowSet SortedRowSet::AllRows() { + SortedRowSet res; + res.Insert(btproto::RowRange()); + return res; +} + +void SortedRowSet::Insert(btproto::RowRange inserted_range) { + // Remove all ranges which either have an overlap with `inserted_range` or are + // adjacent to it. Then add `inserted_range` with `start` and `end` + // adjusted to cover what the removed ranges used to cover. + + auto first_to_remove = disjoint_ranges_.upper_bound(inserted_range); + // `*first_to_remove` starts strictly after `inserted_range`'s start. + // The previous range is the first to have a chance for an overlap - it is the + // last one, which starts at or before `inserted_range` start. + if (first_to_remove != disjoint_ranges_.begin() && + HasOverlap(*std::prev(first_to_remove), inserted_range)) { + std::advance(first_to_remove, -1); + } + // The range preceeding `first_to_remove` for sure has no overlap with + // `inserted_range` but it may be adjacent to it. In that case we should also + // remove it. + if (first_to_remove != disjoint_ranges_.begin() && + DisjointRangesAdjacent(*std::prev(first_to_remove), inserted_range)) { + std::advance(first_to_remove, -1); + } + if (first_to_remove != disjoint_ranges_.end()) { + if (internal::RowRangeHelpers::StartLess()(*first_to_remove, + inserted_range)) { + *inserted_range.mutable_start_key_open() = + first_to_remove->start_key_open(); + *inserted_range.mutable_start_key_closed() = + first_to_remove->start_key_closed(); + } + do { + if (internal::RowRangeHelpers::EndLess()(inserted_range, + *first_to_remove)) { + *inserted_range.mutable_end_key_open() = + first_to_remove->end_key_open(); + *inserted_range.mutable_end_key_closed() = + first_to_remove->end_key_closed(); + } + disjoint_ranges_.erase(first_to_remove++); + } while (first_to_remove != disjoint_ranges_.end() && + (HasOverlap(*first_to_remove, inserted_range) || + DisjointRangesAdjacent(inserted_range, *first_to_remove))); + } + disjoint_ranges_.insert(std::move(inserted_range)); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/sorted_row_set.h b/google/cloud/bigtable/emulator/sorted_row_set.h new file mode 100644 index 0000000000000..84ee741ee01ce --- /dev/null +++ b/google/cloud/bigtable/emulator/sorted_row_set.h @@ -0,0 +1,47 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SORTED_ROW_SET_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SORTED_ROW_SET_H + +#include +#include "google/cloud/bigtable/internal/row_range_helpers.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +class SortedRowSet { + public: + static StatusOr Create( + google::bigtable::v2::RowSet const& row_set); + static SortedRowSet AllRows(); + + void Insert(google::bigtable::v2::RowRange inserted_range); + std::set const& + disjoint_ranges() const { + return disjoint_ranges_; + }; + + private: + std::set disjoint_ranges_; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SORTED_ROW_SET_H diff --git a/google/cloud/bigtable/emulator/sorted_row_set_test.cc b/google/cloud/bigtable/emulator/sorted_row_set_test.cc new file mode 100644 index 0000000000000..bca9f7c642466 --- /dev/null +++ b/google/cloud/bigtable/emulator/sorted_row_set_test.cc @@ -0,0 +1,68 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/sorted_row_set.h" +#include "google/cloud/bigtable/row_range.h" +#include "google/cloud/testing_util/is_proto_equal.h" +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +namespace btproto = ::google::bigtable::v2; +using ::google::cloud::testing_util::IsProtoEqual; + +TEST(SortedRowSet, SingleRange) { + SortedRowSet srs; + std::string expected_text = R"""( +start_key_closed: 'a' +end_key_closed: 'b' +)"""; + btproto::RowRange expected; + ASSERT_TRUE( + google::protobuf::TextFormat::ParseFromString(expected_text, &expected)); + + srs.Insert(RowRange::Closed("a", "b").as_proto()); + ASSERT_EQ(1, srs.disjoint_ranges().size()); + ASSERT_THAT(expected, IsProtoEqual(*srs.disjoint_ranges().begin())); +} + +TEST(StartLess, Order) { + using StartLess = internal::RowRangeHelpers::StartLess; + + ASSERT_FALSE(StartLess()(RowRange::Closed("a", "").as_proto(), + RowRange::Closed("a", "").as_proto())); + ASSERT_TRUE(StartLess()(RowRange::Closed("a", "").as_proto(), + RowRange::Open("a", "").as_proto())); + ASSERT_FALSE(StartLess()(RowRange::Open("a", "").as_proto(), + RowRange::Closed("a", "").as_proto())); + ASSERT_TRUE(StartLess()(RowRange::Closed("a", "").as_proto(), + RowRange::Closed("b", "").as_proto())); + ASSERT_TRUE(StartLess()(RowRange::InfiniteRange().as_proto(), + RowRange::Closed("a", "").as_proto())); + ASSERT_TRUE(StartLess()(RowRange::InfiniteRange().as_proto(), + RowRange::Open("a", "").as_proto())); + ASSERT_FALSE(StartLess()(RowRange::InfiniteRange().as_proto(), + RowRange::InfiniteRange().as_proto())); +} + +} // anonymous namespace +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index ab19442390d98..2a15c83366cd9 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -14,6 +14,7 @@ #include #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/row_iterators.h" #include "google/protobuf/util/field_mask_util.h" #include "google/cloud/internal/make_status.h" @@ -25,73 +26,14 @@ namespace emulator { namespace btadmin = ::google::bigtable::admin::v2; namespace btproto = ::google::bigtable::v2; -void ColumnRow::SetCell(std::int64_t timestamp_micros, std::string const& value) { - if (timestamp_micros == -1) { - // Time since epoch expressed in microseconds but rounded to milliseconds. - timestamp_micros = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count() * - 1000LL; +StatusOr> Table::Create( + google::bigtable::admin::v2::Table schema) { + std::shared_ptr
res(new Table); + auto status = res->Construct(std::move(schema)); + if (!status.ok()) { + return status; } - cells_[timestamp_micros] = std::move(value); -} - -std::size_t ColumnRow::DeleteTimeRange( - ::google::bigtable::v2::TimestampRange const& time_range) { - std::size_t num_erased = 0; - for (auto cell_it = cells_.lower_bound(time_range.start_timestamp_micros()); - cell_it != cells_.end() && - (time_range.end_timestamp_micros() == 0 || - cell_it->first < time_range.end_timestamp_micros());) { - cells_.erase(cell_it++); - ++num_erased; - } - return num_erased; -} - -void ColumnFamilyRow::SetCell(std::string const& column_qualifier, - std::int64_t timestamp_micros, - std::string const& value) { - columns_[column_qualifier].SetCell(timestamp_micros, value); -} - -std::size_t ColumnFamilyRow::DeleteColumn( - std::string const& column_qualifier, - ::google::bigtable::v2::TimestampRange const& time_range) { - auto column_it = columns_.find(column_qualifier); - if (column_it != columns_.end()) { - return column_it->second.DeleteTimeRange(time_range); - } - if (!column_it->second.HasCells()) { - columns_.erase(column_it); - } - return 0; -} - -void ColumnFamily::SetCell(std::string const& row_key, - std::string const& column_qualifier, - std::int64_t timestamp_micros, - std::string const& value) { - rows_[row_key].SetCell(column_qualifier, timestamp_micros, value); -} - -bool ColumnFamily::DeleteRow(std::string const& row_key) { - return rows_.erase(row_key) > 0; -} - -std::size_t ColumnFamily::DeleteColumn( - std::string const& row_key, std::string const& column_qualifier, - ::google::bigtable::v2::TimestampRange const& time_range) { - auto row_it = rows_.find(row_key); - if (row_it != rows_.end()) { - auto num_erased_cells = - row_it->second.DeleteColumn(column_qualifier, time_range); - if (!row_it->second.HasColumns()) { - rows_.erase(row_it); - } - return num_erased_cells; - } - return 0; + return res; } Status Table::Construct(google::bigtable::admin::v2::Table schema) { @@ -335,6 +277,153 @@ Status Table::MutateRow( return Status(); } +class ExtendWithColumnFamilyName { + public: + using ExtendedType = std::tuple const; + + explicit ExtendWithColumnFamilyName(std::string const& column_family_name) + : column_family_name_(std::cref(column_family_name)) {} + ExtendWithColumnFamilyName(ExtendWithColumnFamilyName const&) = default; + ExtendWithColumnFamilyName(ExtendWithColumnFamilyName&&) = default; + ExtendWithColumnFamilyName& operator=(ExtendWithColumnFamilyName const&) = + default; + ExtendWithColumnFamilyName& operator=(ExtendWithColumnFamilyName&) = default; + + ExtendedType operator()( + std::iterator_traits::value_type const& + row_key_and_column) const { + return ExtendedType(row_key_and_column.first, column_family_name_.get(), + row_key_and_column.second); + } + + private: + std::reference_wrapper column_family_name_; +}; + +struct RowKeyLess { + bool operator()( + TransformIterator::value_type const& lhs, + TransformIterator::value_type const& rhs) + const { + auto row_key_cmp = + internal::CompareRowKey(std::get<0>(lhs), std::get<0>(rhs)); + if (row_key_cmp == 0) { + return internal::CompareColumnQualifiers(std::get<1>(lhs), + std::get<1>(rhs)) < 0; + } + return row_key_cmp < 0; + } +}; + +struct DescendToColumn { + ColumnFamilyRow const& operator()( + std::tuple const& column_family_row) const { + return std::get<2>(column_family_row); + } +}; + +struct CombineColumnIterators { + using ReturnType = + std::tuple const; + ReturnType operator()( + std::tuple const& column_family_row, + std::pair const& column_row) { + return ReturnType(std::get<0>(column_family_row), + std::get<1>(column_family_row), column_row.first, + column_row.second); + } +}; + + +struct DescendToCell { + ColumnRow const& operator()( + CombineColumnIterators::ReturnType const &column_row) const { + return std::get<3>(column_row); + } +}; + +struct CombineCellIterators { + using ReturnType = + std::tuple const; + ReturnType operator()( + CombineColumnIterators::ReturnType const &column_row, + std::pair const& cell) { + static_assert(std::is_same, + ColumnRow::iterator::value_type>::value); + return ReturnType(std::get<0>(column_row), + std::get<1>(column_row), + std::get<2>(column_row), + cell.first, + cell.second); + } +}; + +Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, + RowStreamer& row_streamer) const { + std::shared_ptr row_set; + if (request.has_rows()) { + auto maybe_row_set = SortedRowSet::Create(request.rows()); + if (!maybe_row_set) { + return maybe_row_set.status(); + } + row_set = std::make_shared(*std::move(maybe_row_set)); + } else { + row_set = std::make_shared(SortedRowSet::AllRows()); + } + std::lock_guard lock(mu_); + std::vector, + TransformIterator>> + cf_ranges; + for (auto const &column_family : column_families_) { + cf_ranges.emplace_back( + TransformIteratorRange( + column_family.second->begin(row_set), column_family.second->end(), + ExtendWithColumnFamilyName(column_family.first))); + } + using CFRowsIt = MergedSortedIterator< + TransformIterator, + RowKeyLess>; + CFRowsIt cfrows_begin(std::move(cf_ranges)); + CFRowsIt cfrows_end; + + using ColRowsIt = + FlattenedIterator; + ColRowsIt colrows_begin(std::move(cfrows_begin), cfrows_end); + ColRowsIt colrows_end(cfrows_end, cfrows_end); + + using CellRowsIt = + FlattenedIterator; + CellRowsIt cellrows_begin(std::move(colrows_begin), colrows_end); + CellRowsIt cellrows_end(colrows_end, colrows_end); + std::cout << "Print start" << std::endl; + + for (; cellrows_begin != cellrows_end; ++cellrows_begin) { + std::cout << "Row: " << std::get<0>(*cellrows_begin) + << " column_family: " << std::get<1>(*cellrows_begin) + << " column_qualifier: " << std::get<2>(*cellrows_begin) + << " column_timestamp: " << std::get<3>(*cellrows_begin) + << " column_value: " << std::get<4>(*cellrows_begin) + << std::endl; + if (!row_streamer.Stream(*cellrows_begin)) { + std::cout << "HOW?" << std::endl; + return AbortedError("Stream closed by the client.", GCP_ERROR_INFO()); + } + } + if (!row_streamer.Flush(true)) { + return AbortedError("Stream closed by the client.", GCP_ERROR_INFO()); + } + std::cout << "Print stop" << std::endl; + return Status(); +} + bool Table::IsDeleteProtected() const { std::lock_guard lock(mu_); return IsDeleteProtectedNoLock(); diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index d56c2c7f808cf..c8df8d6179bd3 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -22,6 +22,8 @@ #include #include #include +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" #include namespace google { @@ -29,47 +31,15 @@ namespace cloud { namespace bigtable { namespace emulator { -class ColumnRow { - public: - void SetCell(std::int64_t timestamp_micros, std::string const& value); - std::size_t DeleteTimeRange( - ::google::bigtable::v2::TimestampRange const& time_range); - - bool HasCells() const { return !cells_.empty(); } - - private: - std::map cells_; -}; - -class ColumnFamilyRow { - public: - void SetCell(std::string const& column_qualifier, - std::int64_t timestamp_micros, std::string const& value); - std::size_t DeleteColumn( - std::string const& column_qualifier, - ::google::bigtable::v2::TimestampRange const& time_range); - bool HasColumns() { return !columns_.empty(); } - - private: - std::map columns_; -}; - -class ColumnFamily { - public: - void SetCell(std::string const& row_key, std::string const& column_qualifier, - std::int64_t timestamp_micros, std::string const& value); - bool DeleteRow(std::string const& row_key); - std::size_t DeleteColumn( - std::string const& row_key, std::string const& column_qualifier, - ::google::bigtable::v2::TimestampRange const& time_range); - - private: - std::map rows_; +struct CellView { + std::int64_t timestamp; + std::string const& value; }; class Table { public: - Status Construct(google::bigtable::admin::v2::Table schema); + static StatusOr> Create( + google::bigtable::admin::v2::Table schema); google::bigtable::admin::v2::Table GetSchema() const; @@ -83,11 +53,18 @@ class Table { Status MutateRow(google::bigtable::v2::MutateRowRequest const & request); + Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, + RowStreamer& row_streamer) const; + private: + Table() = default; + friend class RowSetIterator; + template StatusOr> FindColumnFamily( MESSAGE const& message) const; bool IsDeleteProtectedNoLock() const; + Status Construct(google::bigtable::admin::v2::Table schema); mutable std::mutex mu_; google::bigtable::admin::v2::Table schema_; diff --git a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl index cb7b8e9635df1..49f4cd94525db 100644 --- a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl +++ b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl @@ -103,6 +103,7 @@ google_cloud_cpp_bigtable_hdrs = [ "internal/retry_context.h", "internal/retry_traits.h", "internal/row_reader_impl.h", + "internal/row_range_helpers.h", "internal/rpc_policy_parameters.h", "internal/rpc_policy_parameters.inc", "internal/traced_row_reader.h", @@ -203,6 +204,7 @@ google_cloud_cpp_bigtable_srcs = [ "internal/rate_limiter.cc", "internal/readrowsparser.cc", "internal/retry_context.cc", + "internal/row_range_helpers.cc", "internal/traced_row_reader.cc", "metadata_update_policy.cc", "mutation_batcher.cc", diff --git a/google/cloud/bigtable/internal/row_range_helpers.cc b/google/cloud/bigtable/internal/row_range_helpers.cc new file mode 100644 index 0000000000000..069afb0e7223c --- /dev/null +++ b/google/cloud/bigtable/internal/row_range_helpers.cc @@ -0,0 +1,261 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/internal/row_range_helpers.h" +#include "google/cloud/bigtable/internal/google_bytes_traits.h" + +namespace google { +namespace cloud { +namespace bigtable { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace internal { + +namespace btproto = ::google::bigtable::v2; + +btproto::RowRange RowRangeHelpers::Empty() { + btproto::RowRange result; + // Return an open interval that contains no key, using "\0" for the end key. + // We can't use "", because when appearing as the end it means 'infinity'. + result.set_start_key_open(""); + result.set_end_key_open(std::string("\0", 1)); + return result; +} + +bool RowRangeHelpers::IsEmpty(btproto::RowRange const &row_range) { + RowKeyType unused; + // We do not want to copy the strings unnecessarily, so initialize a reference + // pointing to *_key_closed() or *_key_open(), as needed. + auto const* start = &unused; + bool start_open = false; + switch (row_range.start_key_case()) { + case btproto::RowRange::kStartKeyClosed: + start = &row_range.start_key_closed(); + break; + case btproto::RowRange::kStartKeyOpen: + start = &row_range.start_key_open(); + start_open = true; + break; + case btproto::RowRange::START_KEY_NOT_SET: + break; + } + // We need to initialize this to something to make g++ happy, but it cannot + // be a value that is discarded in all switch() cases to make Clang happy. + auto const* end = &row_range.end_key_closed(); + bool end_open = false; + switch (row_range.end_key_case()) { + case btproto::RowRange::kEndKeyClosed: + // Already initialized. + break; + case btproto::RowRange::kEndKeyOpen: + end = &row_range.end_key_open(); + end_open = true; + break; + case btproto::RowRange::END_KEY_NOT_SET: + // A range ending at +infinity is never empty. + return false; + } + + // Special case of an open interval of two consecutive strings. + if (start_open && end_open && internal::ConsecutiveRowKeys(*start, *end)) { + return true; + } + + // Compare the strings as byte vectors (careful with unsigned chars). + int cmp = internal::CompareRowKey(*start, *end); + if (cmp == 0) { + return start_open || end_open; + } + return cmp > 0; +} + +bool RowRangeHelpers::BelowStart(btproto::RowRange const& row_range, + RowKeyType const& key) { + switch (row_range.start_key_case()) { + case btproto::RowRange::START_KEY_NOT_SET: + break; + case btproto::RowRange::kStartKeyClosed: + return key < row_range.start_key_closed(); + case btproto::RowRange::kStartKeyOpen: + return key <= row_range.start_key_open(); + } + return false; +} + +bool RowRangeHelpers::AboveEnd(btproto::RowRange const& row_range, + RowKeyType const& key) { + switch (row_range.end_key_case()) { + case btproto::RowRange::END_KEY_NOT_SET: + break; + case btproto::RowRange::kEndKeyClosed: + return key > row_range.end_key_closed(); + case btproto::RowRange::kEndKeyOpen: + return key >= row_range.end_key_open(); + } + return false; +} + +std::pair RowRangeHelpers::Intersect( + btproto::RowRange const& lhs, btproto::RowRange const& rhs) { + if (IsEmpty(rhs)) { + return std::make_pair(false, Empty()); + } + std::string empty; + + // The algorithm is simple: start with lhs as a the resulting range. Update + // both endpoints based on the value of @p range. If the resulting range is + // empty there is no intersection. + btproto::RowRange intersection(lhs); + + switch (rhs.start_key_case()) { + case btproto::RowRange::START_KEY_NOT_SET: + break; + case btproto::RowRange::kStartKeyClosed: { + auto const& start = rhs.start_key_closed(); + // If `range` starts above the current range then there is no + // intersection. + if (AboveEnd(intersection, start)) { + return std::make_pair(false, Empty()); + } + // If `start` is inside the intersection (as computed so far), then the + // intersection must start at `start`, and it would be closed if `range` + // is closed at the start. + if (Contains(intersection, start)) { + intersection.set_start_key_closed(start); + } + break; + } + case btproto::RowRange::kStartKeyOpen: { + // The case where `range` is open on the start point is analogous. + auto const& start = rhs.start_key_open(); + if (AboveEnd(intersection, start)) { + return std::make_pair(false, Empty()); + } + if (Contains(intersection, start)) { + intersection.set_start_key_open(start); + } + } break; + } + + // Then check if the end limit of @p range is below *this. + switch (rhs.end_key_case()) { + case btproto::RowRange::END_KEY_NOT_SET: + break; + case btproto::RowRange::kEndKeyClosed: { + // If `range` ends before the start of the intersection there is no + // intersection and we can return immediately. + auto const& end = rhs.end_key_closed(); + if (BelowStart(intersection, end)) { + return std::make_pair(false, Empty()); + } + // If `end` is inside the intersection as computed so far, then the + // intersection must end at `end` and it is closed if `range` is closed + // at the end. + if (Contains(intersection, end)) { + intersection.set_end_key_closed(end); + } + } break; + case btproto::RowRange::kEndKeyOpen: { + // Do the analogous thing for `end` being a open endpoint. + auto const& end = rhs.end_key_open(); + if (BelowStart(intersection, end)) { + return std::make_pair(false, Empty()); + } + if (Contains(intersection, end)) { + intersection.set_end_key_open(end); + } + } break; + } + + bool is_empty = IsEmpty(intersection); + return std::make_pair(!is_empty, std::move(intersection)); +} + +void RowRangeHelpers::SanitizeEmptyEndKeys( + google::bigtable::v2::RowRange& row_range) { + // The service treats an empty end key as end of table. Some of our + // intersection logic does not, though. So we are best off sanitizing the + // input, by clearing the end key if it is empty. + if (row_range.has_end_key_closed()) { + if (IsEmptyRowKey(row_range.end_key_closed())) { + row_range.clear_end_key_closed(); + } + } + if (row_range.has_end_key_open()) { + if (IsEmptyRowKey(row_range.end_key_open())) { + row_range.clear_end_key_open(); + } + } +} + +bool RowRangeHelpers::StartLess::operator()( + btproto::RowRange const& left, btproto::RowRange const& right) const { + if (!left.has_start_key_open() && + !left.has_start_key_closed()) { + // left is empty + return right.has_start_key_open() || + right.has_start_key_closed(); + } + // left is non-empty + if (!right.has_start_key_open() && + !right.has_start_key_closed()) { + return false; + } + // both are non-empty + auto const& left_start = left.has_start_key_closed() + ? left.start_key_closed() + : left.start_key_open(); + auto const& right_start = right.has_start_key_closed() + ? right.start_key_closed() + : right.start_key_open(); + + auto cmp = internal::CompareRowKey(left_start, right_start); + if (cmp != 0) { + return cmp < 0; + } + // same row key in both + return left.has_start_key_closed() && + right.has_start_key_open(); +} + +bool RowRangeHelpers::EndLess::operator()( + btproto::RowRange const& left, btproto::RowRange const& right) const { + if (!right.has_end_key_open() && !right.has_end_key_closed()) { + // right is inifinite + return left.has_end_key_open() || left.has_end_key_closed(); + } + // right is finite + if (!left.has_end_key_open() && !left.has_end_key_closed()) { + return false; + } + // both are finite + auto const& left_end = + left.has_end_key_closed() ? left.end_key_closed() : left.end_key_open(); + auto const& right_end = right.has_end_key_closed() ? right.end_key_closed() + : right.end_key_open(); + + auto cmp = internal::CompareRowKey(left_end, right_end); + if (cmp != 0) { + return cmp < 0; + } + // same row key in both + return left.has_end_key_open() && right.has_end_key_closed(); +} + +} // namespace internal +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace bigtable +} // namespace cloud +} // namespace google + + diff --git a/google/cloud/bigtable/internal/row_range_helpers.h b/google/cloud/bigtable/internal/row_range_helpers.h new file mode 100644 index 0000000000000..4f49b246aac02 --- /dev/null +++ b/google/cloud/bigtable/internal/row_range_helpers.h @@ -0,0 +1,66 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_ROW_RANGE_HELPERS_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_ROW_RANGE_HELPERS_H + +#include "google/cloud/bigtable/row_key.h" +#include "google/cloud/bigtable/row_range.h" +#include + +namespace google { +namespace cloud { +namespace bigtable { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace internal { + +class RowRangeHelpers { + public: + static google::bigtable::v2::RowRange Empty(); + static bool IsEmpty(google::bigtable::v2::RowRange const& row_range); + static bool BelowStart(google::bigtable::v2::RowRange const& row_range, + RowKeyType const& key); + static bool AboveEnd(google::bigtable::v2::RowRange const& row_range, + RowKeyType const& key); + static std::pair Intersect( + google::bigtable::v2::RowRange const& lhs, + google::bigtable::v2::RowRange const& rhs); + /// Return true if @p key is in the range. + template + static bool Contains(google::bigtable::v2::RowRange const& row_range, + T const& key) { + return !BelowStart(row_range, key) && !AboveEnd(row_range, key); + } + static void SanitizeEmptyEndKeys(google::bigtable::v2::RowRange &row_range); + + /// A Functor describing the order on range starts. + struct StartLess { + bool operator()(google::bigtable::v2::RowRange const& left, + google::bigtable::v2::RowRange const& right) const; + }; + + /// A Functor describing the order on range ends. + struct EndLess { + bool operator()(google::bigtable::v2::RowRange const& left, + google::bigtable::v2::RowRange const& right) const; + }; +}; + +} // namespace internal +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_ROW_RANGE_HELPERS_H diff --git a/google/cloud/bigtable/row_range.cc b/google/cloud/bigtable/row_range.cc index f92cafd4870a3..304dd5981952a 100644 --- a/google/cloud/bigtable/row_range.cc +++ b/google/cloud/bigtable/row_range.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/row_range.h" +#include "google/cloud/bigtable/internal/row_range_helpers.h" namespace google { namespace cloud { @@ -22,164 +23,28 @@ namespace btproto = ::google::bigtable::v2; RowRange::RowRange(::google::bigtable::v2::RowRange rhs) : row_range_(std::move(rhs)) { - // The service treats an empty end key as end of table. Some of our - // intersection logic does not, though. So we are best off sanitizing the - // input, by clearing the end key if it is empty. - if (row_range_.has_end_key_closed()) { - if (internal::IsEmptyRowKey(row_range_.end_key_closed())) { - row_range_.clear_end_key_closed(); - } - } - if (row_range_.has_end_key_open()) { - if (internal::IsEmptyRowKey(row_range_.end_key_open())) { - row_range_.clear_end_key_open(); - } - } + internal::RowRangeHelpers::SanitizeEmptyEndKeys(row_range_); } -bool RowRange::IsEmpty() const { - RowKeyType unused; - // We do not want to copy the strings unnecessarily, so initialize a reference - // pointing to *_key_closed() or *_key_open(), as needed. - auto const* start = &unused; - bool start_open = false; - switch (row_range_.start_key_case()) { - case btproto::RowRange::kStartKeyClosed: - start = &row_range_.start_key_closed(); - break; - case btproto::RowRange::kStartKeyOpen: - start = &row_range_.start_key_open(); - start_open = true; - break; - case btproto::RowRange::START_KEY_NOT_SET: - break; - } - // We need to initialize this to something to make g++ happy, but it cannot - // be a value that is discarded in all switch() cases to make Clang happy. - auto const* end = &row_range_.end_key_closed(); - bool end_open = false; - switch (row_range_.end_key_case()) { - case btproto::RowRange::kEndKeyClosed: - // Already initialized. - break; - case btproto::RowRange::kEndKeyOpen: - end = &row_range_.end_key_open(); - end_open = true; - break; - case btproto::RowRange::END_KEY_NOT_SET: - // A range ending at +infinity is never empty. - return false; - } - - // Special case of an open interval of two consecutive strings. - if (start_open && end_open && internal::ConsecutiveRowKeys(*start, *end)) { - return true; - } +RowRange RowRange::Empty() { + return RowRange(internal::RowRangeHelpers::Empty()); +} - // Compare the strings as byte vectors (careful with unsigned chars). - int cmp = internal::CompareRowKey(*start, *end); - if (cmp == 0) { - return start_open || end_open; - } - return cmp > 0; +bool RowRange::IsEmpty() const { + return internal::RowRangeHelpers::IsEmpty(row_range_); } bool RowRange::BelowStart(RowKeyType const& key) const { - switch (row_range_.start_key_case()) { - case btproto::RowRange::START_KEY_NOT_SET: - break; - case btproto::RowRange::kStartKeyClosed: - return key < row_range_.start_key_closed(); - case btproto::RowRange::kStartKeyOpen: - return key <= row_range_.start_key_open(); - } - return false; + return internal::RowRangeHelpers::BelowStart(row_range_, key); } bool RowRange::AboveEnd(RowKeyType const& key) const { - switch (row_range_.end_key_case()) { - case btproto::RowRange::END_KEY_NOT_SET: - break; - case btproto::RowRange::kEndKeyClosed: - return key > row_range_.end_key_closed(); - case btproto::RowRange::kEndKeyOpen: - return key >= row_range_.end_key_open(); - } - return false; + return internal::RowRangeHelpers::AboveEnd(row_range_, key); } std::pair RowRange::Intersect(RowRange const& range) const { - if (range.IsEmpty()) { - return std::make_pair(false, RowRange::Empty()); - } - std::string empty; - - // The algorithm is simple: start with *this as a the resulting range. Update - // both endpoints based on the value of @p range. If the resulting range is - // empty there is no intersection. - RowRange intersection(*this); - - switch (range.row_range_.start_key_case()) { - case btproto::RowRange::START_KEY_NOT_SET: - break; - case btproto::RowRange::kStartKeyClosed: { - auto const& start = range.row_range_.start_key_closed(); - // If `range` starts above the current range then there is no - // intersection. - if (intersection.AboveEnd(start)) { - return std::make_pair(false, Empty()); - } - // If `start` is inside the intersection (as computed so far), then the - // intersection must start at `start`, and it would be closed if `range` - // is closed at the start. - if (intersection.Contains(start)) { - intersection.row_range_.set_start_key_closed(start); - } - } break; - case btproto::RowRange::kStartKeyOpen: { - // The case where `range` is open on the start point is analogous. - auto const& start = range.row_range_.start_key_open(); - if (intersection.AboveEnd(start)) { - return std::make_pair(false, Empty()); - } - if (intersection.Contains(start)) { - intersection.row_range_.set_start_key_open(start); - } - } break; - } - - // Then check if the end limit of @p range is below *this. - switch (range.row_range_.end_key_case()) { - case btproto::RowRange::END_KEY_NOT_SET: - break; - case btproto::RowRange::kEndKeyClosed: { - // If `range` ends before the start of the intersection there is no - // intersection and we can return immediately. - auto const& end = range.row_range_.end_key_closed(); - if (intersection.BelowStart(end)) { - return std::make_pair(false, Empty()); - } - // If `end` is inside the intersection as computed so far, then the - // intersection must end at `end` and it is closed if `range` is closed - // at the end. - if (intersection.Contains(end)) { - intersection.row_range_.set_end_key_closed(end); - } - } break; - case btproto::RowRange::kEndKeyOpen: { - // Do the analogous thing for `end` being a open endpoint. - auto const& end = range.row_range_.end_key_open(); - if (intersection.BelowStart(end)) { - return std::make_pair(false, Empty()); - } - if (intersection.Contains(end)) { - intersection.row_range_.set_end_key_open(end); - } - } break; - } - - bool is_empty = intersection.IsEmpty(); - return std::make_pair(!is_empty, std::move(intersection)); + auto res = internal::RowRangeHelpers::Intersect(row_range_, range.row_range_); + return std::make_pair(res.first, RowRange(std::move(res.second))); } bool operator==(RowRange const& lhs, RowRange const& rhs) { diff --git a/google/cloud/bigtable/row_range.h b/google/cloud/bigtable/row_range.h index e39ae676c3703..47f43d9402138 100644 --- a/google/cloud/bigtable/row_range.h +++ b/google/cloud/bigtable/row_range.h @@ -65,14 +65,7 @@ class RowRange { } /// Return an empty range. - static RowRange Empty() { - RowRange result; - // Return an open interval that contains no key, using "\0" for the end key. - // We can't use "", because when appearing as the end it means 'infinity'. - result.row_range_.set_start_key_open(""); - result.row_range_.set_end_key_open(std::string("\0", 1)); - return result; - } + static RowRange Empty(); /// Return the range representing the interval [@p begin, @p end). template From f1e4740a2f64cafec059c9279878ba4d59e37c6c Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Sat, 28 Dec 2024 02:31:37 +0100 Subject: [PATCH 003/195] Tidy up iterators. --- google/cloud/bigtable/emulator/cell_view.h | 65 ++++++++++ .../cloud/bigtable/emulator/column_family.cc | 47 ++++---- .../cloud/bigtable/emulator/column_family.h | 55 +++++---- .../bigtable/emulator/column_family_test.cc | 24 ++-- .../cloud/bigtable/emulator/row_iterators.h | 112 +++++++++++++----- .../cloud/bigtable/emulator/row_streamer.cc | 46 +++---- google/cloud/bigtable/emulator/row_streamer.h | 5 +- google/cloud/bigtable/emulator/table.cc | 50 ++++---- google/cloud/bigtable/emulator/table.h | 5 - 9 files changed, 266 insertions(+), 143 deletions(-) create mode 100644 google/cloud/bigtable/emulator/cell_view.h diff --git a/google/cloud/bigtable/emulator/cell_view.h b/google/cloud/bigtable/emulator/cell_view.h new file mode 100644 index 0000000000000..819464c3b8a21 --- /dev/null +++ b/google/cloud/bigtable/emulator/cell_view.h @@ -0,0 +1,65 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CELL_VIEW_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CELL_VIEW_H + +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +/** + * A class used to represent values when scanning a table. + * + * It is transient - it should never be stored as it only contains references to + * data which will likely become invalidated on first update. + */ +class CellView { + public: + CellView(std::string const& row_key, std::string const& column_family, + std::string const& column_qualifier, + std::chrono::milliseconds timestamp, std::string const& value) + : row_key_(row_key), + column_family_(column_family), + column_qualifier_(column_qualifier), + timestamp_(timestamp), + value_(value) {} + + std::string const& row_key() const { return row_key_.get(); } + std::string const& column_family() const { return column_family_.get(); } + std::string const& column_qualifier() const { + return column_qualifier_.get(); + } + std::chrono::milliseconds timestamp() const { return timestamp_; } + std::string const& value() const { return value_.get(); } + + private: + std::reference_wrapper row_key_; + std::reference_wrapper column_family_; + std::reference_wrapper column_qualifier_; + std::chrono::milliseconds timestamp_; + std::reference_wrapper value_; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CELL_VIEW_H diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 58c5bee629901..358fff1209ded 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -13,17 +13,18 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" +#include namespace google { namespace cloud { namespace bigtable { namespace emulator { -ColumnFamily::iterator::iterator( +ColumnFamily::const_iterator::const_iterator( ColumnFamily const& column_family, std::shared_ptr row_set) : column_family_(std::cref(column_family)), row_set_(std::move(row_set)) { if (row_set_) { - std::cout << "ColumnFamily::iterator::iterator():" << std::endl; + std::cout << "ColumnFamily::const_iterator::const_iterator():" << std::endl; for (auto const& range : row_set_->disjoint_ranges()) { std::cout << " "; if (range.has_start_key_closed()) { @@ -54,7 +55,7 @@ ColumnFamily::iterator::iterator( } } -void ColumnFamily::iterator::AdvanceToNextRange() { +void ColumnFamily::const_iterator::AdvanceToNextRange() { if (row_set_pos_ == row_set_->disjoint_ranges().end()) { // We've reached the end. row_pos_ = column_family_.get().rows_.end(); @@ -80,7 +81,7 @@ void ColumnFamily::iterator::AdvanceToNextRange() { } } -void ColumnFamily::iterator::EnsureIteratorValid() { +void ColumnFamily::const_iterator::EnsureIteratorValid() { // `row_pos_` may point to a row which is past the end of the range pointed by // row_set_pos_. Make sure this only happens when the iteration reaches its // end. @@ -96,8 +97,8 @@ void ColumnFamily::iterator::EnsureIteratorValid() { // following ranges, i.e. we've reached the end. } -ColumnFamily::iterator& ColumnFamily::iterator::operator++() { - std::cout << "ColumnFamily::iterator::operator++ this=" +ColumnFamily::const_iterator& ColumnFamily::const_iterator::operator++() { + std::cout << "ColumnFamily::const_iterator::operator++ this=" << reinterpret_cast(this) << " val before: " << (row_pos_ == column_family_.get().rows_.end() ? std::string("end") @@ -108,30 +109,32 @@ ColumnFamily::iterator& ColumnFamily::iterator::operator++() { return *this; } -ColumnFamily::iterator ColumnFamily::iterator::operator++(int) { - ColumnFamily::iterator retval = *this; +ColumnFamily::const_iterator ColumnFamily::const_iterator::operator++(int) { + ColumnFamily::const_iterator retval = *this; ++(*this); return retval; } -void ColumnRow::SetCell(std::int64_t timestamp_micros, std::string const& value) { - if (timestamp_micros < 0) { - // Time since epoch expressed in microseconds but rounded to milliseconds. - timestamp_micros = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count() * - 1000LL; +void ColumnRow::SetCell(std::chrono::milliseconds timestamp, + std::string const& value) { + if (timestamp <= std::chrono::milliseconds::zero()) { + timestamp = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); } - cells_[timestamp_micros] = std::move(value); + cells_[timestamp] = std::move(value); } std::size_t ColumnRow::DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range) { std::size_t num_erased = 0; - for (auto cell_it = cells_.lower_bound(time_range.start_timestamp_micros()); + for (auto cell_it = cells_.lower_bound( + std::chrono::duration_cast( + std::chrono::microseconds(time_range.start_timestamp_micros()))); cell_it != cells_.end() && (time_range.end_timestamp_micros() == 0 || - cell_it->first < time_range.end_timestamp_micros());) { + cell_it->first < std::chrono::duration_cast( + std::chrono::microseconds( + time_range.end_timestamp_micros())));) { cells_.erase(cell_it++); ++num_erased; } @@ -139,9 +142,9 @@ std::size_t ColumnRow::DeleteTimeRange( } void ColumnFamilyRow::SetCell(std::string const& column_qualifier, - std::int64_t timestamp_micros, + std::chrono::milliseconds timestamp, std::string const& value) { - columns_[column_qualifier].SetCell(timestamp_micros, value); + columns_[column_qualifier].SetCell(timestamp, value); } std::size_t ColumnFamilyRow::DeleteColumn( @@ -159,9 +162,9 @@ std::size_t ColumnFamilyRow::DeleteColumn( void ColumnFamily::SetCell(std::string const& row_key, std::string const& column_qualifier, - std::int64_t timestamp_micros, + std::chrono::milliseconds timestamp, std::string const& value) { - rows_[row_key].SetCell(column_qualifier, timestamp_micros, value); + rows_[row_key].SetCell(column_qualifier, timestamp, value); } bool ColumnFamily::DeleteRow(std::string const& row_key) { diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 9a93e6aecd1fc..6653ed6d32786 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -28,30 +28,31 @@ namespace emulator { class ColumnRow { public: - void SetCell(std::int64_t timestamp_micros, std::string const& value); + void SetCell(std::chrono::milliseconds timestamp, std::string const& value); std::size_t DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range); bool HasCells() const { return !cells_.empty(); } - using iterator = std::map::const_iterator; - iterator begin() const { return cells_.begin(); } - iterator end() const { return cells_.end(); } + using const_iterator = + std::map::const_iterator; + const_iterator begin() const { return cells_.begin(); } + const_iterator end() const { return cells_.end(); } private: - std::map cells_; + std::map cells_; }; class ColumnFamilyRow { public: void SetCell(std::string const& column_qualifier, - std::int64_t timestamp_micros, std::string const& value); + std::chrono::milliseconds timestamp, std::string const& value); std::size_t DeleteColumn( std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); bool HasColumns() { return !columns_.empty(); } - using iterator = std::map::const_iterator; - iterator begin() const { return columns_.begin(); } - iterator end() const { return columns_.end(); } + using const_iterator = std::map::const_iterator; + const_iterator begin() const { return columns_.begin(); } + const_iterator end() const { return columns_.end(); } private: std::map columns_; @@ -59,46 +60,44 @@ class ColumnFamilyRow { class ColumnFamily { public: - class iterator; + class const_iterator; void SetCell(std::string const& row_key, std::string const& column_qualifier, - std::int64_t timestamp_micros, std::string const& value); + std::chrono::milliseconds timestamp, std::string const& value); bool DeleteRow(std::string const& row_key); std::size_t DeleteColumn( std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); - iterator begin(std::shared_ptr row_set) const { - return iterator(*this, std::move(row_set)); + const_iterator FindRows(std::shared_ptr row_set) const { + return const_iterator(*this, std::move(row_set)); } - iterator end() const { return iterator(*this, {}); } + const_iterator end() const { return const_iterator(*this, {}); } - class iterator { + class const_iterator { public: using iterator_category = std::input_iterator_tag; - using value_type = std::pair; - using difference_type = std::size_t; + using value_type = std::pair const; + using difference_type = std::ptrdiff_t; using reference = value_type&; using pointer = value_type*; - using const_reference = value_type const&; - using const_pointer = value_type const*; - iterator& operator++(); - iterator operator++(int); - bool operator==(iterator const& other) const { + const_iterator& operator++(); + const_iterator operator++(int); + bool operator==(const_iterator const& other) const { return row_pos_ == other.row_pos_; } - bool operator!=(iterator const& other) const { + bool operator!=(const_iterator const& other) const { return !(*this == other); } - const_reference operator*() const { return *row_pos_; } + reference operator*() const { return *row_pos_; } - friend iterator ColumnFamily::begin(std::shared_ptr) const; - friend iterator ColumnFamily::end() const; + friend const_iterator ColumnFamily::FindRows(std::shared_ptr) const; + friend const_iterator ColumnFamily::end() const; private: - iterator(ColumnFamily const& column_family, + const_iterator(ColumnFamily const& column_family, std::shared_ptr row_set); void AdvanceToNextRange(); @@ -112,7 +111,7 @@ class ColumnFamily { }; private: - friend class iterator; + friend class const_iterator; std::map rows_; }; diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index 6b821938641cd..0e750741a4bb5 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -24,17 +24,19 @@ namespace bigtable { namespace emulator { namespace { +using namespace std::chrono_literals; + TEST(ColumnFamilyIterator, Simple) { ColumnFamily fam; - fam.SetCell("row1", "col1", 123, "foo"); - fam.SetCell("row1", "col1", 124, "fo"); - fam.SetCell("row1", "col2", 123, "bar"); - fam.SetCell("row2", "col1", 123, "foo"); - fam.SetCell("row2", "col3", 120, "baz"); - fam.SetCell("row2", "col3", 120, "baz"); + fam.SetCell("row1", "col1", 123ms, "foo"); + fam.SetCell("row1", "col1", 124ms, "fo"); + fam.SetCell("row1", "col2", 123ms, "bar"); + fam.SetCell("row2", "col1", 123ms, "foo"); + fam.SetCell("row2", "col3", 120ms, "baz"); + fam.SetCell("row2", "col3", 120ms, "baz"); std::vector rows; std::transform( - fam.begin(std::shared_ptr( + fam.FindRows(std::shared_ptr( new SortedRowSet(SortedRowSet::AllRows()))), fam.end(), std::back_inserter(rows), @@ -45,6 +47,14 @@ TEST(ColumnFamilyIterator, Simple) { EXPECT_EQ(expected, rows); } +class Foo { + public: + Foo(std::string const& foo) : foo_(foo) {} + + private: + std::reference_wrapper foo_; +}; + } // anonymous namespace } // namespace emulator } // namespace bigtable diff --git a/google/cloud/bigtable/emulator/row_iterators.h b/google/cloud/bigtable/emulator/row_iterators.h index 5578dc0a0636b..eaaacf143a5e9 100644 --- a/google/cloud/bigtable/emulator/row_iterators.h +++ b/google/cloud/bigtable/emulator/row_iterators.h @@ -15,6 +15,8 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_ITERATORS_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_ITERATORS_H +#include +#include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/emulator/sorted_row_set.h" #include @@ -31,17 +33,12 @@ class MergedSortedIterator { public: using iterator_category = std::input_iterator_tag; using value_type = typename std::iterator_traits::value_type; - using difference_type = std::size_t; - using reference = value_type&; - using pointer = value_type*; - using const_reference = value_type const&; - using const_pointer = value_type const*; - - // end() iterator. - MergedSortedIterator() = default; - MergedSortedIterator(MergedSortedIterator const& other) = default; - MergedSortedIterator(MergedSortedIterator&& other) = default; + using difference_type = + typename std::iterator_traits::difference_type; + using reference = typename std::iterator_traits::reference; + using pointer = typename std::iterator_traits::pointer; + MergedSortedIterator() = default; // end() MergedSortedIterator(std::vector> ranges) { for (auto & range : ranges) { if (range.first != range.second) { @@ -54,6 +51,10 @@ class MergedSortedIterator { return *ranges_.top().first; } + pointer operator->() const { + return ranges_.top().first; + } + MergedSortedIterator& operator++() { auto prev_top = ranges_.top();; // We need to remove it from the priority queue because we're likely to @@ -98,17 +99,18 @@ template class FlattenedIterator { public: - using InnerCollection = typename std::result_of::value_type const&)>::type; - using InnerIterator = typename std::decay_t::iterator; + using InnerCollection = std::decay_t::reference)>::type>; + using InnerIterator = typename InnerCollection::const_iterator; + using iterator_category = std::input_iterator_tag; - using value_type = typename std::result_of::value_type const&, - typename std::iterator_traits::value_type const&)>::type; + using value_type = std::decay_t::reference, + typename std::iterator_traits::reference)>::type> const; using difference_type = typename std::iterator_traits::difference_type; - using pointer = value_type const*; - using reference = value_type const&; + using pointer = value_type*; + using reference = value_type&; FlattenedIterator(OuterIterator begin, OuterIterator end) : outer_pos_(std::move(begin)), outer_end_(std::move(end)) { @@ -120,7 +122,12 @@ class FlattenedIterator { value_type operator*() const { assert(inner_pos_ != DescendFunctor()(*outer_pos_).end()); - return ValueCombineFunctor()(*outer_pos_, *inner_pos_); + return GetCachedValue(); + } + + pointer operator->() const { + assert(inner_pos_ != DescendFunctor()(*outer_pos_).end()); + return &GetCachedValue(); } FlattenedIterator& operator++() { @@ -148,8 +155,10 @@ class FlattenedIterator { OuterIterator outer_pos_; OuterIterator outer_end_; InnerIterator inner_pos_; + mutable absl::optional> cached_value_; void EnsureIteratorValid() { + cached_value_.reset(); while (outer_pos_ != outer_end_ && inner_pos_ == DescendFunctor()(*outer_pos_).end()) { ++outer_pos_; @@ -158,26 +167,37 @@ class FlattenedIterator { } } } + + reference GetCachedValue() const { + if (!cached_value_) { + cached_value_.emplace(ValueCombineFunctor()(*outer_pos_, *inner_pos_)); + } + return *cached_value_; + } }; template class TransformIterator { public: using iterator_category = std::input_iterator_tag; - using value_type = typename std::result_of::value_type)>::type; + using value_type = std::decay_t::value_type)>::type> const; using difference_type = typename std::iterator_traits::difference_type; using pointer = value_type*; using reference = value_type&; TransformIterator(InputIterator it, Functor func) - : current(std::move(it)), transformer(std::move(func)) {} + : current_(std::move(it)), transformer_(std::move(func)) {} + TransformIterator(TransformIterator const& other) = default; + TransformIterator(TransformIterator&& other) = default; - value_type operator*() const { return transformer(*current); } + value_type operator*() const { return GetCachedValue(); } + pointer operator->() const { return &GetCachedValue(); } TransformIterator& operator++() { - ++current; + cached_value_.reset(); + ++current_; return *this; } @@ -188,16 +208,52 @@ class TransformIterator { } bool operator==(TransformIterator const& other) const { - return current == other.current; + return current_ == other.current_; } bool operator!=(TransformIterator const& other) const { - return current != other.current; + return current_ != other.current_; + } + + TransformIterator &operator=(TransformIterator const &other) { + if (this == &other) { + return *this; + } + current_ = other.current_; + transformer_ = other.transformer_; + if (other.cached_value_) { + cached_value_.emplace(other.cached_value_.get()); + } else { + cached_value_.reset(); + } + return this; + } + + TransformIterator &operator=(TransformIterator &&other) { + if (this == &other) { + return *this; + } + current_ = std::move(other.current_); + transformer_ = std::move(other.transformer_); + if (other.cached_value_) { + cached_value_.emplace(*std::move(other.cached_value_)); + } else { + cached_value_.reset(); + } + return *this; + } + + reference GetCachedValue() const { + if (!cached_value_) { + cached_value_.emplace(transformer_(*current_)); + } + return *cached_value_; } private: - InputIterator current; - Functor transformer; + InputIterator current_; + Functor transformer_; + mutable absl::optional> cached_value_; }; // Helper function to create a TransformIterator diff --git a/google/cloud/bigtable/emulator/row_streamer.cc b/google/cloud/bigtable/emulator/row_streamer.cc index 7902cfea4375e..6a0d9f0a0ce9f 100644 --- a/google/cloud/bigtable/emulator/row_streamer.cc +++ b/google/cloud/bigtable/emulator/row_streamer.cc @@ -25,37 +25,37 @@ namespace btproto = ::google::bigtable::v2; RowStreamer::RowStreamer(grpc::ServerWriter& writer) : writer_(writer) {} -bool RowStreamer::Stream( - std::tuple const& cell) { +bool RowStreamer::Stream(CellView const& cell) { std::cout << "Attempting to stream" << std::endl; btproto::ReadRowsResponse::CellChunk chunk; - if (!current_row_key_ || (¤t_row_key_->get() != &std::get<0>(cell) && - current_row_key_->get() != std::get<0>(cell))) { + if (!current_row_key_ || (¤t_row_key_->get() != &cell.row_key() && + current_row_key_->get() != cell.row_key())) { if (!pending_chunks_.empty()) { pending_chunks_.back().set_commit_row(true); } - current_row_key_ = std::cref(std::get<0>(cell)); - current_column_family_ = std::cref(std::get<1>(cell)); - current_column_qualifier_ = std::cref(std::get<2>(cell)); - chunk.set_row_key(std::get<0>(cell)); - chunk.mutable_family_name()->set_value(std::get<1>(cell)); - chunk.mutable_qualifier()->set_value(std::get<2>(cell)); + current_row_key_ = std::cref(cell.row_key()); + current_column_family_ = std::cref(cell.column_family()); + current_column_qualifier_ = std::cref(cell.column_qualifier()); + chunk.set_row_key(cell.row_key()); + chunk.mutable_family_name()->set_value(cell.column_family()); + chunk.mutable_qualifier()->set_value(cell.column_qualifier()); } - if (¤t_column_family_->get() != &std::get<1>(cell) && - current_row_key_->get() != std::get<1>(cell)) { - current_column_family_ = std::cref(std::get<1>(cell)); - current_column_qualifier_ = std::cref(std::get<2>(cell)); - chunk.mutable_family_name()->set_value(std::get<1>(cell)); - chunk.mutable_qualifier()->set_value(std::get<2>(cell)); + if (¤t_column_family_->get() != &cell.column_family() && + current_row_key_->get() != cell.column_family()) { + current_column_family_ = std::cref(cell.column_family()); + current_column_qualifier_ = std::cref(cell.column_qualifier()); + chunk.mutable_family_name()->set_value(cell.column_family()); + chunk.mutable_qualifier()->set_value(cell.column_qualifier()); } - if (¤t_column_qualifier_->get() != &std::get<2>(cell) && - current_row_key_->get() != std::get<2>(cell)) { - current_column_qualifier_ = std::cref(std::get<2>(cell)); - chunk.mutable_qualifier()->set_value(std::get<2>(cell)); + if (¤t_column_qualifier_->get() != &cell.column_qualifier() && + current_row_key_->get() != cell.column_qualifier()) { + current_column_qualifier_ = std::cref(cell.column_qualifier()); + chunk.mutable_qualifier()->set_value(cell.column_qualifier()); } - chunk.set_timestamp_micros(std::get<3>(cell)); - chunk.set_value(std::get<4>(cell)); + chunk.set_timestamp_micros( + std::chrono::duration_cast(cell.timestamp()) + .count()); + chunk.set_value(cell.value()); pending_chunks_.emplace_back(std::move(chunk)); if (pending_chunks_.size() > 200) { return Flush(false); diff --git a/google/cloud/bigtable/emulator/row_streamer.h b/google/cloud/bigtable/emulator/row_streamer.h index 558d8dd266a00..3a44152aed687 100644 --- a/google/cloud/bigtable/emulator/row_streamer.h +++ b/google/cloud/bigtable/emulator/row_streamer.h @@ -16,6 +16,7 @@ #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_STREAMER_H #include +#include "google/cloud/bigtable/emulator/cell_view.h" #include #include "absl/types/optional.h" @@ -28,9 +29,7 @@ class RowStreamer { public: RowStreamer( grpc::ServerWriter& writer); - bool Stream( - std::tuple const&); + bool Stream(CellView const& cell_view); bool Flush(bool stream_finished); diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 2a15c83366cd9..56dcb6e888f46 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -234,7 +234,9 @@ Status Table::MutateRow( } maybe_column_family->get().SetCell( request.row_key(), set_cell.column_qualifier(), - set_cell.timestamp_micros(), set_cell.value()); + std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros())), + set_cell.value()); } else if (mutation.has_add_to_cell()) { // FIXME } else if (mutation.has_merge_to_cell()) { @@ -284,14 +286,9 @@ class ExtendWithColumnFamilyName { explicit ExtendWithColumnFamilyName(std::string const& column_family_name) : column_family_name_(std::cref(column_family_name)) {} - ExtendWithColumnFamilyName(ExtendWithColumnFamilyName const&) = default; - ExtendWithColumnFamilyName(ExtendWithColumnFamilyName&&) = default; - ExtendWithColumnFamilyName& operator=(ExtendWithColumnFamilyName const&) = - default; - ExtendWithColumnFamilyName& operator=(ExtendWithColumnFamilyName&) = default; ExtendedType operator()( - std::iterator_traits::value_type const& + std::iterator_traits::reference row_key_and_column) const { return ExtendedType(row_key_and_column.first, column_family_name_.get(), row_key_and_column.second); @@ -303,9 +300,9 @@ class ExtendWithColumnFamilyName { struct RowKeyLess { bool operator()( - TransformIterator::value_type const& lhs, - TransformIterator::value_type const& rhs) const { auto row_key_cmp = @@ -333,7 +330,7 @@ struct CombineColumnIterators { ReturnType operator()( std::tuple const& column_family_row, - std::pair const& column_row) { + std::pair const& column_row) const { return ReturnType(std::get<0>(column_family_row), std::get<1>(column_family_row), column_row.first, column_row.second); @@ -349,14 +346,13 @@ struct DescendToCell { }; struct CombineCellIterators { - using ReturnType = - std::tuple const; - ReturnType operator()( - CombineColumnIterators::ReturnType const &column_row, - std::pair const& cell) { - static_assert(std::is_same, - ColumnRow::iterator::value_type>::value); + using ReturnType = CellView; + ReturnType operator()(CombineColumnIterators::ReturnType const& column_row, + std::pair const& cell) const { + static_assert( + std::is_same, + ColumnRow::const_iterator::value_type>::value); return ReturnType(std::get<0>(column_row), std::get<1>(column_row), std::get<2>(column_row), @@ -379,17 +375,17 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, } std::lock_guard lock(mu_); std::vector, - TransformIterator>> + TransformIterator, + TransformIterator>> cf_ranges; for (auto const &column_family : column_families_) { cf_ranges.emplace_back( TransformIteratorRange( - column_family.second->begin(row_set), column_family.second->end(), + column_family.second->FindRows(row_set), column_family.second->end(), ExtendWithColumnFamilyName(column_family.first))); } using CFRowsIt = MergedSortedIterator< - TransformIterator, + TransformIterator, RowKeyLess>; CFRowsIt cfrows_begin(std::move(cf_ranges)); CFRowsIt cfrows_end; @@ -406,11 +402,11 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, std::cout << "Print start" << std::endl; for (; cellrows_begin != cellrows_end; ++cellrows_begin) { - std::cout << "Row: " << std::get<0>(*cellrows_begin) - << " column_family: " << std::get<1>(*cellrows_begin) - << " column_qualifier: " << std::get<2>(*cellrows_begin) - << " column_timestamp: " << std::get<3>(*cellrows_begin) - << " column_value: " << std::get<4>(*cellrows_begin) + std::cout << "Row: " << (*cellrows_begin).row_key() + << " column_family: " << (*cellrows_begin).column_family() + << " column_qualifier: " << (*cellrows_begin).column_qualifier() + << " column_timestamp: " << (*cellrows_begin).timestamp().count() + << " column_value: " << (*cellrows_begin).value() << std::endl; if (!row_streamer.Stream(*cellrows_begin)) { std::cout << "HOW?" << std::endl; diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index c8df8d6179bd3..3c8786efdbb7e 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -31,11 +31,6 @@ namespace cloud { namespace bigtable { namespace emulator { -struct CellView { - std::int64_t timestamp; - std::string const& value; -}; - class Table { public: static StatusOr> Create( From 58cdbb151a03435338555eebf2365c72ed0da770 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Sun, 29 Dec 2024 13:32:43 +0100 Subject: [PATCH 004/195] Mostly working filters. --- google/cloud/bigtable/emulator/CMakeLists.txt | 19 +- .../emulator/bigtable_emulator_common.bzl | 16 +- .../emulator/bigtable_emulator_unit_tests.bzl | 1 + google/cloud/bigtable/emulator/filter.cc | 230 ++++++++++++++++++ google/cloud/bigtable/emulator/filter.h | 55 +++++ google/cloud/bigtable/emulator/filter_test.cc | 49 ++++ .../cloud/bigtable/emulator/row_iterators.h | 12 - google/cloud/bigtable/emulator/table.cc | 88 ++++--- google/cloud/bigtable/emulator/table.h | 2 + 9 files changed, 415 insertions(+), 57 deletions(-) create mode 100644 google/cloud/bigtable/emulator/filter.cc create mode 100644 google/cloud/bigtable/emulator/filter.h create mode 100644 google/cloud/bigtable/emulator/filter_test.cc diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 4da665c7297f9..17532da45722c 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -16,22 +16,28 @@ add_library( bigtable_emulator_common # cmake-format: sort + cell_view.h cluster.cc cluster.h column_family.cc column_family.h - table.cc - table.h - server.cc - server.h - sorted_row_set.cc - sorted_row_set.h + column_family_test.cc + filter.cc + filter.h row_iterators.cc row_iterators.h row_streamer.cc row_streamer.h + server.cc + server.h + sorted_row_set.cc + sorted_row_set.h + table.cc + table.h to_grpc_status.cc to_grpc_status.h) + + target_link_libraries( bigtable_emulator_common google-cloud-cpp::bigtable @@ -51,6 +57,7 @@ if (BUILD_TESTING) set(bigtable_emulator_unit_tests # cmake-format: sort column_family_test.cc + filter_test.cc row_iterators_test.cc server_test.cc sorted_row_set_test.cc) diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index 02f827b6c4036..f798585e6f11b 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -17,23 +17,27 @@ """Automatically generated source lists for bigtable_emulator_common - DO NOT EDIT.""" bigtable_emulator_common_hdrs = [ + "cell_view.h", "cluster.h", "column_family.h", - "table.h", - "server.h", - "sorted_row_set.h", + "filter.h", "row_iterators.h", "row_streamer.h", + "server.h", + "sorted_row_set.h", + "table.h", "to_grpc_status.h", ] bigtable_emulator_common_srcs = [ "cluster.cc", "column_family.cc", - "table.cc", - "server.cc", - "sorted_row_set.cc", + "column_family_test.cc", + "filter.cc", "row_iterators.cc", "row_streamer.cc", + "server.cc", + "sorted_row_set.cc", + "table.cc", "to_grpc_status.cc", ] diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index 135bacf54e7e8..b860ad9410c51 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -18,6 +18,7 @@ bigtable_emulator_unit_tests = [ "column_family_test.cc", + "filter_test.cc", "row_iterators_test.cc", "server_test.cc", "sorted_row_set_test.cc", diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc new file mode 100644 index 0000000000000..05b157a937d3e --- /dev/null +++ b/google/cloud/bigtable/emulator/filter.cc @@ -0,0 +1,230 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/status_or.h" +#include "google/cloud/internal/make_status.h" +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +CellView const& CellStream::operator++() { + Next(); + return Value(); +} + +CellView CellStream::operator++(int) { + CellView tmp = Value(); + Next(); + return tmp; +} + +template +class PerRowStatusFilter { + public: + PerRowStatusFilter(CellStream source, FilterFunctor filter, + StateResetFunctor reset) + : filter_(std::move(filter)), + reset_(std::move(reset)), + source_(std::move(source)) {} + + absl::optional operator()() { + for (; source_; ++source_) { + if (!prev_row_ || + !(&prev_row_.value().get() == &source_->row_key() || + prev_row_.value().get() == source_->row_key())) { + state_ = reset_(); + prev_row_ = source_->row_key(); + } + if (filter_(state_, source_.Value())) { + return source_++; + } + } + return {}; + } + private: + absl::optional> prev_row_; + State state_; + FilterFunctor filter_; + StateResetFunctor reset_; + CellStream source_; +}; + + +template +CellStream MakeTrivialFilter(CellStream source, Filter filter) { + return CellStream( + [source = std::move(source), + filter = std::move(filter)]() mutable -> absl::optional { + for (; source && !filter(*source); ++source); + if (!source) { + return {}; + } + return source++; + }); +} + +StatusOr CreateFilter( + ::google::bigtable::v2::RowFilter const& filter, CellStream source) { + if (filter.has_pass_all_filter()) { + if (!filter.pass_all_filter()) { + return InvalidArgumentError( + "`pass_all_filter` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + return source; + } + if (filter.has_block_all_filter()) { + if (!filter.block_all_filter()) { + return InvalidArgumentError( + "`block_all_filter` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + return CellStream([]() -> absl::optional { return {}; }); + } + if (filter.has_row_key_regex_filter()) { + std::cout << "Regex filter: " << filter.row_key_regex_filter() << std::endl; + auto pattern = std::make_shared(filter.row_key_regex_filter()); + if (!pattern->ok()) { + return InvalidArgumentError( + "`row_key_regex_filter` is not a valid RE2 regex.", + GCP_ERROR_INFO() + .WithMetadata("filter", filter.DebugString()) + .WithMetadata("description", pattern->error())); + } + return MakeTrivialFilter( + std::move(source), + [pattern = std::move(pattern)](CellView const& cell_view) mutable { + return re2::RE2::PartialMatch(cell_view.row_key(), *pattern); + }); + } + if (filter.has_row_sample_filter()) { + double pass_prob = filter.row_sample_filter(); + if (pass_prob + std::numeric_limits::epsilon() < 0 + || pass_prob - std::numeric_limits::epsilon() > 1) { + return InvalidArgumentError( + "`row_sample_filter` is not a valid probability.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + auto per_row_filter = [] (bool skip, CellView const &) { + return !skip; + }; + std::mt19937 gen; + auto reset_state = [gen = std::move(gen), pass_prob]() mutable { + std::uniform_real_distribution dis(0.0, 1.0); + return dis(gen) > pass_prob; + }; + return CellStream(PerRowStatusFilter( + std::move(source), std::move(per_row_filter), std::move(reset_state))); + } + if (filter.has_family_name_regex_filter()) { + auto pattern = std::make_shared(filter.family_name_regex_filter()); + if (!pattern->ok()) { + return InvalidArgumentError( + "`family_name_regex_filter` is not a valid RE2 regex.", + GCP_ERROR_INFO() + .WithMetadata("filter", filter.DebugString()) + .WithMetadata("description", pattern->error())); + } + return CellStream( + [pattern = std::move(pattern), + source = std::move(source)]() mutable -> absl::optional { + for (; source && + !re2::RE2::PartialMatch(source->column_family(), *pattern); + ++source) { + } + if (!source) { + return {}; + } + return source++; + }); + } + if (filter.has_column_qualifier_regex_filter()) { + auto pattern = + std::make_shared(filter.column_qualifier_regex_filter()); + if (!pattern->ok()) { + return InvalidArgumentError( + "`column_qualifier_regex_filter` is not a valid RE2 regex.", + GCP_ERROR_INFO() + .WithMetadata("filter", filter.DebugString()) + .WithMetadata("description", pattern->error())); + } + return CellStream( + [pattern = std::move(pattern), + source = std::move(source)]() mutable -> absl::optional { + for (; source && + !re2::RE2::PartialMatch(source->column_qualifier(), *pattern); + ++source) { + } + if (!source) { + return {}; + } + return source++; + }); + } + if (filter.has_cells_per_row_offset_filter()) { + std::int64_t cells_per_row_offset = filter.cells_per_row_offset_filter(); + if (cells_per_row_offset < 0) { + return InvalidArgumentError( + "`cells_per_row_offset_filter` is negative.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + auto per_row_filter = [] (std::int64_t &skip, CellView const &) { + return skip-- <= 0; + }; + auto reset_state = [cells_per_row_offset]() { + return cells_per_row_offset; + }; + return CellStream(PerRowStatusFilter( + std::move(source), std::move(per_row_filter), std::move(reset_state))); + } + // ColumnRange column_range_filter = 7; + // TimestampRange timestamp_range_filter = 8; + // bytes value_regex_filter = 9; + // ValueRange value_range_filter = 15; + // int32 cells_per_row_offset_filter = 10; + // int32 cells_per_row_limit_filter = 11; + // int32 cells_per_column_limit_filter = 12; + // bool strip_value_transformer = 13; + // string apply_label_transformer = 19; + if (filter.has_chain()) { + CellStream res = std::move(source); + for (auto const &subfilter : filter.chain().filters()) { + auto maybe_res = CreateFilter(subfilter, std::move(res)); + if (!maybe_res) { + return maybe_res.status(); + } + res = *std::move(maybe_res); + } + return res; + } + // Interleave interleave = 2; + // Condition condition = 3; + // bool sink = 16; + return UnimplementedError( + "Unsupported filter.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h new file mode 100644 index 0000000000000..7e816e39a25e1 --- /dev/null +++ b/google/cloud/bigtable/emulator/filter.h @@ -0,0 +1,55 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTER_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTER_H + +#include "google/cloud/bigtable/emulator/cell_view.h" +#include "google/cloud/stream_range.h" +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +class CellStream { + public: + CellStream(std::function()> impl) + : impl_(std::move(impl)), current_(impl_()) {} + + bool HasValue() const { return current_.has_value(); } + CellView const & Value() const { return *current_; } + void Next() { current_ = impl_(); } + CellView const &operator++(); + CellView operator++(int); + CellView operator*() const { return Value(); } + CellView const* operator->() const { return &Value(); } + explicit operator bool() const { return HasValue(); } + + private: + std::function()> impl_; + absl::optional current_; +}; + +StatusOr CreateFilter( + ::google::bigtable::v2::RowFilter const& filter, CellStream source); + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTER_H + diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc new file mode 100644 index 0000000000000..5f38b3c44ae6e --- /dev/null +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -0,0 +1,49 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/testing_util/is_proto_equal.h" +#include +#include "google/cloud/bigtable/data_connection.h" +#include "google/cloud/bigtable/table.h" +#include "google/cloud/testing_util/status_matchers.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +TEST(DummyFilter, Simple) { + google::cloud::bigtable::Table table(MakeDataConnection(), + TableResource("fake", "baz", "ft")); + Filter filter = + Filter::Chain(Filter::FamilyRegex("fam1"), Filter::CellsRowOffset(2)); + for (StatusOr& row : + table.ReadRows(RowSet(RowRange::InfiniteRange()), filter)) { + ASSERT_STATUS_OK(row); + std::cout << row->row_key() << ":\n"; + for (auto const& cell : row->cells()) { + std::cout << "\t" << cell.family_name() << ":" << cell.column_qualifier() + << " @ " << cell.timestamp().count() << "us\n" + << "\t\"" << cell.value() << '"' << "\n"; + } + } +} + +} // anonymous namespace +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + diff --git a/google/cloud/bigtable/emulator/row_iterators.h b/google/cloud/bigtable/emulator/row_iterators.h index eaaacf143a5e9..036a8d9ff0c6d 100644 --- a/google/cloud/bigtable/emulator/row_iterators.h +++ b/google/cloud/bigtable/emulator/row_iterators.h @@ -256,18 +256,6 @@ class TransformIterator { mutable absl::optional> cached_value_; }; -// Helper function to create a TransformIterator -template -std::pair, - TransformIterator> -TransformIteratorRange(InputIterator begin, InputIterator end, Functor func) { - Functor func_copy(func); // avoid two copies - return std::make_pair(TransformIterator( - std::move(begin), std::move(func)), - TransformIterator( - std::move(end), std::move(func_copy))); -} - } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 56dcb6e888f46..f543bfff954a3 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -14,6 +14,7 @@ #include #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/row_iterators.h" #include "google/protobuf/util/field_mask_util.h" #include "google/cloud/internal/make_status.h" @@ -361,32 +362,22 @@ struct CombineCellIterators { } }; -Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, - RowStreamer& row_streamer) const { - std::shared_ptr row_set; - if (request.has_rows()) { - auto maybe_row_set = SortedRowSet::Create(request.rows()); - if (!maybe_row_set) { - return maybe_row_set.status(); - } - row_set = std::make_shared(*std::move(maybe_row_set)); - } else { - row_set = std::make_shared(SortedRowSet::AllRows()); - } - std::lock_guard lock(mu_); - std::vector, - TransformIterator>> - cf_ranges; - for (auto const &column_family : column_families_) { - cf_ranges.emplace_back( - TransformIteratorRange( - column_family.second->FindRows(row_set), column_family.second->end(), - ExtendWithColumnFamilyName(column_family.first))); - } - using CFRowsIt = MergedSortedIterator< - TransformIterator, - RowKeyLess>; +CellStream Table::ReadRowsInternal( + std::shared_ptr row_set) const { + using CFWithNameIt = TransformIterator; + std::vector> cf_ranges; + + std::transform( + column_families_.begin(), column_families_.end(), + std::back_inserter(cf_ranges), [&](auto const& column_family) { + ExtendWithColumnFamilyName transformer(column_family.first); + return std::make_pair( + CFWithNameIt(column_family.second->FindRows(row_set), transformer), + CFWithNameIt(column_family.second->end(), transformer)); + }); + + using CFRowsIt = MergedSortedIterator; CFRowsIt cfrows_begin(std::move(cf_ranges)); CFRowsIt cfrows_end; @@ -401,19 +392,50 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, CellRowsIt cellrows_end(colrows_end, colrows_end); std::cout << "Print start" << std::endl; - for (; cellrows_begin != cellrows_end; ++cellrows_begin) { - std::cout << "Row: " << (*cellrows_begin).row_key() - << " column_family: " << (*cellrows_begin).column_family() - << " column_qualifier: " << (*cellrows_begin).column_qualifier() - << " column_timestamp: " << (*cellrows_begin).timestamp().count() - << " column_value: " << (*cellrows_begin).value() + return CellStream ([cellrows_begin, cellrows_end]() mutable + -> absl::optional { + if (cellrows_begin == cellrows_end) { + return {}; + } + return *cellrows_begin++; + }); +} + +Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, + RowStreamer& row_streamer) const { + std::shared_ptr row_set; + if (request.has_rows()) { + auto maybe_row_set = SortedRowSet::Create(request.rows()); + if (!maybe_row_set) { + return maybe_row_set.status(); + } + row_set = std::make_shared(*std::move(maybe_row_set)); + } else { + row_set = std::make_shared(SortedRowSet::AllRows()); + } + std::lock_guard lock(mu_); + auto stream = ReadRowsInternal(std::move(row_set)); + if (request.has_filter()) { + auto maybe_stream = CreateFilter(request.filter(), std::move(stream)); + if (!maybe_stream) { + return maybe_stream.status(); + } + stream = *maybe_stream; + } + for (; stream; ++stream) { + std::cout << "Row: " << stream->row_key() + << " column_family: " << stream->column_family() + << " column_qualifier: " << stream->column_qualifier() + << " column_timestamp: " << stream->timestamp().count() + << " column_value: " << stream->value() << std::endl; - if (!row_streamer.Stream(*cellrows_begin)) { + if (!row_streamer.Stream(*stream)) { std::cout << "HOW?" << std::endl; return AbortedError("Stream closed by the client.", GCP_ERROR_INFO()); } } if (!row_streamer.Flush(true)) { + std::cout << "Flush failed?" << std::endl; return AbortedError("Stream closed by the client.", GCP_ERROR_INFO()); } std::cout << "Print stop" << std::endl; diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 3c8786efdbb7e..3552eeefb7216 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -22,6 +22,7 @@ #include #include #include +#include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/row_streamer.h" #include @@ -59,6 +60,7 @@ class Table { StatusOr> FindColumnFamily( MESSAGE const& message) const; bool IsDeleteProtectedNoLock() const; + CellStream ReadRowsInternal(std::shared_ptr row_set) const; Status Construct(google::bigtable::admin::v2::Table schema); mutable std::mutex mu_; From f37f61eb25b258d7ac44c14ff9aea3e756631cfe Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Sat, 11 Jan 2025 20:07:55 +0100 Subject: [PATCH 005/195] working read filtering --- google/cloud/bigtable/emulator/cell_view.h | 6 + google/cloud/bigtable/emulator/filter.cc | 634 +++++++++++++++--- google/cloud/bigtable/emulator/filter.h | 16 +- google/cloud/bigtable/emulator/filter_test.cc | 16 +- .../cloud/bigtable/emulator/row_streamer.cc | 3 + google/cloud/bigtable/emulator/table.cc | 6 +- 6 files changed, 595 insertions(+), 86 deletions(-) diff --git a/google/cloud/bigtable/emulator/cell_view.h b/google/cloud/bigtable/emulator/cell_view.h index 819464c3b8a21..dc0f8002a2ac7 100644 --- a/google/cloud/bigtable/emulator/cell_view.h +++ b/google/cloud/bigtable/emulator/cell_view.h @@ -18,6 +18,7 @@ #include #include #include +#include namespace google { namespace cloud { @@ -48,6 +49,10 @@ class CellView { } std::chrono::milliseconds timestamp() const { return timestamp_; } std::string const& value() const { return value_.get(); } + bool HasLabel() const { return label_.has_value(); } + std::string const& label() const { return label_.value().get(); } + void SetLabel(std::string const& label) { label_ = label; } + void SetValue(std::string const& value) { value_ = value; } private: std::reference_wrapper row_key_; @@ -55,6 +60,7 @@ class CellView { std::reference_wrapper column_qualifier_; std::chrono::milliseconds timestamp_; std::reference_wrapper value_; + absl::optional> label_; }; } // namespace emulator diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 05b157a937d3e..a1e9cb0fd124a 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -15,17 +15,28 @@ #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/status_or.h" #include "google/cloud/internal/make_status.h" +#include "google/cloud/bigtable/internal/google_bytes_traits.h" #include #include +#include namespace google { namespace cloud { namespace bigtable { namespace emulator { +namespace { +bool StringRefEq(std::string const &s1, std::string const &s2) { + return &s1 == &s2 || s1 == s2; +} +} // namespace + +FilterContext& FilterContext::DisallowApplyLabel() { + allow_apply_label_ = false; + return *this; +} -CellView const& CellStream::operator++() { +void CellStream::operator++() { Next(); - return Value(); } CellView CellStream::operator++(int) { @@ -34,37 +45,90 @@ CellView CellStream::operator++(int) { return tmp; } -template -class PerRowStatusFilter { +template +class PerRowStateFilter { + static_assert(std::is_invocable_v, + "StateResetFunctor must be invocable with no arguments"); + using State = std::decay_t>; + static_assert(std::is_default_constructible_v, + "State must be default constructible"); + static_assert(std::is_assignable_v, + "State must assignable"); + static_assert( + std::is_same_v< + std::result_of_t, + bool>, + "The result of FilterFunctor invocation must be a `bool`"); + public: - PerRowStatusFilter(CellStream source, FilterFunctor filter, - StateResetFunctor reset) - : filter_(std::move(filter)), - reset_(std::move(reset)), - source_(std::move(source)) {} + PerRowStateFilter(FilterFunctor filter, StateResetFunctor reset) + : filter_(std::move(filter)), reset_(std::move(reset)) {} - absl::optional operator()() { - for (; source_; ++source_) { - if (!prev_row_ || - !(&prev_row_.value().get() == &source_->row_key() || - prev_row_.value().get() == source_->row_key())) { - state_ = reset_(); - prev_row_ = source_->row_key(); - } - if (filter_(state_, source_.Value())) { - return source_++; - } + bool operator()(CellView const &cell_view) { + if (!prev_row_ || + !StringRefEq(prev_row_.value().get(), cell_view.row_key())) { + state_ = reset_(); + prev_row_ = cell_view.row_key(); } - return {}; + return filter_(state_, cell_view); } private: absl::optional> prev_row_; State state_; FilterFunctor filter_; StateResetFunctor reset_; - CellStream source_; }; +template +class PerColumnStateFilter { + static_assert(std::is_invocable_v, + "StateResetFunctor must be invocable with no arguments"); + using State = std::decay_t>; + static_assert(std::is_default_constructible_v, + "State must be default constructible"); + static_assert(std::is_assignable_v, + "State must assignable"); + static_assert( + std::is_same_v< + std::result_of_t, + bool>, + "The result of FilterFunctor invocation must be a `bool`"); + + public: + PerColumnStateFilter(FilterFunctor filter, StateResetFunctor reset) + : filter_(std::move(filter)), reset_(std::move(reset)) {} + + bool operator()(CellView const &cell_view) { + if (!prev_|| !prev_->Matches(cell_view)) { + state_ = reset_(); + prev_ = Prev(cell_view); + } + return filter_(state_, cell_view); + } + private: + class Prev { + public: + Prev(CellView const& cell_view) + : row_key_(cell_view.row_key()), + column_family_(cell_view.column_family()), + column_qualifier_(cell_view.column_qualifier()) {} + + bool Matches(CellView const &cell_view) { + return StringRefEq(row_key_.get(), cell_view.row_key()) && + StringRefEq(column_family_.get(), cell_view.column_family()) && + StringRefEq(column_qualifier_, cell_view.column_qualifier()); + } + + private: + std::reference_wrapper row_key_; + std::reference_wrapper column_family_; + std::reference_wrapper column_qualifier_; + }; + absl::optional prev_; + State state_; + FilterFunctor filter_; + StateResetFunctor reset_; +}; template CellStream MakeTrivialFilter(CellStream source, Filter filter) { @@ -79,8 +143,242 @@ CellStream MakeTrivialFilter(CellStream source, Filter filter) { }); } -StatusOr CreateFilter( - ::google::bigtable::v2::RowFilter const& filter, CellStream source) { +template +CellStream MakePerRowStateFilter(CellStream source, FilterFunctor filter, + StateResetFunctor state_reset) { + return MakeTrivialFilter(std::move(source), + PerRowStateFilter( + std::move(filter), std::move(state_reset))); +} + +template +CellStream MakePerColumnStateFilter(CellStream source, FilterFunctor filter, + StateResetFunctor state_reset) { + return MakeTrivialFilter( + std::move(source), PerColumnStateFilter( + std::move(filter), std::move(state_reset))); +} + +class ValueRangeFilter { + public: + ValueRangeFilter(::google::bigtable::v2::ColumnRange const &column_range) : + string_cmp_(internal::CompareColumnQualifiers) + { + if (column_range.has_start_qualifier_closed()) { + start_ = column_range.start_qualifier_closed(); + start_closed_ = true; + } else if (column_range.has_start_qualifier_open()) { + start_ = column_range.start_qualifier_open(); + start_closed_ = false; + } else { + start_closed_ = true; + } + if (column_range.has_end_qualifier_closed()) { + end_ = column_range.end_qualifier_closed(); + end_closed_ = true; + has_end_ = true; + } else if (column_range.has_end_qualifier_open()) { + end_ = column_range.end_qualifier_open(); + end_closed_ = false; + has_end_ = true; + } else { + has_end_ = false; + } + } + + ValueRangeFilter(::google::bigtable::v2::ValueRange const& value_range) + : string_cmp_(internal::CompareCellValues) { + if (value_range.has_start_value_closed()) { + start_ = value_range.start_value_closed(); + start_closed_ = true; + } else if (value_range.has_start_value_open()) { + start_ = value_range.start_value_open(); + start_closed_ = false; + } else { + start_closed_ = true; + } + if (value_range.has_end_value_closed()) { + end_ = value_range.end_value_closed(); + end_closed_ = true; + has_end_ = true; + } else if (value_range.has_end_value_open()) { + end_ = value_range.end_value_open(); + end_closed_ = false; + has_end_ = true; + } else { + has_end_ = false; + } + } + + bool WithinRange(std::string const &val) const { + if (start_closed_) { + if (string_cmp_(start_, val) > 0) { + return false; + } + } else { + if (string_cmp_(start_, val) >= 0) { + return false; + } + } + if (!has_end_) { + return true; + } + if (end_closed_) { + if (string_cmp_(val, end_) > 0) { + return false; + } + } else { + if (string_cmp_(val, end_) >= 0) { + return false; + } + } + return true; + } + + private: + std::function string_cmp_; + std::string start_; + std::string end_; + bool start_closed_; + bool has_end_; + bool end_closed_; +}; + + +class MergeCellStreams { + public: + class CellStreamGreater { + public: + bool operator()(std::shared_ptr const& lhs, + std::shared_ptr const& rhs) const { + auto row_key_cmp = + internal::CompareRowKey((*lhs)->row_key(), (*rhs)->row_key()); + if (row_key_cmp != 0) { + return row_key_cmp > 0; + } + auto cf_cmp = internal::CompareColumnQualifiers((*lhs)->column_family(), + (*rhs)->column_family()); + if (cf_cmp != 0) { + return cf_cmp > 0; + } + auto col_cmp = internal::CompareColumnQualifiers( + (*lhs)->column_qualifier(), (*rhs)->column_qualifier()); + if (col_cmp != 0) { + return col_cmp > 0; + } + return (*lhs)->timestamp() > (*rhs)->timestamp(); + } + }; + + MergeCellStreams(std::vector streams) { + for (auto &stream : streams) { + if (stream.HasValue()) { + unfinished_streams_.emplace( + std::make_shared(std::move(stream))); + } + } + } + + absl::optional operator()() { + if (unfinished_streams_.empty()) { + return {}; + } + auto stream_to_advance = unfinished_streams_.top(); + unfinished_streams_.pop(); + CellView res = stream_to_advance->Value(); + stream_to_advance->Next(); + if (stream_to_advance->HasValue()) { + unfinished_streams_.emplace(std::move(stream_to_advance)); + } + return res; + } + + std::priority_queue, + std::vector>, + CellStreamGreater> + unfinished_streams_; +}; + +class ConditionStream { + public: + ConditionStream(CellStream source, CellStream predicate, + CellStream true_stream, CellStream false_stream) + : source_(std::move(source)), + predicate_stream_(std::move(predicate)), + true_stream_(std::move(true_stream)), + false_stream_(std::move(false_stream)) {} + + absl::optional operator()() { + while (true) { + auto cell_view = *source_; + + if (!prev_row_ || + !StringRefEq(prev_row_.value().get(), cell_view.row_key())) { + prev_row_ = cell_view.row_key(); + condition_true_.reset(); + } + if (!condition_true_.has_value()) { + // Let's test if the predicate stream returned something for this row. + for (; predicate_stream_ && + internal::CompareRowKey(predicate_stream_->row_key(), + cell_view.row_key()) < 0; + predicate_stream_.Next()); + if (predicate_stream_ && + internal::CompareRowKey(predicate_stream_->row_key(), + cell_view.row_key()) == 0) { + // Predicate stream did return somthing for this row. + condition_true_ = true; + // Fast-forward the true stream to start at current row. + for (; + true_stream_ && internal::CompareRowKey(true_stream_->row_key(), + cell_view.row_key()) < 0; + true_stream_.Next()); + } else { + // Predicate stream did not return anything for this row. + condition_true_ = false; + // Fast-forward the false stream to start at current row. + for (; false_stream_ && + internal::CompareRowKey(false_stream_->row_key(), + cell_view.row_key()) < 0; + false_stream_.Next()); + } + } + if (*condition_true_) { + if (true_stream_ && internal::CompareRowKey(true_stream_->row_key(), + cell_view.row_key()) == 0) { + return true_stream_++; + } + } else { + if (false_stream_ && + internal::CompareRowKey(false_stream_->row_key(), + cell_view.row_key()) == 0) { + return false_stream_++; + } + } + // True/false stream exhausted, reset state and fast-forward source. + condition_true_.reset(); + for (; + source_ && internal::CompareRowKey(source_->row_key(), + prev_row_->get()) == 0; + source_.Next()); + if (!source_) { + return {}; + } + } + } + + private: + CellStream source_; + CellStream predicate_stream_; + CellStream true_stream_; + CellStream false_stream_; + absl::optional> prev_row_; + absl::optional condition_true_; +}; + +StatusOr CreateFilterImpl( + ::google::bigtable::v2::RowFilter const& filter, CellStream source, + FilterContext const& ctx, std::vector &direct_sinks) { if (filter.has_pass_all_filter()) { if (!filter.pass_all_filter()) { return InvalidArgumentError( @@ -113,6 +411,21 @@ StatusOr CreateFilter( return re2::RE2::PartialMatch(cell_view.row_key(), *pattern); }); } + if (filter.has_value_regex_filter()) { + auto pattern = std::make_shared(filter.value_regex_filter()); + if (!pattern->ok()) { + return InvalidArgumentError( + "`value_regex_filter` is not a valid RE2 regex.", + GCP_ERROR_INFO() + .WithMetadata("filter", filter.DebugString()) + .WithMetadata("description", pattern->error())); + } + return MakeTrivialFilter( + std::move(source), + [pattern = std::move(pattern)](CellView const& cell_view) mutable { + return re2::RE2::PartialMatch(cell_view.value(), *pattern); + }); + } if (filter.has_row_sample_filter()) { double pass_prob = filter.row_sample_filter(); if (pass_prob + std::numeric_limits::epsilon() < 0 @@ -121,17 +434,13 @@ StatusOr CreateFilter( "`row_sample_filter` is not a valid probability.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - auto per_row_filter = [] (bool skip, CellView const &) { - return !skip; - }; - std::mt19937 gen; - auto reset_state = [gen = std::move(gen), pass_prob]() mutable { - std::uniform_real_distribution dis(0.0, 1.0); - return dis(gen) > pass_prob; - }; - return CellStream(PerRowStatusFilter( - std::move(source), std::move(per_row_filter), std::move(reset_state))); + return MakePerRowStateFilter( + std::move(source), + [](bool& should_pass, CellView const&) { return should_pass; }, + [gen = std::mt19937(), pass_prob]() mutable { + std::uniform_real_distribution dis(0.0, 1.0); + return dis(gen) < pass_prob; + }); } if (filter.has_family_name_regex_filter()) { auto pattern = std::make_shared(filter.family_name_regex_filter()); @@ -142,17 +451,10 @@ StatusOr CreateFilter( .WithMetadata("filter", filter.DebugString()) .WithMetadata("description", pattern->error())); } - return CellStream( - [pattern = std::move(pattern), - source = std::move(source)]() mutable -> absl::optional { - for (; source && - !re2::RE2::PartialMatch(source->column_family(), *pattern); - ++source) { - } - if (!source) { - return {}; - } - return source++; + return MakeTrivialFilter( + std::move(source), + [pattern = std::move(pattern)](CellView const& cell_view) mutable { + return re2::RE2::PartialMatch(cell_view.column_family(), *pattern); }); } if (filter.has_column_qualifier_regex_filter()) { @@ -165,17 +467,28 @@ StatusOr CreateFilter( .WithMetadata("filter", filter.DebugString()) .WithMetadata("description", pattern->error())); } - return CellStream( - [pattern = std::move(pattern), - source = std::move(source)]() mutable -> absl::optional { - for (; source && - !re2::RE2::PartialMatch(source->column_qualifier(), *pattern); - ++source) { - } - if (!source) { - return {}; - } - return source++; + return MakeTrivialFilter( + std::move(source), + [pattern = std::move(pattern)](CellView const& cell_view) mutable { + return re2::RE2::PartialMatch(cell_view.column_qualifier(), *pattern); + }); + } + if (filter.has_column_range_filter()) { + return MakeTrivialFilter( + std::move(source), + [qualifier_filter = ValueRangeFilter(filter.column_range_filter()), + column_family = filter.column_range_filter().family_name()]( + CellView const& cell_view) { + return cell_view.column_family() == column_family && + qualifier_filter.WithinRange(cell_view.column_qualifier()); + }); + } + if (filter.has_value_range_filter()) { + return MakeTrivialFilter( + std::move(source), + [value_filter = ValueRangeFilter(filter.value_range_filter())]( + CellView const& cell_view) { + return value_filter.WithinRange(cell_view.value()); }); } if (filter.has_cells_per_row_offset_filter()) { @@ -185,29 +498,110 @@ StatusOr CreateFilter( "`cells_per_row_offset_filter` is negative.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - auto per_row_filter = [] (std::int64_t &skip, CellView const &) { - return skip-- <= 0; - }; - auto reset_state = [cells_per_row_offset]() { - return cells_per_row_offset; - }; - return CellStream(PerRowStatusFilter( - std::move(source), std::move(per_row_filter), std::move(reset_state))); - } - // ColumnRange column_range_filter = 7; - // TimestampRange timestamp_range_filter = 8; - // bytes value_regex_filter = 9; - // ValueRange value_range_filter = 15; - // int32 cells_per_row_offset_filter = 10; - // int32 cells_per_row_limit_filter = 11; - // int32 cells_per_column_limit_filter = 12; - // bool strip_value_transformer = 13; - // string apply_label_transformer = 19; + return MakePerRowStateFilter( + std::move(source), + [](std::int64_t& per_row_state, CellView const&) { + return per_row_state-- <= 0; + }, + [cells_per_row_offset]() { return cells_per_row_offset; }); + } + if (filter.has_cells_per_row_limit_filter()) { + std::int64_t cells_per_row_limit = filter.cells_per_row_limit_filter(); + if (cells_per_row_limit < 0) { + return InvalidArgumentError( + "`cells_per_row_limit_filter` is negative.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + return MakePerRowStateFilter( + std::move(source), + [cells_per_row_limit](std::int64_t& per_row_state, CellView const&) { + return per_row_state++ < cells_per_row_limit; + }, + []() -> std::int64_t { return 0; }); + } + if (filter.has_cells_per_column_limit_filter()) { + std::int64_t cells_per_column_limit = filter.cells_per_column_limit_filter(); + if (cells_per_column_limit < 0) { + return InvalidArgumentError( + "`cells_per_column_limit_filter` is negative.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + return MakePerColumnStateFilter( + std::move(source), + [cells_per_column_limit](std::int64_t& per_column_state, CellView const&) { + return per_column_state++ < cells_per_column_limit; + }, + []() -> std::int64_t { return 0; }); + } + if (filter.has_timestamp_range_filter()) { + auto const & ts_filter = filter.timestamp_range_filter(); + return MakeTrivialFilter( + std::move(source), + [start = ts_filter.start_timestamp_micros(), + end = ts_filter.end_timestamp_micros()](CellView const& cell_view) { + auto timestamp_micros = + std::chrono::duration_cast( + cell_view.timestamp()) + .count(); + + return timestamp_micros >= start && + (end == 0 || timestamp_micros < end); + }); + } + if (filter.has_apply_label_transformer()) { + if (!ctx.IsApplyLabelAllowed()) { + return InvalidArgumentError( + "Two `apply_label_transformer`s cannot coexist in one chain.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + return CellStream([source = std::move(source), + label = std::make_shared( + filter.apply_label_transformer())]() mutable + -> absl::optional { + if (!source) { + return {}; + } + CellView res = source++; + std::cout << "Label " << label + << " being set on cell value: " << res.value() << std::endl; + res.SetLabel(*label); + return res; + }); + } + if (filter.has_strip_value_transformer()) { + if (!filter.strip_value_transformer()) { + return InvalidArgumentError( + "`strip_value_transformer` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + return CellStream( + [source = std::move(source), + empty = std::string()]() mutable -> absl::optional { + // We want `empty` to explicitly live for as long as the filter so + // that the values returned by the filter are valid. + if (!source) { + return {}; + } + auto res = source++; + res.SetValue(empty); + return res; + }); + } if (filter.has_chain()) { CellStream res = std::move(source); + // FIXME handle the contexts properly for (auto const &subfilter : filter.chain().filters()) { - auto maybe_res = CreateFilter(subfilter, std::move(res)); + if (subfilter.has_sink()) { + if (!subfilter.sink()) { + return InvalidArgumentError( + "`sink` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", subfilter.DebugString())); + } + direct_sinks.emplace_back(std::move(res)); + return CellStream([]() -> absl::optional { return {}; }); + } + auto maybe_res = + CreateFilterImpl(subfilter, std::move(res), ctx, direct_sinks); if (!maybe_res) { return maybe_res.status(); } @@ -215,13 +609,93 @@ StatusOr CreateFilter( } return res; } - // Interleave interleave = 2; - // Condition condition = 3; - // bool sink = 16; + if (filter.has_interleave()) { + std::vector parallel_streams; + for (auto const & subfilter : filter.interleave().filters()) { + if (subfilter.has_sink()) { + if (!subfilter.sink()) { + return InvalidArgumentError( + "`sink` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", subfilter.DebugString())); + } + direct_sinks.emplace_back(source); + continue; + } + auto maybe_filter = + CreateFilterImpl(subfilter, source, ctx, direct_sinks); + if (!maybe_filter) { + return maybe_filter.status(); + } + parallel_streams.emplace_back(*maybe_filter); + } + if (parallel_streams.empty()) { + return CellStream([]() -> absl::optional { return {}; }); + } + return CellStream(MergeCellStreams(parallel_streams)); + } + if (filter.has_condition()) { + if (!filter.condition().has_predicate_filter()){ + return InvalidArgumentError( + "`condition` must have a `predicate_filter` set.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + auto maybe_predicate_stream = CreateFilterImpl( + filter.condition().predicate_filter(), source, ctx, direct_sinks); + if (!maybe_predicate_stream) { + return maybe_predicate_stream.status(); + } + auto maybe_true_stream = + filter.condition().has_true_filter() + ? CreateFilterImpl(filter.condition().true_filter(), source, ctx, + direct_sinks) + : StatusOr( + CellStream([]() -> absl::optional { return {}; })); + if (!maybe_true_stream) { + return maybe_true_stream.status(); + } + auto maybe_false_stream = + filter.condition().has_false_filter() + ? CreateFilterImpl(filter.condition().false_filter(), source, ctx, + direct_sinks) + : StatusOr( + CellStream([]() -> absl::optional { return {}; })); + if (!maybe_false_stream) { + return maybe_true_stream.status(); + } + + return CellStream(ConditionStream( + std::move(source), *std::move(maybe_predicate_stream), + *std::move(maybe_true_stream), *std::move(maybe_false_stream))); + } return UnimplementedError( "Unsupported filter.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); +} +StatusOr CreateFilter( + ::google::bigtable::v2::RowFilter const& filter, CellStream source, + FilterContext const& ctx) { + std::cout << "Creating a filter structure for: " << std::endl + << filter.DebugString() << std::endl; + std::vector direct_sinks; + if (filter.has_sink()) { + if (!filter.sink()) { + return InvalidArgumentError( + "`sink` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + return source; + } + auto maybe_filter = + CreateFilterImpl(filter, std::move(source), ctx, direct_sinks); + if (!maybe_filter) { + return maybe_filter.status(); + } + if (!direct_sinks.empty()) { + direct_sinks.emplace_back(*std::move(maybe_filter)); + return CellStream(MergeCellStreams(std::move(direct_sinks))); + } + return maybe_filter; } } // namespace emulator diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index 7e816e39a25e1..646143f573d40 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -32,7 +32,7 @@ class CellStream { bool HasValue() const { return current_.has_value(); } CellView const & Value() const { return *current_; } void Next() { current_ = impl_(); } - CellView const &operator++(); + void operator++(); CellView operator++(int); CellView operator*() const { return Value(); } CellView const* operator->() const { return &Value(); } @@ -43,8 +43,20 @@ class CellStream { absl::optional current_; }; +class FilterContext { + public: + FilterContext() : allow_apply_label_(true) {} + + FilterContext& DisallowApplyLabel(); + + bool IsApplyLabelAllowed() const { return allow_apply_label_; } + private: + bool allow_apply_label_; +}; + StatusOr CreateFilter( - ::google::bigtable::v2::RowFilter const& filter, CellStream source); + ::google::bigtable::v2::RowFilter const& filter, CellStream source, + FilterContext const& ctx); } // namespace emulator } // namespace bigtable diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index 5f38b3c44ae6e..70c6f9439e4e0 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -27,8 +27,13 @@ namespace { TEST(DummyFilter, Simple) { google::cloud::bigtable::Table table(MakeDataConnection(), TableResource("fake", "baz", "ft")); - Filter filter = - Filter::Chain(Filter::FamilyRegex("fam1"), Filter::CellsRowOffset(2)); + Filter filter = Filter::Chain( + Filter::Interleave(Filter::Chain(Filter::ColumnRegex("[0-1]"), + Filter::ApplyLabelTransformer("L01")), + Filter::Chain(Filter::ColumnRegex("[1-2]"), + Filter::ApplyLabelTransformer("L12")), + Filter::Sink()), + Filter::ColumnRegex("[0-2]")); for (StatusOr& row : table.ReadRows(RowSet(RowRange::InfiniteRange()), filter)) { ASSERT_STATUS_OK(row); @@ -37,6 +42,13 @@ TEST(DummyFilter, Simple) { std::cout << "\t" << cell.family_name() << ":" << cell.column_qualifier() << " @ " << cell.timestamp().count() << "us\n" << "\t\"" << cell.value() << '"' << "\n"; + if (!cell.labels().empty()) { + std::cout << "\tlabelled:"; + for (auto const& label : cell.labels()) { + std::cout << " " << label; + } + std::cout << "\n\n"; + } } } } diff --git a/google/cloud/bigtable/emulator/row_streamer.cc b/google/cloud/bigtable/emulator/row_streamer.cc index 6a0d9f0a0ce9f..5c4b582f484da 100644 --- a/google/cloud/bigtable/emulator/row_streamer.cc +++ b/google/cloud/bigtable/emulator/row_streamer.cc @@ -56,6 +56,9 @@ bool RowStreamer::Stream(CellView const& cell) { std::chrono::duration_cast(cell.timestamp()) .count()); chunk.set_value(cell.value()); + if (cell.HasLabel()) { + *chunk.add_labels() = cell.label(); + } pending_chunks_.emplace_back(std::move(chunk)); if (pending_chunks_.size() > 200) { return Flush(false); diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index f543bfff954a3..a391d87f730c1 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -415,8 +415,9 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, } std::lock_guard lock(mu_); auto stream = ReadRowsInternal(std::move(row_set)); + FilterContext ctx; if (request.has_filter()) { - auto maybe_stream = CreateFilter(request.filter(), std::move(stream)); + auto maybe_stream = CreateFilter(request.filter(), std::move(stream), ctx); if (!maybe_stream) { return maybe_stream.status(); } @@ -427,7 +428,8 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, << " column_family: " << stream->column_family() << " column_qualifier: " << stream->column_qualifier() << " column_timestamp: " << stream->timestamp().count() - << " column_value: " << stream->value() + << " column_value: " << stream->value() << " label: " + << (stream->HasLabel() ? stream->label() : std::string("unset")) << std::endl; if (!row_streamer.Stream(*stream)) { std::cout << "HOW?" << std::endl; From 054631620c3af0491e381b9ecde6be3c8dd9778a Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Sun, 26 Jan 2025 16:34:18 +0100 Subject: [PATCH 006/195] Refactored filtering. --- google/cloud/bigtable/emulator/CMakeLists.txt | 7 +- .../emulator/bigtable_emulator_common.bzl | 5 +- .../emulator/bigtable_emulator_unit_tests.bzl | 2 +- .../cloud/bigtable/emulator/column_family.cc | 95 ------- .../cloud/bigtable/emulator/column_family.h | 55 +--- .../bigtable/emulator/column_family_test.cc | 36 +-- google/cloud/bigtable/emulator/filtered_map.h | 150 +++++++++++ .../cloud/bigtable/emulator/row_iterators.h | 86 +++++- .../cloud/bigtable/emulator/sorted_row_set.cc | 133 --------- .../cloud/bigtable/emulator/sorted_row_set.h | 47 ---- .../bigtable/emulator/sorted_row_set_test.cc | 68 ----- .../bigtable/emulator/string_range_set.cc | 252 ++++++++++++++++++ .../bigtable/emulator/string_range_set.h | 105 ++++++++ .../emulator/string_range_set_test.cc | 61 +++++ google/cloud/bigtable/emulator/table.cc | 83 +++++- google/cloud/bigtable/emulator/table.h | 2 +- 16 files changed, 760 insertions(+), 427 deletions(-) create mode 100644 google/cloud/bigtable/emulator/filtered_map.h delete mode 100644 google/cloud/bigtable/emulator/sorted_row_set.cc delete mode 100644 google/cloud/bigtable/emulator/sorted_row_set.h delete mode 100644 google/cloud/bigtable/emulator/sorted_row_set_test.cc create mode 100644 google/cloud/bigtable/emulator/string_range_set.cc create mode 100644 google/cloud/bigtable/emulator/string_range_set.h create mode 100644 google/cloud/bigtable/emulator/string_range_set_test.cc diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 17532da45722c..c201433e186f4 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -24,14 +24,15 @@ add_library( column_family_test.cc filter.cc filter.h + filtered_map.h row_iterators.cc row_iterators.h row_streamer.cc row_streamer.h server.cc server.h - sorted_row_set.cc - sorted_row_set.h + string_range_set.cc + string_range_set.h table.cc table.h to_grpc_status.cc @@ -60,7 +61,7 @@ if (BUILD_TESTING) filter_test.cc row_iterators_test.cc server_test.cc - sorted_row_set_test.cc) + string_range_set_test.cc) export_list_to_bazel("bigtable_emulator_unit_tests.bzl" "bigtable_emulator_unit_tests" YEAR "2024") diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index f798585e6f11b..ce9a8be571b39 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -21,10 +21,11 @@ bigtable_emulator_common_hdrs = [ "cluster.h", "column_family.h", "filter.h", + "filtered_map.h", "row_iterators.h", "row_streamer.h", "server.h", - "sorted_row_set.h", + "string_range_set.h", "table.h", "to_grpc_status.h", ] @@ -37,7 +38,7 @@ bigtable_emulator_common_srcs = [ "row_iterators.cc", "row_streamer.cc", "server.cc", - "sorted_row_set.cc", + "string_range_set.cc", "table.cc", "to_grpc_status.cc", ] diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index b860ad9410c51..240296e87891a 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -21,5 +21,5 @@ bigtable_emulator_unit_tests = [ "filter_test.cc", "row_iterators_test.cc", "server_test.cc", - "sorted_row_set_test.cc", + "string_range_set_test.cc", ] diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 358fff1209ded..ee5e410a0ca27 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -20,101 +20,6 @@ namespace cloud { namespace bigtable { namespace emulator { -ColumnFamily::const_iterator::const_iterator( - ColumnFamily const& column_family, std::shared_ptr row_set) - : column_family_(std::cref(column_family)), row_set_(std::move(row_set)) { - if (row_set_) { - std::cout << "ColumnFamily::const_iterator::const_iterator():" << std::endl; - for (auto const& range : row_set_->disjoint_ranges()) { - std::cout << " "; - if (range.has_start_key_closed()) { - std::cout << "[" << range.start_key_closed(); - } else if (range.has_start_key_open()) { - std::cout << "(" << range.start_key_open(); - } else { - std::cout << "(inf"; - } - std::cout << ":"; - if (range.has_end_key_closed()) { - std::cout << range.end_key_closed() << "]"; - } else if (range.has_end_key_open()) { - std::cout << range.end_key_open() << ")"; - } else { - std::cout << "inf)"; - } - std::cout << std::endl; - } - - row_set_pos_ = row_set_->disjoint_ranges().begin(); - row_pos_ = column_family_.get().rows_.begin(); - - AdvanceToNextRange(); - EnsureIteratorValid(); - } else { - row_pos_ = column_family_.get().rows_.end(); - } -} - -void ColumnFamily::const_iterator::AdvanceToNextRange() { - if (row_set_pos_ == row_set_->disjoint_ranges().end()) { - // We've reached the end. - row_pos_ = column_family_.get().rows_.end(); - return; - } - if (row_pos_ == column_family_.get().rows_.end()) { - // row_pos_ is already pointing far enough. - return; - } - if (!internal::RowRangeHelpers::BelowStart(*row_set_pos_, row_pos_->first)) { - // row_pos_ is already pointing far enough. - return; - } - if (row_set_pos_->has_start_key_closed()) { - row_pos_ = column_family_.get().rows_.lower_bound( - row_set_pos_->start_key_closed()); - } else if (row_set_pos_->has_start_key_open()) { - row_pos_ = - column_family_.get().rows_.upper_bound(row_set_pos_->start_key_open()); - } else { - // Range open on the left - row_pos_ = column_family_.get().rows_.begin(); - } -} - -void ColumnFamily::const_iterator::EnsureIteratorValid() { - // `row_pos_` may point to a row which is past the end of the range pointed by - // row_set_pos_. Make sure this only happens when the iteration reaches its - // end. - while (row_pos_ != column_family_.get().rows_.end() && - row_set_pos_ != row_set_->disjoint_ranges().end() && - internal::RowRangeHelpers::AboveEnd(*row_set_pos_, row_pos_->first)) { - ++row_set_pos_; - AdvanceToNextRange(); - } - // This situation indicates that there are no rows which start after - // current (as pointed by `row_set_pos_`) range's start. Given that we're - // traversing `row_set_` in order, there will be no such rows for - // following ranges, i.e. we've reached the end. -} - -ColumnFamily::const_iterator& ColumnFamily::const_iterator::operator++() { - std::cout << "ColumnFamily::const_iterator::operator++ this=" - << reinterpret_cast(this) << " val before: " - << (row_pos_ == column_family_.get().rows_.end() - ? std::string("end") - : row_pos_->first) - << std::endl; - ++row_pos_; - EnsureIteratorValid(); - return *this; -} - -ColumnFamily::const_iterator ColumnFamily::const_iterator::operator++(int) { - ColumnFamily::const_iterator retval = *this; - ++(*this); - return retval; -} - void ColumnRow::SetCell(std::chrono::milliseconds timestamp, std::string const& value) { if (timestamp <= std::chrono::milliseconds::zero()) { diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 6653ed6d32786..edde3bad06089 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -17,7 +17,7 @@ #include #include -#include "google/cloud/bigtable/emulator/sorted_row_set.h" +#include "google/cloud/bigtable/emulator/string_range_set.h" #include namespace google { @@ -60,7 +60,7 @@ class ColumnFamilyRow { class ColumnFamily { public: - class const_iterator; + using const_iterator = std::map::const_iterator; void SetCell(std::string const& row_key, std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value); @@ -69,49 +69,20 @@ class ColumnFamily { std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); - const_iterator FindRows(std::shared_ptr row_set) const { - return const_iterator(*this, std::move(row_set)); + const_iterator begin() const { + return rows_.begin(); + } + const_iterator end() const { + return rows_.end(); + } + const_iterator lower_bound(std::string const& row_key) const { + return rows_.lower_bound(row_key); + } + const_iterator upper_bound(std::string const& row_key) const { + return rows_.lower_bound(row_key); } - const_iterator end() const { return const_iterator(*this, {}); } - - class const_iterator { - public: - using iterator_category = std::input_iterator_tag; - using value_type = std::pair const; - using difference_type = std::ptrdiff_t; - using reference = value_type&; - using pointer = value_type*; - - const_iterator& operator++(); - const_iterator operator++(int); - bool operator==(const_iterator const& other) const { - return row_pos_ == other.row_pos_; - } - - bool operator!=(const_iterator const& other) const { - return !(*this == other); - } - - reference operator*() const { return *row_pos_; } - - friend const_iterator ColumnFamily::FindRows(std::shared_ptr) const; - friend const_iterator ColumnFamily::end() const; - private: - const_iterator(ColumnFamily const& column_family, - std::shared_ptr row_set); - - void AdvanceToNextRange(); - void EnsureIteratorValid(); - - std::reference_wrapper column_family_; - std::shared_ptr row_set_; - std::set::const_iterator row_set_pos_; - std::map::const_iterator row_pos_; - }; private: - friend class const_iterator; std::map rows_; }; diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index 0e750741a4bb5..21fa50b57b924 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -27,24 +27,24 @@ namespace { using namespace std::chrono_literals; TEST(ColumnFamilyIterator, Simple) { - ColumnFamily fam; - fam.SetCell("row1", "col1", 123ms, "foo"); - fam.SetCell("row1", "col1", 124ms, "fo"); - fam.SetCell("row1", "col2", 123ms, "bar"); - fam.SetCell("row2", "col1", 123ms, "foo"); - fam.SetCell("row2", "col3", 120ms, "baz"); - fam.SetCell("row2", "col3", 120ms, "baz"); - std::vector rows; - std::transform( - fam.FindRows(std::shared_ptr( - new SortedRowSet(SortedRowSet::AllRows()))), - fam.end(), - std::back_inserter(rows), - [](std::pair const& val) { - return val.first; - }); - std::vector expected{"row1", "row2"}; - EXPECT_EQ(expected, rows); +// ColumnFamily fam; +// fam.SetCell("row1", "col1", 123ms, "foo"); +// fam.SetCell("row1", "col1", 124ms, "fo"); +// fam.SetCell("row1", "col2", 123ms, "bar"); +// fam.SetCell("row2", "col1", 123ms, "foo"); +// fam.SetCell("row2", "col3", 120ms, "baz"); +// fam.SetCell("row2", "col3", 120ms, "baz"); +// std::vector rows; +// std::transform( +// fam.FindRows(std::shared_ptr( +// new StringRangeSet(StringRangeSet::All()))), +// fam.end(), +// std::back_inserter(rows), +// [](std::pair const& val) { +// return val.first; +// }); +// std::vector expected{"row1", "row2"}; +// EXPECT_EQ(expected, rows); } class Foo { diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h new file mode 100644 index 0000000000000..c7742ee88b972 --- /dev/null +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -0,0 +1,150 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTERED_MAP_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTERED_MAP_H + +#include "google/cloud/bigtable/emulator/string_range_set.h" +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +template +class FilteredMap { + public: + FilteredMap(StringKeyedMap const& unfiltered, + std::shared_ptr filter) + : unfiltered_(std::cref(unfiltered)), filter_(std::move(filter)) {} + class const_iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = typename std::iterator_traits< + typename StringKeyedMap::const_iterator>::value_type; + using difference_type = typename std::iterator_traits< + typename StringKeyedMap::const_iterator>::difference_type; + using reference = value_type const&; + using pointer = value_type*; + + const_iterator( + FilteredMap const& parent, + typename StringKeyedMap::const_iterator unfiltered_pos, + std::set::const_iterator filter_pos) + : parent_(std::cref(parent)), + unfiltered_pos_(std::move(unfiltered_pos)), + filter_pos_(std::move(filter_pos)) { + AdvanceToNextRange(); + EnsureIteratorValid(); + } + + const_iterator& operator++() { + ++unfiltered_pos_; + EnsureIteratorValid(); + return *this; + } + + const_iterator operator++(int) { + const_iterator retval = *this; + ++(*this); + return retval; + } + + bool operator==(const_iterator const& other) const { + return unfiltered_pos_ == other.unfiltered_pos_; + } + + bool operator!=(const_iterator const& other) const { + return !(*this == other); + } + + reference operator*() const { return *unfiltered_pos_; } + + private: + void AdvanceToNextRange() { + if (filter_pos_ == parent_.get().filter_->disjoint_ranges().end()) { + // We've reached the end. + unfiltered_pos_ = parent_.get().unfiltered_.get().end(); + return; + } + if (unfiltered_pos_ == parent_.get().unfiltered_.get().end()) { + // unfiltered_pos_ is already pointing far enough. + return; + } + if (!filter_pos_->IsBelowStart(unfiltered_pos_->first)) { + // unfiltered_pos_ is already pointing far enough. + return; + } + + if (absl::holds_alternative( + filter_pos_->start())) { + // Defensive programming - this should be dead code - it means we've got + // a range which starts at infinity. + unfiltered_pos_ = parent_.get().unfiltered_.get().end(); + return; + } + + if (filter_pos_->start_closed()) { + unfiltered_pos_ = parent_.get().unfiltered_.get().lower_bound( + absl::get(filter_pos_->start())); + } else { + unfiltered_pos_ = parent_.get().unfiltered_.get().upper_bound( + absl::get(filter_pos_->start())); + } + } + + void EnsureIteratorValid() { + // `unfiltered_pos_` may point to a row which is past the end of the range + // pointed by filter_pos_. Make sure this only happens when the iteration + // reaches its end. + while (unfiltered_pos_ != parent_.get().unfiltered_.get().end() && + filter_pos_ != parent_.get().filter_->disjoint_ranges().end() && + filter_pos_->IsAboveEnd(unfiltered_pos_->first)) { + ++filter_pos_; + AdvanceToNextRange(); + } + // This situation indicates that there are no rows which start after + // current (as pointed by `filter_pos_`) range's start. Given that we're + // traversing `filter_` in order, there will be no such rows for + // following ranges, i.e. we've reached the end. + } + + std::reference_wrapper parent_; + typename StringKeyedMap::const_iterator unfiltered_pos_; + std::set::const_iterator filter_pos_; + }; + + const_iterator begin() const { + return const_iterator(*this, unfiltered_.get().begin(), + filter_->disjoint_ranges().begin()); + } + const_iterator end() const { + return const_iterator(*this, unfiltered_.get().end(), + filter_->disjoint_ranges().end()); + } + private: + std::reference_wrapper unfiltered_; + std::shared_ptr filter_; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTERED_MAP_H diff --git a/google/cloud/bigtable/emulator/row_iterators.h b/google/cloud/bigtable/emulator/row_iterators.h index 036a8d9ff0c6d..38225993d48e4 100644 --- a/google/cloud/bigtable/emulator/row_iterators.h +++ b/google/cloud/bigtable/emulator/row_iterators.h @@ -18,7 +18,8 @@ #include #include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/table.h" -#include "google/cloud/bigtable/emulator/sorted_row_set.h" +#include "google/cloud/bigtable/emulator/string_range_set.h" +#include "google/cloud/internal/invoke_result.h" #include #include #include @@ -28,6 +29,84 @@ namespace cloud { namespace bigtable { namespace emulator { +struct RowKeyRegex { + std::string regex; +}; +struct RowSample { + double pass_prob; +}; +struct FamilyNameRegex { + std::string regex; +}; +struct ColumnRegex { + std::string regex; +}; +struct PerRowOffset { + std::int32_t offset; +}; + +using InternalFilter = + absl::variant; + +class FilteredColumnFamilyStream { + absl::optional operator()() { + return {}; + } + + bool ApplyFilter(InternalFilter const& internal_filter) { + return absl::visit(FilterApply(*this), internal_filter); + } + + class FilterApply { + public: + FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} + bool operator()(google::bigtable::v2::RowSet const& ) { + assert(!parent_.row_ranges_); +// parent_.row_ranges_ = StringRangeSet::Create(row_set); + return false; + } + + bool operator()(google::bigtable::v2::ColumnRange const&) { + return false; + } + + bool operator()(google::bigtable::v2::TimestampRange const&) { + return false; + } + + bool operator()(RowKeyRegex const&) { + return false; + } + + bool operator()(RowSample const&) { + return false; + } + + bool operator()(FamilyNameRegex const&) { + return false; + } + + bool operator()(ColumnRegex const&) { + return false; + } + + bool operator()(PerRowOffset const&) { + return false; + } + + private: + FilteredColumnFamilyStream& parent_; + }; + + std::shared_ptr row_ranges_; + ColumnRow::const_iterator cell_it_; + ColumnFamilyRow::const_iterator column_it_; + ColumnFamily::const_iterator row_it_; +}; + template class MergedSortedIterator { public: @@ -180,8 +259,9 @@ template class TransformIterator { public: using iterator_category = std::input_iterator_tag; - using value_type = std::decay_t::value_type)>::type> const; + using value_type = std::decay_t::value_type>::type> const; using difference_type = typename std::iterator_traits::difference_type; using pointer = value_type*; diff --git a/google/cloud/bigtable/emulator/sorted_row_set.cc b/google/cloud/bigtable/emulator/sorted_row_set.cc deleted file mode 100644 index 152acee36ab0e..0000000000000 --- a/google/cloud/bigtable/emulator/sorted_row_set.cc +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2024 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "google/cloud/bigtable/emulator/sorted_row_set.h" -#include "google/cloud/bigtable/internal/row_range_helpers.h" -#include "google/cloud/bigtable/row_range.h" - -namespace google { -namespace cloud { -namespace bigtable { -namespace emulator { -namespace { - -namespace btproto = google::bigtable::v2; - -bool HasOverlap(btproto::RowRange const& lhs, btproto::RowRange const& rhs) { - return internal::RowRangeHelpers::Intersect(lhs, rhs).first; -} - -bool DisjointRangesAdjacent(btproto::RowRange const& left, - btproto::RowRange const& right) { - assert(internal::RowRangeHelpers::StartLess()(left, right)); - if (left.has_end_key_closed() && - right.has_start_key_open() && - left.end_key_closed() == right.start_key_open()) { - return true; - } - if (left.has_end_key_open() && - right.has_start_key_closed() && - left.end_key_open() == right.start_key_closed()) { - return true; - } - if (left.has_end_key_closed() && - right.has_start_key_closed() && - internal::ConsecutiveRowKeys(left.end_key_closed(), - right.start_key_closed())) { - return true; - } - return false; -} - -} // anonymous namespace - -StatusOr SortedRowSet::Create( - google::bigtable::v2::RowSet const& row_set) { - SortedRowSet res; - for (auto const& row_key : row_set.row_keys()) { - if (row_key.empty()) { - return InvalidArgumentError( - "`row_key` empty", - GCP_ERROR_INFO().WithMetadata("row_set", row_set.DebugString())); - } - btproto::RowRange to_insert; - to_insert.set_start_key_closed(row_key); - to_insert.set_end_key_closed(row_key); - res.Insert(std::move(to_insert)); - } - for (auto const& row_range : row_set.row_ranges()) { - btproto::RowRange to_insert(row_range); - internal::RowRangeHelpers::SanitizeEmptyEndKeys(to_insert); - if (internal::RowRangeHelpers::IsEmpty(to_insert)) { - continue; - } - res.Insert(row_range); - } - return res; -} - -SortedRowSet SortedRowSet::AllRows() { - SortedRowSet res; - res.Insert(btproto::RowRange()); - return res; -} - -void SortedRowSet::Insert(btproto::RowRange inserted_range) { - // Remove all ranges which either have an overlap with `inserted_range` or are - // adjacent to it. Then add `inserted_range` with `start` and `end` - // adjusted to cover what the removed ranges used to cover. - - auto first_to_remove = disjoint_ranges_.upper_bound(inserted_range); - // `*first_to_remove` starts strictly after `inserted_range`'s start. - // The previous range is the first to have a chance for an overlap - it is the - // last one, which starts at or before `inserted_range` start. - if (first_to_remove != disjoint_ranges_.begin() && - HasOverlap(*std::prev(first_to_remove), inserted_range)) { - std::advance(first_to_remove, -1); - } - // The range preceeding `first_to_remove` for sure has no overlap with - // `inserted_range` but it may be adjacent to it. In that case we should also - // remove it. - if (first_to_remove != disjoint_ranges_.begin() && - DisjointRangesAdjacent(*std::prev(first_to_remove), inserted_range)) { - std::advance(first_to_remove, -1); - } - if (first_to_remove != disjoint_ranges_.end()) { - if (internal::RowRangeHelpers::StartLess()(*first_to_remove, - inserted_range)) { - *inserted_range.mutable_start_key_open() = - first_to_remove->start_key_open(); - *inserted_range.mutable_start_key_closed() = - first_to_remove->start_key_closed(); - } - do { - if (internal::RowRangeHelpers::EndLess()(inserted_range, - *first_to_remove)) { - *inserted_range.mutable_end_key_open() = - first_to_remove->end_key_open(); - *inserted_range.mutable_end_key_closed() = - first_to_remove->end_key_closed(); - } - disjoint_ranges_.erase(first_to_remove++); - } while (first_to_remove != disjoint_ranges_.end() && - (HasOverlap(*first_to_remove, inserted_range) || - DisjointRangesAdjacent(inserted_range, *first_to_remove))); - } - disjoint_ranges_.insert(std::move(inserted_range)); -} - -} // namespace emulator -} // namespace bigtable -} // namespace cloud -} // namespace google diff --git a/google/cloud/bigtable/emulator/sorted_row_set.h b/google/cloud/bigtable/emulator/sorted_row_set.h deleted file mode 100644 index 84ee741ee01ce..0000000000000 --- a/google/cloud/bigtable/emulator/sorted_row_set.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2024 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SORTED_ROW_SET_H -#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SORTED_ROW_SET_H - -#include -#include "google/cloud/bigtable/internal/row_range_helpers.h" - -namespace google { -namespace cloud { -namespace bigtable { -namespace emulator { - -class SortedRowSet { - public: - static StatusOr Create( - google::bigtable::v2::RowSet const& row_set); - static SortedRowSet AllRows(); - - void Insert(google::bigtable::v2::RowRange inserted_range); - std::set const& - disjoint_ranges() const { - return disjoint_ranges_; - }; - - private: - std::set disjoint_ranges_; -}; - -} // namespace emulator -} // namespace bigtable -} // namespace cloud -} // namespace google - -#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SORTED_ROW_SET_H diff --git a/google/cloud/bigtable/emulator/sorted_row_set_test.cc b/google/cloud/bigtable/emulator/sorted_row_set_test.cc deleted file mode 100644 index bca9f7c642466..0000000000000 --- a/google/cloud/bigtable/emulator/sorted_row_set_test.cc +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2024 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "google/cloud/bigtable/emulator/sorted_row_set.h" -#include "google/cloud/bigtable/row_range.h" -#include "google/cloud/testing_util/is_proto_equal.h" -#include -#include - -namespace google { -namespace cloud { -namespace bigtable { -namespace emulator { -namespace { - -namespace btproto = ::google::bigtable::v2; -using ::google::cloud::testing_util::IsProtoEqual; - -TEST(SortedRowSet, SingleRange) { - SortedRowSet srs; - std::string expected_text = R"""( -start_key_closed: 'a' -end_key_closed: 'b' -)"""; - btproto::RowRange expected; - ASSERT_TRUE( - google::protobuf::TextFormat::ParseFromString(expected_text, &expected)); - - srs.Insert(RowRange::Closed("a", "b").as_proto()); - ASSERT_EQ(1, srs.disjoint_ranges().size()); - ASSERT_THAT(expected, IsProtoEqual(*srs.disjoint_ranges().begin())); -} - -TEST(StartLess, Order) { - using StartLess = internal::RowRangeHelpers::StartLess; - - ASSERT_FALSE(StartLess()(RowRange::Closed("a", "").as_proto(), - RowRange::Closed("a", "").as_proto())); - ASSERT_TRUE(StartLess()(RowRange::Closed("a", "").as_proto(), - RowRange::Open("a", "").as_proto())); - ASSERT_FALSE(StartLess()(RowRange::Open("a", "").as_proto(), - RowRange::Closed("a", "").as_proto())); - ASSERT_TRUE(StartLess()(RowRange::Closed("a", "").as_proto(), - RowRange::Closed("b", "").as_proto())); - ASSERT_TRUE(StartLess()(RowRange::InfiniteRange().as_proto(), - RowRange::Closed("a", "").as_proto())); - ASSERT_TRUE(StartLess()(RowRange::InfiniteRange().as_proto(), - RowRange::Open("a", "").as_proto())); - ASSERT_FALSE(StartLess()(RowRange::InfiniteRange().as_proto(), - RowRange::InfiniteRange().as_proto())); -} - -} // anonymous namespace -} // namespace emulator -} // namespace bigtable -} // namespace cloud -} // namespace google diff --git a/google/cloud/bigtable/emulator/string_range_set.cc b/google/cloud/bigtable/emulator/string_range_set.cc new file mode 100644 index 0000000000000..b785f3ccf24c7 --- /dev/null +++ b/google/cloud/bigtable/emulator/string_range_set.cc @@ -0,0 +1,252 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/string_range_set.h" +#include "google/cloud/bigtable/internal/google_bytes_traits.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +int CompareRangeValues(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs) { + if (absl::holds_alternative(lhs)) { + return absl::holds_alternative(rhs) ? 0 : 1; + } + if (absl::holds_alternative(rhs)) { + return -1; + } + return internal::CompareRowKey(absl::get(lhs), + absl::get(rhs)); +} + +bool ConsecutiveRowKeys(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs) { + if (absl::holds_alternative(lhs) || + absl::holds_alternative(rhs)) { + return false; + } + return internal::ConsecutiveRowKeys(absl::get(lhs), + absl::get(rhs)); +} + +bool HasOverlap(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs) { + auto const start_cmp = CompareRangeValues(lhs.start(), rhs.start()); + StringRangeSet::Range const& intersect_start = + (start_cmp == 0) ? (lhs.start_open() ? lhs : rhs) + : ((start_cmp > 0) ? lhs : rhs); + auto const end_cmp = CompareRangeValues(lhs.end(), rhs.end()); + StringRangeSet::Range const& intersect_end = (end_cmp == 0) + ? (lhs.end_open() ? lhs : rhs) + : ((end_cmp < 0) ? lhs : rhs); + return !StringRangeSet::Range::IsEmpty( + intersect_start.start(), intersect_start.start_open(), + intersect_end.end(), intersect_end.end_open()); +} + +bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs) { + assert(!HasOverlap(lhs, rhs)); + assert(StringRangeSet::RangeStartLess()(lhs, rhs)); + if (lhs.end_closed() && rhs.start_open() && lhs.end() == rhs.start()) { + return true; + } + if (lhs.end_open() && rhs.start_closed() && lhs.end() == rhs.start()) { + return true; + } + // FIXME - ConsecutiveRowKeys should somehow take into account the allowed + // length of the strings. + if (lhs.end_closed() && rhs.start_closed() && + ConsecutiveRowKeys(lhs.end(), rhs.start())) { + return true; + } + return false; +} + +} // anonymous namespace + +StringRangeSet::Range::Range(Value start, bool start_open, Value end, + bool end_open) + : start_(std::move(start)), + start_open_(start_open), + end_(std::move(end)), + end_open_(end_open) { + assert(!RangeValueLess()(end, start)); + assert(!absl::holds_alternative(start) || + start_open_); + assert(!absl::holds_alternative(end) || + end_open_); + assert(!absl::holds_alternative(start) || + StringRangeSet::Range::IsEmpty(start_, start_open_, end_, end_open_)); +} + +void StringRangeSet::Range::set_start(Value start, bool start_open) { + start_ = std::move(start); + start_open_ = start_open; +} + +void StringRangeSet::Range::set_end(Value end, bool end_open) { + end_ = std::move(end); + end_open_ = end_open; +} + +bool StringRangeSet::Range::IsBelowStart(Value const &value) const { + auto const cmp = CompareRangeValues(start_, value); + if (cmp != 0) { + return cmp < 0; + } + return start_open_; +} + +bool StringRangeSet::Range::IsEmpty(StringRangeSet::Range::Value const& start, + bool start_open, + StringRangeSet::Range::Value const& end, + bool end_open) { + auto const res_cmp = CompareRangeValues(start, end); + if (res_cmp > 0) { + return true; + } + if (res_cmp == 0) { + return start_open || end_open; + } + if (start_open && end_open) { + // FIXME - ConsecutiveRowKeys should somehow take into account the allowed + // length of the strings. + return ConsecutiveRowKeys(start, end); + } + return false; +} + + +bool StringRangeSet::Range::IsAboveEnd(Value const &value) const { + auto const cmp = CompareRangeValues(value, end_); + if (cmp != 0) { + return cmp > 0; + } + return end_open_; +} + +bool StringRangeSet::RangeValueLess::operator()(Range::Value const& lhs, + Range::Value const& rhs) const { + return CompareRangeValues(lhs, rhs) < 0; +} + +bool StringRangeSet::RangeStartLess::operator()(Range const& lhs, + Range const& rhs) const { + auto res = CompareRangeValues(lhs.start(), rhs.start()); + if (res == 0) { + return lhs.start_closed() && rhs.start_open(); + } + return res < 0; +} + +bool StringRangeSet::RangeEndLess::operator()(Range const& lhs, + Range const& rhs) const { + auto res = CompareRangeValues(lhs.end(), rhs.end()); + if (res == 0) { + return lhs.end_open() && rhs.end_closed(); + } + return res < 0; +} + +StringRangeSet StringRangeSet::All() { + StringRangeSet res; + res.Insert(Range("", false, StringRangeSet::Range::Infinity{}, true)); + return res; +} + +StringRangeSet StringRangeSet::Empty() { + return StringRangeSet{}; +} + +void StringRangeSet::Insert(StringRangeSet::Range inserted_range) { + // Remove all ranges which either have an overlap with `inserted_range` or are + // adjacent to it. Then add `inserted_range` with `start` and `end` + // adjusted to cover what the removed ranges used to cover. + + auto first_to_remove = disjoint_ranges_.upper_bound(inserted_range); + // `*first_to_remove` starts strictly after `inserted_range`'s start. + // The previous range is the first to have a chance for an overlap - it is the + // last one, which starts at or before `inserted_range` start. + if (first_to_remove != disjoint_ranges_.begin() && + HasOverlap(*std::prev(first_to_remove), inserted_range)) { + std::advance(first_to_remove, -1); + } + // The range preceeding `first_to_remove` for sure has no overlap with + // `inserted_range` but it may be adjacent to it. In that case we should also + // remove it. + if (first_to_remove != disjoint_ranges_.begin() && + DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), + inserted_range)) { + std::advance(first_to_remove, -1); + } + if (first_to_remove != disjoint_ranges_.end()) { + if (RangeStartLess()(*first_to_remove, inserted_range)) { + inserted_range.set_start(std::move(first_to_remove)->start(), + first_to_remove->start_open()); + } + do { + if (RangeEndLess()(inserted_range, *first_to_remove)) { + inserted_range.set_end(std::move(first_to_remove)->end(), + first_to_remove->end_open()); + } + disjoint_ranges_.erase(first_to_remove++); + } while ( + first_to_remove != disjoint_ranges_.end() && + (HasOverlap(*first_to_remove, inserted_range) || + DisjointAndSortedRangesAdjacent(inserted_range, *first_to_remove))); + } + disjoint_ranges_.insert(std::move(inserted_range)); +} + +bool operator==(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs) { + if (absl::holds_alternative(lhs)) { + return absl::holds_alternative(rhs); + } + if (absl::holds_alternative(rhs)) { + return false; + } + return absl::get(lhs) == absl::get(rhs); +} + +std::ostream& operator<<(std::ostream& os, + StringRangeSet::Range::Value const& value) { + if (absl::holds_alternative(value)) { + os << "inf"; + } + os << absl::holds_alternative(value); + return os; +} + +bool operator==(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs) { + return lhs.start() == rhs.start() && lhs.start_open() == rhs.start_open() && + lhs.end() == rhs.end() && lhs.end_open() == rhs.end_open(); +} + +std::ostream& operator<<(std::ostream& os, + StringRangeSet::Range const& range) { + os << (range.start_closed() ? "[" : "(") << range.start() << "," + << range.end() << (range.end_closed() ? "]" : ")"); + return os; +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/string_range_set.h b/google/cloud/bigtable/emulator/string_range_set.h new file mode 100644 index 0000000000000..d28f47e8bb25e --- /dev/null +++ b/google/cloud/bigtable/emulator/string_range_set.h @@ -0,0 +1,105 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_STRING_RANGE_SET_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_STRING_RANGE_SET_H + +#include "absl/types/variant.h" +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +class StringRangeSet { + public: + class Range { + public: + struct Infinity {}; + using Value = absl::variant; + + Range(Value start, bool start_open, Value end, bool end_open); + + Value const& start() const & { return start_; } + Value&& start() && { return std::move(start_); } + bool start_open() const { return start_open_; } + bool start_closed() const { return !start_open_; } + void set_start(Value start, bool start_open); + + Value const& end() const & { return end_; } + Value&& end() && { return std::move(end_); } + void set_end(Value end, bool end_open); + bool end_open() const { return end_open_; } + bool end_closed() const { return !end_open_; } + + bool IsBelowStart(Value const &value) const; + bool IsAboveEnd(Value const &value) const; + + static bool IsEmpty(StringRangeSet::Range::Value const& start, + bool start_open, + StringRangeSet::Range::Value const& end, bool end_open); + + private: + Value start_; + bool start_open_; + Value end_; + bool end_open_; + }; + + struct RangeValueLess { + bool operator()(Range::Value const& lhs, Range::Value const& rhs) const; + }; + + struct RangeStartLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + + struct RangeEndLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + + static StringRangeSet All(); + static StringRangeSet Empty(); + void Insert(Range inserted_range); + + std::set const& disjoint_ranges() const { + return disjoint_ranges_; + }; + + + private: + std::set disjoint_ranges_; +}; + +bool operator==(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs); + +std::ostream& operator<<(std::ostream& os, + StringRangeSet::Range::Value const& value); + +bool operator==(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs); + +std::ostream& operator<<(std::ostream& os, + StringRangeSet::Range const& range); + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_STRING_RANGE_SET_H diff --git a/google/cloud/bigtable/emulator/string_range_set_test.cc b/google/cloud/bigtable/emulator/string_range_set_test.cc new file mode 100644 index 0000000000000..5b596e776f26c --- /dev/null +++ b/google/cloud/bigtable/emulator/string_range_set_test.cc @@ -0,0 +1,61 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/string_range_set.h" +#include "google/cloud/bigtable/row_range.h" +#include "google/cloud/testing_util/is_proto_equal.h" +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +namespace btproto = ::google::bigtable::v2; +using ::google::cloud::testing_util::IsProtoEqual; + +TEST(StringRangeSet, SingleRange) { + StringRangeSet srs; + srs.Insert(StringRangeSet::Range("a", false, "b", false)); + ASSERT_EQ(1, srs.disjoint_ranges().size()); + ASSERT_EQ(StringRangeSet::Range("a", false, "b", false), + *srs.disjoint_ranges().begin()); +} + +TEST(StartLess, Order) { +// using StartLess = internal::RowRangeHelpers::StartLess; +// +// ASSERT_FALSE(StartLess()(RowRange::Closed("a", "").as_proto(), +// RowRange::Closed("a", "").as_proto())); +// ASSERT_TRUE(StartLess()(RowRange::Closed("a", "").as_proto(), +// RowRange::Open("a", "").as_proto())); +// ASSERT_FALSE(StartLess()(RowRange::Open("a", "").as_proto(), +// RowRange::Closed("a", "").as_proto())); +// ASSERT_TRUE(StartLess()(RowRange::Closed("a", "").as_proto(), +// RowRange::Closed("b", "").as_proto())); +// ASSERT_TRUE(StartLess()(RowRange::InfiniteRange().as_proto(), +// RowRange::Closed("a", "").as_proto())); +// ASSERT_TRUE(StartLess()(RowRange::InfiniteRange().as_proto(), +// RowRange::Open("a", "").as_proto())); +// ASSERT_FALSE(StartLess()(RowRange::InfiniteRange().as_proto(), +// RowRange::InfiniteRange().as_proto())); +} + +} // anonymous namespace +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index a391d87f730c1..f4945f573fc42 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -15,7 +15,9 @@ #include #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/row_iterators.h" +#include "google/cloud/bigtable/internal/google_bytes_traits.h" #include "google/protobuf/util/field_mask_util.h" #include "google/cloud/internal/make_status.h" @@ -363,19 +365,21 @@ struct CombineCellIterators { }; CellStream Table::ReadRowsInternal( - std::shared_ptr row_set) const { - using CFWithNameIt = TransformIterator row_set) const { + using FilteredColumnFamily = FilteredMap; + using CFWithNameIt = TransformIterator; std::vector> cf_ranges; - std::transform( - column_families_.begin(), column_families_.end(), - std::back_inserter(cf_ranges), [&](auto const& column_family) { - ExtendWithColumnFamilyName transformer(column_family.first); - return std::make_pair( - CFWithNameIt(column_family.second->FindRows(row_set), transformer), - CFWithNameIt(column_family.second->end(), transformer)); - }); + std::vector filtered_cfs; + for (auto const &column_family : column_families_) { + filtered_cfs.emplace_back(*column_family.second, row_set); + + ExtendWithColumnFamilyName transformer(column_family.first); + cf_ranges.emplace_back( + CFWithNameIt(filtered_cfs.back().begin(), transformer), + CFWithNameIt(filtered_cfs.back().end(), transformer)); + } using CFRowsIt = MergedSortedIterator; CFRowsIt cfrows_begin(std::move(cf_ranges)); @@ -401,17 +405,68 @@ CellStream Table::ReadRowsInternal( }); } +StatusOr CreateStringRangeSet( + google::bigtable::v2::RowSet const& row_set) { + StringRangeSet res; + for (auto const& row_key : row_set.row_keys()) { + if (row_key.empty()) { + return InvalidArgumentError( + "`row_key` empty", + GCP_ERROR_INFO().WithMetadata("row_set", row_set.DebugString())); + } + res.Insert(StringRangeSet::Range(row_key, false, row_key, false)); + } + for (auto const& row_range : row_set.row_ranges()) { + StringRangeSet::Range::Value start; + bool start_open; + if (row_range.has_start_key_open() && !row_range.start_key_open().empty()) { + start = StringRangeSet::Range::Value(row_range.start_key_open()); + start_open = true; + } else if (row_range.has_start_key_closed() && + !row_range.start_key_closed().empty()) { + start = StringRangeSet::Range::Value(row_range.start_key_closed()); + start_open = false; + } else { + start = StringRangeSet::Range::Value(""); + start_open = false; + } + StringRangeSet::Range::Value end; + bool end_open; + if (row_range.has_end_key_open() && !row_range.end_key_open().empty()) { + end = StringRangeSet::Range::Value(row_range.end_key_open()); + end_open = true; + } else if (row_range.has_end_key_closed() && + !row_range.end_key_closed().empty()) { + end = StringRangeSet::Range::Value(row_range.end_key_closed()); + end_open = false; + } else { + end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); + end_open = true; + } + if (StringRangeSet::RangeValueLess()(end, start)) { + return InvalidArgumentError( + "reversed `row_range`", + GCP_ERROR_INFO().WithMetadata("row_range", row_range.DebugString())); + } + if (StringRangeSet::Range::IsEmpty(start, start_open, end, end_open)) { + continue; + } + res.Insert(StringRangeSet::Range(std::move(start), start_open, std::move(end), end_open)); + } + return res; +} + Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const { - std::shared_ptr row_set; + std::shared_ptr row_set; if (request.has_rows()) { - auto maybe_row_set = SortedRowSet::Create(request.rows()); + auto maybe_row_set = CreateStringRangeSet(request.rows()); if (!maybe_row_set) { return maybe_row_set.status(); } - row_set = std::make_shared(*std::move(maybe_row_set)); + row_set = std::make_shared(*std::move(maybe_row_set)); } else { - row_set = std::make_shared(SortedRowSet::AllRows()); + row_set = std::make_shared(StringRangeSet::All()); } std::lock_guard lock(mu_); auto stream = ReadRowsInternal(std::move(row_set)); diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 3552eeefb7216..f751ee3aae1ca 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -60,7 +60,7 @@ class Table { StatusOr> FindColumnFamily( MESSAGE const& message) const; bool IsDeleteProtectedNoLock() const; - CellStream ReadRowsInternal(std::shared_ptr row_set) const; + CellStream ReadRowsInternal(std::shared_ptr row_set) const; Status Construct(google::bigtable::admin::v2::Table schema); mutable std::mutex mu_; From 7a82f268349b9e03e34466d6696f4900c42bb52d Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Tue, 28 Jan 2025 13:47:14 +0100 Subject: [PATCH 007/195] Refactor to use a column stream directly. --- google/cloud/bigtable/emulator/CMakeLists.txt | 6 +- .../emulator/bigtable_emulator_common.bzl | 4 +- .../emulator/bigtable_emulator_unit_tests.bzl | 2 +- .../cloud/bigtable/emulator/column_family.cc | 149 +++++++++ .../cloud/bigtable/emulator/column_family.h | 74 ++++- google/cloud/bigtable/emulator/filter.cc | 4 + google/cloud/bigtable/emulator/filter.h | 11 + google/cloud/bigtable/emulator/filtered_map.h | 62 ++-- .../{string_range_set.cc => range_set.cc} | 119 ++++++- .../{string_range_set.h => range_set.h} | 68 +++- ...ng_range_set_test.cc => range_set_test.cc} | 2 +- .../cloud/bigtable/emulator/row_iterators.h | 306 ------------------ .../bigtable/emulator/row_iterators_test.cc | 12 - google/cloud/bigtable/emulator/table.cc | 130 +------- google/cloud/bigtable/emulator/table.h | 1 - 15 files changed, 462 insertions(+), 488 deletions(-) rename google/cloud/bigtable/emulator/{string_range_set.cc => range_set.cc} (67%) rename google/cloud/bigtable/emulator/{string_range_set.h => range_set.h} (61%) rename google/cloud/bigtable/emulator/{string_range_set_test.cc => range_set_test.cc} (97%) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index c201433e186f4..82cb278de48b0 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -25,14 +25,14 @@ add_library( filter.cc filter.h filtered_map.h + range_set.cc + range_set.h row_iterators.cc row_iterators.h row_streamer.cc row_streamer.h server.cc server.h - string_range_set.cc - string_range_set.h table.cc table.h to_grpc_status.cc @@ -61,7 +61,7 @@ if (BUILD_TESTING) filter_test.cc row_iterators_test.cc server_test.cc - string_range_set_test.cc) + range_set_test.cc) export_list_to_bazel("bigtable_emulator_unit_tests.bzl" "bigtable_emulator_unit_tests" YEAR "2024") diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index ce9a8be571b39..ae95dda8c484f 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -22,10 +22,10 @@ bigtable_emulator_common_hdrs = [ "column_family.h", "filter.h", "filtered_map.h", + "range_set.h", "row_iterators.h", "row_streamer.h", "server.h", - "string_range_set.h", "table.h", "to_grpc_status.h", ] @@ -35,10 +35,10 @@ bigtable_emulator_common_srcs = [ "column_family.cc", "column_family_test.cc", "filter.cc", + "range_set.cc", "row_iterators.cc", "row_streamer.cc", "server.cc", - "string_range_set.cc", "table.cc", "to_grpc_status.cc", ] diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index 240296e87891a..5f55adb9cafaf 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -21,5 +21,5 @@ bigtable_emulator_unit_tests = [ "filter_test.cc", "row_iterators_test.cc", "server_test.cc", - "string_range_set_test.cc", + "range_set_test.cc", ] diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index ee5e410a0ca27..93a262a8cd22f 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -19,6 +19,38 @@ namespace google { namespace cloud { namespace bigtable { namespace emulator { +namespace { + +StringRangeSet::Range CreateColumnRange( + ::google::bigtable::v2::ColumnRange const& column_range) { + StringRangeSet::Range::Value start; + bool start_open; + StringRangeSet::Range::Value end; + bool end_open; + if (column_range.has_start_qualifier_closed()) { + start = StringRangeSet::Range::Value(column_range.start_qualifier_closed()); + start_open = false; + } else if (column_range.has_start_qualifier_open()) { + start = StringRangeSet::Range::Value(column_range.start_qualifier_open()); + start_open = true; + } else { + start_open = false; + start = StringRangeSet::Range::Value(""); + } + if (column_range.has_end_qualifier_closed()) { + end = StringRangeSet::Range::Value(column_range.end_qualifier_closed()); + end_open = false; + } else if (column_range.has_end_qualifier_open()) { + end = StringRangeSet::Range::Value(column_range.end_qualifier_open()); + end_open = true; + } else { + end = StringRangeSet::Range::Infinity{}; + end_open = true; + } + return StringRangeSet::Range(std::move(start), start_open, std::move(end), + end_open); +} +} // anonymous namespace void ColumnRow::SetCell(std::chrono::milliseconds timestamp, std::string const& value) { @@ -91,7 +123,124 @@ std::size_t ColumnFamily::DeleteColumn( return 0; } +class FilteredColumnFamilyStream::FilterApply { + public: + FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} + + bool operator()(google::bigtable::v2::ColumnRange const& column_range) { + parent_.column_ranges_->Insert(CreateColumnRange(column_range)); + return true; + } + + bool operator()(google::bigtable::v2::TimestampRange const& timestamp_range) { + parent_.timestamp_ranges_->Insert(TimestampRangeSet::Range( + std::chrono::duration_cast( + std::chrono::microseconds( + timestamp_range.start_timestamp_micros())), + std::chrono::duration_cast( + std::chrono::microseconds( + timestamp_range.end_timestamp_micros())))); + return true; + } + + bool operator()(RowKeyRegex const& row_key_regex) { + parent_.row_regexes_.emplace_back(row_key_regex.regex); + return true; + } + + bool operator()(FamilyNameRegex const&) { return false; } + + bool operator()(ColumnRegex const& column_regex) { + parent_.column_regexes_.emplace_back(column_regex.regex); + return true; + } + + private: + FilteredColumnFamilyStream& parent_; +}; + +FilteredColumnFamilyStream::FilteredColumnFamilyStream( + ColumnFamily const& column_family, std::string column_family_name, + std::shared_ptr row_set) + : column_family_name_(std::move(column_family_name)), + row_ranges_(std::move(row_set)), + rows_(column_family, *row_ranges_), + row_it_(rows_.begin()), + initialized_(false) {} + +absl::optional FilteredColumnFamilyStream::operator()() { + InitializeIfNeeded(); + if (row_it_ == rows_.end()) { + return {}; + } + return CellView(row_it_->first, column_family_name_, + column_it_.value()->first, cell_it_.value()->first, + cell_it_.value()->second); +} + +bool FilteredColumnFamilyStream::ApplyFilter( + InternalFilter const& internal_filter) { + return absl::visit(FilterApply(*this), internal_filter); +} + +void FilteredColumnFamilyStream::SkipCurrentColumn() { + ++(column_it_.value()); + if (PointToFirstCellAfterColumnChange()) { + return; + } + // no more cells in this row + ++row_it_; + PointToFirstCellAfterRowChange(); +} + +void FilteredColumnFamilyStream::SkipCurrentRow() { + ++row_it_; + PointToFirstCellAfterRowChange(); +} + +void FilteredColumnFamilyStream::InitializeIfNeeded() { + if (!initialized_) { + PointToFirstCellAfterRowChange(); + initialized_ = true; + } +} + +void FilteredColumnFamilyStream::Next() { + assert(row_it_ != rows_.end()); + assert(column_it_.value() != columns_.value().end()); + assert(cell_it_.value() != cells_.value().end()); + ++(cell_it_.value()); + if (cell_it_.value() != cells_.value().end()) { + return; + } + SkipCurrentColumn(); +} +// Returns whether we've managed to find another cell in currently pointed row +bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() { + for (; column_it_.value() != columns_.value().end(); ++(column_it_.value())) { + cells_ = FilteredMapView( + column_it_.value()->second, *timestamp_ranges_); + cell_it_ = cells_.value().begin(); + if (cell_it_.value() != cells_.value().end()) { + return true; + } + } + return false; +} + +// Returns whether we've managed to find another cell +bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() { + for (; row_it_ != rows_.end(); ++row_it_) { + columns_ = FilteredMapView( + row_it_->second, *column_ranges_); + column_it_.value() = columns_.value().begin(); + if (PointToFirstCellAfterColumnChange()) { + return true; + } + } + return false; +} } // namespace emulator } // namespace bigtable diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index edde3bad06089..5e58f3918a1bb 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -17,7 +17,10 @@ #include #include -#include "google/cloud/bigtable/emulator/string_range_set.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/emulator/filtered_map.h" +#include "google/cloud/bigtable/emulator/cell_view.h" +#include "absl/types/optional.h" #include namespace google { @@ -37,6 +40,12 @@ class ColumnRow { std::map::const_iterator; const_iterator begin() const { return cells_.begin(); } const_iterator end() const { return cells_.end(); } + const_iterator lower_bound(std::chrono::milliseconds timestamp) const { + return cells_.lower_bound(timestamp); + } + const_iterator upper_bound(std::chrono::milliseconds timestamp) const { + return cells_.lower_bound(timestamp); + } private: std::map cells_; @@ -53,6 +62,12 @@ class ColumnFamilyRow { using const_iterator = std::map::const_iterator; const_iterator begin() const { return columns_.begin(); } const_iterator end() const { return columns_.end(); } + const_iterator lower_bound(std::string const& column_qualifier) const { + return columns_.lower_bound(column_qualifier); + } + const_iterator upper_bound(std::string const& column_qualifier) const { + return columns_.lower_bound(column_qualifier); + } private: std::map columns_; @@ -86,6 +101,63 @@ class ColumnFamily { std::map rows_; }; +struct RowKeyRegex { + std::string regex; +}; +struct FamilyNameRegex { + std::string regex; +}; +struct ColumnRegex { + std::string regex; +}; + +using InternalFilter = absl::variant; + +class FilteredColumnFamilyStream { + public: + FilteredColumnFamilyStream(ColumnFamily const& column_family, + std::string column_family_name, + std::shared_ptr row_set); + absl::optional operator()(); + bool ApplyFilter(InternalFilter const& internal_filter); + void SkipCurrentColumn(); + void SkipCurrentRow(); + + private: + class FilterApply; + + void Next(); + void InitializeIfNeeded(); + // Returns whether we've managed to find another cell in currently pointed row + bool PointToFirstCellAfterColumnChange(); + // Returns whether we've managed to find another cell + bool PointToFirstCellAfterRowChange(); + + std::string column_family_name_; + + std::shared_ptr row_ranges_; + std::vector row_regexes_; + std::shared_ptr column_ranges_; + std::vector column_regexes_; + std::shared_ptr timestamp_ranges_; + + FilteredMapView rows_; + absl::optional> columns_; + absl::optional> cells_; + + // If row_it_ == rows_.end() we've reached the end. + // We keep the invariant that if (row_it_ != rows_.end()) then + // cell_it_ != cells.end() && column_it_ != columns_.end() + FilteredMapView::const_iterator row_it_; + absl::optional< + FilteredMapView::const_iterator> + column_it_; + absl::optional::const_iterator> + cell_it_; + bool initialized_; +}; } // namespace emulator } // namespace bigtable diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index a1e9cb0fd124a..a3be80129d1c2 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -672,6 +672,10 @@ StatusOr CreateFilterImpl( GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } +CellStream JoinCellStreams(std::vector cell_streams) { + return CellStream(MergeCellStreams(std::move(cell_streams))); +} + StatusOr CreateFilter( ::google::bigtable::v2::RowFilter const& filter, CellStream source, FilterContext const& ctx) { diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index 646143f573d40..e5167a0e7db46 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -43,6 +43,15 @@ class CellStream { absl::optional current_; }; +class AbstractCellStreamImpl { + public: + virtual ~AbstractCellStreamImpl() = default; + + virtual absl::optional Next() = 0; + bool SkipColumn(); + bool SkipRow(); +}; + class FilterContext { public: FilterContext() : allow_apply_label_(true) {} @@ -54,6 +63,8 @@ class FilterContext { bool allow_apply_label_; }; +CellStream JoinCellStreams(std::vector cell_streams); + StatusOr CreateFilter( ::google::bigtable::v2::RowFilter const& filter, CellStream source, FilterContext const& ctx); diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h index c7742ee88b972..b6e7be5ee6805 100644 --- a/google/cloud/bigtable/emulator/filtered_map.h +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -15,7 +15,7 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTERED_MAP_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTERED_MAP_H -#include "google/cloud/bigtable/emulator/string_range_set.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include #include @@ -24,27 +24,24 @@ namespace cloud { namespace bigtable { namespace emulator { -template -class FilteredMap { +template +class FilteredMapView { public: - FilteredMap(StringKeyedMap const& unfiltered, - std::shared_ptr filter) - : unfiltered_(std::cref(unfiltered)), filter_(std::move(filter)) {} class const_iterator { public: using iterator_category = std::input_iterator_tag; using value_type = typename std::iterator_traits< - typename StringKeyedMap::const_iterator>::value_type; + typename Map::const_iterator>::value_type; using difference_type = typename std::iterator_traits< - typename StringKeyedMap::const_iterator>::difference_type; + typename Map::const_iterator>::difference_type; using reference = value_type const&; - using pointer = value_type*; + using pointer = value_type const*; const_iterator( - FilteredMap const& parent, - typename StringKeyedMap::const_iterator unfiltered_pos, - std::set::const_iterator filter_pos) + FilteredMapView const& parent, typename Map::const_iterator unfiltered_pos, + typename std::set< + typename ExcludedRanges::Range, + typename ExcludedRanges::RangeStartLess>::const_iterator filter_pos) : parent_(std::cref(parent)), unfiltered_pos_(std::move(unfiltered_pos)), filter_pos_(std::move(filter_pos)) { @@ -73,10 +70,11 @@ class FilteredMap { } reference operator*() const { return *unfiltered_pos_; } + pointer operator->() const { return &*unfiltered_pos_; } private: void AdvanceToNextRange() { - if (filter_pos_ == parent_.get().filter_->disjoint_ranges().end()) { + if (filter_pos_ == parent_.get().filter_.get().disjoint_ranges().end()) { // We've reached the end. unfiltered_pos_ = parent_.get().unfiltered_.get().end(); return; @@ -90,20 +88,12 @@ class FilteredMap { return; } - if (absl::holds_alternative( - filter_pos_->start())) { - // Defensive programming - this should be dead code - it means we've got - // a range which starts at infinity. - unfiltered_pos_ = parent_.get().unfiltered_.get().end(); - return; - } - if (filter_pos_->start_closed()) { unfiltered_pos_ = parent_.get().unfiltered_.get().lower_bound( - absl::get(filter_pos_->start())); + filter_pos_->start_finite()); } else { unfiltered_pos_ = parent_.get().unfiltered_.get().upper_bound( - absl::get(filter_pos_->start())); + filter_pos_->start_finite()); } } @@ -112,7 +102,7 @@ class FilteredMap { // pointed by filter_pos_. Make sure this only happens when the iteration // reaches its end. while (unfiltered_pos_ != parent_.get().unfiltered_.get().end() && - filter_pos_ != parent_.get().filter_->disjoint_ranges().end() && + filter_pos_ != parent_.get().filter_.get().disjoint_ranges().end() && filter_pos_->IsAboveEnd(unfiltered_pos_->first)) { ++filter_pos_; AdvanceToNextRange(); @@ -123,23 +113,29 @@ class FilteredMap { // following ranges, i.e. we've reached the end. } - std::reference_wrapper parent_; - typename StringKeyedMap::const_iterator unfiltered_pos_; - std::set::const_iterator filter_pos_; + std::reference_wrapper parent_; + typename Map::const_iterator unfiltered_pos_; + typename std::set::const_iterator + filter_pos_; }; + FilteredMapView(Map const& unfiltered, + ExcludedRanges const& filter) + : unfiltered_(std::cref(unfiltered)), filter_(std::cref(filter)) {} + + const_iterator begin() const { return const_iterator(*this, unfiltered_.get().begin(), - filter_->disjoint_ranges().begin()); + filter_.get().disjoint_ranges().begin()); } const_iterator end() const { return const_iterator(*this, unfiltered_.get().end(), - filter_->disjoint_ranges().end()); + filter_.get().disjoint_ranges().end()); } private: - std::reference_wrapper unfiltered_; - std::shared_ptr filter_; + std::reference_wrapper unfiltered_; + std::reference_wrapper filter_; }; } // namespace emulator diff --git a/google/cloud/bigtable/emulator/string_range_set.cc b/google/cloud/bigtable/emulator/range_set.cc similarity index 67% rename from google/cloud/bigtable/emulator/string_range_set.cc rename to google/cloud/bigtable/emulator/range_set.cc index b785f3ccf24c7..018fd91146e43 100644 --- a/google/cloud/bigtable/emulator/string_range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "google/cloud/bigtable/emulator/string_range_set.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" namespace google { @@ -58,6 +58,15 @@ bool HasOverlap(StringRangeSet::Range const& lhs, intersect_end.end(), intersect_end.end_open()); } +bool HasOverlap(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs) { + TimestampRangeSet::Range::Value overlap_start = + std::max(lhs.start(), rhs.start()); + TimestampRangeSet::Range::Value overlap_end = + TimestampRangeSet::RangeEndLess()(lhs, rhs) ? lhs.end() : rhs.end(); + return TimestampRangeSet::Range::IsEmpty(overlap_start, overlap_end); +} + bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, StringRangeSet::Range const& rhs) { assert(!HasOverlap(lhs, rhs)); @@ -77,6 +86,13 @@ bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, return false; } +bool DisjointAndSortedRangesAdjacent(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs) { + assert(!HasOverlap(lhs, rhs)); + assert(TimestampRangeSet::RangeStartLess()(lhs, rhs)); + return lhs.end() == rhs.start(); +} + } // anonymous namespace StringRangeSet::Range::Range(Value start, bool start_open, Value end, @@ -246,6 +262,107 @@ std::ostream& operator<<(std::ostream& os, return os; } +TimestampRangeSet::Range::Range(Value start, Value end) + : start_(std::move(start)), end_(std::move(end)) { + assert(end == std::chrono::milliseconds::zero() || start <= end); +} + +bool TimestampRangeSet::Range::IsAboveEnd(Value value) const { + return end_ != std::chrono::milliseconds::zero() && value >= end_; +} + +bool TimestampRangeSet::Range::IsEmpty(TimestampRangeSet::Range::Value start, + TimestampRangeSet::Range::Value end) { + if (end == std::chrono::milliseconds::zero()) { + return false; + } + return start < end; +} + +bool TimestampRangeSet::RangeStartLess::operator()(Range const& lhs, + Range const& rhs) const { + return lhs.start() < rhs.start(); +} + +bool TimestampRangeSet::RangeEndLess::operator()(Range const& lhs, + Range const& rhs) const { + if (lhs.end() == std::chrono::milliseconds::zero()) { + return false; + } + if (rhs.end() == std::chrono::milliseconds::zero()) { + return true; + } + return lhs.end() < rhs.end(); +} + +TimestampRangeSet TimestampRangeSet::All() { + TimestampRangeSet res; + res.Insert(Range(std::chrono::milliseconds(0), std::chrono::milliseconds(0))); + return res; +} + +TimestampRangeSet TimestampRangeSet::Empty() { + return TimestampRangeSet{}; +} + +// FIXME - share this with StringRangeSet +void TimestampRangeSet::Insert(TimestampRangeSet::Range inserted_range) { + // Remove all ranges which either have an overlap with `inserted_range` or are + // adjacent to it. Then add `inserted_range` with `start` and `end` + // adjusted to cover what the removed ranges used to cover. + + auto first_to_remove = disjoint_ranges_.upper_bound(inserted_range); + // `*first_to_remove` starts strictly after `inserted_range`'s start. + // The previous range is the first to have a chance for an overlap - it is the + // last one, which starts at or before `inserted_range` start. + if (first_to_remove != disjoint_ranges_.begin() && + HasOverlap(*std::prev(first_to_remove), inserted_range)) { + std::advance(first_to_remove, -1); + } + // The range preceeding `first_to_remove` for sure has no overlap with + // `inserted_range` but it may be adjacent to it. In that case we should also + // remove it. + if (first_to_remove != disjoint_ranges_.begin() && + DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), + inserted_range)) { + std::advance(first_to_remove, -1); + } + if (first_to_remove != disjoint_ranges_.end()) { + if (RangeStartLess()(*first_to_remove, inserted_range)) { + inserted_range.set_start(first_to_remove->start()); + + } + do { + if (RangeEndLess()(inserted_range, *first_to_remove)) { + inserted_range.set_end(first_to_remove->end()); + } + disjoint_ranges_.erase(first_to_remove++); + } while ( + first_to_remove != disjoint_ranges_.end() && + (HasOverlap(*first_to_remove, inserted_range) || + DisjointAndSortedRangesAdjacent(inserted_range, *first_to_remove))); + } + disjoint_ranges_.insert(std::move(inserted_range)); +} + +bool operator==(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs) { + return lhs.start() == rhs.start() && lhs.end() == rhs.end(); +} + +std::ostream& operator<<(std::ostream& os, + TimestampRangeSet::Range const& range) { + os << "[" << range.start().count() << "ms-"; + if (range.end() == std::chrono::milliseconds::zero()) { + os << "INF"; + } else { + os << range.end().count() << "ms"; + } + os << ")"; + return os; +} + + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/string_range_set.h b/google/cloud/bigtable/emulator/range_set.h similarity index 61% rename from google/cloud/bigtable/emulator/string_range_set.h rename to google/cloud/bigtable/emulator/range_set.h index d28f47e8bb25e..c4cfef4876758 100644 --- a/google/cloud/bigtable/emulator/string_range_set.h +++ b/google/cloud/bigtable/emulator/range_set.h @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_STRING_RANGE_SET_H -#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_STRING_RANGE_SET_H +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_RANGE_SET_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_RANGE_SET_H #include "absl/types/variant.h" +#include #include #include #include @@ -35,6 +36,9 @@ class StringRangeSet { Range(Value start, bool start_open, Value end, bool end_open); Value const& start() const & { return start_; } + std::string const& start_finite() const& { + return absl::get(start_); + } Value&& start() && { return std::move(start_); } bool start_open() const { return start_open_; } bool start_closed() const { return !start_open_; } @@ -97,9 +101,67 @@ bool operator==(StringRangeSet::Range const& lhs, std::ostream& operator<<(std::ostream& os, StringRangeSet::Range const& range); + +class TimestampRangeSet { + public: + class Range { + public: + using Value = std::chrono::milliseconds; + + Range(Value start, Value end); + + Value start() const { return start_; } + Value start_finite() const { return start_; } + bool start_open() const { return false; } + bool start_closed() const { return true; } + void set_start(Value start) { start_ = start; } + + Value end() const { return end_; } + bool end_open() const { return true; } + bool end_closed() const { return false; } + void set_end(Value end) { end_ = end; } + + bool IsBelowStart(Value value) const { return value < start_; } + bool IsAboveEnd(Value value) const; + + static bool IsEmpty(TimestampRangeSet::Range::Value start, + TimestampRangeSet::Range::Value end); + + private: + Value start_; + Value end_; + }; + + struct RangeStartLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + + struct RangeEndLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + + static TimestampRangeSet All(); + static TimestampRangeSet Empty(); + void Insert(Range inserted_range); + + std::set const& disjoint_ranges() const { + return disjoint_ranges_; + }; + + + private: + std::set disjoint_ranges_; +}; + +bool operator==(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs); + +std::ostream& operator<<(std::ostream& os, + TimestampRangeSet::Range const& range); + } // namespace emulator } // namespace bigtable } // namespace cloud } // namespace google -#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_STRING_RANGE_SET_H +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_RANGE_SET_H diff --git a/google/cloud/bigtable/emulator/string_range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc similarity index 97% rename from google/cloud/bigtable/emulator/string_range_set_test.cc rename to google/cloud/bigtable/emulator/range_set_test.cc index 5b596e776f26c..af8f14142bccc 100644 --- a/google/cloud/bigtable/emulator/string_range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "google/cloud/bigtable/emulator/string_range_set.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/is_proto_equal.h" #include diff --git a/google/cloud/bigtable/emulator/row_iterators.h b/google/cloud/bigtable/emulator/row_iterators.h index 38225993d48e4..e7c2d60930bcf 100644 --- a/google/cloud/bigtable/emulator/row_iterators.h +++ b/google/cloud/bigtable/emulator/row_iterators.h @@ -18,7 +18,6 @@ #include #include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/table.h" -#include "google/cloud/bigtable/emulator/string_range_set.h" #include "google/cloud/internal/invoke_result.h" #include #include @@ -29,312 +28,7 @@ namespace cloud { namespace bigtable { namespace emulator { -struct RowKeyRegex { - std::string regex; -}; -struct RowSample { - double pass_prob; -}; -struct FamilyNameRegex { - std::string regex; -}; -struct ColumnRegex { - std::string regex; -}; -struct PerRowOffset { - std::int32_t offset; -}; -using InternalFilter = - absl::variant; - -class FilteredColumnFamilyStream { - absl::optional operator()() { - return {}; - } - - bool ApplyFilter(InternalFilter const& internal_filter) { - return absl::visit(FilterApply(*this), internal_filter); - } - - class FilterApply { - public: - FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} - bool operator()(google::bigtable::v2::RowSet const& ) { - assert(!parent_.row_ranges_); -// parent_.row_ranges_ = StringRangeSet::Create(row_set); - return false; - } - - bool operator()(google::bigtable::v2::ColumnRange const&) { - return false; - } - - bool operator()(google::bigtable::v2::TimestampRange const&) { - return false; - } - - bool operator()(RowKeyRegex const&) { - return false; - } - - bool operator()(RowSample const&) { - return false; - } - - bool operator()(FamilyNameRegex const&) { - return false; - } - - bool operator()(ColumnRegex const&) { - return false; - } - - bool operator()(PerRowOffset const&) { - return false; - } - - private: - FilteredColumnFamilyStream& parent_; - }; - - std::shared_ptr row_ranges_; - ColumnRow::const_iterator cell_it_; - ColumnFamilyRow::const_iterator column_it_; - ColumnFamily::const_iterator row_it_; -}; - -template -class MergedSortedIterator { - public: - using iterator_category = std::input_iterator_tag; - using value_type = typename std::iterator_traits::value_type; - using difference_type = - typename std::iterator_traits::difference_type; - using reference = typename std::iterator_traits::reference; - using pointer = typename std::iterator_traits::pointer; - - MergedSortedIterator() = default; // end() - MergedSortedIterator(std::vector> ranges) { - for (auto & range : ranges) { - if (range.first != range.second) { - ranges_.emplace(std::move(range)); - } - } - } - - value_type operator*() const { - return *ranges_.top().first; - } - - pointer operator->() const { - return ranges_.top().first; - } - - MergedSortedIterator& operator++() { - auto prev_top = ranges_.top();; - // We need to remove it from the priority queue because we're likely to - // change the order. - ranges_.pop(); - - ++prev_top.first; - - if (prev_top.first != prev_top.second) { - ranges_.emplace(std::move(prev_top)); - } - - return *this; - } - - bool operator==(MergedSortedIterator const& other) const { - if (ranges_.empty() || other.ranges_.empty()) { - return ranges_.empty() == other.ranges_.empty(); - } - return ranges_.top() == other.ranges_.top(); - } - - bool operator!=(MergedSortedIterator const& other) const { - return !(*this == other); - } - - private: - struct InternalGreater { - bool operator()(std::pair const &lhs, - std::pair const &rhs) const { - return IteratorLess()(*rhs.first, *lhs.first); - } - }; - - std::priority_queue, - std::vector>, - InternalGreater> - ranges_; -}; - -template -class FlattenedIterator { - public: - using InnerCollection = std::decay_t::reference)>::type>; - using InnerIterator = typename InnerCollection::const_iterator; - - using iterator_category = std::input_iterator_tag; - using value_type = std::decay_t::reference, - typename std::iterator_traits::reference)>::type> const; - using difference_type = - typename std::iterator_traits::difference_type; - using pointer = value_type*; - using reference = value_type&; - - FlattenedIterator(OuterIterator begin, OuterIterator end) - : outer_pos_(std::move(begin)), outer_end_(std::move(end)) { - if (outer_pos_ != outer_end_) { - inner_pos_ = DescendFunctor()(*outer_pos_).begin(); - EnsureIteratorValid(); - } - } - - value_type operator*() const { - assert(inner_pos_ != DescendFunctor()(*outer_pos_).end()); - return GetCachedValue(); - } - - pointer operator->() const { - assert(inner_pos_ != DescendFunctor()(*outer_pos_).end()); - return &GetCachedValue(); - } - - FlattenedIterator& operator++() { - ++inner_pos_; - EnsureIteratorValid(); - return *this; - } - - FlattenedIterator operator++(int) { - FlattenedIterator tmp = *this; - ++(*this); - return tmp; - } - - bool operator==(FlattenedIterator const& other) const { - return outer_pos_ == other.outer_pos_ && - (outer_pos_ == outer_end_ || inner_pos_ == other.inner_pos_); - } - - bool operator!=(FlattenedIterator const& other) const { - return !(*this == other); - } - - private: - OuterIterator outer_pos_; - OuterIterator outer_end_; - InnerIterator inner_pos_; - mutable absl::optional> cached_value_; - - void EnsureIteratorValid() { - cached_value_.reset(); - while (outer_pos_ != outer_end_ && - inner_pos_ == DescendFunctor()(*outer_pos_).end()) { - ++outer_pos_; - if (outer_pos_ != outer_end_) { - inner_pos_ = DescendFunctor()(*outer_pos_).begin(); - } - } - } - - reference GetCachedValue() const { - if (!cached_value_) { - cached_value_.emplace(ValueCombineFunctor()(*outer_pos_, *inner_pos_)); - } - return *cached_value_; - } -}; - -template -class TransformIterator { - public: - using iterator_category = std::input_iterator_tag; - using value_type = std::decay_t::value_type>::type> const; - using difference_type = - typename std::iterator_traits::difference_type; - using pointer = value_type*; - using reference = value_type&; - - TransformIterator(InputIterator it, Functor func) - : current_(std::move(it)), transformer_(std::move(func)) {} - TransformIterator(TransformIterator const& other) = default; - TransformIterator(TransformIterator&& other) = default; - - value_type operator*() const { return GetCachedValue(); } - pointer operator->() const { return &GetCachedValue(); } - - TransformIterator& operator++() { - cached_value_.reset(); - ++current_; - return *this; - } - - TransformIterator operator++(int) { - TransformIterator tmp = *this; - ++(*this); - return tmp; - } - - bool operator==(TransformIterator const& other) const { - return current_ == other.current_; - } - - bool operator!=(TransformIterator const& other) const { - return current_ != other.current_; - } - - TransformIterator &operator=(TransformIterator const &other) { - if (this == &other) { - return *this; - } - current_ = other.current_; - transformer_ = other.transformer_; - if (other.cached_value_) { - cached_value_.emplace(other.cached_value_.get()); - } else { - cached_value_.reset(); - } - return this; - } - - TransformIterator &operator=(TransformIterator &&other) { - if (this == &other) { - return *this; - } - current_ = std::move(other.current_); - transformer_ = std::move(other.transformer_); - if (other.cached_value_) { - cached_value_.emplace(*std::move(other.cached_value_)); - } else { - cached_value_.reset(); - } - return *this; - } - - reference GetCachedValue() const { - if (!cached_value_) { - cached_value_.emplace(transformer_(*current_)); - } - return *cached_value_; - } - - private: - InputIterator current_; - Functor transformer_; - mutable absl::optional> cached_value_; -}; } // namespace emulator } // namespace bigtable diff --git a/google/cloud/bigtable/emulator/row_iterators_test.cc b/google/cloud/bigtable/emulator/row_iterators_test.cc index 32ee60572ac68..80eeb2a09aec4 100644 --- a/google/cloud/bigtable/emulator/row_iterators_test.cc +++ b/google/cloud/bigtable/emulator/row_iterators_test.cc @@ -25,18 +25,6 @@ namespace emulator { namespace { TEST(MergedSortedIterator, Simple) { - std::vector a{4, 5, 6, 6, 9, 20}; - std::vector b{1, 2, 3, 4, 7, 20}; - std::vector expected{1, 2, 3, 4, 4, 5, 6, 6, 7, 9, 20, 20}; - std::vector merged; - using MSI = MergedSortedIterator::iterator, std::less>; - for (MSI it(std::vector::iterator, - std::vector::iterator>>{ - {a.begin(), a.end()}, {b.begin(), b.end()}}); - it != MSI(); ++it) { - merged.push_back(*it); - } - EXPECT_EQ(expected, merged); } } // anonymous namespace diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index f4945f573fc42..3789b612dd7c1 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -282,129 +282,6 @@ Status Table::MutateRow( return Status(); } -class ExtendWithColumnFamilyName { - public: - using ExtendedType = std::tuple const; - - explicit ExtendWithColumnFamilyName(std::string const& column_family_name) - : column_family_name_(std::cref(column_family_name)) {} - - ExtendedType operator()( - std::iterator_traits::reference - row_key_and_column) const { - return ExtendedType(row_key_and_column.first, column_family_name_.get(), - row_key_and_column.second); - } - - private: - std::reference_wrapper column_family_name_; -}; - -struct RowKeyLess { - bool operator()( - TransformIterator::value_type const& lhs, - TransformIterator::value_type const& rhs) - const { - auto row_key_cmp = - internal::CompareRowKey(std::get<0>(lhs), std::get<0>(rhs)); - if (row_key_cmp == 0) { - return internal::CompareColumnQualifiers(std::get<1>(lhs), - std::get<1>(rhs)) < 0; - } - return row_key_cmp < 0; - } -}; - -struct DescendToColumn { - ColumnFamilyRow const& operator()( - std::tuple const& column_family_row) const { - return std::get<2>(column_family_row); - } -}; - -struct CombineColumnIterators { - using ReturnType = - std::tuple const; - ReturnType operator()( - std::tuple const& column_family_row, - std::pair const& column_row) const { - return ReturnType(std::get<0>(column_family_row), - std::get<1>(column_family_row), column_row.first, - column_row.second); - } -}; - - -struct DescendToCell { - ColumnRow const& operator()( - CombineColumnIterators::ReturnType const &column_row) const { - return std::get<3>(column_row); - } -}; - -struct CombineCellIterators { - using ReturnType = CellView; - ReturnType operator()(CombineColumnIterators::ReturnType const& column_row, - std::pair const& cell) const { - static_assert( - std::is_same, - ColumnRow::const_iterator::value_type>::value); - return ReturnType(std::get<0>(column_row), - std::get<1>(column_row), - std::get<2>(column_row), - cell.first, - cell.second); - } -}; - -CellStream Table::ReadRowsInternal( - std::shared_ptr row_set) const { - using FilteredColumnFamily = FilteredMap; - using CFWithNameIt = TransformIterator; - std::vector> cf_ranges; - - std::vector filtered_cfs; - for (auto const &column_family : column_families_) { - filtered_cfs.emplace_back(*column_family.second, row_set); - - ExtendWithColumnFamilyName transformer(column_family.first); - cf_ranges.emplace_back( - CFWithNameIt(filtered_cfs.back().begin(), transformer), - CFWithNameIt(filtered_cfs.back().end(), transformer)); - } - - using CFRowsIt = MergedSortedIterator; - CFRowsIt cfrows_begin(std::move(cf_ranges)); - CFRowsIt cfrows_end; - - using ColRowsIt = - FlattenedIterator; - ColRowsIt colrows_begin(std::move(cfrows_begin), cfrows_end); - ColRowsIt colrows_end(cfrows_end, cfrows_end); - - using CellRowsIt = - FlattenedIterator; - CellRowsIt cellrows_begin(std::move(colrows_begin), colrows_end); - CellRowsIt cellrows_end(colrows_end, colrows_end); - std::cout << "Print start" << std::endl; - - return CellStream ([cellrows_begin, cellrows_end]() mutable - -> absl::optional { - if (cellrows_begin == cellrows_end) { - return {}; - } - return *cellrows_begin++; - }); -} - StatusOr CreateStringRangeSet( google::bigtable::v2::RowSet const& row_set) { StringRangeSet res; @@ -469,7 +346,12 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, row_set = std::make_shared(StringRangeSet::All()); } std::lock_guard lock(mu_); - auto stream = ReadRowsInternal(std::move(row_set)); + std::vector per_cf_streams; + for (auto const & column_family: column_families_) { + per_cf_streams.emplace_back(FilteredColumnFamilyStream( + *column_family.second, column_family.first, row_set)); + } + auto stream = JoinCellStreams(std::move(per_cf_streams)); FilterContext ctx; if (request.has_filter()) { auto maybe_stream = CreateFilter(request.filter(), std::move(stream), ctx); diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index f751ee3aae1ca..5d1983abf0b27 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -60,7 +60,6 @@ class Table { StatusOr> FindColumnFamily( MESSAGE const& message) const; bool IsDeleteProtectedNoLock() const; - CellStream ReadRowsInternal(std::shared_ptr row_set) const; Status Construct(google::bigtable::admin::v2::Table schema); mutable std::mutex mu_; From cc02728693b5d7e31e0642eb462ac21bb47a7b72 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Tue, 28 Jan 2025 15:57:54 +0100 Subject: [PATCH 008/195] Refactor --- .../cloud/bigtable/emulator/column_family.cc | 20 ++- .../cloud/bigtable/emulator/column_family.h | 28 +--- google/cloud/bigtable/emulator/filter.cc | 140 ++++++++++++------ google/cloud/bigtable/emulator/filter.h | 48 ++++-- google/cloud/bigtable/emulator/table.cc | 2 +- 5 files changed, 153 insertions(+), 85 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 93a262a8cd22f..d721965899fd4 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -168,14 +168,16 @@ FilteredColumnFamilyStream::FilteredColumnFamilyStream( row_it_(rows_.begin()), initialized_(false) {} -absl::optional FilteredColumnFamilyStream::operator()() { +absl::optional FilteredColumnFamilyStream::Next() { InitializeIfNeeded(); if (row_it_ == rows_.end()) { return {}; } - return CellView(row_it_->first, column_family_name_, - column_it_.value()->first, cell_it_.value()->first, - cell_it_.value()->second); + auto res = + CellView(row_it_->first, column_family_name_, column_it_.value()->first, + cell_it_.value()->first, cell_it_.value()->second); + Advance(); + return res; } bool FilteredColumnFamilyStream::ApplyFilter( @@ -183,7 +185,7 @@ bool FilteredColumnFamilyStream::ApplyFilter( return absl::visit(FilterApply(*this), internal_filter); } -void FilteredColumnFamilyStream::SkipCurrentColumn() { +bool FilteredColumnFamilyStream::SkipColumn() { ++(column_it_.value()); if (PointToFirstCellAfterColumnChange()) { return; @@ -191,11 +193,13 @@ void FilteredColumnFamilyStream::SkipCurrentColumn() { // no more cells in this row ++row_it_; PointToFirstCellAfterRowChange(); + return true; } -void FilteredColumnFamilyStream::SkipCurrentRow() { +bool FilteredColumnFamilyStream::SkipRow() { ++row_it_; PointToFirstCellAfterRowChange(); + return true; } void FilteredColumnFamilyStream::InitializeIfNeeded() { @@ -205,7 +209,7 @@ void FilteredColumnFamilyStream::InitializeIfNeeded() { } } -void FilteredColumnFamilyStream::Next() { +void FilteredColumnFamilyStream::Advance() { assert(row_it_ != rows_.end()); assert(column_it_.value() != columns_.value().end()); assert(cell_it_.value() != cells_.value().end()); @@ -213,7 +217,7 @@ void FilteredColumnFamilyStream::Next() { if (cell_it_.value() != cells_.value().end()) { return; } - SkipCurrentColumn(); + SkipColumn(); } // Returns whether we've managed to find another cell in currently pointed row diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 5e58f3918a1bb..b5ccad34fd8a2 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -18,6 +18,7 @@ #include #include #include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/cell_view.h" #include "absl/types/optional.h" @@ -100,35 +101,20 @@ class ColumnFamily { private: std::map rows_; }; - -struct RowKeyRegex { - std::string regex; -}; -struct FamilyNameRegex { - std::string regex; -}; -struct ColumnRegex { - std::string regex; -}; - -using InternalFilter = absl::variant; - -class FilteredColumnFamilyStream { +class FilteredColumnFamilyStream : public AbstractCellStreamImpl { public: FilteredColumnFamilyStream(ColumnFamily const& column_family, std::string column_family_name, std::shared_ptr row_set); - absl::optional operator()(); - bool ApplyFilter(InternalFilter const& internal_filter); - void SkipCurrentColumn(); - void SkipCurrentRow(); + absl::optional Next() override; + bool ApplyFilter(InternalFilter const& internal_filter) override; + bool SkipColumn() override; + bool SkipRow() override; private: class FilterApply; - void Next(); + void Advance(); void InitializeIfNeeded(); // Returns whether we've managed to find another cell in currently pointed row bool PointToFirstCellAfterColumnChange(); diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index a3be80129d1c2..04a531f0f407a 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -130,17 +130,55 @@ class PerColumnStateFilter { StateResetFunctor reset_; }; +template +class TrivialTransformer : public AbstractCellStreamImpl { + TrivialTransformer(CellStream source, Transformer transformer) + : source_(std::move(source)), transformer_(std::move(transformer)) {} + + absl::optional Next() override { + auto res = source_.Next(); + if (!res) { + return res; + } + return transformer_(res); + } + + bool SkipRow() override { return source_->SkipRow(); } + bool SkipColumn() override { return source_->SkipColumn(); } + + private: + CellStream source_; + Transformer transformer_; +}; +template +CellStream MakeTrivialTransformer(CellStream source, Transformer transformer) { + return CellStream(TrivialTransformer(std::move(source), std::move(transformer))); +} + +template +class TrivialFilter : public AbstractCellStreamImpl { + TrivialFilter(CellStream source, Filter filter) + : source_(std::move(source)), filter_(std::move(filter)) {} + + absl::optional Next() override { + for (; source_ && !filter_(*source_); ++source_); + if (!source_) { + return {}; + } + return source_++; + } + + bool SkipRow() override { return source_->SkipRow(); } + bool SkipColumn() override { return source_->SkipColumn(); } + + private: + CellStream source_; + Filter filter_; +}; + template CellStream MakeTrivialFilter(CellStream source, Filter filter) { - return CellStream( - [source = std::move(source), - filter = std::move(filter)]() mutable -> absl::optional { - for (; source && !filter(*source); ++source); - if (!source) { - return {}; - } - return source++; - }); + return CellStream(TrivialFilter(std::move(source), std::move(filter))); } template @@ -244,8 +282,7 @@ class ValueRangeFilter { bool end_closed_; }; - -class MergeCellStreams { +class MergeCellStreams : public AbstractCellStreamImpl { public: class CellStreamGreater { public: @@ -279,7 +316,31 @@ class MergeCellStreams { } } - absl::optional operator()() { + bool ApplyFilter(InternalFilter const& internal_filter) override { + bool res = true; + for (auto & stream : unfinished_streams_) { + res &&= stream.ApplyFilter(unfinished_streams); + } + return res; + } + + bool SkipRow() override { + bool res = true; + for (auto & stream : unfinished_streams) { + res &&= stream.SkipRow(); + } + return res; + } + + bool SkipColumn() override { + bool res = true; + for (auto & stream : unfinished_streams) { + res &&= stream.SkipColumn(); + } + return res; + } + + absl::optional Next() override { if (unfinished_streams_.empty()) { return {}; } @@ -376,6 +437,15 @@ class ConditionStream { absl::optional condition_true_; }; +class EmptyCellStreamImpl : public AbstractCellStreamImpl { + virtual bool ApplyFilter(InternalFilter const& ) { + return true; + } + virtual absl::optional Next() { return {}; } + virtual bool SkipRow() { return true; } + virtual bool SkipColumn() { return true; } +}; + StatusOr CreateFilterImpl( ::google::bigtable::v2::RowFilter const& filter, CellStream source, FilterContext const& ctx, std::vector &direct_sinks) { @@ -393,7 +463,7 @@ StatusOr CreateFilterImpl( "`block_all_filter` explicitly set to `false`.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return CellStream([]() -> absl::optional { return {}; }); + return CellStream(std::make_shared()); } if (filter.has_row_key_regex_filter()) { std::cout << "Regex filter: " << filter.row_key_regex_filter() << std::endl; @@ -474,6 +544,9 @@ StatusOr CreateFilterImpl( }); } if (filter.has_column_range_filter()) { + if (source.ApplyFilter(filter.column_range_filter())) { + return source; + } return MakeTrivialFilter( std::move(source), [qualifier_filter = ValueRangeFilter(filter.column_range_filter()), @@ -554,19 +627,12 @@ StatusOr CreateFilterImpl( "Two `apply_label_transformer`s cannot coexist in one chain.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return CellStream([source = std::move(source), - label = std::make_shared( - filter.apply_label_transformer())]() mutable - -> absl::optional { - if (!source) { - return {}; - } - CellView res = source++; - std::cout << "Label " << label - << " being set on cell value: " << res.value() << std::endl; - res.SetLabel(*label); - return res; - }); + return MakeTrivialTransformer( + std::move(source), + [label = std::make_shared( + filter.apply_label_transformer())](CellView cell_view) { + cell_view.SetLabel(*label); + }); } if (filter.has_strip_value_transformer()) { if (!filter.strip_value_transformer()) { @@ -574,18 +640,8 @@ StatusOr CreateFilterImpl( "`strip_value_transformer` explicitly set to `false`.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return CellStream( - [source = std::move(source), - empty = std::string()]() mutable -> absl::optional { - // We want `empty` to explicitly live for as long as the filter so - // that the values returned by the filter are valid. - if (!source) { - return {}; - } - auto res = source++; - res.SetValue(empty); - return res; - }); + return MakeTrivialTransformer( + std::move(source), [](CellView cell_view) { cell_view.SetValue(""); }); } if (filter.has_chain()) { CellStream res = std::move(source); @@ -598,7 +654,7 @@ StatusOr CreateFilterImpl( GCP_ERROR_INFO().WithMetadata("filter", subfilter.DebugString())); } direct_sinks.emplace_back(std::move(res)); - return CellStream([]() -> absl::optional { return {}; }); + return CellStream(std::make_shared()); } auto maybe_res = CreateFilterImpl(subfilter, std::move(res), ctx, direct_sinks); @@ -629,7 +685,7 @@ StatusOr CreateFilterImpl( parallel_streams.emplace_back(*maybe_filter); } if (parallel_streams.empty()) { - return CellStream([]() -> absl::optional { return {}; }); + return CellStream(std::make_shared()); } return CellStream(MergeCellStreams(parallel_streams)); } @@ -649,7 +705,7 @@ StatusOr CreateFilterImpl( ? CreateFilterImpl(filter.condition().true_filter(), source, ctx, direct_sinks) : StatusOr( - CellStream([]() -> absl::optional { return {}; })); + CellStream(std::make_shared()); if (!maybe_true_stream) { return maybe_true_stream.status(); } @@ -658,7 +714,7 @@ StatusOr CreateFilterImpl( ? CreateFilterImpl(filter.condition().false_filter(), source, ctx, direct_sinks) : StatusOr( - CellStream([]() -> absl::optional { return {}; })); + CellStream(std::make_shared())); if (!maybe_false_stream) { return maybe_true_stream.status(); } diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index e5167a0e7db46..6d5e69e9c423b 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -24,14 +24,45 @@ namespace cloud { namespace bigtable { namespace emulator { + +struct RowKeyRegex { + std::string regex; +}; +struct FamilyNameRegex { + std::string regex; +}; +struct ColumnRegex { + std::string regex; +}; + +using InternalFilter = absl::variant; + +class AbstractCellStreamImpl { + public: + virtual ~AbstractCellStreamImpl() = default; + + virtual bool ApplyFilter(InternalFilter const& internal_filter) = 0; + virtual absl::optional Next() = 0; + // Make sure Next() returns a different row than on last invocation. Noop in + // Next() was never called before. + virtual bool SkipRow() = 0; + // Make sure Next() returns a different (row, column) pair than on last + // invocation. Noop in Next() was never called before. + virtual bool SkipColumn() = 0; +}; + class CellStream { public: - CellStream(std::function()> impl) - : impl_(std::move(impl)), current_(impl_()) {} + CellStream(std::shared_ptr impl) + : impl_(std::move(impl)), current_(impl_->Next()) {} bool HasValue() const { return current_.has_value(); } CellView const & Value() const { return *current_; } - void Next() { current_ = impl_(); } + void Next() { current_ = impl_->Next(); } + bool SkipColumn() { return impl_->SkipColumn(); } + bool SkipRow() { return impl_->SkipRow(); } void operator++(); CellView operator++(int); CellView operator*() const { return Value(); } @@ -39,19 +70,10 @@ class CellStream { explicit operator bool() const { return HasValue(); } private: - std::function()> impl_; + std::shared_ptr impl_; absl::optional current_; }; -class AbstractCellStreamImpl { - public: - virtual ~AbstractCellStreamImpl() = default; - - virtual absl::optional Next() = 0; - bool SkipColumn(); - bool SkipRow(); -}; - class FilterContext { public: FilterContext() : allow_apply_label_(true) {} diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 3789b612dd7c1..f7b6542556012 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -348,7 +348,7 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, std::lock_guard lock(mu_); std::vector per_cf_streams; for (auto const & column_family: column_families_) { - per_cf_streams.emplace_back(FilteredColumnFamilyStream( + per_cf_streams.emplace_back(std::make_unique( *column_family.second, column_family.first, row_set)); } auto stream = JoinCellStreams(std::move(per_cf_streams)); From ff49c5d2291bb92a5b77186fcf965579c9d695fd Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Mon, 3 Feb 2025 19:49:26 +0100 Subject: [PATCH 009/195] Further fixes after refactor. --- .../cloud/bigtable/emulator/column_family.cc | 115 +-- .../cloud/bigtable/emulator/column_family.h | 33 +- google/cloud/bigtable/emulator/filter.cc | 655 +++++++++++------- google/cloud/bigtable/emulator/filter.h | 83 ++- google/cloud/bigtable/emulator/range_set.cc | 144 ++++ google/cloud/bigtable/emulator/range_set.h | 20 + google/cloud/bigtable/emulator/table.cc | 86 ++- 7 files changed, 724 insertions(+), 412 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index d721965899fd4..5ed4c9063b751 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -19,38 +19,6 @@ namespace google { namespace cloud { namespace bigtable { namespace emulator { -namespace { - -StringRangeSet::Range CreateColumnRange( - ::google::bigtable::v2::ColumnRange const& column_range) { - StringRangeSet::Range::Value start; - bool start_open; - StringRangeSet::Range::Value end; - bool end_open; - if (column_range.has_start_qualifier_closed()) { - start = StringRangeSet::Range::Value(column_range.start_qualifier_closed()); - start_open = false; - } else if (column_range.has_start_qualifier_open()) { - start = StringRangeSet::Range::Value(column_range.start_qualifier_open()); - start_open = true; - } else { - start_open = false; - start = StringRangeSet::Range::Value(""); - } - if (column_range.has_end_qualifier_closed()) { - end = StringRangeSet::Range::Value(column_range.end_qualifier_closed()); - end_open = false; - } else if (column_range.has_end_qualifier_open()) { - end = StringRangeSet::Range::Value(column_range.end_qualifier_open()); - end_open = true; - } else { - end = StringRangeSet::Range::Infinity{}; - end_open = true; - } - return StringRangeSet::Range(std::move(start), start_open, std::move(end), - end_open); -} -} // anonymous namespace void ColumnRow::SetCell(std::chrono::milliseconds timestamp, std::string const& value) { @@ -127,19 +95,13 @@ class FilteredColumnFamilyStream::FilterApply { public: FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} - bool operator()(google::bigtable::v2::ColumnRange const& column_range) { - parent_.column_ranges_->Insert(CreateColumnRange(column_range)); + bool operator()(ColumnRange const& column_range) { + parent_.column_ranges_->Insert(column_range.range); return true; } - bool operator()(google::bigtable::v2::TimestampRange const& timestamp_range) { - parent_.timestamp_ranges_->Insert(TimestampRangeSet::Range( - std::chrono::duration_cast( - std::chrono::microseconds( - timestamp_range.start_timestamp_micros())), - std::chrono::duration_cast( - std::chrono::microseconds( - timestamp_range.end_timestamp_micros())))); + bool operator()(TimestampRange const& timestamp_range) { + parent_.timestamp_ranges_->Insert(timestamp_range.range); return true; } @@ -168,60 +130,59 @@ FilteredColumnFamilyStream::FilteredColumnFamilyStream( row_it_(rows_.begin()), initialized_(false) {} -absl::optional FilteredColumnFamilyStream::Next() { - InitializeIfNeeded(); - if (row_it_ == rows_.end()) { - return {}; - } - auto res = - CellView(row_it_->first, column_family_name_, column_it_.value()->first, - cell_it_.value()->first, cell_it_.value()->second); - Advance(); - return res; -} - bool FilteredColumnFamilyStream::ApplyFilter( InternalFilter const& internal_filter) { + assert(!initialized_); return absl::visit(FilterApply(*this), internal_filter); } -bool FilteredColumnFamilyStream::SkipColumn() { - ++(column_it_.value()); - if (PointToFirstCellAfterColumnChange()) { - return; +bool FilteredColumnFamilyStream::HasValue() const { + InitializeIfNeeded(); + return row_it_ != rows_.end(); +} +CellView const& FilteredColumnFamilyStream::Value() const { + InitializeIfNeeded(); + if (!cur_value_) { + cur_value_ = + CellView(row_it_->first, column_family_name_, column_it_.value()->first, + cell_it_.value()->first, cell_it_.value()->second); } - // no more cells in this row - ++row_it_; - PointToFirstCellAfterRowChange(); - return true; + return cur_value_.value(); } -bool FilteredColumnFamilyStream::SkipRow() { +bool FilteredColumnFamilyStream::Next(NextMode mode) { + InitializeIfNeeded(); + cur_value_.reset(); + assert(row_it_ != rows_.end()); + assert(column_it_.value() != columns_.value().end()); + assert(cell_it_.value() != cells_.value().end()); + + if (mode == NextMode::kCell) { + ++(cell_it_.value()); + if (cell_it_.value() != cells_.value().end()) { + return true; + } + } + if (mode == NextMode::kCell || mode == NextMode::kColumn) { + ++(column_it_.value()); + if (PointToFirstCellAfterColumnChange()) { + return true; + } + } ++row_it_; PointToFirstCellAfterRowChange(); return true; } -void FilteredColumnFamilyStream::InitializeIfNeeded() { +void FilteredColumnFamilyStream::InitializeIfNeeded() const { if (!initialized_) { PointToFirstCellAfterRowChange(); initialized_ = true; } } -void FilteredColumnFamilyStream::Advance() { - assert(row_it_ != rows_.end()); - assert(column_it_.value() != columns_.value().end()); - assert(cell_it_.value() != cells_.value().end()); - ++(cell_it_.value()); - if (cell_it_.value() != cells_.value().end()) { - return; - } - SkipColumn(); -} - // Returns whether we've managed to find another cell in currently pointed row -bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() { +bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { for (; column_it_.value() != columns_.value().end(); ++(column_it_.value())) { cells_ = FilteredMapView( column_it_.value()->second, *timestamp_ranges_); @@ -234,7 +195,7 @@ bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() { } // Returns whether we've managed to find another cell -bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() { +bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() const { for (; row_it_ != rows_.end(); ++row_it_) { columns_ = FilteredMapView( row_it_->second, *column_ranges_); diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index b5ccad34fd8a2..3cfcbfb9e23b1 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -106,43 +106,46 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { FilteredColumnFamilyStream(ColumnFamily const& column_family, std::string column_family_name, std::shared_ptr row_set); - absl::optional Next() override; bool ApplyFilter(InternalFilter const& internal_filter) override; - bool SkipColumn() override; - bool SkipRow() override; + bool HasValue() const override; + CellView const &Value() const override; + bool Next(NextMode mode) override; + std::string const& column_family_name() const { return column_family_name_; } private: class FilterApply; - void Advance(); - void InitializeIfNeeded(); + void InitializeIfNeeded() const; // Returns whether we've managed to find another cell in currently pointed row - bool PointToFirstCellAfterColumnChange(); + bool PointToFirstCellAfterColumnChange() const; // Returns whether we've managed to find another cell - bool PointToFirstCellAfterRowChange(); + bool PointToFirstCellAfterRowChange() const; std::string column_family_name_; std::shared_ptr row_ranges_; - std::vector row_regexes_; + std::vector> row_regexes_; std::shared_ptr column_ranges_; - std::vector column_regexes_; + std::vector> column_regexes_; std::shared_ptr timestamp_ranges_; FilteredMapView rows_; - absl::optional> columns_; - absl::optional> cells_; + mutable absl::optional> + columns_; + mutable absl::optional> cells_; // If row_it_ == rows_.end() we've reached the end. // We keep the invariant that if (row_it_ != rows_.end()) then // cell_it_ != cells.end() && column_it_ != columns_.end() - FilteredMapView::const_iterator row_it_; - absl::optional< + mutable FilteredMapView::const_iterator row_it_; + mutable absl::optional< FilteredMapView::const_iterator> column_it_; - absl::optional::const_iterator> + mutable absl::optional< + FilteredMapView::const_iterator> cell_it_; - bool initialized_; + mutable absl::optional cur_value_; + mutable bool initialized_; }; } // namespace emulator diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 04a531f0f407a..8d8c235cbdfae 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/status_or.h" +#include "google/cloud/internal/invoke_result.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" #include @@ -25,9 +27,13 @@ namespace cloud { namespace bigtable { namespace emulator { namespace { + bool StringRefEq(std::string const &s1, std::string const &s2) { return &s1 == &s2 || s1 == s2; } + +bool PassAllFilters(InternalFilter const&) { return true; } + } // namespace FilterContext& FilterContext::DisallowApplyLabel() { @@ -35,36 +41,49 @@ FilterContext& FilterContext::DisallowApplyLabel() { return *this; } -void CellStream::operator++() { - Next(); -} - -CellView CellStream::operator++(int) { - CellView tmp = Value(); - Next(); - return tmp; +void CellStream::Next(NextMode mode) { + if (impl_->Next(mode)) { + return; + } + if (mode == NextMode::kColumn) { + std::string cur_row_key = impl_->Value().row_key(); + std::string cur_column_family = impl_->Value().column_family(); + std::string cur_column_qualifier = impl_->Value().column_qualifier(); + for (impl_->Next(); + impl_->HasValue() && cur_row_key == impl_->Value().row_key() && + cur_column_family == impl_->Value().column_family() && + cur_column_qualifier == impl_->Value().column_qualifier(); + impl_->Next()); + return; + } + assert(mode == NextMode::kRow); + std::string cur_row_key = impl_->Value().row_key(); + for (Next(NextMode::kColumn); + impl_->HasValue() && cur_row_key == impl_->Value().row_key(); + impl_->Next(NextMode::kColumn)); } template class PerRowStateFilter { - static_assert(std::is_invocable_v, - "StateResetFunctor must be invocable with no arguments"); - using State = std::decay_t>; + static_assert( + google::cloud::internal::is_invocable::value, + "StateResetFunctor must be invocable with no arguments"); + using State = + std::decay_t>; static_assert(std::is_default_constructible_v, "State must be default constructible"); static_assert(std::is_assignable_v, "State must assignable"); - static_assert( - std::is_same_v< - std::result_of_t, - bool>, - "The result of FilterFunctor invocation must be a `bool`"); + static_assert(std::is_same_v, + absl::optional>, + "Invalid result of `FilterFunctor` invocation."); public: PerRowStateFilter(FilterFunctor filter, StateResetFunctor reset) : filter_(std::move(filter)), reset_(std::move(reset)) {} - bool operator()(CellView const &cell_view) { + absl::optional operator()(CellView const &cell_view) { if (!prev_row_ || !StringRefEq(prev_row_.value().get(), cell_view.row_key())) { state_ = reset_(); @@ -81,24 +100,25 @@ class PerRowStateFilter { template class PerColumnStateFilter { - static_assert(std::is_invocable_v, - "StateResetFunctor must be invocable with no arguments"); - using State = std::decay_t>; + static_assert( + google::cloud::internal::is_invocable::value, + "StateResetFunctor must be invocable with no arguments"); + using State = + std::decay_t>; static_assert(std::is_default_constructible_v, "State must be default constructible"); static_assert(std::is_assignable_v, "State must assignable"); - static_assert( - std::is_same_v< - std::result_of_t, - bool>, - "The result of FilterFunctor invocation must be a `bool`"); + static_assert(std::is_same_v, + absl::optional>, + "Invali result of `FilterFunctor` invocation."); public: PerColumnStateFilter(FilterFunctor filter, StateResetFunctor reset) : filter_(std::move(filter)), reset_(std::move(reset)) {} - bool operator()(CellView const &cell_view) { + absl::optional operator()(CellView const &cell_view) { if (!prev_|| !prev_->Matches(cell_view)) { state_ = reset_(); prev_ = Prev(cell_view); @@ -132,235 +152,244 @@ class PerColumnStateFilter { template class TrivialTransformer : public AbstractCellStreamImpl { + public: TrivialTransformer(CellStream source, Transformer transformer) : source_(std::move(source)), transformer_(std::move(transformer)) {} - absl::optional Next() override { - auto res = source_.Next(); - if (!res) { - return res; + bool ApplyFilter(InternalFilter const& ) override { + return false; + } + + bool HasValue() const override { return source_.HasValue(); } + + CellView const& Value() const override { + if (!transformed_) { + transformed_ = absl::optional(transformer_(source_.Value())); } - return transformer_(res); + return transformed_.value(); } - bool SkipRow() override { return source_->SkipRow(); } - bool SkipColumn() override { return source_->SkipColumn(); } + bool Next(NextMode mode) override { + source_.Next(mode); + transformed_.reset(); + return true; + } private: CellStream source_; Transformer transformer_; + mutable absl::optional transformed_; }; + template CellStream MakeTrivialTransformer(CellStream source, Transformer transformer) { - return CellStream(TrivialTransformer(std::move(source), std::move(transformer))); + return CellStream(std::make_shared>( + std::move(source), std::move(transformer))); } -template +template class TrivialFilter : public AbstractCellStreamImpl { - TrivialFilter(CellStream source, Filter filter) - : source_(std::move(source)), filter_(std::move(filter)) {} + static_assert( + std::is_same_v< + google::cloud::internal::invoke_result_t, + absl::optional>, + "Invalid filter return type"); - absl::optional Next() override { - for (; source_ && !filter_(*source_); ++source_); - if (!source_) { - return {}; + public: + TrivialFilter(CellStream source, Filter filter, + std::function filter_filter) + : initialized_(false), + source_(std::move(source)), + filter_(std::move(filter)), + filter_filter_(std::move(filter_filter)) {} + + bool ApplyFilter(InternalFilter const& filter) override { + if (filter_filter_(filter)) { + return source_.ApplyFilter(filter); } - return source_++; + return false; + } + + bool HasValue() const override { + InitializeIfNeeded(); + return source_.HasValue(); + } + + CellView const& Value() const override { + InitializeIfNeeded(); + return source_.Value(); } - bool SkipRow() override { return source_->SkipRow(); } - bool SkipColumn() override { return source_->SkipColumn(); } + bool Next(NextMode mode) override { + source_.Next(mode); + EnsureCurrentNotFiltered(); + return true; + } private: - CellStream source_; - Filter filter_; + void EnsureCurrentNotFiltered() const { + while (source_.HasValue()) { + auto maybe_next_mode = filter_(*source_); + if (!maybe_next_mode) { + return; + } + source_.Next(*maybe_next_mode); + } + } + + void InitializeIfNeeded() const { + if (!initialized_) { + EnsureCurrentNotFiltered(); + initialized_ = true; + } + } + + mutable bool initialized_; + mutable CellStream source_; + mutable Filter filter_; + std::function filter_filter_; }; template -CellStream MakeTrivialFilter(CellStream source, Filter filter) { - return CellStream(TrivialFilter(std::move(source), std::move(filter))); +CellStream MakeTrivialFilter( + CellStream source, Filter filter, + std::function filter_filter = PassAllFilters) { + return CellStream(std::make_shared>( + std::move(source), std::move(filter), std::move(filter_filter))); } template -CellStream MakePerRowStateFilter(CellStream source, FilterFunctor filter, - StateResetFunctor state_reset) { +CellStream MakePerRowStateFilter( + CellStream source, FilterFunctor filter, StateResetFunctor state_reset, + std::function filter_filter = PassAllFilters) { return MakeTrivialFilter(std::move(source), PerRowStateFilter( - std::move(filter), std::move(state_reset))); + std::move(filter), std::move(state_reset)), + std::move(filter_filter)); } template -CellStream MakePerColumnStateFilter(CellStream source, FilterFunctor filter, - StateResetFunctor state_reset) { +CellStream MakePerColumnStateFilter( + CellStream source, FilterFunctor filter, StateResetFunctor state_reset, + std::function filter_filter = PassAllFilters) { return MakeTrivialFilter( - std::move(source), PerColumnStateFilter( - std::move(filter), std::move(state_reset))); + std::move(source), + PerColumnStateFilter( + std::move(filter), std::move(state_reset)), + std::move(filter_filter)); } -class ValueRangeFilter { - public: - ValueRangeFilter(::google::bigtable::v2::ColumnRange const &column_range) : - string_cmp_(internal::CompareColumnQualifiers) - { - if (column_range.has_start_qualifier_closed()) { - start_ = column_range.start_qualifier_closed(); - start_closed_ = true; - } else if (column_range.has_start_qualifier_open()) { - start_ = column_range.start_qualifier_open(); - start_closed_ = false; - } else { - start_closed_ = true; - } - if (column_range.has_end_qualifier_closed()) { - end_ = column_range.end_qualifier_closed(); - end_closed_ = true; - has_end_ = true; - } else if (column_range.has_end_qualifier_open()) { - end_ = column_range.end_qualifier_open(); - end_closed_ = false; - has_end_ = true; - } else { - has_end_ = false; - } - } - - ValueRangeFilter(::google::bigtable::v2::ValueRange const& value_range) - : string_cmp_(internal::CompareCellValues) { - if (value_range.has_start_value_closed()) { - start_ = value_range.start_value_closed(); - start_closed_ = true; - } else if (value_range.has_start_value_open()) { - start_ = value_range.start_value_open(); - start_closed_ = false; - } else { - start_closed_ = true; - } - if (value_range.has_end_value_closed()) { - end_ = value_range.end_value_closed(); - end_closed_ = true; - has_end_ = true; - } else if (value_range.has_end_value_open()) { - end_ = value_range.end_value_open(); - end_closed_ = false; - has_end_ = true; - } else { - has_end_ = false; - } - } - - bool WithinRange(std::string const &val) const { - if (start_closed_) { - if (string_cmp_(start_, val) > 0) { - return false; - } - } else { - if (string_cmp_(start_, val) >= 0) { - return false; - } - } - if (!has_end_) { - return true; - } - if (end_closed_) { - if (string_cmp_(val, end_) > 0) { - return false; - } - } else { - if (string_cmp_(val, end_) >= 0) { - return false; - } - } - return true; +bool MergeCellStreams::CellStreamGreater::operator()( + std::unique_ptr const& lhs, + std::unique_ptr const& rhs) const { + auto row_key_cmp = + internal::CompareRowKey((*lhs)->row_key(), (*rhs)->row_key()); + if (row_key_cmp != 0) { + return row_key_cmp > 0; + } + auto cf_cmp = internal::CompareColumnQualifiers((*lhs)->column_family(), + (*rhs)->column_family()); + if (cf_cmp != 0) { + return cf_cmp > 0; + } + auto col_cmp = internal::CompareColumnQualifiers((*lhs)->column_qualifier(), + (*rhs)->column_qualifier()); + if (col_cmp != 0) { + return col_cmp > 0; + } + return (*lhs)->timestamp() > (*rhs)->timestamp(); +} + +MergeCellStreams::MergeCellStreams(std::vector streams) { + for (auto& stream : streams) { + unfinished_streams_.emplace_back( + std::make_unique(std::move(stream))); } +} - private: - std::function string_cmp_; - std::string start_; - std::string end_; - bool start_closed_; - bool has_end_; - bool end_closed_; -}; +bool MergeCellStreams::ApplyFilter(InternalFilter const& internal_filter) { + assert(!initialized_); + bool res = true; + for (auto& stream : unfinished_streams_) { + res = res && stream->ApplyFilter(internal_filter); + } + return res; +} -class MergeCellStreams : public AbstractCellStreamImpl { - public: - class CellStreamGreater { - public: - bool operator()(std::shared_ptr const& lhs, - std::shared_ptr const& rhs) const { - auto row_key_cmp = - internal::CompareRowKey((*lhs)->row_key(), (*rhs)->row_key()); - if (row_key_cmp != 0) { - return row_key_cmp > 0; - } - auto cf_cmp = internal::CompareColumnQualifiers((*lhs)->column_family(), - (*rhs)->column_family()); - if (cf_cmp != 0) { - return cf_cmp > 0; - } - auto col_cmp = internal::CompareColumnQualifiers( - (*lhs)->column_qualifier(), (*rhs)->column_qualifier()); - if (col_cmp != 0) { - return col_cmp > 0; - } - return (*lhs)->timestamp() > (*rhs)->timestamp(); - } - }; +bool MergeCellStreams::HasValue() const { + InitializeIfNeeded(); + return !unfinished_streams_.empty(); +} - MergeCellStreams(std::vector streams) { - for (auto &stream : streams) { - if (stream.HasValue()) { - unfinished_streams_.emplace( - std::make_shared(std::move(stream))); - } - } - } +CellView const& MergeCellStreams::Value() const { + InitializeIfNeeded(); + return unfinished_streams_.front()->Value(); +} - bool ApplyFilter(InternalFilter const& internal_filter) override { - bool res = true; - for (auto & stream : unfinished_streams_) { - res &&= stream.ApplyFilter(unfinished_streams); - } - return res; +bool MergeCellStreams::Next(NextMode mode) { + InitializeIfNeeded(); + if (unfinished_streams_.empty()) { + return true; } - - bool SkipRow() override { - bool res = true; - for (auto & stream : unfinished_streams) { - res &&= stream.SkipRow(); - } - return res; + if (mode != NextMode::kCell) { + SkipRowOrColumn(mode); + return true; } + std::pop_heap(unfinished_streams_.begin(), unfinished_streams_.end(), + CellStreamGreater()); + auto& stream_to_advance = unfinished_streams_.back(); + stream_to_advance->Next(); + if (stream_to_advance->HasValue()) { + std::push_heap(unfinished_streams_.begin(), unfinished_streams_.end(), + CellStreamGreater()); + } else { + unfinished_streams_.pop_back(); + } + return true; +} - bool SkipColumn() override { - bool res = true; - for (auto & stream : unfinished_streams) { - res &&= stream.SkipColumn(); - } - return res; +void MergeCellStreams::InitializeIfNeeded() const { + if (!initialized_) { + ReassesStreams(); + initialized_ = true; } +} - absl::optional Next() override { - if (unfinished_streams_.empty()) { - return {}; - } - auto stream_to_advance = unfinished_streams_.top(); - unfinished_streams_.pop(); - CellView res = stream_to_advance->Value(); - stream_to_advance->Next(); - if (stream_to_advance->HasValue()) { - unfinished_streams_.emplace(std::move(stream_to_advance)); +void MergeCellStreams::ReassesStreams() const { + for (auto stream_it = unfinished_streams_.begin(); + stream_it != unfinished_streams_.end(); ++stream_it) { + if (!(*stream_it)->HasValue()) { + stream_it->swap(unfinished_streams_.back()); + unfinished_streams_.pop_back(); } - return res; } + std::make_heap(unfinished_streams_.begin(), unfinished_streams_.end(), + CellStreamGreater()); +} - std::priority_queue, - std::vector>, - CellStreamGreater> - unfinished_streams_; -}; +bool MergeCellStreams::SkipRowOrColumn(NextMode mode) { + assert(mode != NextMode::kCell); + // The first element in `unfinished_streams_` is the stream beginning with the + // smallest Cell - the one we would normally return. Before we alter this + // stream alter all others which point to the same column/row. + for (auto stream_it = std::next(unfinished_streams_.begin()); + stream_it != unfinished_streams_.end(); ++stream_it) { + if ((mode == NextMode::kRow || + ((*stream_it)->Value().column_qualifier() == + unfinished_streams_.front()->Value().column_qualifier() && + (*stream_it)->Value().column_family() == + unfinished_streams_.front()->Value().column_family())) && + (*stream_it)->Value().row_key() == + unfinished_streams_.front()->Value().row_key()) { + (*stream_it)->Next(mode); + } + } + unfinished_streams_.front()->Next(mode); + ReassesStreams(); + return true; +} -class ConditionStream { +class ConditionStream : public AbstractCellStreamImpl { public: ConditionStream(CellStream source, CellStream predicate, CellStream true_stream, CellStream false_stream) @@ -369,7 +398,23 @@ class ConditionStream { true_stream_(std::move(true_stream)), false_stream_(std::move(false_stream)) {} - absl::optional operator()() { + bool ApplyFilter(InternalFilter const& ) override { + return false; + } + + bool HasValue() const override { + return true; // FIXME + } + + CellView const &Value() const override { + return *source_; // FIXME + } + + bool Next(NextMode mode) override { + if (mode != NextMode::kCell) { + // FIXME - we can be smarter than that. + return false; + } while (true) { auto cell_view = *source_; @@ -393,7 +438,7 @@ class ConditionStream { for (; true_stream_ && internal::CompareRowKey(true_stream_->row_key(), cell_view.row_key()) < 0; - true_stream_.Next()); + true_stream_.Next(NextMode::kRow)); } else { // Predicate stream did not return anything for this row. condition_true_ = false; @@ -401,19 +446,19 @@ class ConditionStream { for (; false_stream_ && internal::CompareRowKey(false_stream_->row_key(), cell_view.row_key()) < 0; - false_stream_.Next()); + false_stream_.Next(NextMode::kRow)); } } if (*condition_true_) { if (true_stream_ && internal::CompareRowKey(true_stream_->row_key(), cell_view.row_key()) == 0) { - return true_stream_++; + return true; } } else { if (false_stream_ && internal::CompareRowKey(false_stream_->row_key(), cell_view.row_key()) == 0) { - return false_stream_++; + return true; } } // True/false stream exhausted, reset state and fast-forward source. @@ -421,7 +466,7 @@ class ConditionStream { for (; source_ && internal::CompareRowKey(source_->row_key(), prev_row_->get()) == 0; - source_.Next()); + source_.Next(NextMode::kRow)); if (!source_) { return {}; } @@ -438,12 +483,16 @@ class ConditionStream { }; class EmptyCellStreamImpl : public AbstractCellStreamImpl { - virtual bool ApplyFilter(InternalFilter const& ) { - return true; - } - virtual absl::optional Next() { return {}; } - virtual bool SkipRow() { return true; } - virtual bool SkipColumn() { return true; } + bool ApplyFilter(InternalFilter const&) override { return true; } + bool HasValue() const override { return false; } + CellView const& Value() const override { + assert(false); + // The code below makes no sense but it should be dead. + static CellView dummy{"row", "cf", "col", std::chrono::milliseconds(0), + "val"}; + return dummy; + } + bool Next(NextMode) override { return true; } }; StatusOr CreateFilterImpl( @@ -466,7 +515,6 @@ StatusOr CreateFilterImpl( return CellStream(std::make_shared()); } if (filter.has_row_key_regex_filter()) { - std::cout << "Regex filter: " << filter.row_key_regex_filter() << std::endl; auto pattern = std::make_shared(filter.row_key_regex_filter()); if (!pattern->ok()) { return InvalidArgumentError( @@ -475,10 +523,17 @@ StatusOr CreateFilterImpl( .WithMetadata("filter", filter.DebugString()) .WithMetadata("description", pattern->error())); } + if (source.ApplyFilter(RowKeyRegex{pattern})) { + return source; + } return MakeTrivialFilter( std::move(source), - [pattern = std::move(pattern)](CellView const& cell_view) mutable { - return re2::RE2::PartialMatch(cell_view.row_key(), *pattern); + [pattern = std::move(pattern)]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.row_key(), *pattern)) { + return {}; + } + return NextMode::kCell; }); } if (filter.has_value_regex_filter()) { @@ -492,8 +547,12 @@ StatusOr CreateFilterImpl( } return MakeTrivialFilter( std::move(source), - [pattern = std::move(pattern)](CellView const& cell_view) mutable { - return re2::RE2::PartialMatch(cell_view.value(), *pattern); + [pattern = std::move(pattern)]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.value(), *pattern)) { + return {}; + } + return NextMode::kCell; }); } if (filter.has_row_sample_filter()) { @@ -506,14 +565,20 @@ StatusOr CreateFilterImpl( } return MakePerRowStateFilter( std::move(source), - [](bool& should_pass, CellView const&) { return should_pass; }, + [](bool& should_pass, CellView const&) -> absl::optional { + if (should_pass) { + return {}; + } + return NextMode::kRow; + }, [gen = std::mt19937(), pass_prob]() mutable { std::uniform_real_distribution dis(0.0, 1.0); return dis(gen) < pass_prob; }); } if (filter.has_family_name_regex_filter()) { - auto pattern = std::make_shared(filter.family_name_regex_filter()); + auto pattern = + std::make_shared(filter.family_name_regex_filter()); if (!pattern->ok()) { return InvalidArgumentError( "`family_name_regex_filter` is not a valid RE2 regex.", @@ -521,10 +586,18 @@ StatusOr CreateFilterImpl( .WithMetadata("filter", filter.DebugString()) .WithMetadata("description", pattern->error())); } + if (source.ApplyFilter(FamilyNameRegex{pattern})) { + return source; + } return MakeTrivialFilter( std::move(source), - [pattern = std::move(pattern)](CellView const& cell_view) mutable { - return re2::RE2::PartialMatch(cell_view.column_family(), *pattern); + [pattern = std::move(pattern)]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.column_family(), *pattern)) { + return {}; + } + // FIXME we could introduce even column family skipping + return NextMode::kColumn; }); } if (filter.has_column_qualifier_regex_filter()) { @@ -537,31 +610,55 @@ StatusOr CreateFilterImpl( .WithMetadata("filter", filter.DebugString()) .WithMetadata("description", pattern->error())); } + if (source.ApplyFilter(ColumnRegex{pattern})) { + return source; + } return MakeTrivialFilter( std::move(source), - [pattern = std::move(pattern)](CellView const& cell_view) mutable { - return re2::RE2::PartialMatch(cell_view.column_qualifier(), *pattern); + [pattern = std::move(pattern)]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.column_qualifier(), *pattern)) { + return {}; + } + return NextMode::kColumn; }); } if (filter.has_column_range_filter()) { - if (source.ApplyFilter(filter.column_range_filter())) { + auto maybe_range = + StringRangeSet::Range::FromColumnRange(filter.column_range_filter()); + if (!maybe_range) { + return maybe_range.status(); + } + if (source.ApplyFilter(ColumnRange{*maybe_range})) { return source; } return MakeTrivialFilter( std::move(source), - [qualifier_filter = ValueRangeFilter(filter.column_range_filter()), + [range = *std::move(maybe_range), column_family = filter.column_range_filter().family_name()]( - CellView const& cell_view) { - return cell_view.column_family() == column_family && - qualifier_filter.WithinRange(cell_view.column_qualifier()); + CellView const& cell_view) -> absl::optional { + if ( cell_view.column_family() == column_family && + range.IsWithin(cell_view.column_qualifier())) { + return {}; + } + // FIXME - we might know that we should skip the whole column family + return NextMode::kColumn; }); } if (filter.has_value_range_filter()) { + auto maybe_range = + StringRangeSet::Range::FromValueRange(filter.value_range_filter()); + if (!maybe_range) { + return maybe_range.status(); + } return MakeTrivialFilter( std::move(source), - [value_filter = ValueRangeFilter(filter.value_range_filter())]( - CellView const& cell_view) { - return value_filter.WithinRange(cell_view.value()); + [range = *std::move(maybe_range)]( + CellView const& cell_view) -> absl::optional { + if (range.IsWithin(cell_view.value())) { + return {}; + } + return NextMode::kCell; }); } if (filter.has_cells_per_row_offset_filter()) { @@ -573,10 +670,17 @@ StatusOr CreateFilterImpl( } return MakePerRowStateFilter( std::move(source), - [](std::int64_t& per_row_state, CellView const&) { - return per_row_state-- <= 0; + [](std::int64_t& per_row_state, + CellView const&) -> absl::optional { + if (per_row_state-- <= 0) { + return {}; + } + return NextMode::kRow; }, - [cells_per_row_offset]() { return cells_per_row_offset; }); + [cells_per_row_offset]() { return cells_per_row_offset; }, + [](InternalFilter const& internal_filter) { + return absl::holds_alternative(internal_filter); + }); } if (filter.has_cells_per_row_limit_filter()) { std::int64_t cells_per_row_limit = filter.cells_per_row_limit_filter(); @@ -587,10 +691,17 @@ StatusOr CreateFilterImpl( } return MakePerRowStateFilter( std::move(source), - [cells_per_row_limit](std::int64_t& per_row_state, CellView const&) { - return per_row_state++ < cells_per_row_limit; + [cells_per_row_limit](std::int64_t& per_row_state, + CellView const&) -> absl::optional { + if (per_row_state++ < cells_per_row_limit) { + return {}; + } + return NextMode::kRow; }, - []() -> std::int64_t { return 0; }); + []() -> std::int64_t { return 0; }, + [](InternalFilter const& internal_filter) { + return absl::holds_alternative(internal_filter); + }); } if (filter.has_cells_per_column_limit_filter()) { std::int64_t cells_per_column_limit = filter.cells_per_column_limit_filter(); @@ -601,24 +712,38 @@ StatusOr CreateFilterImpl( } return MakePerColumnStateFilter( std::move(source), - [cells_per_column_limit](std::int64_t& per_column_state, CellView const&) { - return per_column_state++ < cells_per_column_limit; + [cells_per_column_limit](std::int64_t& per_column_state, + CellView const&) -> absl::optional { + if (per_column_state++ < cells_per_column_limit) { + return {}; + } + return NextMode::kColumn; }, - []() -> std::int64_t { return 0; }); + []() -> std::int64_t { return 0; }, + [](InternalFilter const& internal_filter) { + return !absl::holds_alternative(internal_filter); + }); } if (filter.has_timestamp_range_filter()) { - auto const & ts_filter = filter.timestamp_range_filter(); + auto maybe_range = TimestampRangeSet::Range::FromTimestampRange( + filter.timestamp_range_filter()); + if (!maybe_range) { + return maybe_range.status(); + } + if (source.ApplyFilter(TimestampRange{*maybe_range})) { + return source; + } return MakeTrivialFilter( std::move(source), - [start = ts_filter.start_timestamp_micros(), - end = ts_filter.end_timestamp_micros()](CellView const& cell_view) { - auto timestamp_micros = - std::chrono::duration_cast( - cell_view.timestamp()) - .count(); - - return timestamp_micros >= start && - (end == 0 || timestamp_micros < end); + [range = *std::move(maybe_range)]( + CellView const& cell_view) -> absl::optional { + if (range.IsBelowStart(cell_view.timestamp())) { + return NextMode::kCell; + } + if (range.IsAboveEnd(cell_view.timestamp())) { + return NextMode::kColumn; + } + return {}; }); } if (filter.has_apply_label_transformer()) { @@ -632,6 +757,7 @@ StatusOr CreateFilterImpl( [label = std::make_shared( filter.apply_label_transformer())](CellView cell_view) { cell_view.SetLabel(*label); + return cell_view; }); } if (filter.has_strip_value_transformer()) { @@ -640,8 +766,10 @@ StatusOr CreateFilterImpl( "`strip_value_transformer` explicitly set to `false`.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return MakeTrivialTransformer( - std::move(source), [](CellView cell_view) { cell_view.SetValue(""); }); + return MakeTrivialTransformer(std::move(source), [](CellView cell_view) { + cell_view.SetValue(""); + return cell_view; + }); } if (filter.has_chain()) { CellStream res = std::move(source); @@ -687,7 +815,7 @@ StatusOr CreateFilterImpl( if (parallel_streams.empty()) { return CellStream(std::make_shared()); } - return CellStream(MergeCellStreams(parallel_streams)); + return CellStream(std::make_shared(parallel_streams)); } if (filter.has_condition()) { if (!filter.condition().has_predicate_filter()){ @@ -695,6 +823,7 @@ StatusOr CreateFilterImpl( "`condition` must have a `predicate_filter` set.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } + // FIXME stream must be deep-copied auto maybe_predicate_stream = CreateFilterImpl( filter.condition().predicate_filter(), source, ctx, direct_sinks); if (!maybe_predicate_stream) { @@ -705,7 +834,7 @@ StatusOr CreateFilterImpl( ? CreateFilterImpl(filter.condition().true_filter(), source, ctx, direct_sinks) : StatusOr( - CellStream(std::make_shared()); + CellStream(std::make_shared())); if (!maybe_true_stream) { return maybe_true_stream.status(); } @@ -719,7 +848,7 @@ StatusOr CreateFilterImpl( return maybe_true_stream.status(); } - return CellStream(ConditionStream( + return CellStream(std::make_shared( std::move(source), *std::move(maybe_predicate_stream), *std::move(maybe_true_stream), *std::move(maybe_false_stream))); } @@ -729,7 +858,8 @@ StatusOr CreateFilterImpl( } CellStream JoinCellStreams(std::vector cell_streams) { - return CellStream(MergeCellStreams(std::move(cell_streams))); + return CellStream( + std::make_shared(std::move(cell_streams))); } StatusOr CreateFilter( @@ -753,7 +883,8 @@ StatusOr CreateFilter( } if (!direct_sinks.empty()) { direct_sinks.emplace_back(*std::move(maybe_filter)); - return CellStream(MergeCellStreams(std::move(direct_sinks))); + return CellStream( + std::make_shared(std::move(direct_sinks))); } return maybe_filter; } diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index 6d5e69e9c423b..91c55f578bac9 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -16,9 +16,13 @@ #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTER_H #include "google/cloud/bigtable/emulator/cell_view.h" -#include "google/cloud/stream_range.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include +namespace re2 { +class RE2; +} // namespace re2 + namespace google { namespace cloud { namespace bigtable { @@ -26,52 +30,59 @@ namespace emulator { struct RowKeyRegex { - std::string regex; + std::shared_ptr regex; }; struct FamilyNameRegex { - std::string regex; + std::shared_ptr regex; }; struct ColumnRegex { - std::string regex; + std::shared_ptr regex; +}; +struct ColumnRange { + StringRangeSet::Range range; +}; +struct TimestampRange { + TimestampRangeSet::Range range; }; -using InternalFilter = absl::variant; +using InternalFilter = absl::variant; +enum class NextMode { + kCell = 0, + kColumn, + kRow, +}; class AbstractCellStreamImpl { public: virtual ~AbstractCellStreamImpl() = default; virtual bool ApplyFilter(InternalFilter const& internal_filter) = 0; - virtual absl::optional Next() = 0; - // Make sure Next() returns a different row than on last invocation. Noop in - // Next() was never called before. - virtual bool SkipRow() = 0; - // Make sure Next() returns a different (row, column) pair than on last - // invocation. Noop in Next() was never called before. - virtual bool SkipColumn() = 0; + virtual bool HasValue() const = 0; + virtual CellView const &Value() const = 0; + virtual bool Next(NextMode mode = NextMode::kCell) = 0; }; class CellStream { public: CellStream(std::shared_ptr impl) - : impl_(std::move(impl)), current_(impl_->Next()) {} - - bool HasValue() const { return current_.has_value(); } - CellView const & Value() const { return *current_; } - void Next() { current_ = impl_->Next(); } - bool SkipColumn() { return impl_->SkipColumn(); } - bool SkipRow() { return impl_->SkipRow(); } - void operator++(); + : impl_(std::move(impl)) {} + + bool ApplyFilter(InternalFilter const& internal_filter) { + return impl_->ApplyFilter(internal_filter); + } + bool HasValue() const { return impl_->HasValue(); } + CellView const & Value() const { return impl_->Value(); } + void Next(NextMode mode = NextMode::kCell); + void operator++() { Next(); } CellView operator++(int); CellView operator*() const { return Value(); } CellView const* operator->() const { return &Value(); } explicit operator bool() const { return HasValue(); } + AbstractCellStreamImpl const &impl() const { return *impl_; } private: std::shared_ptr impl_; - absl::optional current_; }; class FilterContext { @@ -85,6 +96,32 @@ class FilterContext { bool allow_apply_label_; }; +class MergeCellStreams : public AbstractCellStreamImpl { + public: + class CellStreamGreater { + public: + bool operator()(std::unique_ptr const& lhs, + std::unique_ptr const& rhs) const; + }; + + MergeCellStreams(std::vector streams); + bool ApplyFilter(InternalFilter const& internal_filter) override; + bool HasValue() const override; + CellView const& Value() const override; + bool Next(NextMode mode) override; + + private: + void InitializeIfNeeded() const; + void ReassesStreams() const; + bool SkipRowOrColumn(NextMode mode); + + mutable bool initialized_; + protected: + // A priority queue of streams which still have data. + // `std::priority_queue` can't be used because it cannot be iterated over. + mutable std::vector> unfinished_streams_; +}; + CellStream JoinCellStreams(std::vector cell_streams); StatusOr CreateFilter( diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index 018fd91146e43..9641758b48c41 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -14,6 +14,7 @@ #include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" +#include namespace google { namespace cloud { @@ -110,6 +111,122 @@ StringRangeSet::Range::Range(Value start, bool start_open, Value end, StringRangeSet::Range::IsEmpty(start_, start_open_, end_, end_open_)); } +StatusOr StringRangeSet::Range::FromRowRange( + google::bigtable::v2::RowRange const& row_range) { + StringRangeSet::Range::Value start; + bool start_open; + if (row_range.has_start_key_open() && !row_range.start_key_open().empty()) { + start = StringRangeSet::Range::Value(row_range.start_key_open()); + start_open = true; + } else if (row_range.has_start_key_closed() && + !row_range.start_key_closed().empty()) { + start = StringRangeSet::Range::Value(row_range.start_key_closed()); + start_open = false; + } else { + start = StringRangeSet::Range::Value(""); + start_open = false; + } + StringRangeSet::Range::Value end; + bool end_open; + if (row_range.has_end_key_open() && !row_range.end_key_open().empty()) { + end = StringRangeSet::Range::Value(row_range.end_key_open()); + end_open = true; + } else if (row_range.has_end_key_closed() && + !row_range.end_key_closed().empty()) { + end = StringRangeSet::Range::Value(row_range.end_key_closed()); + end_open = false; + } else { + end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); + end_open = true; + } + if (StringRangeSet::RangeValueLess()(end, start)) { + return InvalidArgumentError( + "reversed `row_range`", + GCP_ERROR_INFO().WithMetadata("row_range", row_range.DebugString())); + } + return StringRangeSet::Range(std::move(start), start_open, std::move(end), + end_open); +} + +StatusOr StringRangeSet::Range::FromValueRange( + google::bigtable::v2::ValueRange const& value_range) { + StringRangeSet::Range::Value start; + bool start_open; + if (value_range.has_start_value_open() && + !value_range.start_value_open().empty()) { + start = StringRangeSet::Range::Value(value_range.start_value_open()); + start_open = true; + } else if (value_range.has_start_value_closed() && + !value_range.start_value_closed().empty()) { + start = StringRangeSet::Range::Value(value_range.start_value_closed()); + start_open = false; + } else { + start = StringRangeSet::Range::Value(""); + start_open = false; + } + StringRangeSet::Range::Value end; + bool end_open; + if (value_range.has_end_value_open() && + !value_range.end_value_open().empty()) { + end = StringRangeSet::Range::Value(value_range.end_value_open()); + end_open = true; + } else if (value_range.has_end_value_closed() && + !value_range.end_value_closed().empty()) { + end = StringRangeSet::Range::Value(value_range.end_value_closed()); + end_open = false; + } else { + end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); + end_open = true; + } + if (StringRangeSet::RangeValueLess()(end, start)) { + return InvalidArgumentError("reversed `value_range`", + GCP_ERROR_INFO().WithMetadata( + "value_range", value_range.DebugString())); + } + return StringRangeSet::Range(std::move(start), start_open, std::move(end), + end_open); +} + +StatusOr StringRangeSet::Range::FromColumnRange( + google::bigtable::v2::ColumnRange const& column_range) { + StringRangeSet::Range::Value start; + bool start_open; + if (column_range.has_start_qualifier_open() && + !column_range.start_qualifier_open().empty()) { + start = StringRangeSet::Range::Value(column_range.start_qualifier_open()); + start_open = true; + } else if (column_range.has_start_qualifier_closed() && + !column_range.start_qualifier_closed().empty()) { + start = StringRangeSet::Range::Value(column_range.start_qualifier_closed()); + start_open = false; + } else { + start = StringRangeSet::Range::Value(""); + start_open = false; + } + StringRangeSet::Range::Value end; + bool end_open; + if (column_range.has_end_qualifier_open() && + !column_range.end_qualifier_open().empty()) { + end = StringRangeSet::Range::Value(column_range.end_qualifier_open()); + end_open = true; + } else if (column_range.has_end_qualifier_closed() && + !column_range.end_qualifier_closed().empty()) { + end = StringRangeSet::Range::Value(column_range.end_qualifier_closed()); + end_open = false; + } else { + end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); + end_open = true; + } + if (StringRangeSet::RangeValueLess()(end, start)) { + return InvalidArgumentError( + "reversed `column_range`", + GCP_ERROR_INFO().WithMetadata("column_range", + column_range.DebugString())); + } + return StringRangeSet::Range(std::move(start), start_open, std::move(end), + end_open); +} + void StringRangeSet::Range::set_start(Value start, bool start_open) { start_ = std::move(start); start_open_ = start_open; @@ -156,6 +273,14 @@ bool StringRangeSet::Range::IsAboveEnd(Value const &value) const { return end_open_; } +bool StringRangeSet::Range::IsWithin(Value const &value) const { + return !IsAboveEnd(value) && !IsBelowStart(value); +} + +bool StringRangeSet::Range::IsEmpty() const { + return Range::IsEmpty(start_, start_open_, end_, end_open_); +} + bool StringRangeSet::RangeValueLess::operator()(Range::Value const& lhs, Range::Value const& rhs) const { return CompareRangeValues(lhs, rhs) < 0; @@ -267,10 +392,29 @@ TimestampRangeSet::Range::Range(Value start, Value end) assert(end == std::chrono::milliseconds::zero() || start <= end); } +StatusOr TimestampRangeSet::Range::FromTimestampRange( + google::bigtable::v2::TimestampRange const& timestamp_range) { + auto start = std::chrono::duration_cast( + std::chrono::microseconds(timestamp_range.start_timestamp_micros())); + auto end = std::chrono::duration_cast( + std::chrono::microseconds(timestamp_range.end_timestamp_micros())); + if (end != std::chrono::milliseconds::zero() && start > end) { + return InvalidArgumentError( + "reversed `timestamp_range`", + GCP_ERROR_INFO().WithMetadata("timestamp_range", + timestamp_range.DebugString())); + } + return Range(start, end); +} + bool TimestampRangeSet::Range::IsAboveEnd(Value value) const { return end_ != std::chrono::milliseconds::zero() && value >= end_; } +bool TimestampRangeSet::Range::IsWithin(Value value) const { + return !IsAboveEnd(value) && !IsBelowStart(value); +} + bool TimestampRangeSet::Range::IsEmpty(TimestampRangeSet::Range::Value start, TimestampRangeSet::Range::Value end) { if (end == std::chrono::milliseconds::zero()) { diff --git a/google/cloud/bigtable/emulator/range_set.h b/google/cloud/bigtable/emulator/range_set.h index c4cfef4876758..3bd7674fe1d9e 100644 --- a/google/cloud/bigtable/emulator/range_set.h +++ b/google/cloud/bigtable/emulator/range_set.h @@ -16,12 +16,21 @@ #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_RANGE_SET_H #include "absl/types/variant.h" +#include "google/cloud/status_or.h" #include #include #include #include namespace google { +namespace bigtable { +namespace v2 { +class RowRange; +class ValueRange; +class ColumnRange; +class TimestampRange; +} // namespace v2 +} // namespace bigtable namespace cloud { namespace bigtable { namespace emulator { @@ -34,6 +43,12 @@ class StringRangeSet { using Value = absl::variant; Range(Value start, bool start_open, Value end, bool end_open); + static StatusOr FromRowRange( + google::bigtable::v2::RowRange const& row_range); + static StatusOr FromValueRange( + google::bigtable::v2::ValueRange const& value_range); + static StatusOr FromColumnRange( + google::bigtable::v2::ColumnRange const& column_range); Value const& start() const & { return start_; } std::string const& start_finite() const& { @@ -52,6 +67,8 @@ class StringRangeSet { bool IsBelowStart(Value const &value) const; bool IsAboveEnd(Value const &value) const; + bool IsWithin(Value const &value) const; + bool IsEmpty() const; static bool IsEmpty(StringRangeSet::Range::Value const& start, bool start_open, @@ -109,6 +126,8 @@ class TimestampRangeSet { using Value = std::chrono::milliseconds; Range(Value start, Value end); + static StatusOr FromTimestampRange( + google::bigtable::v2::TimestampRange const& timestamp_range); Value start() const { return start_; } Value start_finite() const { return start_; } @@ -123,6 +142,7 @@ class TimestampRangeSet { bool IsBelowStart(Value value) const { return value < start_; } bool IsAboveEnd(Value value) const; + bool IsWithin(Value value) const; static bool IsEmpty(TimestampRangeSet::Range::Value start, TimestampRangeSet::Range::Value end); diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index f7b6542556012..bd477b274ebbd 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" @@ -282,6 +283,47 @@ Status Table::MutateRow( return Status(); } +class FilteredTableStream : public MergeCellStreams { + public: + FilteredTableStream( + std::vector> cf_streams) + : MergeCellStreams(CreateCellStreams(std::move(cf_streams))) {} + + bool ApplyFilter(InternalFilter const& internal_filter) override { + if (!absl::holds_alternative(internal_filter)) { + return MergeCellStreams::ApplyFilter(internal_filter); + } + for (auto stream_it = unfinished_streams_.begin(); + stream_it != unfinished_streams_.end(); ++stream_it) { + auto* cf_stream = + dynamic_cast(&(*stream_it)->impl()); + assert(cf_stream); + if (re2::RE2::PartialMatch( + cf_stream->column_family_name(), + *absl::get(internal_filter).regex)) { + auto last_it = std::prev(unfinished_streams_.end()); + if (stream_it == last_it) { + unfinished_streams_.pop_back(); + break; + } + stream_it->swap(unfinished_streams_.back()); + unfinished_streams_.pop_back(); + } + } + return true; + } + private: + static std::vector CreateCellStreams( + std::vector> cf_streams) { + std::vector res; + res.reserve(cf_streams.size()); + for (auto& stream : cf_streams) { + res.emplace_back(std::move(stream)); + } + return res; + } +}; + StatusOr CreateStringRangeSet( google::bigtable::v2::RowSet const& row_set) { StringRangeSet res; @@ -294,41 +336,14 @@ StatusOr CreateStringRangeSet( res.Insert(StringRangeSet::Range(row_key, false, row_key, false)); } for (auto const& row_range : row_set.row_ranges()) { - StringRangeSet::Range::Value start; - bool start_open; - if (row_range.has_start_key_open() && !row_range.start_key_open().empty()) { - start = StringRangeSet::Range::Value(row_range.start_key_open()); - start_open = true; - } else if (row_range.has_start_key_closed() && - !row_range.start_key_closed().empty()) { - start = StringRangeSet::Range::Value(row_range.start_key_closed()); - start_open = false; - } else { - start = StringRangeSet::Range::Value(""); - start_open = false; - } - StringRangeSet::Range::Value end; - bool end_open; - if (row_range.has_end_key_open() && !row_range.end_key_open().empty()) { - end = StringRangeSet::Range::Value(row_range.end_key_open()); - end_open = true; - } else if (row_range.has_end_key_closed() && - !row_range.end_key_closed().empty()) { - end = StringRangeSet::Range::Value(row_range.end_key_closed()); - end_open = false; - } else { - end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); - end_open = true; - } - if (StringRangeSet::RangeValueLess()(end, start)) { - return InvalidArgumentError( - "reversed `row_range`", - GCP_ERROR_INFO().WithMetadata("row_range", row_range.DebugString())); + auto maybe_range = StringRangeSet::Range::FromRowRange(row_range); + if (!maybe_range) { + return maybe_range.status(); } - if (StringRangeSet::Range::IsEmpty(start, start_open, end, end_open)) { + if (maybe_range->IsEmpty()) { continue; } - res.Insert(StringRangeSet::Range(std::move(start), start_open, std::move(end), end_open)); + res.Insert(*std::move(maybe_range)); } return res; } @@ -346,12 +361,13 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, row_set = std::make_shared(StringRangeSet::All()); } std::lock_guard lock(mu_); - std::vector per_cf_streams; + std::vector> per_cf_streams; for (auto const & column_family: column_families_) { - per_cf_streams.emplace_back(std::make_unique( + per_cf_streams.emplace_back(std::make_shared( *column_family.second, column_family.first, row_set)); } - auto stream = JoinCellStreams(std::move(per_cf_streams)); + auto stream = CellStream( + std::make_shared(std::move(per_cf_streams))); FilterContext ctx; if (request.has_filter()) { auto maybe_stream = CreateFilter(request.filter(), std::move(stream), ctx); From 9b524021a6c3dc2d0a25592e000f79e30cdec3ef Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Mon, 10 Feb 2025 23:29:56 +0100 Subject: [PATCH 010/195] StringRangeSet::Range tested. --- google/cloud/bigtable/emulator/range_set.cc | 70 +-- google/cloud/bigtable/emulator/range_set.h | 18 +- .../cloud/bigtable/emulator/range_set_test.cc | 520 +++++++++++++++++- 3 files changed, 552 insertions(+), 56 deletions(-) diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index 9641758b48c41..40a59f37bf338 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -20,12 +20,13 @@ namespace google { namespace cloud { namespace bigtable { namespace emulator { -namespace { +namespace detail { int CompareRangeValues(StringRangeSet::Range::Value const& lhs, StringRangeSet::Range::Value const& rhs) { if (absl::holds_alternative(lhs)) { - return absl::holds_alternative(rhs) ? 0 : 1; + return absl::holds_alternative(rhs) ? 0 + : 1; } if (absl::holds_alternative(rhs)) { return -1; @@ -81,7 +82,7 @@ bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, // FIXME - ConsecutiveRowKeys should somehow take into account the allowed // length of the strings. if (lhs.end_closed() && rhs.start_closed() && - ConsecutiveRowKeys(lhs.end(), rhs.start())) { + detail::ConsecutiveRowKeys(lhs.end(), rhs.start())) { return true; } return false; @@ -94,7 +95,7 @@ bool DisjointAndSortedRangesAdjacent(TimestampRangeSet::Range const& lhs, return lhs.end() == rhs.start(); } -} // anonymous namespace +} // namespace detail StringRangeSet::Range::Range(Value start, bool start_open, Value end, bool end_open) @@ -104,11 +105,11 @@ StringRangeSet::Range::Range(Value start, bool start_open, Value end, end_open_(end_open) { assert(!RangeValueLess()(end, start)); assert(!absl::holds_alternative(start) || - start_open_); + !start_open_); assert(!absl::holds_alternative(end) || - end_open_); + !end_open_); assert(!absl::holds_alternative(start) || - StringRangeSet::Range::IsEmpty(start_, start_open_, end_, end_open_)); + absl::holds_alternative(end)); } StatusOr StringRangeSet::Range::FromRowRange( @@ -137,7 +138,7 @@ StatusOr StringRangeSet::Range::FromRowRange( end_open = false; } else { end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); - end_open = true; + end_open = false; } if (StringRangeSet::RangeValueLess()(end, start)) { return InvalidArgumentError( @@ -176,7 +177,7 @@ StatusOr StringRangeSet::Range::FromValueRange( end_open = false; } else { end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); - end_open = true; + end_open = false; } if (StringRangeSet::RangeValueLess()(end, start)) { return InvalidArgumentError("reversed `value_range`", @@ -215,7 +216,7 @@ StatusOr StringRangeSet::Range::FromColumnRange( end_open = false; } else { end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); - end_open = true; + end_open = false; } if (StringRangeSet::RangeValueLess()(end, start)) { return InvalidArgumentError( @@ -238,7 +239,7 @@ void StringRangeSet::Range::set_end(Value end, bool end_open) { } bool StringRangeSet::Range::IsBelowStart(Value const &value) const { - auto const cmp = CompareRangeValues(start_, value); + auto const cmp = detail::CompareRangeValues(value, start_); if (cmp != 0) { return cmp < 0; } @@ -249,24 +250,24 @@ bool StringRangeSet::Range::IsEmpty(StringRangeSet::Range::Value const& start, bool start_open, StringRangeSet::Range::Value const& end, bool end_open) { - auto const res_cmp = CompareRangeValues(start, end); + auto const res_cmp = detail::CompareRangeValues(start, end); if (res_cmp > 0) { return true; } if (res_cmp == 0) { - return start_open || end_open; + return start_open || end_open || + absl::holds_alternative(start); } if (start_open && end_open) { // FIXME - ConsecutiveRowKeys should somehow take into account the allowed // length of the strings. - return ConsecutiveRowKeys(start, end); + return detail::ConsecutiveRowKeys(start, end); } return false; } - bool StringRangeSet::Range::IsAboveEnd(Value const &value) const { - auto const cmp = CompareRangeValues(value, end_); + auto const cmp = detail::CompareRangeValues(value, end_); if (cmp != 0) { return cmp > 0; } @@ -283,12 +284,12 @@ bool StringRangeSet::Range::IsEmpty() const { bool StringRangeSet::RangeValueLess::operator()(Range::Value const& lhs, Range::Value const& rhs) const { - return CompareRangeValues(lhs, rhs) < 0; + return detail::CompareRangeValues(lhs, rhs) < 0; } bool StringRangeSet::RangeStartLess::operator()(Range const& lhs, Range const& rhs) const { - auto res = CompareRangeValues(lhs.start(), rhs.start()); + auto res = detail::CompareRangeValues(lhs.start(), rhs.start()); if (res == 0) { return lhs.start_closed() && rhs.start_open(); } @@ -297,7 +298,7 @@ bool StringRangeSet::RangeStartLess::operator()(Range const& lhs, bool StringRangeSet::RangeEndLess::operator()(Range const& lhs, Range const& rhs) const { - auto res = CompareRangeValues(lhs.end(), rhs.end()); + auto res = detail::CompareRangeValues(lhs.end(), rhs.end()); if (res == 0) { return lhs.end_open() && rhs.end_closed(); } @@ -324,15 +325,15 @@ void StringRangeSet::Insert(StringRangeSet::Range inserted_range) { // The previous range is the first to have a chance for an overlap - it is the // last one, which starts at or before `inserted_range` start. if (first_to_remove != disjoint_ranges_.begin() && - HasOverlap(*std::prev(first_to_remove), inserted_range)) { + detail::HasOverlap(*std::prev(first_to_remove), inserted_range)) { std::advance(first_to_remove, -1); } // The range preceeding `first_to_remove` for sure has no overlap with // `inserted_range` but it may be adjacent to it. In that case we should also // remove it. if (first_to_remove != disjoint_ranges_.begin() && - DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), - inserted_range)) { + detail::DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), + inserted_range)) { std::advance(first_to_remove, -1); } if (first_to_remove != disjoint_ranges_.end()) { @@ -346,10 +347,10 @@ void StringRangeSet::Insert(StringRangeSet::Range inserted_range) { first_to_remove->end_open()); } disjoint_ranges_.erase(first_to_remove++); - } while ( - first_to_remove != disjoint_ranges_.end() && - (HasOverlap(*first_to_remove, inserted_range) || - DisjointAndSortedRangesAdjacent(inserted_range, *first_to_remove))); + } while (first_to_remove != disjoint_ranges_.end() && + (detail::HasOverlap(*first_to_remove, inserted_range) || + detail::DisjointAndSortedRangesAdjacent(inserted_range, + *first_to_remove))); } disjoint_ranges_.insert(std::move(inserted_range)); } @@ -369,8 +370,9 @@ std::ostream& operator<<(std::ostream& os, StringRangeSet::Range::Value const& value) { if (absl::holds_alternative(value)) { os << "inf"; + return os; } - os << absl::holds_alternative(value); + os << absl::get(value); return os; } @@ -460,15 +462,15 @@ void TimestampRangeSet::Insert(TimestampRangeSet::Range inserted_range) { // The previous range is the first to have a chance for an overlap - it is the // last one, which starts at or before `inserted_range` start. if (first_to_remove != disjoint_ranges_.begin() && - HasOverlap(*std::prev(first_to_remove), inserted_range)) { + detail::HasOverlap(*std::prev(first_to_remove), inserted_range)) { std::advance(first_to_remove, -1); } // The range preceeding `first_to_remove` for sure has no overlap with // `inserted_range` but it may be adjacent to it. In that case we should also // remove it. if (first_to_remove != disjoint_ranges_.begin() && - DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), - inserted_range)) { + detail::DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), + inserted_range)) { std::advance(first_to_remove, -1); } if (first_to_remove != disjoint_ranges_.end()) { @@ -481,10 +483,10 @@ void TimestampRangeSet::Insert(TimestampRangeSet::Range inserted_range) { inserted_range.set_end(first_to_remove->end()); } disjoint_ranges_.erase(first_to_remove++); - } while ( - first_to_remove != disjoint_ranges_.end() && - (HasOverlap(*first_to_remove, inserted_range) || - DisjointAndSortedRangesAdjacent(inserted_range, *first_to_remove))); + } while (first_to_remove != disjoint_ranges_.end() && + (detail::HasOverlap(*first_to_remove, inserted_range) || + detail::DisjointAndSortedRangesAdjacent(inserted_range, + *first_to_remove))); } disjoint_ranges_.insert(std::move(inserted_range)); } diff --git a/google/cloud/bigtable/emulator/range_set.h b/google/cloud/bigtable/emulator/range_set.h index 3bd7674fe1d9e..810507fa8886d 100644 --- a/google/cloud/bigtable/emulator/range_set.h +++ b/google/cloud/bigtable/emulator/range_set.h @@ -168,7 +168,6 @@ class TimestampRangeSet { return disjoint_ranges_; }; - private: std::set disjoint_ranges_; }; @@ -179,6 +178,23 @@ bool operator==(TimestampRangeSet::Range const& lhs, std::ostream& operator<<(std::ostream& os, TimestampRangeSet::Range const& range); +// For testing only. +namespace detail { + +int CompareRangeValues(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs); +bool ConsecutiveRowKeys(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs); +bool HasOverlap(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs); +bool HasOverlap(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs); +bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs); +bool DisjointAndSortedRangesAdjacent(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs); + +} // namespace detail } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index af8f14142bccc..e7a3453a1c1c0 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -15,6 +15,7 @@ #include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/is_proto_equal.h" +#include "google/cloud/testing_util/status_matchers.h" #include #include @@ -24,36 +25,513 @@ namespace bigtable { namespace emulator { namespace { +bool const kOpen = true; +bool const kClosed = false; +bool const kWhatever = true; // to indicate it's unimportant in the test + namespace btproto = ::google::bigtable::v2; using ::google::cloud::testing_util::IsProtoEqual; +TEST(StringRangeValueOrder, Simple) { + EXPECT_EQ(-1, detail::CompareRangeValues("A", "B")); + EXPECT_EQ(0, detail::CompareRangeValues("A", "A")); + EXPECT_EQ(1, detail::CompareRangeValues("B", "A")); +} + +TEST(StringRangeValueOrder, Empty) { + EXPECT_EQ(-1, detail::CompareRangeValues("", "A")); + EXPECT_EQ(0, detail::CompareRangeValues("", "")); + EXPECT_EQ(1, detail::CompareRangeValues("A", "")); +} + +TEST(StringRangeValueOrder, Infinite) { + EXPECT_EQ(-1, + detail::CompareRangeValues("A", StringRangeSet::Range::Infinity{})); + EXPECT_EQ(0, detail::CompareRangeValues(StringRangeSet::Range::Infinity{}, + StringRangeSet::Range::Infinity{})); + EXPECT_EQ(1, + detail::CompareRangeValues(StringRangeSet::Range::Infinity{}, "A")); + + EXPECT_EQ(-1, + detail::CompareRangeValues("", StringRangeSet::Range::Infinity{})); + EXPECT_EQ(1, + detail::CompareRangeValues(StringRangeSet::Range::Infinity{}, "")); +} + +TEST(StringRangeSet, FromRowRangeClosed) { + auto closed = StringRangeSet::Range::FromRowRange( + RowRange::Closed("A", "B").as_proto()); + EXPECT_STATUS_OK(closed); + EXPECT_EQ("A", closed->start()); + EXPECT_EQ("B", closed->end()); + EXPECT_TRUE(closed->start_closed()); + EXPECT_TRUE(closed->end_closed()); + EXPECT_FALSE(closed->start_open()); + EXPECT_FALSE(closed->end_open()); +} + +TEST(StringRangeSet, FromRowRangeOpen) { + auto open = StringRangeSet::Range::FromRowRange( + RowRange::Open("A", "B").as_proto()); + EXPECT_STATUS_OK(open); + EXPECT_EQ("A", open->start()); + EXPECT_EQ("B", open->end()); + EXPECT_FALSE(open->start_closed()); + EXPECT_FALSE(open->end_closed()); + EXPECT_TRUE(open->start_open()); + EXPECT_TRUE(open->end_open()); +} + +TEST(StringRangeSet, FromRowRangeImplicitlyInfinite) { + auto range = + StringRangeSet::Range::FromRowRange(google::bigtable::v2::RowRange{}); + + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); +} + +TEST(StringRangeSet, FromRowRangeExplicitlyInfinite) { + for (bool end_open : {true, false}) { + google::bigtable::v2::RowRange proto_range; + proto_range.set_start_key_closed(""); + if (end_open) { + proto_range.set_end_key_open(""); + } else { + proto_range.set_end_key_closed(""); + } + + auto range = StringRangeSet::Range::FromRowRange(proto_range); + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); + } +} + +TEST(StringRangeSet, FromColumnRangeClosed) { + google::bigtable::v2::ColumnRange proto_range; + proto_range.set_start_qualifier_closed("A"); + proto_range.set_end_qualifier_closed("B"); + auto closed = StringRangeSet::Range::FromColumnRange(proto_range); + EXPECT_STATUS_OK(closed); + EXPECT_EQ("A", closed->start()); + EXPECT_EQ("B", closed->end()); + EXPECT_TRUE(closed->start_closed()); + EXPECT_TRUE(closed->end_closed()); + EXPECT_FALSE(closed->start_open()); + EXPECT_FALSE(closed->end_open()); +} + +TEST(StringRangeSet, FromColumnRangeOpen) { + google::bigtable::v2::ColumnRange proto_range; + proto_range.set_start_qualifier_open("A"); + proto_range.set_end_qualifier_open("B"); + auto open = StringRangeSet::Range::FromColumnRange(proto_range); + EXPECT_STATUS_OK(open); + EXPECT_EQ("A", open->start()); + EXPECT_EQ("B", open->end()); + EXPECT_FALSE(open->start_closed()); + EXPECT_FALSE(open->end_closed()); + EXPECT_TRUE(open->start_open()); + EXPECT_TRUE(open->end_open()); +} + +TEST(StringRangeSet, FromColumnRangeImplicitlyInfinite) { + auto range = + StringRangeSet::Range::FromColumnRange(google::bigtable::v2::ColumnRange{}); + + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); +} + +TEST(StringRangeSet, FromColumnRangeExplicitlyInfinite) { + for (bool end_open : {true, false}) { + google::bigtable::v2::ColumnRange proto_range; + proto_range.set_start_qualifier_closed(""); + if (end_open) { + proto_range.set_end_qualifier_open(""); + } else { + proto_range.set_end_qualifier_closed(""); + } + + auto range = StringRangeSet::Range::FromColumnRange(proto_range); + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); + } +} + +TEST(StringRangeSet, FromValueRangeClosed) { + google::bigtable::v2::ValueRange proto_range; + proto_range.set_start_value_closed("A"); + proto_range.set_end_value_closed("B"); + auto closed = StringRangeSet::Range::FromValueRange(proto_range); + EXPECT_STATUS_OK(closed); + EXPECT_EQ("A", closed->start()); + EXPECT_EQ("B", closed->end()); + EXPECT_TRUE(closed->start_closed()); + EXPECT_TRUE(closed->end_closed()); + EXPECT_FALSE(closed->start_open()); + EXPECT_FALSE(closed->end_open()); +} + +TEST(StringRangeSet, FromValueRangeOpen) { + google::bigtable::v2::ValueRange proto_range; + proto_range.set_start_value_open("A"); + proto_range.set_end_value_open("B"); + auto open = StringRangeSet::Range::FromValueRange(proto_range); + EXPECT_STATUS_OK(open); + EXPECT_EQ("A", open->start()); + EXPECT_EQ("B", open->end()); + EXPECT_FALSE(open->start_closed()); + EXPECT_FALSE(open->end_closed()); + EXPECT_TRUE(open->start_open()); + EXPECT_TRUE(open->end_open()); +} + +TEST(StringRangeSet, FromValueRangeImplicitlyInfinite) { + auto range = + StringRangeSet::Range::FromValueRange(google::bigtable::v2::ValueRange{}); + + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); +} + +TEST(StringRangeSet, FromValueRangeExplicitlyInfinite) { + for (bool end_open : {true, false}) { + google::bigtable::v2::ValueRange proto_range; + proto_range.set_start_value_closed(""); + if (end_open) { + proto_range.set_end_value_open(""); + } else { + proto_range.set_end_value_closed(""); + } + + auto range = StringRangeSet::Range::FromValueRange(proto_range); + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); + } +} + +TEST(StringRangeSet, RangeValueLess) { + EXPECT_TRUE(StringRangeSet::RangeValueLess()("A", "B")); + EXPECT_FALSE(StringRangeSet::RangeValueLess()("A", "A")); + EXPECT_FALSE(StringRangeSet::RangeValueLess()("B", "A")); +} + +TEST(StringRangeSet, RangeStartLess) { + EXPECT_TRUE(StringRangeSet::RangeStartLess()( + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever), + StringRangeSet::Range("B", kOpen, "unimportant", kWhatever))); + EXPECT_FALSE(StringRangeSet::RangeStartLess()( + StringRangeSet::Range("B", kOpen, "unimportant", kWhatever), + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever))); + EXPECT_FALSE(StringRangeSet::RangeStartLess()( + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever), + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever))); + + EXPECT_TRUE(StringRangeSet::RangeStartLess()( + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever), + StringRangeSet::Range("B", kClosed, "unimportant", kWhatever))); + EXPECT_FALSE(StringRangeSet::RangeStartLess()( + StringRangeSet::Range("B", kClosed, "unimportant", kWhatever), + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever))); + EXPECT_FALSE(StringRangeSet::RangeStartLess()( + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever), + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever))); + + EXPECT_FALSE(StringRangeSet::RangeStartLess()( + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever), + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever))); + EXPECT_TRUE(StringRangeSet::RangeStartLess()( + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever), + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever))); +} + +TEST(StringRangeSet, RangeEndLess) { + EXPECT_TRUE(StringRangeSet::RangeEndLess()( + StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), + StringRangeSet::Range("unimportant", kWhatever, "B", kOpen))); + EXPECT_FALSE(StringRangeSet::RangeEndLess()( + StringRangeSet::Range("unimportant", kWhatever, "B", kOpen), + StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); + EXPECT_FALSE(StringRangeSet::RangeEndLess()( + StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), + StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); + + EXPECT_TRUE(StringRangeSet::RangeEndLess()( + StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), + StringRangeSet::Range("unimportant", kWhatever, "B", kClosed))); + EXPECT_FALSE(StringRangeSet::RangeEndLess()( + StringRangeSet::Range("unimportant", kWhatever, "B", kClosed), + StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); + EXPECT_FALSE(StringRangeSet::RangeEndLess()( + StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), + StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); + + EXPECT_FALSE(StringRangeSet::RangeEndLess()( + StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), + StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); + EXPECT_TRUE(StringRangeSet::RangeEndLess()( + StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), + StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); +} + +TEST(StringRangeSet, BelowStart) { + StringRangeSet::Range const open("B", kOpen, "unimportant", kWhatever); + StringRangeSet::Range const closed("B", kClosed, "unimportant", kWhatever); + StringRangeSet::Range const infinite(StringRangeSet::Range::Infinity{}, kClosed, + StringRangeSet::Range::Infinity{}, + kClosed); + + EXPECT_TRUE(open.IsBelowStart("A")); + EXPECT_TRUE(closed.IsBelowStart("A")); + EXPECT_TRUE(open.IsBelowStart("B")); + EXPECT_FALSE(closed.IsBelowStart("B")); + EXPECT_FALSE(open.IsBelowStart("C")); + EXPECT_FALSE(closed.IsBelowStart("C")); + EXPECT_FALSE(open.IsBelowStart(StringRangeSet::Range::Infinity{})); + EXPECT_FALSE(closed.IsBelowStart(StringRangeSet::Range::Infinity{})); + EXPECT_TRUE(infinite.IsBelowStart("whatever_string")); + EXPECT_FALSE(infinite.IsBelowStart(StringRangeSet::Range::Infinity{})); +} + +TEST(StringRangeSet, AboveEnd) { + StringRangeSet::Range const open("unimportant", kWhatever, "B", kOpen); + StringRangeSet::Range const closed("unimportant", kWhatever, "B", kClosed); + StringRangeSet::Range const infinite( + "unimportant", kWhatever, StringRangeSet::Range::Infinity{}, kClosed); + + EXPECT_FALSE(open.IsAboveEnd("A")); + EXPECT_FALSE(closed.IsAboveEnd("A")); + EXPECT_TRUE(open.IsAboveEnd("B")); + EXPECT_FALSE(closed.IsAboveEnd("B")); + EXPECT_TRUE(open.IsAboveEnd("C")); + EXPECT_TRUE(closed.IsAboveEnd("C")); + EXPECT_FALSE(infinite.IsAboveEnd("whatever_string")); + EXPECT_FALSE(infinite.IsAboveEnd(StringRangeSet::Range::Infinity{})); +} + + +TEST(StringRangeSet, IsWithin) { + StringRangeSet::Range const closed("A", kClosed, "C", kClosed); + EXPECT_FALSE(closed.IsWithin("")); + EXPECT_TRUE(closed.IsWithin("A")); + EXPECT_TRUE(closed.IsWithin("B")); + EXPECT_TRUE(closed.IsWithin("C")); + EXPECT_FALSE(closed.IsWithin("D")); + EXPECT_FALSE(closed.IsWithin(StringRangeSet::Range::Infinity{})); + + StringRangeSet::Range const open("A", kOpen, "C", kOpen); + EXPECT_FALSE(open.IsWithin("")); + EXPECT_FALSE(open.IsWithin("A")); + EXPECT_TRUE(open.IsWithin("B")); + EXPECT_FALSE(open.IsWithin("C")); + EXPECT_FALSE(open.IsWithin("D")); + EXPECT_FALSE(open.IsWithin(StringRangeSet::Range::Infinity{})); +} + +TEST(StringRangeSet, RangeEqality) { + EXPECT_EQ(StringRangeSet::Range("A", kClosed, "B", kOpen), + StringRangeSet::Range("A", kClosed, "B", kOpen)); + + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "B", kOpen) == + StringRangeSet::Range("B", kClosed, "B", kOpen)); + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "B", kOpen) == + StringRangeSet::Range("A", kOpen, "B", kOpen)); + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "B", kOpen) == + StringRangeSet::Range("A", kClosed, "C", kOpen)); + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "B", kOpen) == + StringRangeSet::Range("A", kClosed, "B", kClosed)); +} + +TEST(StringRangeSet, RangePrint) { + { + std::stringstream os; + os << StringRangeSet::Range("A", kClosed, "B", kOpen); + EXPECT_EQ("[A,B)", os.str()); + } + { + std::stringstream os; + os << StringRangeSet::Range("A", kOpen, "B", kClosed); + EXPECT_EQ("(A,B]", os.str()); + } + { + std::stringstream os; + os << StringRangeSet::Range("", kOpen, "", kClosed); + EXPECT_EQ("(,]", os.str()); + } + { + std::stringstream os; + os << StringRangeSet::Range(StringRangeSet::Range::Infinity{}, kClosed, + StringRangeSet::Range::Infinity{}, kClosed); + EXPECT_EQ("[inf,inf]", os.str()); + } +} + +// FIXME - test ConsecutiveRowKeys + +TEST(StringRangeSet, IsEmpty) { + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "A", kClosed).IsEmpty()); + EXPECT_TRUE(StringRangeSet::Range("A", kClosed, "A", kOpen).IsEmpty()); + EXPECT_TRUE(StringRangeSet::Range("A", kOpen, "A", kClosed).IsEmpty()); + EXPECT_TRUE(StringRangeSet::Range("A", kOpen, "A", kOpen).IsEmpty()); + + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "C", kClosed).IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("A", kOpen, "C", kClosed).IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "C", kOpen).IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("A", kOpen, "C", kOpen).IsEmpty()); + + EXPECT_FALSE( + StringRangeSet::Range("A", kClosed, std::string("A\0", 2), kClosed) + .IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("A", kOpen, std::string("A\0", 2), kClosed) + .IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, std::string("A\0", 2), kOpen) + .IsEmpty()); + EXPECT_TRUE(StringRangeSet::Range("A", kOpen, std::string("A\0", 2), kOpen) + .IsEmpty()); + + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, + StringRangeSet::Range::Infinity{}, kClosed) + .IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("", kClosed, + StringRangeSet::Range::Infinity{}, kClosed) + .IsEmpty()); + EXPECT_TRUE(StringRangeSet::Range(StringRangeSet::Range::Infinity{}, kClosed, + StringRangeSet::Range::Infinity{}, kClosed) + .IsEmpty()); +} + +TEST(StringRangeSet, HasOverlap) { + EXPECT_FALSE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, "A", kClosed))); + EXPECT_FALSE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, "B", kOpen))); + EXPECT_FALSE( + detail::HasOverlap(StringRangeSet::Range("B", kOpen, "D", kClosed), + StringRangeSet::Range("A", kClosed, "B", kClosed))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kOpen, "D", kClosed), + StringRangeSet::Range("A", kClosed, std::string("B\0", 2), kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, std::string("B\0", 2), kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kOpen, "D", kClosed), + StringRangeSet::Range("A", kClosed, std::string("B\0", 2), kClosed))); + EXPECT_TRUE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, "B", kClosed))); + EXPECT_TRUE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, "C", kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, StringRangeSet::Range::Infinity{}, + kClosed))); + + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kOpen), + StringRangeSet::Range("D", kClosed, "E", kOpen))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, std::string("D\0", 2), kOpen), + StringRangeSet::Range("D", kOpen, "E", kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, std::string("D\0", 2), kClosed), + StringRangeSet::Range("D", kOpen, "E", kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, std::string("D\0", 2), kOpen), + StringRangeSet::Range("D", kClosed, "E", kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("D", kClosed, StringRangeSet::Range::Infinity{}, + kClosed))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("D", kOpen, StringRangeSet::Range::Infinity{}, + kClosed))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kOpen), + StringRangeSet::Range("D", kClosed, StringRangeSet::Range::Infinity{}, + kClosed))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kOpen), + StringRangeSet::Range("D", kOpen, StringRangeSet::Range::Infinity{}, + kClosed))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("E", kClosed, StringRangeSet::Range::Infinity{}, + kClosed))); + EXPECT_FALSE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("D", kOpen, "E", kOpen))); +} + +TEST(StringRangeSet, DisjointAdjacent) { + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "B", kOpen), + StringRangeSet::Range("C", kOpen, "D", kWhatever))); + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kOpen), + StringRangeSet::Range("C", kOpen, "D", kWhatever))); + EXPECT_TRUE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kClosed), + StringRangeSet::Range("C", kOpen, "D", kWhatever))); + EXPECT_TRUE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kOpen), + StringRangeSet::Range("C", kClosed, "D", kWhatever))); + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kOpen), + StringRangeSet::Range(std::string("C\0", 2), kOpen, "D", kWhatever))); + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kClosed), + StringRangeSet::Range(std::string("C\0", 2), kOpen, "D", kWhatever))); + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kOpen), + StringRangeSet::Range(std::string("C\0", 2), kClosed, "D", kWhatever))); + EXPECT_TRUE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kClosed), + StringRangeSet::Range(std::string("C\0", 2), kClosed, "D", kWhatever))); +} + TEST(StringRangeSet, SingleRange) { StringRangeSet srs; - srs.Insert(StringRangeSet::Range("a", false, "b", false)); + srs.Insert(StringRangeSet::Range("a", kClosed, "b", kClosed)); ASSERT_EQ(1, srs.disjoint_ranges().size()); - ASSERT_EQ(StringRangeSet::Range("a", false, "b", false), + ASSERT_EQ(StringRangeSet::Range("a", kClosed, "b", kClosed), *srs.disjoint_ranges().begin()); } -TEST(StartLess, Order) { -// using StartLess = internal::RowRangeHelpers::StartLess; -// -// ASSERT_FALSE(StartLess()(RowRange::Closed("a", "").as_proto(), -// RowRange::Closed("a", "").as_proto())); -// ASSERT_TRUE(StartLess()(RowRange::Closed("a", "").as_proto(), -// RowRange::Open("a", "").as_proto())); -// ASSERT_FALSE(StartLess()(RowRange::Open("a", "").as_proto(), -// RowRange::Closed("a", "").as_proto())); -// ASSERT_TRUE(StartLess()(RowRange::Closed("a", "").as_proto(), -// RowRange::Closed("b", "").as_proto())); -// ASSERT_TRUE(StartLess()(RowRange::InfiniteRange().as_proto(), -// RowRange::Closed("a", "").as_proto())); -// ASSERT_TRUE(StartLess()(RowRange::InfiniteRange().as_proto(), -// RowRange::Open("a", "").as_proto())); -// ASSERT_FALSE(StartLess()(RowRange::InfiniteRange().as_proto(), -// RowRange::InfiniteRange().as_proto())); -} - } // anonymous namespace } // namespace emulator } // namespace bigtable From d9f61827d5ea93b831fb74a949a819385c396650 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Tue, 11 Feb 2025 00:43:23 +0100 Subject: [PATCH 011/195] RangeSets tested. --- google/cloud/bigtable/emulator/filtered_map.h | 11 +- google/cloud/bigtable/emulator/range_set.cc | 160 +++++-------- google/cloud/bigtable/emulator/range_set.h | 59 +++-- .../cloud/bigtable/emulator/range_set_test.cc | 222 +++++++++++++++--- 4 files changed, 291 insertions(+), 161 deletions(-) diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h index b6e7be5ee6805..b3dc30ba334f0 100644 --- a/google/cloud/bigtable/emulator/filtered_map.h +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -38,10 +38,11 @@ class FilteredMapView { using pointer = value_type const*; const_iterator( - FilteredMapView const& parent, typename Map::const_iterator unfiltered_pos, - typename std::set< - typename ExcludedRanges::Range, - typename ExcludedRanges::RangeStartLess>::const_iterator filter_pos) + FilteredMapView const& parent, + typename Map::const_iterator unfiltered_pos, + typename std::set:: + const_iterator filter_pos) : parent_(std::cref(parent)), unfiltered_pos_(std::move(unfiltered_pos)), filter_pos_(std::move(filter_pos)) { @@ -116,7 +117,7 @@ class FilteredMapView { std::reference_wrapper parent_; typename Map::const_iterator unfiltered_pos_; typename std::set::const_iterator + typename ExcludedRanges::Range::StartLess>::const_iterator filter_pos_; }; diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index 40a59f37bf338..e77cf0cf3286a 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -65,14 +65,14 @@ bool HasOverlap(TimestampRangeSet::Range const& lhs, TimestampRangeSet::Range::Value overlap_start = std::max(lhs.start(), rhs.start()); TimestampRangeSet::Range::Value overlap_end = - TimestampRangeSet::RangeEndLess()(lhs, rhs) ? lhs.end() : rhs.end(); - return TimestampRangeSet::Range::IsEmpty(overlap_start, overlap_end); + TimestampRangeSet::Range::EndLess()(lhs, rhs) ? lhs.end() : rhs.end(); + return !TimestampRangeSet::Range::IsEmpty(overlap_start, overlap_end); } bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, StringRangeSet::Range const& rhs) { assert(!HasOverlap(lhs, rhs)); - assert(StringRangeSet::RangeStartLess()(lhs, rhs)); + assert(StringRangeSet::Range::StartLess()(lhs, rhs)); if (lhs.end_closed() && rhs.start_open() && lhs.end() == rhs.start()) { return true; } @@ -91,10 +91,50 @@ bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, bool DisjointAndSortedRangesAdjacent(TimestampRangeSet::Range const& lhs, TimestampRangeSet::Range const& rhs) { assert(!HasOverlap(lhs, rhs)); - assert(TimestampRangeSet::RangeStartLess()(lhs, rhs)); + assert(TimestampRangeSet::Range::StartLess()(lhs, rhs)); return lhs.end() == rhs.start(); } +template +void RangeSetInsertImpl(RangeSetType& disjoint_ranges, + RangeType inserted_range) { + // Remove all ranges which either have an overlap with `inserted_range` or are + // adjacent to it. Then add `inserted_range` with `start` and `end` + // adjusted to cover what the removed ranges used to cover. + + auto first_to_remove = disjoint_ranges.upper_bound(inserted_range); + // `*first_to_remove` starts strictly after `inserted_range`'s start. + // The previous range is the first to have a chance for an overlap - it is the + // last one, which starts at or before `inserted_range` start. + if (first_to_remove != disjoint_ranges.begin() && + detail::HasOverlap(*std::prev(first_to_remove), inserted_range)) { + std::advance(first_to_remove, -1); + } + // The range preceeding `first_to_remove` for sure has no overlap with + // `inserted_range` but it may be adjacent to it. In that case we should also + // remove it. + if (first_to_remove != disjoint_ranges.begin() && + detail::DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), + inserted_range)) { + std::advance(first_to_remove, -1); + } + if (first_to_remove != disjoint_ranges.end()) { + if (typename RangeType::StartLess()(*first_to_remove, inserted_range)) { + inserted_range.set_start(*first_to_remove); + } + do { + if (typename RangeType::EndLess()(inserted_range, *first_to_remove)) { + inserted_range.set_end(*first_to_remove); + } + disjoint_ranges.erase(first_to_remove++); + } while (first_to_remove != disjoint_ranges.end() && + (detail::HasOverlap(*first_to_remove, inserted_range) || + detail::DisjointAndSortedRangesAdjacent(inserted_range, + *first_to_remove))); + } + disjoint_ranges.insert(std::move(inserted_range)); +} + } // namespace detail StringRangeSet::Range::Range(Value start, bool start_open, Value end, @@ -103,7 +143,7 @@ StringRangeSet::Range::Range(Value start, bool start_open, Value end, start_open_(start_open), end_(std::move(end)), end_open_(end_open) { - assert(!RangeValueLess()(end, start)); + assert(!Range::ValueLess()(end, start)); assert(!absl::holds_alternative(start) || !start_open_); assert(!absl::holds_alternative(end) || @@ -140,7 +180,7 @@ StatusOr StringRangeSet::Range::FromRowRange( end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); end_open = false; } - if (StringRangeSet::RangeValueLess()(end, start)) { + if (StringRangeSet::Range::ValueLess()(end, start)) { return InvalidArgumentError( "reversed `row_range`", GCP_ERROR_INFO().WithMetadata("row_range", row_range.DebugString())); @@ -179,7 +219,7 @@ StatusOr StringRangeSet::Range::FromValueRange( end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); end_open = false; } - if (StringRangeSet::RangeValueLess()(end, start)) { + if (StringRangeSet::Range::ValueLess()(end, start)) { return InvalidArgumentError("reversed `value_range`", GCP_ERROR_INFO().WithMetadata( "value_range", value_range.DebugString())); @@ -218,7 +258,7 @@ StatusOr StringRangeSet::Range::FromColumnRange( end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); end_open = false; } - if (StringRangeSet::RangeValueLess()(end, start)) { + if (StringRangeSet::Range::ValueLess()(end, start)) { return InvalidArgumentError( "reversed `column_range`", GCP_ERROR_INFO().WithMetadata("column_range", @@ -228,14 +268,14 @@ StatusOr StringRangeSet::Range::FromColumnRange( end_open); } -void StringRangeSet::Range::set_start(Value start, bool start_open) { - start_ = std::move(start); - start_open_ = start_open; +void StringRangeSet::Range::set_start(Range const& source) { + start_ = source.start(); + start_open_ = source.start_open(); } -void StringRangeSet::Range::set_end(Value end, bool end_open) { - end_ = std::move(end); - end_open_ = end_open; +void StringRangeSet::Range::set_end(Range const& source) { + end_ = source.end(); + end_open_ = source.end_open(); } bool StringRangeSet::Range::IsBelowStart(Value const &value) const { @@ -282,12 +322,12 @@ bool StringRangeSet::Range::IsEmpty() const { return Range::IsEmpty(start_, start_open_, end_, end_open_); } -bool StringRangeSet::RangeValueLess::operator()(Range::Value const& lhs, +bool StringRangeSet::Range::ValueLess::operator()(Range::Value const& lhs, Range::Value const& rhs) const { return detail::CompareRangeValues(lhs, rhs) < 0; } -bool StringRangeSet::RangeStartLess::operator()(Range const& lhs, +bool StringRangeSet::Range::StartLess::operator()(Range const& lhs, Range const& rhs) const { auto res = detail::CompareRangeValues(lhs.start(), rhs.start()); if (res == 0) { @@ -296,7 +336,7 @@ bool StringRangeSet::RangeStartLess::operator()(Range const& lhs, return res < 0; } -bool StringRangeSet::RangeEndLess::operator()(Range const& lhs, +bool StringRangeSet::Range::EndLess::operator()(Range const& lhs, Range const& rhs) const { auto res = detail::CompareRangeValues(lhs.end(), rhs.end()); if (res == 0) { @@ -316,43 +356,7 @@ StringRangeSet StringRangeSet::Empty() { } void StringRangeSet::Insert(StringRangeSet::Range inserted_range) { - // Remove all ranges which either have an overlap with `inserted_range` or are - // adjacent to it. Then add `inserted_range` with `start` and `end` - // adjusted to cover what the removed ranges used to cover. - - auto first_to_remove = disjoint_ranges_.upper_bound(inserted_range); - // `*first_to_remove` starts strictly after `inserted_range`'s start. - // The previous range is the first to have a chance for an overlap - it is the - // last one, which starts at or before `inserted_range` start. - if (first_to_remove != disjoint_ranges_.begin() && - detail::HasOverlap(*std::prev(first_to_remove), inserted_range)) { - std::advance(first_to_remove, -1); - } - // The range preceeding `first_to_remove` for sure has no overlap with - // `inserted_range` but it may be adjacent to it. In that case we should also - // remove it. - if (first_to_remove != disjoint_ranges_.begin() && - detail::DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), - inserted_range)) { - std::advance(first_to_remove, -1); - } - if (first_to_remove != disjoint_ranges_.end()) { - if (RangeStartLess()(*first_to_remove, inserted_range)) { - inserted_range.set_start(std::move(first_to_remove)->start(), - first_to_remove->start_open()); - } - do { - if (RangeEndLess()(inserted_range, *first_to_remove)) { - inserted_range.set_end(std::move(first_to_remove)->end(), - first_to_remove->end_open()); - } - disjoint_ranges_.erase(first_to_remove++); - } while (first_to_remove != disjoint_ranges_.end() && - (detail::HasOverlap(*first_to_remove, inserted_range) || - detail::DisjointAndSortedRangesAdjacent(inserted_range, - *first_to_remove))); - } - disjoint_ranges_.insert(std::move(inserted_range)); + detail::RangeSetInsertImpl(disjoint_ranges_, std::move(inserted_range)); } bool operator==(StringRangeSet::Range::Value const& lhs, @@ -422,15 +426,15 @@ bool TimestampRangeSet::Range::IsEmpty(TimestampRangeSet::Range::Value start, if (end == std::chrono::milliseconds::zero()) { return false; } - return start < end; + return start >= end; } -bool TimestampRangeSet::RangeStartLess::operator()(Range const& lhs, +bool TimestampRangeSet::Range::StartLess::operator()(Range const& lhs, Range const& rhs) const { return lhs.start() < rhs.start(); } -bool TimestampRangeSet::RangeEndLess::operator()(Range const& lhs, +bool TimestampRangeSet::Range::EndLess::operator()(Range const& lhs, Range const& rhs) const { if (lhs.end() == std::chrono::milliseconds::zero()) { return false; @@ -451,44 +455,8 @@ TimestampRangeSet TimestampRangeSet::Empty() { return TimestampRangeSet{}; } -// FIXME - share this with StringRangeSet void TimestampRangeSet::Insert(TimestampRangeSet::Range inserted_range) { - // Remove all ranges which either have an overlap with `inserted_range` or are - // adjacent to it. Then add `inserted_range` with `start` and `end` - // adjusted to cover what the removed ranges used to cover. - - auto first_to_remove = disjoint_ranges_.upper_bound(inserted_range); - // `*first_to_remove` starts strictly after `inserted_range`'s start. - // The previous range is the first to have a chance for an overlap - it is the - // last one, which starts at or before `inserted_range` start. - if (first_to_remove != disjoint_ranges_.begin() && - detail::HasOverlap(*std::prev(first_to_remove), inserted_range)) { - std::advance(first_to_remove, -1); - } - // The range preceeding `first_to_remove` for sure has no overlap with - // `inserted_range` but it may be adjacent to it. In that case we should also - // remove it. - if (first_to_remove != disjoint_ranges_.begin() && - detail::DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), - inserted_range)) { - std::advance(first_to_remove, -1); - } - if (first_to_remove != disjoint_ranges_.end()) { - if (RangeStartLess()(*first_to_remove, inserted_range)) { - inserted_range.set_start(first_to_remove->start()); - - } - do { - if (RangeEndLess()(inserted_range, *first_to_remove)) { - inserted_range.set_end(first_to_remove->end()); - } - disjoint_ranges_.erase(first_to_remove++); - } while (first_to_remove != disjoint_ranges_.end() && - (detail::HasOverlap(*first_to_remove, inserted_range) || - detail::DisjointAndSortedRangesAdjacent(inserted_range, - *first_to_remove))); - } - disjoint_ranges_.insert(std::move(inserted_range)); + detail::RangeSetInsertImpl(disjoint_ranges_, std::move(inserted_range)); } bool operator==(TimestampRangeSet::Range const& lhs, @@ -498,9 +466,9 @@ bool operator==(TimestampRangeSet::Range const& lhs, std::ostream& operator<<(std::ostream& os, TimestampRangeSet::Range const& range) { - os << "[" << range.start().count() << "ms-"; + os << "[" << range.start().count() << "ms,"; if (range.end() == std::chrono::milliseconds::zero()) { - os << "INF"; + os << "inf"; } else { os << range.end().count() << "ms"; } diff --git a/google/cloud/bigtable/emulator/range_set.h b/google/cloud/bigtable/emulator/range_set.h index 810507fa8886d..019e95a418abd 100644 --- a/google/cloud/bigtable/emulator/range_set.h +++ b/google/cloud/bigtable/emulator/range_set.h @@ -54,16 +54,14 @@ class StringRangeSet { std::string const& start_finite() const& { return absl::get(start_); } - Value&& start() && { return std::move(start_); } bool start_open() const { return start_open_; } bool start_closed() const { return !start_open_; } - void set_start(Value start, bool start_open); + void set_start(Range const& source); Value const& end() const & { return end_; } - Value&& end() && { return std::move(end_); } - void set_end(Value end, bool end_open); bool end_open() const { return end_open_; } bool end_closed() const { return !end_open_; } + void set_end(Range const& source); bool IsBelowStart(Value const &value) const; bool IsAboveEnd(Value const &value) const; @@ -74,6 +72,18 @@ class StringRangeSet { bool start_open, StringRangeSet::Range::Value const& end, bool end_open); + struct ValueLess { + bool operator()(Range::Value const& lhs, Range::Value const& rhs) const; + }; + + struct StartLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + + struct EndLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + private: Value start_; bool start_open_; @@ -81,29 +91,17 @@ class StringRangeSet { bool end_open_; }; - struct RangeValueLess { - bool operator()(Range::Value const& lhs, Range::Value const& rhs) const; - }; - - struct RangeStartLess { - bool operator()(Range const& lhs, Range const& rhs) const; - }; - - struct RangeEndLess { - bool operator()(Range const& lhs, Range const& rhs) const; - }; - static StringRangeSet All(); static StringRangeSet Empty(); void Insert(Range inserted_range); - std::set const& disjoint_ranges() const { + std::set const& disjoint_ranges() const { return disjoint_ranges_; }; private: - std::set disjoint_ranges_; + std::set disjoint_ranges_; }; bool operator==(StringRangeSet::Range::Value const& lhs, @@ -133,12 +131,12 @@ class TimestampRangeSet { Value start_finite() const { return start_; } bool start_open() const { return false; } bool start_closed() const { return true; } - void set_start(Value start) { start_ = start; } + void set_start(Range const& source) { start_ = source.start_; } Value end() const { return end_; } bool end_open() const { return true; } bool end_closed() const { return false; } - void set_end(Value end) { end_ = end; } + void set_end(Range const& source) { end_ = source.end_; } bool IsBelowStart(Value value) const { return value < start_; } bool IsAboveEnd(Value value) const; @@ -146,30 +144,31 @@ class TimestampRangeSet { static bool IsEmpty(TimestampRangeSet::Range::Value start, TimestampRangeSet::Range::Value end); + bool IsEmpty() const { return IsEmpty(start_, end_); } + + struct StartLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + + struct EndLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; private: Value start_; Value end_; }; - struct RangeStartLess { - bool operator()(Range const& lhs, Range const& rhs) const; - }; - - struct RangeEndLess { - bool operator()(Range const& lhs, Range const& rhs) const; - }; - static TimestampRangeSet All(); static TimestampRangeSet Empty(); void Insert(Range inserted_range); - std::set const& disjoint_ranges() const { + std::set const& disjoint_ranges() const { return disjoint_ranges_; }; private: - std::set disjoint_ranges_; + std::set disjoint_ranges_; }; bool operator==(TimestampRangeSet::Range const& lhs, diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index e7a3453a1c1c0..fcdff0f28d9f5 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -16,6 +16,7 @@ #include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/status_matchers.h" +#include "google/cloud/testing_util/chrono_literals.h" #include #include @@ -58,6 +59,7 @@ TEST(StringRangeValueOrder, Infinite) { detail::CompareRangeValues(StringRangeSet::Range::Infinity{}, "")); } +// FIXME add invalid data tests TEST(StringRangeSet, FromRowRangeClosed) { auto closed = StringRangeSet::Range::FromRowRange( RowRange::Closed("A", "B").as_proto()); @@ -71,8 +73,8 @@ TEST(StringRangeSet, FromRowRangeClosed) { } TEST(StringRangeSet, FromRowRangeOpen) { - auto open = StringRangeSet::Range::FromRowRange( - RowRange::Open("A", "B").as_proto()); + auto open = + StringRangeSet::Range::FromRowRange(RowRange::Open("A", "B").as_proto()); EXPECT_STATUS_OK(open); EXPECT_EQ("A", open->start()); EXPECT_EQ("B", open->end()); @@ -145,8 +147,8 @@ TEST(StringRangeSet, FromColumnRangeOpen) { } TEST(StringRangeSet, FromColumnRangeImplicitlyInfinite) { - auto range = - StringRangeSet::Range::FromColumnRange(google::bigtable::v2::ColumnRange{}); + auto range = StringRangeSet::Range::FromColumnRange( + google::bigtable::v2::ColumnRange{}); EXPECT_STATUS_OK(range); EXPECT_EQ("", range->start()); @@ -241,65 +243,65 @@ TEST(StringRangeSet, FromValueRangeExplicitlyInfinite) { } TEST(StringRangeSet, RangeValueLess) { - EXPECT_TRUE(StringRangeSet::RangeValueLess()("A", "B")); - EXPECT_FALSE(StringRangeSet::RangeValueLess()("A", "A")); - EXPECT_FALSE(StringRangeSet::RangeValueLess()("B", "A")); + EXPECT_TRUE(StringRangeSet::Range::ValueLess()("A", "B")); + EXPECT_FALSE(StringRangeSet::Range::ValueLess()("A", "A")); + EXPECT_FALSE(StringRangeSet::Range::ValueLess()("B", "A")); } TEST(StringRangeSet, RangeStartLess) { - EXPECT_TRUE(StringRangeSet::RangeStartLess()( + EXPECT_TRUE(StringRangeSet::Range::StartLess()( StringRangeSet::Range("A", kOpen, "unimportant", kWhatever), StringRangeSet::Range("B", kOpen, "unimportant", kWhatever))); - EXPECT_FALSE(StringRangeSet::RangeStartLess()( + EXPECT_FALSE(StringRangeSet::Range::StartLess()( StringRangeSet::Range("B", kOpen, "unimportant", kWhatever), StringRangeSet::Range("A", kOpen, "unimportant", kWhatever))); - EXPECT_FALSE(StringRangeSet::RangeStartLess()( + EXPECT_FALSE(StringRangeSet::Range::StartLess()( StringRangeSet::Range("A", kOpen, "unimportant", kWhatever), StringRangeSet::Range("A", kOpen, "unimportant", kWhatever))); - EXPECT_TRUE(StringRangeSet::RangeStartLess()( + EXPECT_TRUE(StringRangeSet::Range::StartLess()( StringRangeSet::Range("A", kClosed, "unimportant", kWhatever), StringRangeSet::Range("B", kClosed, "unimportant", kWhatever))); - EXPECT_FALSE(StringRangeSet::RangeStartLess()( + EXPECT_FALSE(StringRangeSet::Range::StartLess()( StringRangeSet::Range("B", kClosed, "unimportant", kWhatever), StringRangeSet::Range("A", kClosed, "unimportant", kWhatever))); - EXPECT_FALSE(StringRangeSet::RangeStartLess()( + EXPECT_FALSE(StringRangeSet::Range::StartLess()( StringRangeSet::Range("A", kClosed, "unimportant", kWhatever), StringRangeSet::Range("A", kClosed, "unimportant", kWhatever))); - EXPECT_FALSE(StringRangeSet::RangeStartLess()( + EXPECT_FALSE(StringRangeSet::Range::StartLess()( StringRangeSet::Range("A", kOpen, "unimportant", kWhatever), StringRangeSet::Range("A", kClosed, "unimportant", kWhatever))); - EXPECT_TRUE(StringRangeSet::RangeStartLess()( + EXPECT_TRUE(StringRangeSet::Range::StartLess()( StringRangeSet::Range("A", kClosed, "unimportant", kWhatever), StringRangeSet::Range("A", kOpen, "unimportant", kWhatever))); } TEST(StringRangeSet, RangeEndLess) { - EXPECT_TRUE(StringRangeSet::RangeEndLess()( + EXPECT_TRUE(StringRangeSet::Range::EndLess()( StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), StringRangeSet::Range("unimportant", kWhatever, "B", kOpen))); - EXPECT_FALSE(StringRangeSet::RangeEndLess()( + EXPECT_FALSE(StringRangeSet::Range::EndLess()( StringRangeSet::Range("unimportant", kWhatever, "B", kOpen), StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); - EXPECT_FALSE(StringRangeSet::RangeEndLess()( + EXPECT_FALSE(StringRangeSet::Range::EndLess()( StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); - EXPECT_TRUE(StringRangeSet::RangeEndLess()( + EXPECT_TRUE(StringRangeSet::Range::EndLess()( StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), StringRangeSet::Range("unimportant", kWhatever, "B", kClosed))); - EXPECT_FALSE(StringRangeSet::RangeEndLess()( + EXPECT_FALSE(StringRangeSet::Range::EndLess()( StringRangeSet::Range("unimportant", kWhatever, "B", kClosed), StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); - EXPECT_FALSE(StringRangeSet::RangeEndLess()( + EXPECT_FALSE(StringRangeSet::Range::EndLess()( StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); - EXPECT_FALSE(StringRangeSet::RangeEndLess()( + EXPECT_FALSE(StringRangeSet::Range::EndLess()( StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); - EXPECT_TRUE(StringRangeSet::RangeEndLess()( + EXPECT_TRUE(StringRangeSet::Range::EndLess()( StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); } @@ -307,9 +309,9 @@ TEST(StringRangeSet, RangeEndLess) { TEST(StringRangeSet, BelowStart) { StringRangeSet::Range const open("B", kOpen, "unimportant", kWhatever); StringRangeSet::Range const closed("B", kClosed, "unimportant", kWhatever); - StringRangeSet::Range const infinite(StringRangeSet::Range::Infinity{}, kClosed, - StringRangeSet::Range::Infinity{}, - kClosed); + StringRangeSet::Range const infinite( + StringRangeSet::Range::Infinity{}, kClosed, + StringRangeSet::Range::Infinity{}, kClosed); EXPECT_TRUE(open.IsBelowStart("A")); EXPECT_TRUE(closed.IsBelowStart("A")); @@ -339,7 +341,6 @@ TEST(StringRangeSet, AboveEnd) { EXPECT_FALSE(infinite.IsAboveEnd(StringRangeSet::Range::Infinity{})); } - TEST(StringRangeSet, IsWithin) { StringRangeSet::Range const closed("A", kClosed, "C", kClosed); EXPECT_FALSE(closed.IsWithin("")); @@ -460,9 +461,9 @@ TEST(StringRangeSet, HasOverlap) { StringRangeSet::Range("A", kClosed, StringRangeSet::Range::Infinity{}, kClosed))); - EXPECT_FALSE(detail::HasOverlap( - StringRangeSet::Range("B", kClosed, "D", kOpen), - StringRangeSet::Range("D", kClosed, "E", kOpen))); + EXPECT_FALSE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kOpen), + StringRangeSet::Range("D", kClosed, "E", kOpen))); EXPECT_FALSE(detail::HasOverlap( StringRangeSet::Range("B", kClosed, std::string("D\0", 2), kOpen), StringRangeSet::Range("D", kOpen, "E", kOpen))); @@ -524,6 +525,167 @@ TEST(StringRangeSet, DisjointAdjacent) { StringRangeSet::Range(std::string("C\0", 2), kClosed, "D", kWhatever))); } +// FIXME test invalid data +TEST(TimestampRangeSet, FromInfiniteTimstampRange) { + using testing_util::chrono_literals::operator""_ms; + auto infinite = TimestampRangeSet::Range::FromTimestampRange( + google::bigtable::v2::TimestampRange{}); + ASSERT_STATUS_OK(infinite); + EXPECT_EQ(0_ms, infinite->start()); + EXPECT_EQ(0_ms, infinite->start_finite()); + EXPECT_EQ(0_ms, infinite->end()); + EXPECT_TRUE(infinite->start_closed()); + EXPECT_TRUE(infinite->end_open()); + EXPECT_FALSE(infinite->start_open()); + EXPECT_FALSE(infinite->end_closed()); +} + +TEST(TimestampRangeSet, FromFiniteTimstampRange) { + using testing_util::chrono_literals::operator""_ms; + google::bigtable::v2::TimestampRange proto; + proto.set_start_timestamp_micros(1234); + proto.set_end_timestamp_micros(123456789); + auto finite = TimestampRangeSet::Range::FromTimestampRange(proto); + ASSERT_STATUS_OK(finite); + EXPECT_EQ(1_ms, finite->start()); + EXPECT_EQ(1_ms, finite->start_finite()); + EXPECT_EQ(123456_ms, finite->end()); + EXPECT_TRUE(finite->start_closed()); + EXPECT_TRUE(finite->end_open()); + EXPECT_FALSE(finite->start_open()); + EXPECT_FALSE(finite->end_closed()); +} + +TEST(TimestampRangeSet, RangeStartLess) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_TRUE(TimestampRangeSet::Range::StartLess()( + TimestampRangeSet::Range(3_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 7_ms))); + EXPECT_FALSE(TimestampRangeSet::Range::StartLess()( + TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 7_ms))); + EXPECT_FALSE(TimestampRangeSet::Range::StartLess()( + TimestampRangeSet::Range(5_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 7_ms))); +} + +TEST(TimestampRangeSet, RangeEndLess) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_TRUE(TimestampRangeSet::Range::EndLess()( + TimestampRangeSet::Range(3_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 8_ms))); + EXPECT_FALSE(TimestampRangeSet::Range::EndLess()( + TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 7_ms))); + EXPECT_FALSE(TimestampRangeSet::Range::EndLess()( + TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 6_ms))); + EXPECT_TRUE(TimestampRangeSet::Range::EndLess()( + TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 0_ms))); +} + +TEST(TimestampRangeSet, BelowStart) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 7_ms).IsBelowStart(0_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 7_ms).IsBelowStart(2_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 7_ms).IsBelowStart(3_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 7_ms).IsBelowStart(4_ms)); +} + +TEST(TimestampRangeSet, AboveEnd) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 7_ms).IsAboveEnd(0_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 7_ms).IsAboveEnd(6_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 7_ms).IsAboveEnd(7_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 7_ms).IsAboveEnd(8_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms).IsAboveEnd(4_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms).IsAboveEnd(0_ms)); +} + +TEST(TimestampRangeSet, IsWithin) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(0_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(2_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(3_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(4_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(2_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(2_ms)); + + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms).IsWithin(0_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms).IsWithin(2_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 0_ms).IsWithin(3_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 0_ms).IsWithin(4_ms)); +} + +TEST(TimestampRangeSet, RangeEqality) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_EQ(TimestampRangeSet::Range(3_ms, 5_ms), + TimestampRangeSet::Range(3_ms, 5_ms)); + EXPECT_EQ(TimestampRangeSet::Range(3_ms, 0_ms), + TimestampRangeSet::Range(3_ms, 0_ms)); + + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms) == + TimestampRangeSet::Range(4_ms, 5_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms) == + TimestampRangeSet::Range(3_ms, 6_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms) == + TimestampRangeSet::Range(4_ms, 0_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms) == + TimestampRangeSet::Range(3_ms, 10_ms)); +} + +TEST(TimestampRangeSet, RangePrint) { + using testing_util::chrono_literals::operator""_ms; + { + std::stringstream os; + os << TimestampRangeSet::Range(1_ms, 3_ms); + EXPECT_EQ("[1ms,3ms)", os.str()); + } + { + std::stringstream os; + os << TimestampRangeSet::Range(1_ms, 0_ms); + EXPECT_EQ("[1ms,inf)", os.str()); + } +} + +TEST(TimestampRangeSet, IsEmpty) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 3_ms).IsEmpty()); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms).IsEmpty()); + EXPECT_FALSE(TimestampRangeSet::Range(0_ms, 0_ms).IsEmpty()); + EXPECT_FALSE(TimestampRangeSet::Range(1_ms, 0_ms).IsEmpty()); + EXPECT_FALSE(TimestampRangeSet::Range(1_ms, 2_ms).IsEmpty()); +} + +TEST(TimestampRangeSet, HasOverlap) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_FALSE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(0_ms, 4_ms))); + EXPECT_TRUE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(0_ms, 5_ms))); + EXPECT_TRUE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(6_ms, 9_ms))); + EXPECT_FALSE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(7_ms, 9_ms))); + EXPECT_TRUE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 0_ms), + TimestampRangeSet::Range(7_ms, 9_ms))); + EXPECT_FALSE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 0_ms), + TimestampRangeSet::Range(3_ms, 4_ms))); + EXPECT_TRUE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 0_ms), + TimestampRangeSet::Range(3_ms, 5_ms))); +} + +TEST(TimestampRangeSet, DisjointAdjacent) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_TRUE(detail::DisjointAndSortedRangesAdjacent( + TimestampRangeSet::Range(0_ms, 1_ms), + TimestampRangeSet::Range(1_ms, 2_ms))); + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + TimestampRangeSet::Range(0_ms, 1_ms), + TimestampRangeSet::Range(2_ms, 2_ms))); +} + TEST(StringRangeSet, SingleRange) { StringRangeSet srs; srs.Insert(StringRangeSet::Range("a", kClosed, "b", kClosed)); From 4c549391152b14ef0b7301f82a02b074b7820e16 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Tue, 11 Feb 2025 21:33:49 +0100 Subject: [PATCH 012/195] FilteredMapView tested. --- google/cloud/bigtable/emulator/CMakeLists.txt | 1 + .../emulator/bigtable_emulator_unit_tests.bzl | 1 + google/cloud/bigtable/emulator/filtered_map.h | 4 - .../bigtable/emulator/filtered_map_test.cc | 144 ++++++++++++++++++ google/cloud/bigtable/emulator/range_set.cc | 19 +-- .../cloud/bigtable/emulator/range_set_test.cc | 87 +++++++++++ 6 files changed, 240 insertions(+), 16 deletions(-) create mode 100644 google/cloud/bigtable/emulator/filtered_map_test.cc diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 82cb278de48b0..c6cfdbb5cb161 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -59,6 +59,7 @@ if (BUILD_TESTING) # cmake-format: sort column_family_test.cc filter_test.cc + filtered_map_test.cc row_iterators_test.cc server_test.cc range_set_test.cc) diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index 5f55adb9cafaf..99e3ba082a1db 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -19,6 +19,7 @@ bigtable_emulator_unit_tests = [ "column_family_test.cc", "filter_test.cc", + "filtered_map_test.cc", "row_iterators_test.cc", "server_test.cc", "range_set_test.cc", diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h index b3dc30ba334f0..aed96939f5473 100644 --- a/google/cloud/bigtable/emulator/filtered_map.h +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -108,10 +108,6 @@ class FilteredMapView { ++filter_pos_; AdvanceToNextRange(); } - // This situation indicates that there are no rows which start after - // current (as pointed by `filter_pos_`) range's start. Given that we're - // traversing `filter_` in order, there will be no such rows for - // following ranges, i.e. we've reached the end. } std::reference_wrapper parent_; diff --git a/google/cloud/bigtable/emulator/filtered_map_test.cc b/google/cloud/bigtable/emulator/filtered_map_test.cc new file mode 100644 index 0000000000000..67632e318a175 --- /dev/null +++ b/google/cloud/bigtable/emulator/filtered_map_test.cc @@ -0,0 +1,144 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/filtered_map.h" +#include "google/cloud/bigtable/row_range.h" +#include "google/cloud/testing_util/is_proto_equal.h" +#include "google/cloud/testing_util/status_matchers.h" +#include "google/cloud/testing_util/chrono_literals.h" +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +bool const kOpen = true; +bool const kClosed = false; + +template +std::vector Keys(Map const& map) { + std::vector res; + std::transform(map.begin(), map.end(), std::back_inserter(res), + [](typename Map::const_iterator::value_type const& elem) { + return elem.first; + }); + return res; +} + +std::vector Vec(std::initializer_list const &v) { + std::vector res; + std::transform(v.begin(), v.end(), std::back_inserter(res), + [](char const* s) { return std::string(s); }); + std::sort(res.begin(), res.end()); + return res; +} + +TEST(FilteredMap, NoFilter) { + std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; + auto filter = StringRangeSet::All(); + FilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(Vec({"zero", "one", "two"}), Keys(filtered)); +} + +TEST(FilteredMap, EmptyFilter) { + std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; + auto filter = StringRangeSet::Empty(); + FilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(Vec({}), Keys(filtered)); +} + +TEST(FilteredMap, OneOpen) { + std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, + {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Insert(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); + FilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); +} + +TEST(FilteredMap, OneClosed) { + std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, + {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Insert(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); + FilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb", "AAB"}), Keys(filtered)); +} + +TEST(FilteredMap, NoEntriesAfterClosedFilter) { + std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, + {"AAAb", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Insert(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); + FilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb"}), Keys(filtered)); +} + +TEST(FilteredMap, NoEntriesAfterOpenFilter) { + std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, + {"AAAb", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Insert(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); + FilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); +} + +TEST(FilteredMap, NoEntriesBeforeClosedFilter) { + std::map unfiltered{{"AAA", 0}, {"AAAa", 0}, + {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Insert(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); + FilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb", "AAB"}), Keys(filtered)); +} + +TEST(FilteredMap, NoEntriesBeforeOpenFilter) { + std::map unfiltered{{"AAAa", 0}, + {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Insert(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); + FilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); +} + +TEST(FilteredMap, MultipleFilters) { + std::map unfiltered{ + {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, + {"AAC", 0}, {"BB", 0}, {"BBB", 0}, {"BBBa", 0}, {"BBBb", 0}, + {"CCCa", 0}, {"CCCb", 0}, {"CCD", 0}, {"CCE", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Insert(StringRangeSet::Range("AAA", kOpen, "AAB", kClosed)); + filter.Insert(StringRangeSet::Range("BBB", kClosed, "BBC", kOpen)); + filter.Insert(StringRangeSet::Range("CCC", kClosed, "CCD", kOpen)); + FilteredMapView filtered(unfiltered, + filter); + + EXPECT_EQ(Vec({"AAAa", "AAAb", "AAB", "BBB", "BBBa", "BBBb", "CCCa", "CCCb"}), + Keys(filtered)); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index e77cf0cf3286a..df5e5fcc6bfc3 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -104,18 +104,13 @@ void RangeSetInsertImpl(RangeSetType& disjoint_ranges, auto first_to_remove = disjoint_ranges.upper_bound(inserted_range); // `*first_to_remove` starts strictly after `inserted_range`'s start. - // The previous range is the first to have a chance for an overlap - it is the - // last one, which starts at or before `inserted_range` start. + // The previous range is the first to have a chance for an overlap (or being + // adjacent) - it is the last one, which starts at or before `inserted_range` + // start. if (first_to_remove != disjoint_ranges.begin() && - detail::HasOverlap(*std::prev(first_to_remove), inserted_range)) { - std::advance(first_to_remove, -1); - } - // The range preceeding `first_to_remove` for sure has no overlap with - // `inserted_range` but it may be adjacent to it. In that case we should also - // remove it. - if (first_to_remove != disjoint_ranges.begin() && - detail::DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), - inserted_range)) { + (detail::HasOverlap(*std::prev(first_to_remove), inserted_range) || + detail::DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), + inserted_range))) { std::advance(first_to_remove, -1); } if (first_to_remove != disjoint_ranges.end()) { @@ -347,7 +342,7 @@ bool StringRangeSet::Range::EndLess::operator()(Range const& lhs, StringRangeSet StringRangeSet::All() { StringRangeSet res; - res.Insert(Range("", false, StringRangeSet::Range::Infinity{}, true)); + res.Insert(Range("", false, StringRangeSet::Range::Infinity{}, false)); return res; } diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index fcdff0f28d9f5..1632190b127d2 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -694,6 +694,93 @@ TEST(StringRangeSet, SingleRange) { *srs.disjoint_ranges().begin()); } +std::set +TSRanges(std::vector> const& ranges) { + std::set res; + std::transform(ranges.begin(), ranges.end(), std::inserter(res, res.begin()), + [](std::pair const& range) { + return TimestampRangeSet::Range(range.first, range.second); + }); + return res; +} + +TEST(TimestampRangeSet, ThreeDisjointIntervals) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Insert(TimestampRangeSet::Range(1_ms, 2_ms)); + trs.Insert(TimestampRangeSet::Range(3_ms, 5_ms)); + trs.Insert(TimestampRangeSet::Range(6_ms, 8_ms)); + ASSERT_EQ(TSRanges({{1_ms, 2_ms}, {3_ms, 5_ms}, {6_ms, 8_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, MergingAdjacentPreceeding) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Insert(TimestampRangeSet::Range(7_ms, 8_ms)); + trs.Insert(TimestampRangeSet::Range(8_ms, 9_ms)); + ASSERT_EQ(TSRanges({{7_ms, 9_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, MergingOverlappingPreceeding) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Insert(TimestampRangeSet::Range(7_ms, 9_ms)); + trs.Insert(TimestampRangeSet::Range(8_ms, 10_ms)); + ASSERT_EQ(TSRanges({{7_ms, 10_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, RemovingOvelapping) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Insert(TimestampRangeSet::Range(1_ms, 2_ms)); + trs.Insert(TimestampRangeSet::Range(3_ms, 4_ms)); + trs.Insert(TimestampRangeSet::Range(5_ms, 6_ms)); + trs.Insert(TimestampRangeSet::Range(7_ms, 8_ms)); + trs.Insert(TimestampRangeSet::Range(1_ms, 8_ms)); + ASSERT_EQ(TSRanges({{1_ms, 8_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, RemovingOvelappingExtendEnd) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Insert(TimestampRangeSet::Range(1_ms, 2_ms)); + trs.Insert(TimestampRangeSet::Range(3_ms, 4_ms)); + trs.Insert(TimestampRangeSet::Range(5_ms, 6_ms)); + trs.Insert(TimestampRangeSet::Range(7_ms, 8_ms)); + trs.Insert(TimestampRangeSet::Range(1_ms, 9_ms)); + ASSERT_EQ(TSRanges({{1_ms, 9_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, RemovingOvelappingEarlyEnd) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Insert(TimestampRangeSet::Range(1_ms, 2_ms)); + trs.Insert(TimestampRangeSet::Range(3_ms, 4_ms)); + trs.Insert(TimestampRangeSet::Range(5_ms, 6_ms)); + trs.Insert(TimestampRangeSet::Range(7_ms, 9_ms)); + trs.Insert(TimestampRangeSet::Range(1_ms, 8_ms)); + ASSERT_EQ(TSRanges({{1_ms, 9_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, PluggingGap) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Insert(TimestampRangeSet::Range(1_ms, 2_ms)); + trs.Insert(TimestampRangeSet::Range(3_ms, 5_ms)); + ASSERT_EQ(TSRanges({{1_ms, 2_ms}, {3_ms, 5_ms}}), trs.disjoint_ranges()); + trs.Insert(TimestampRangeSet::Range(2_ms, 3_ms)); + ASSERT_EQ(TSRanges({{1_ms, 5_ms}}), trs.disjoint_ranges()); +} + + } // anonymous namespace } // namespace emulator } // namespace bigtable From a35b300f10a0f1f4929c0c5dfec925573fd62844 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Tue, 11 Feb 2025 22:03:06 +0100 Subject: [PATCH 013/195] Some random fixes. --- google/cloud/bigtable/emulator/column_family.cc | 15 +++++++++++---- google/cloud/bigtable/emulator/column_family.h | 6 +++--- google/cloud/bigtable/emulator/filter.cc | 3 ++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 5ed4c9063b751..a10327a8aa8ea 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -96,12 +96,12 @@ class FilteredColumnFamilyStream::FilterApply { FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} bool operator()(ColumnRange const& column_range) { - parent_.column_ranges_->Insert(column_range.range); + parent_.column_ranges_.Insert(column_range.range); return true; } bool operator()(TimestampRange const& timestamp_range) { - parent_.timestamp_ranges_->Insert(timestamp_range.range); + parent_.timestamp_ranges_.Insert(timestamp_range.range); return true; } @@ -176,6 +176,13 @@ bool FilteredColumnFamilyStream::Next(NextMode mode) { void FilteredColumnFamilyStream::InitializeIfNeeded() const { if (!initialized_) { + if (column_ranges_.disjoint_ranges().empty()) { + column_ranges_.Insert(*StringRangeSet::All().disjoint_ranges().begin()); + } + if (timestamp_ranges_.disjoint_ranges().empty()) { + timestamp_ranges_.Insert(* + TimestampRangeSet::All().disjoint_ranges().begin()); + } PointToFirstCellAfterRowChange(); initialized_ = true; } @@ -185,7 +192,7 @@ void FilteredColumnFamilyStream::InitializeIfNeeded() const { bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { for (; column_it_.value() != columns_.value().end(); ++(column_it_.value())) { cells_ = FilteredMapView( - column_it_.value()->second, *timestamp_ranges_); + column_it_.value()->second, timestamp_ranges_); cell_it_ = cells_.value().begin(); if (cell_it_.value() != cells_.value().end()) { return true; @@ -198,7 +205,7 @@ bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() const { for (; row_it_ != rows_.end(); ++row_it_) { columns_ = FilteredMapView( - row_it_->second, *column_ranges_); + row_it_->second, column_ranges_); column_it_.value() = columns_.value().begin(); if (PointToFirstCellAfterColumnChange()) { return true; diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 3cfcbfb9e23b1..e47dd9f43bcde 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -123,11 +123,11 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { std::string column_family_name_; - std::shared_ptr row_ranges_; + std::shared_ptr row_ranges_; std::vector> row_regexes_; - std::shared_ptr column_ranges_; + mutable StringRangeSet column_ranges_; std::vector> column_regexes_; - std::shared_ptr timestamp_ranges_; + mutable TimestampRangeSet timestamp_ranges_; FilteredMapView rows_; mutable absl::optional> diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 8d8c235cbdfae..60a20533c102d 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -300,7 +300,8 @@ bool MergeCellStreams::CellStreamGreater::operator()( return (*lhs)->timestamp() > (*rhs)->timestamp(); } -MergeCellStreams::MergeCellStreams(std::vector streams) { +MergeCellStreams::MergeCellStreams(std::vector streams) + : initialized_(false) { for (auto& stream : streams) { unfinished_streams_.emplace_back( std::make_unique(std::move(stream))); From 02338d4b92bbc203567c20ff98631c6435f4065b Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 17 Feb 2025 16:28:56 +0300 Subject: [PATCH 014/195] emulator: Start RowTransaction class. Logic from Table::MutateRow() will be moved here and this commit proposes the general structure of this class, for early review. --- .gitignore | 1 + google/cloud/bigtable/emulator/table.h | 48 ++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/.gitignore b/.gitignore index b8e44a997ef9b..1d012e3845f3d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Common build output directory names .build/ _build/ +build/ build-output/ build-out/ cmake-out/ diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 5d1983abf0b27..8dd10a79583f9 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -20,12 +20,15 @@ #include #include #include +#include #include #include #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/row_streamer.h" +#include "absl/types/variant.h" #include +#include namespace google { namespace cloud { @@ -67,6 +70,51 @@ class Table { std::map> column_families_; }; + +struct RestoreValue { + // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where + // we should reinsert the value. + std::map::iterator column_row_it_; + std::chrono::milliseconds timestamp_; + std::string value_; +}; + +struct DeleteValue { + // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where + // we should delete value. + std::map::iterator column_row_it_; + std::chrono::milliseconds timestamp_; +}; + + +class RowTransaction { + public: + explicit RowTransaction(google::bigtable::v2::MutateRowRequest const &request); + void commit() { + committed_ = true; + } + + Status SetCell(std::string const& row_key, std::string const& column_qualifier, + std::chrono::milliseconds timestamp, std::string const& value); + Status DeleteRow(std::string const& row_key); + Status DeleteColumn( + std::string const& row_key, std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); + + + private: + void Undo(); + bool committed_; + std::stack> undo_; + ~RowTransaction() { + if (!committed_) { + Undo(); + } + }; + +}; + + } // namespace emulator } // namespace bigtable } // namespace cloud From a3bb308c377abc96e7d3e3efbe686ceb77b9f281 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 17 Feb 2025 20:19:34 +0300 Subject: [PATCH 015/195] emulator: Properly declate all possible row mutations. And unconditionally return an error status for the aggregation mutations, which we don't currently support. --- google/cloud/bigtable/emulator/table.cc | 14 +++++++++++++- google/cloud/bigtable/emulator/table.h | 13 ++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index bd477b274ebbd..520bba614a8ba 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -333,7 +333,7 @@ StatusOr CreateStringRangeSet( "`row_key` empty", GCP_ERROR_INFO().WithMetadata("row_set", row_set.DebugString())); } - res.Insert(StringRangeSet::Range(row_key, false, row_key, false)); + res.Insert(StringRangeSet::Range(row_key, false, row_key, false)); } for (auto const& row_range : row_set.row_ranges()) { auto maybe_range = StringRangeSet::Range::FromRowRange(row_range); @@ -406,6 +406,18 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } +Status RowTransaction::AddToCell(::google::bigtable::v2::Mutation_AddToCell const &add_to_cell) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); +} + +Status MergeToCell(::google::bigtable::v2::Mutation_MergeToCell const &merge_to_cell) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", merge_to_cell.DebugString())); +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 8dd10a79583f9..23e70a3eee399 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -94,13 +94,12 @@ class RowTransaction { committed_ = true; } - Status SetCell(std::string const& row_key, std::string const& column_qualifier, - std::chrono::milliseconds timestamp, std::string const& value); - Status DeleteRow(std::string const& row_key); - Status DeleteColumn( - std::string const& row_key, std::string const& column_qualifier, - ::google::bigtable::v2::TimestampRange const& time_range); - + Status SetCell(::google::bigtable::v2::Mutation_SetCell const &set_cell); + Status AddToCell(::google::bigtable::v2::Mutation_AddToCell const &add_to_cell); + Status MergeToCell(::google::bigtable::v2::Mutation_MergeToCell const &merge_to_cell); + Status DeleteFromColumn(::google::bigtable::v2::Mutation_DeleteFromColumn const &delete_from_column); + Status DeleteFromFamily(::google::bigtable::v2::Mutation_DeleteFromFamily const &delete_from_family); + Status DeleteFromRow(::google::bigtable::v2::Mutation_DeleteFromRow const &delete_from_row); private: void Undo(); From 9bb68e48f7ba7e617e434358354bb0dc881357ab Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 20 Feb 2025 14:56:24 +0300 Subject: [PATCH 016/195] emulator: Implement a SetCell that records an undo log. --- .../cloud/bigtable/emulator/column_family.h | 17 ++++- google/cloud/bigtable/emulator/table.cc | 74 ++++++++++++++++++- google/cloud/bigtable/emulator/table.h | 25 ++++++- 3 files changed, 110 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index e47dd9f43bcde..d487602da7ec0 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -48,10 +48,14 @@ class ColumnRow { return cells_.lower_bound(timestamp); } + std::map::iterator find(std::chrono::milliseconds const & timestamp) { + return cells_.find(timestamp); + } + private: std::map cells_; }; - + class ColumnFamilyRow { public: void SetCell(std::string const& column_qualifier, @@ -70,6 +74,11 @@ class ColumnFamilyRow { return columns_.lower_bound(column_qualifier); } + std::map::iterator find(std::string const & column_qualifier) { + return columns_.find(column_qualifier); + } + + private: std::map columns_; }; @@ -88,7 +97,7 @@ class ColumnFamily { const_iterator begin() const { return rows_.begin(); } - const_iterator end() const { + const_iterator end() const { return rows_.end(); } const_iterator lower_bound(std::string const& row_key) const { @@ -98,6 +107,10 @@ class ColumnFamily { return rows_.lower_bound(row_key); } + std::map::iterator find(std::string const & row_key) { + return rows_.find(row_key); + } + private: std::map rows_; }; diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 520bba614a8ba..b558428acf06a 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -13,8 +13,10 @@ // limitations under the License. #include +#include #include #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/row_iterators.h" @@ -412,12 +414,82 @@ Status RowTransaction::AddToCell(::google::bigtable::v2::Mutation_AddToCell cons GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); } -Status MergeToCell(::google::bigtable::v2::Mutation_MergeToCell const &merge_to_cell) { +Status RowTransaction::MergeToCell(::google::bigtable::v2::Mutation_MergeToCell const &merge_to_cell) { return UnimplementedError( "Unsupported mutation type.", GCP_ERROR_INFO().WithMetadata("mutation", merge_to_cell.DebugString())); } + +Status RowTransaction::SetCell(::google::bigtable::v2::Mutation_SetCell const &set_cell) { + auto maybe_column_family = table_->FindColumnFamily(set_cell); + if (!maybe_column_family) { + return maybe_column_family.status(); + } + + auto column_family = maybe_column_family->get(); + + bool row_existed = true; + // First if the key introduces a new ColumnFamilyRow, we need to + // arrange for the entire ColumnFamilyrow to go when we revert + // the transaction. + auto row_key_it = column_family.find(request_.row_key()); + if (row_key_it == column_family.end()) { + row_existed = false; + } + + ::google::cloud::bigtable::emulator::ColumnFamilyRow column_family_row; + if (row_existed) { + column_family_row = row_key_it->second; + } + + bool column_existed = true; + auto column_row_it = column_family_row.find(set_cell.column_qualifier()); + if (column_row_it == column_family_row.end()) { + column_existed = false; + } + + bool cell_existed = true; + if (!column_existed) { + cell_existed = false; + } else { + auto timestamp_it = column_row_it->second.find(std::chrono::duration_cast(std::chrono::microseconds(set_cell.timestamp_micros()))); + if (timestamp_it == column_row_it->second.end()) { + cell_existed = false; + } + } + + column_family.SetCell( + request_.row_key(), set_cell.column_qualifier(), + std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros())), + set_cell.value()); + + if (!row_existed) { + row_key_it = column_family.find(request_.row_key()); + DeleteRow delete_row = {row_key_it}; + undo_.emplace(delete_row); + } + + if (!column_existed) { + column_row_it = column_family_row.find(set_cell.column_qualifier()); + DeleteColumn delete_column_row = {column_row_it}; + undo_.emplace(delete_column_row); + } + + auto timestamp_it = column_row_it->second.find(std::chrono::duration_cast(std::chrono::microseconds(set_cell.timestamp_micros()))); + if (!cell_existed) { + DeleteValue delete_value = {column_row_it, timestamp_it->first}; + undo_.emplace(delete_value); + } else { + RestoreValue restore_value = {column_row_it, timestamp_it->first, std::move(timestamp_it->second)}; + undo_.emplace(restore_value); + } + + return Status(); + +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 23e70a3eee399..34c2d9b03bace 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -28,6 +28,7 @@ #include "google/cloud/bigtable/emulator/row_streamer.h" #include "absl/types/variant.h" #include +#include #include namespace google { @@ -58,6 +59,7 @@ class Table { private: Table() = default; friend class RowSetIterator; + friend class RowTransaction; template StatusOr> FindColumnFamily( @@ -86,10 +88,24 @@ struct DeleteValue { std::chrono::milliseconds timestamp_; }; +struct DeleteRow { + // The iterator to the `rows_` member of a relavant ColumnFamily + // which we should delete the row if the ColumnfamilyRow has been + // introduced by the mutation (i.e. it did not exist previously). + std::map::iterator row_it; +}; + +struct DeleteColumn { + // The iterator to the `columns_` member of the relevant + // ColumnFamilyRow which we should delete if the ColumnRow has been + // introduced in the mutation (i.e. did not exist previously). + std::map::iterator column_row_it; +}; + class RowTransaction { public: - explicit RowTransaction(google::bigtable::v2::MutateRowRequest const &request); + explicit RowTransaction(Table table, ::google::bigtable::v2::MutateRowRequest const &request); void commit() { committed_ = true; } @@ -103,14 +119,17 @@ class RowTransaction { private: void Undo(); - bool committed_; - std::stack> undo_; ~RowTransaction() { if (!committed_) { Undo(); } }; + bool committed_; + std::shared_ptr
table_; + std::stack> undo_; + ::google::bigtable::v2::MutateRowRequest request_; + }; From 84e18e4d9184e8dd129259af7449a19a08eac093 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 20 Feb 2025 19:23:52 +0300 Subject: [PATCH 017/195] Complete implementation of Undo() for SetCell mutations. --- .../cloud/bigtable/emulator/column_family.h | 13 +++++++ google/cloud/bigtable/emulator/table.cc | 37 ++++++++++++++++++- google/cloud/bigtable/emulator/table.h | 2 + 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index d487602da7ec0..5a112c8925388 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -22,6 +22,7 @@ #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/cell_view.h" #include "absl/types/optional.h" +#include #include namespace google { @@ -52,6 +53,10 @@ class ColumnRow { return cells_.find(timestamp); } + void erase(std::map::iterator timestamp_it) { + cells_.erase(timestamp_it); + } + private: std::map cells_; }; @@ -78,6 +83,10 @@ class ColumnFamilyRow { return columns_.find(column_qualifier); } + void erase(std::map::iterator column_it) { + columns_.erase(column_it); + } + private: std::map columns_; @@ -111,6 +120,10 @@ class ColumnFamily { return rows_.find(row_key); } + void erase(std::map::iterator row_it) { + rows_.erase(row_it); + } + private: std::map rows_; }; diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index b558428acf06a..71704d3f95730 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -467,13 +467,13 @@ Status RowTransaction::SetCell(::google::bigtable::v2::Mutation_SetCell const &s if (!row_existed) { row_key_it = column_family.find(request_.row_key()); - DeleteRow delete_row = {row_key_it}; + DeleteRow delete_row = {row_key_it, column_family}; undo_.emplace(delete_row); } if (!column_existed) { column_row_it = column_family_row.find(set_cell.column_qualifier()); - DeleteColumn delete_column_row = {column_row_it}; + DeleteColumn delete_column_row = {column_row_it, column_family_row}; undo_.emplace(delete_column_row); } @@ -487,9 +487,42 @@ Status RowTransaction::SetCell(::google::bigtable::v2::Mutation_SetCell const &s } return Status(); +} + +void RowTransaction::Undo() { + while (!undo_.empty()) { + auto op = undo_.top(); + undo_.pop(); + + if (auto *restore_value = absl::get_if(&op)) { + auto column_row = restore_value->column_row_it_->second; + column_row.find(restore_value->timestamp_)->second = std::move(restore_value->value_); + continue; + } + + if (auto *delete_value = absl::get_if(&op)) { + auto column_row = delete_value->column_row_it_->second; + auto timestamp_it = column_row.find(delete_value->timestamp_); + column_row.erase(timestamp_it); + continue; + } + if (auto *delete_row = absl::get_if(&op)) { + delete_row->column_family.erase(delete_row->row_it); + continue; + } + + if (auto *delete_column = absl::get_if(&op)) { + delete_column->column_family_row.erase(delete_column->column_row_it); + continue; + } + + // If we get here, there is an type of undo log that has not been implemented! + std::abort(); + } } + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 34c2d9b03bace..2d8e453942129 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -93,6 +93,7 @@ struct DeleteRow { // which we should delete the row if the ColumnfamilyRow has been // introduced by the mutation (i.e. it did not exist previously). std::map::iterator row_it; + ::google::cloud::bigtable::emulator::ColumnFamily &column_family; }; struct DeleteColumn { @@ -100,6 +101,7 @@ struct DeleteColumn { // ColumnFamilyRow which we should delete if the ColumnRow has been // introduced in the mutation (i.e. did not exist previously). std::map::iterator column_row_it; + ::google::cloud::bigtable::emulator::ColumnFamilyRow &column_family_row; }; From 613ade9938faa47754f69a6b60784cecbda9ec53 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 20 Feb 2025 20:10:37 +0300 Subject: [PATCH 018/195] emulator: Implement a constructor for the RowTransaction class. --- google/cloud/bigtable/emulator/table.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 2d8e453942129..07bb729a448fe 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -107,7 +107,12 @@ struct DeleteColumn { class RowTransaction { public: - explicit RowTransaction(Table table, ::google::bigtable::v2::MutateRowRequest const &request); + explicit RowTransaction(const Table *table, ::google::bigtable::v2::MutateRowRequest request) + { + table_ = table; + request_ = request; + }; + void commit() { committed_ = true; } @@ -128,7 +133,7 @@ class RowTransaction { }; bool committed_; - std::shared_ptr
table_; + const Table *table_; std::stack> undo_; ::google::bigtable::v2::MutateRowRequest request_; From 49a3e3376a0a7e2de5b3433ba73782e07d82068a Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 20 Feb 2025 20:16:17 +0300 Subject: [PATCH 019/195] emulator: Pass RowTransaction a constant reference to the mutation request. --- google/cloud/bigtable/emulator/table.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 07bb729a448fe..f7eb5a9edc1fd 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -107,10 +107,10 @@ struct DeleteColumn { class RowTransaction { public: - explicit RowTransaction(const Table *table, ::google::bigtable::v2::MutateRowRequest request) + explicit RowTransaction(const Table *table, const ::google::bigtable::v2::MutateRowRequest &request) + : request_(request) { table_ = table; - request_ = request; }; void commit() { @@ -135,7 +135,7 @@ class RowTransaction { bool committed_; const Table *table_; std::stack> undo_; - ::google::bigtable::v2::MutateRowRequest request_; + const ::google::bigtable::v2::MutateRowRequest &request_; }; From 9071761a1e17ff83f9532d5a2af2c2ce4d5c73c9 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 20 Feb 2025 20:43:28 +0300 Subject: [PATCH 020/195] emulator: Introduce the use of a RowTransaction in Table::MutateRow. Currently in use only for SetCell. --- google/cloud/bigtable/emulator/table.cc | 20 ++++++++++++-------- google/cloud/bigtable/emulator/table.h | 11 ++++++----- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 71704d3f95730..4f11537e116c6 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -231,18 +231,16 @@ Status Table::MutateRow( // FIXME - determine what happens when row/column family/column does not exist std::lock_guard lock(mu_); assert(request.table_name() == schema_.name()); + + RowTransaction row_transaction(this, request); + for (auto mutation : request.mutations()) { if (mutation.has_set_cell()) { auto const & set_cell = mutation.set_cell(); - auto maybe_column_family = FindColumnFamily(set_cell); - if (!maybe_column_family) { - return maybe_column_family.status(); + auto status = row_transaction.SetCell(set_cell); + if (!status.ok()) { + return status; } - maybe_column_family->get().SetCell( - request.row_key(), set_cell.column_qualifier(), - std::chrono::duration_cast( - std::chrono::microseconds(set_cell.timestamp_micros())), - set_cell.value()); } else if (mutation.has_add_to_cell()) { // FIXME } else if (mutation.has_merge_to_cell()) { @@ -282,6 +280,12 @@ Status Table::MutateRow( GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); } } + + // If we get here, all mutations on the row have succeeded. We can + // commit and return which will prevent the destructor from undoing + // the transaction. + row_transaction.commit(); + return Status(); } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index f7eb5a9edc1fd..66830cf6c8f43 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -113,6 +113,12 @@ class RowTransaction { table_ = table; }; + ~RowTransaction() { + if (!committed_) { + Undo(); + } + }; + void commit() { committed_ = true; } @@ -126,11 +132,6 @@ class RowTransaction { private: void Undo(); - ~RowTransaction() { - if (!committed_) { - Undo(); - } - }; bool committed_; const Table *table_; From 7b1ebe448793199a6f32d286d1f77adbbf4f48e9 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 20 Feb 2025 21:09:34 +0300 Subject: [PATCH 021/195] emulator: Run clang-format on the subset of the files we have been changing. --- .../cloud/bigtable/emulator/column_family.h | 32 ++- google/cloud/bigtable/emulator/table.cc | 227 +++++++++--------- google/cloud/bigtable/emulator/table.h | 57 ++--- 3 files changed, 157 insertions(+), 159 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 5a112c8925388..345ce744f6784 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -15,13 +15,13 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H -#include -#include -#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" -#include "google/cloud/bigtable/emulator/cell_view.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include "absl/types/optional.h" +#include +#include #include #include @@ -30,7 +30,6 @@ namespace cloud { namespace bigtable { namespace emulator { - class ColumnRow { public: void SetCell(std::chrono::milliseconds timestamp, std::string const& value); @@ -49,11 +48,13 @@ class ColumnRow { return cells_.lower_bound(timestamp); } - std::map::iterator find(std::chrono::milliseconds const & timestamp) { + std::map::iterator find( + std::chrono::milliseconds const& timestamp) { return cells_.find(timestamp); } - void erase(std::map::iterator timestamp_it) { + void erase( + std::map::iterator timestamp_it) { cells_.erase(timestamp_it); } @@ -79,7 +80,8 @@ class ColumnFamilyRow { return columns_.lower_bound(column_qualifier); } - std::map::iterator find(std::string const & column_qualifier) { + std::map::iterator find( + std::string const& column_qualifier) { return columns_.find(column_qualifier); } @@ -87,7 +89,6 @@ class ColumnFamilyRow { columns_.erase(column_it); } - private: std::map columns_; }; @@ -103,12 +104,8 @@ class ColumnFamily { std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); - const_iterator begin() const { - return rows_.begin(); - } - const_iterator end() const { - return rows_.end(); - } + const_iterator begin() const { return rows_.begin(); } + const_iterator end() const { return rows_.end(); } const_iterator lower_bound(std::string const& row_key) const { return rows_.lower_bound(row_key); } @@ -116,7 +113,8 @@ class ColumnFamily { return rows_.lower_bound(row_key); } - std::map::iterator find(std::string const & row_key) { + std::map::iterator find( + std::string const& row_key) { return rows_.find(row_key); } @@ -134,7 +132,7 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { std::shared_ptr row_set); bool ApplyFilter(InternalFilter const& internal_filter) override; bool HasValue() const override; - CellView const &Value() const override; + CellView const& Value() const override; bool Next(NextMode mode) override; std::string const& column_family_name() const { return column_family_name_; } diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 4f11537e116c6..a289de8548fd1 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -12,17 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/row_iterators.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" -#include "google/protobuf/util/field_mask_util.h" #include "google/cloud/internal/make_status.h" +#include "google/protobuf/util/field_mask_util.h" +#include +#include +#include namespace google { namespace cloud { @@ -66,27 +66,23 @@ Status Table::Construct(google::bigtable::admin::v2::Table schema) { if (schema_.has_change_stream_config()) { return UnimplementedError( "`change_stream_config` not empty.", - GCP_ERROR_INFO().WithMetadata( - "schema", schema.DebugString())); + GCP_ERROR_INFO().WithMetadata("schema", schema.DebugString())); } if (schema_.has_automated_backup_policy()) { return UnimplementedError( "`automated_backup_policy` not empty.", - GCP_ERROR_INFO().WithMetadata( - "schema", schema.DebugString())); + GCP_ERROR_INFO().WithMetadata("schema", schema.DebugString())); } - for (auto const &column_family_def : schema_.column_families()) { - column_families_.emplace( - column_family_def.first, - std::make_shared()); + for (auto const& column_family_def : schema_.column_families()) { + column_families_.emplace(column_family_def.first, + std::make_shared()); } return Status(); } StatusOr Table::ModifyColumnFamilies( btadmin::ModifyColumnFamiliesRequest const& request) { - std::cout << "Modify column families: " << request.DebugString() - << std::endl; + std::cout << "Modify column families: " << request.DebugString() << std::endl; std::unique_lock lock(mu_); auto new_schema = schema_; auto new_column_families = column_families_; @@ -99,25 +95,22 @@ StatusOr Table::ModifyColumnFamilies( modification.DebugString())); } if (new_column_families.erase(modification.id()) == 0) { - return NotFoundError( - "No such column family.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return NotFoundError("No such column family.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } if (new_schema.mutable_column_families()->erase(modification.id()) == 0) { - return InternalError( - "Column family with no schema.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return InternalError("Column family with no schema.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } } else if (modification.has_update()) { auto& cfs = *new_schema.mutable_column_families(); auto cf_it = cfs.find(modification.id()); if (cf_it == cfs.end()) { - return NotFoundError( - "No such column family.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return NotFoundError("No such column family.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } using google::protobuf::util::FieldMaskUtil; @@ -156,10 +149,9 @@ StatusOr Table::ModifyColumnFamilies( if (!new_schema.mutable_column_families() ->emplace(modification.id(), modification.create()) .second) { - return InternalError( - "Column family with schema but no data.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return InternalError("Column family with schema but no data.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } } else { return UnimplementedError( @@ -195,8 +187,7 @@ Status Table::Update(google::bigtable::admin::v2::Table const& new_schema, to_update)) { return InvalidArgumentError( "Update mask is invalid.", - GCP_ERROR_INFO().WithMetadata( - "mask", to_update.DebugString())); + GCP_ERROR_INFO().WithMetadata("mask", to_update.DebugString())); } google::protobuf::FieldMask disallowed_mask; FieldMaskUtil::Subtract( @@ -204,8 +195,7 @@ Status Table::Update(google::bigtable::admin::v2::Table const& new_schema, if (disallowed_mask.paths_size() > 0) { return UnimplementedError( "Update mask contains disallowed fields.", - GCP_ERROR_INFO().WithMetadata( - "mask", disallowed_mask.DebugString())); + GCP_ERROR_INFO().WithMetadata("mask", disallowed_mask.DebugString())); } std::lock_guard lock(mu_); FieldMaskUtil::MergeMessageTo(new_schema, to_update, @@ -225,8 +215,7 @@ StatusOr> Table::FindColumnFamily( return std::ref(*column_family_it->second); } -Status Table::MutateRow( - google::bigtable::v2::MutateRowRequest const &request) { +Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { // FIXME - add atomicity // FIXME - determine what happens when row/column family/column does not exist std::lock_guard lock(mu_); @@ -236,7 +225,7 @@ Status Table::MutateRow( for (auto mutation : request.mutations()) { if (mutation.has_set_cell()) { - auto const & set_cell = mutation.set_cell(); + auto const& set_cell = mutation.set_cell(); auto status = row_transaction.SetCell(set_cell); if (!status.ok()) { return status; @@ -246,9 +235,8 @@ Status Table::MutateRow( } else if (mutation.has_merge_to_cell()) { // FIXME } else if (mutation.has_delete_from_column()) { - auto const & delete_from_column = mutation.delete_from_column(); - auto maybe_column_family = - FindColumnFamily(delete_from_column); + auto const& delete_from_column = mutation.delete_from_column(); + auto maybe_column_family = FindColumnFamily(delete_from_column); if (!maybe_column_family) { return maybe_column_family.status(); } @@ -289,7 +277,7 @@ Status Table::MutateRow( return Status(); } -class FilteredTableStream : public MergeCellStreams { +class FilteredTableStream : public MergeCellStreams { public: FilteredTableStream( std::vector> cf_streams) @@ -301,8 +289,8 @@ class FilteredTableStream : public MergeCellStreams { } for (auto stream_it = unfinished_streams_.begin(); stream_it != unfinished_streams_.end(); ++stream_it) { - auto* cf_stream = - dynamic_cast(&(*stream_it)->impl()); + auto* cf_stream = dynamic_cast( + &(*stream_it)->impl()); assert(cf_stream); if (re2::RE2::PartialMatch( cf_stream->column_family_name(), @@ -318,6 +306,7 @@ class FilteredTableStream : public MergeCellStreams { } return true; } + private: static std::vector CreateCellStreams( std::vector> cf_streams) { @@ -368,7 +357,7 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, } std::lock_guard lock(mu_); std::vector> per_cf_streams; - for (auto const & column_family: column_families_) { + for (auto const& column_family : column_families_) { per_cf_streams.emplace_back(std::make_shared( *column_family.second, column_family.first, row_set)); } @@ -412,85 +401,92 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } -Status RowTransaction::AddToCell(::google::bigtable::v2::Mutation_AddToCell const &add_to_cell) { - return UnimplementedError( - "Unsupported mutation type.", - GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); +Status RowTransaction::AddToCell( + ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); } -Status RowTransaction::MergeToCell(::google::bigtable::v2::Mutation_MergeToCell const &merge_to_cell) { - return UnimplementedError( - "Unsupported mutation type.", - GCP_ERROR_INFO().WithMetadata("mutation", merge_to_cell.DebugString())); +Status RowTransaction::MergeToCell( + ::google::bigtable::v2::Mutation_MergeToCell const& merge_to_cell) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", merge_to_cell.DebugString())); } - -Status RowTransaction::SetCell(::google::bigtable::v2::Mutation_SetCell const &set_cell) { +Status RowTransaction::SetCell( + ::google::bigtable::v2::Mutation_SetCell const& set_cell) { auto maybe_column_family = table_->FindColumnFamily(set_cell); - if (!maybe_column_family) { - return maybe_column_family.status(); - } + if (!maybe_column_family) { + return maybe_column_family.status(); + } - auto column_family = maybe_column_family->get(); + auto column_family = maybe_column_family->get(); - bool row_existed = true; - // First if the key introduces a new ColumnFamilyRow, we need to - // arrange for the entire ColumnFamilyrow to go when we revert - // the transaction. - auto row_key_it = column_family.find(request_.row_key()); - if (row_key_it == column_family.end()) { - row_existed = false; - } + bool row_existed = true; + // First if the key introduces a new ColumnFamilyRow, we need to + // arrange for the entire ColumnFamilyrow to go when we revert + // the transaction. + auto row_key_it = column_family.find(request_.row_key()); + if (row_key_it == column_family.end()) { + row_existed = false; + } - ::google::cloud::bigtable::emulator::ColumnFamilyRow column_family_row; - if (row_existed) { - column_family_row = row_key_it->second; - } + ::google::cloud::bigtable::emulator::ColumnFamilyRow column_family_row; + if (row_existed) { + column_family_row = row_key_it->second; + } - bool column_existed = true; - auto column_row_it = column_family_row.find(set_cell.column_qualifier()); - if (column_row_it == column_family_row.end()) { - column_existed = false; - } + bool column_existed = true; + auto column_row_it = column_family_row.find(set_cell.column_qualifier()); + if (column_row_it == column_family_row.end()) { + column_existed = false; + } - bool cell_existed = true; - if (!column_existed) { - cell_existed = false; - } else { - auto timestamp_it = column_row_it->second.find(std::chrono::duration_cast(std::chrono::microseconds(set_cell.timestamp_micros()))); - if (timestamp_it == column_row_it->second.end()) { - cell_existed = false; - } - } + bool cell_existed = true; + if (!column_existed) { + cell_existed = false; + } else { + auto timestamp_it = column_row_it->second.find( + std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros()))); + if (timestamp_it == column_row_it->second.end()) { + cell_existed = false; + } + } - column_family.SetCell( - request_.row_key(), set_cell.column_qualifier(), - std::chrono::duration_cast( - std::chrono::microseconds(set_cell.timestamp_micros())), - set_cell.value()); + column_family.SetCell( + request_.row_key(), set_cell.column_qualifier(), + std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros())), + set_cell.value()); - if (!row_existed) { - row_key_it = column_family.find(request_.row_key()); - DeleteRow delete_row = {row_key_it, column_family}; - undo_.emplace(delete_row); - } + if (!row_existed) { + row_key_it = column_family.find(request_.row_key()); + DeleteRow delete_row = {row_key_it, column_family}; + undo_.emplace(delete_row); + } - if (!column_existed) { - column_row_it = column_family_row.find(set_cell.column_qualifier()); - DeleteColumn delete_column_row = {column_row_it, column_family_row}; - undo_.emplace(delete_column_row); - } + if (!column_existed) { + column_row_it = column_family_row.find(set_cell.column_qualifier()); + DeleteColumn delete_column_row = {column_row_it, column_family_row}; + undo_.emplace(delete_column_row); + } - auto timestamp_it = column_row_it->second.find(std::chrono::duration_cast(std::chrono::microseconds(set_cell.timestamp_micros()))); - if (!cell_existed) { - DeleteValue delete_value = {column_row_it, timestamp_it->first}; - undo_.emplace(delete_value); - } else { - RestoreValue restore_value = {column_row_it, timestamp_it->first, std::move(timestamp_it->second)}; - undo_.emplace(restore_value); - } + auto timestamp_it = column_row_it->second.find( + std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros()))); + if (!cell_existed) { + DeleteValue delete_value = {column_row_it, timestamp_it->first}; + undo_.emplace(delete_value); + } else { + RestoreValue restore_value = {column_row_it, timestamp_it->first, + std::move(timestamp_it->second)}; + undo_.emplace(restore_value); + } - return Status(); + return Status(); } void RowTransaction::Undo() { @@ -498,35 +494,36 @@ void RowTransaction::Undo() { auto op = undo_.top(); undo_.pop(); - if (auto *restore_value = absl::get_if(&op)) { + if (auto* restore_value = absl::get_if(&op)) { auto column_row = restore_value->column_row_it_->second; - column_row.find(restore_value->timestamp_)->second = std::move(restore_value->value_); + column_row.find(restore_value->timestamp_)->second = + std::move(restore_value->value_); continue; } - if (auto *delete_value = absl::get_if(&op)) { + if (auto* delete_value = absl::get_if(&op)) { auto column_row = delete_value->column_row_it_->second; auto timestamp_it = column_row.find(delete_value->timestamp_); column_row.erase(timestamp_it); continue; } - if (auto *delete_row = absl::get_if(&op)) { + if (auto* delete_row = absl::get_if(&op)) { delete_row->column_family.erase(delete_row->row_it); continue; } - if (auto *delete_column = absl::get_if(&op)) { + if (auto* delete_column = absl::get_if(&op)) { delete_column->column_family_row.erase(delete_column->column_row_it); continue; } - // If we get here, there is an type of undo log that has not been implemented! + // If we get here, there is an type of undo log that has not been + // implemented! std::abort(); } } - } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 66830cf6c8f43..192199de829de 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -15,18 +15,18 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" +#include "absl/types/variant.h" #include #include #include #include #include #include -#include "google/cloud/bigtable/emulator/filter.h" -#include "google/cloud/bigtable/emulator/column_family.h" -#include "google/cloud/bigtable/emulator/row_streamer.h" -#include "absl/types/variant.h" #include #include #include @@ -51,7 +51,7 @@ class Table { bool IsDeleteProtected() const; - Status MutateRow(google::bigtable::v2::MutateRowRequest const & request); + Status MutateRow(google::bigtable::v2::MutateRowRequest const& request); Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const; @@ -72,7 +72,6 @@ class Table { std::map> column_families_; }; - struct RestoreValue { // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where // we should reinsert the value. @@ -93,7 +92,7 @@ struct DeleteRow { // which we should delete the row if the ColumnfamilyRow has been // introduced by the mutation (i.e. it did not exist previously). std::map::iterator row_it; - ::google::cloud::bigtable::emulator::ColumnFamily &column_family; + ::google::cloud::bigtable::emulator::ColumnFamily& column_family; }; struct DeleteColumn { @@ -101,15 +100,15 @@ struct DeleteColumn { // ColumnFamilyRow which we should delete if the ColumnRow has been // introduced in the mutation (i.e. did not exist previously). std::map::iterator column_row_it; - ::google::cloud::bigtable::emulator::ColumnFamilyRow &column_family_row; + ::google::cloud::bigtable::emulator::ColumnFamilyRow& column_family_row; }; - class RowTransaction { public: - explicit RowTransaction(const Table *table, const ::google::bigtable::v2::MutateRowRequest &request) - : request_(request) - { + explicit RowTransaction( + Table const* table, + ::google::bigtable::v2::MutateRowRequest const& request) + : request_(request) { table_ = table; }; @@ -119,28 +118,32 @@ class RowTransaction { } }; - void commit() { - committed_ = true; - } - - Status SetCell(::google::bigtable::v2::Mutation_SetCell const &set_cell); - Status AddToCell(::google::bigtable::v2::Mutation_AddToCell const &add_to_cell); - Status MergeToCell(::google::bigtable::v2::Mutation_MergeToCell const &merge_to_cell); - Status DeleteFromColumn(::google::bigtable::v2::Mutation_DeleteFromColumn const &delete_from_column); - Status DeleteFromFamily(::google::bigtable::v2::Mutation_DeleteFromFamily const &delete_from_family); - Status DeleteFromRow(::google::bigtable::v2::Mutation_DeleteFromRow const &delete_from_row); + void commit() { committed_ = true; } + + Status SetCell(::google::bigtable::v2::Mutation_SetCell const& set_cell); + Status AddToCell( + ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell); + Status MergeToCell( + ::google::bigtable::v2::Mutation_MergeToCell const& merge_to_cell); + Status DeleteFromColumn( + ::google::bigtable::v2::Mutation_DeleteFromColumn const& + delete_from_column); + Status DeleteFromFamily( + ::google::bigtable::v2::Mutation_DeleteFromFamily const& + delete_from_family); + Status DeleteFromRow( + ::google::bigtable::v2::Mutation_DeleteFromRow const& delete_from_row); private: void Undo(); bool committed_; - const Table *table_; - std::stack> undo_; - const ::google::bigtable::v2::MutateRowRequest &request_; - + Table const* table_; + std::stack> + undo_; + ::google::bigtable::v2::MutateRowRequest const& request_; }; - } // namespace emulator } // namespace bigtable } // namespace cloud From 8146b73f1638986b49f1b3f177f51d0f73e044d9 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 17 Feb 2025 16:28:56 +0300 Subject: [PATCH 022/195] emulator: Implement undo log and wire-up rewind for the SetCell mutation. - Introduces a RowTransaction class to proxy mutations and record undo logs in case we needs to rewind. - Re-implements the SetCell mutation in the RowTransaction class. - Introduces all Undo types/actions for SetCell and records them in an undo stack. - For SetCell only and to facilitate testing with only SetCell mutations, introduces the use of RowTransaction in doing the mutation. - clang-format is run on all affected files (which has removed some headers, e.t.c. A test for SetCell is coming up and is not currently included here. --- .gitignore | 1 + .../cloud/bigtable/emulator/column_family.h | 50 +++- google/cloud/bigtable/emulator/table.cc | 222 ++++++++++++++---- google/cloud/bigtable/emulator/table.h | 85 ++++++- 4 files changed, 289 insertions(+), 69 deletions(-) diff --git a/.gitignore b/.gitignore index b8e44a997ef9b..1d012e3845f3d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Common build output directory names .build/ _build/ +build/ build-output/ build-out/ cmake-out/ diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index e47dd9f43bcde..345ce744f6784 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -15,13 +15,14 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H -#include -#include -#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" -#include "google/cloud/bigtable/emulator/cell_view.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include "absl/types/optional.h" +#include +#include +#include #include namespace google { @@ -29,7 +30,6 @@ namespace cloud { namespace bigtable { namespace emulator { - class ColumnRow { public: void SetCell(std::chrono::milliseconds timestamp, std::string const& value); @@ -48,10 +48,20 @@ class ColumnRow { return cells_.lower_bound(timestamp); } + std::map::iterator find( + std::chrono::milliseconds const& timestamp) { + return cells_.find(timestamp); + } + + void erase( + std::map::iterator timestamp_it) { + cells_.erase(timestamp_it); + } + private: std::map cells_; }; - + class ColumnFamilyRow { public: void SetCell(std::string const& column_qualifier, @@ -70,6 +80,15 @@ class ColumnFamilyRow { return columns_.lower_bound(column_qualifier); } + std::map::iterator find( + std::string const& column_qualifier) { + return columns_.find(column_qualifier); + } + + void erase(std::map::iterator column_it) { + columns_.erase(column_it); + } + private: std::map columns_; }; @@ -85,12 +104,8 @@ class ColumnFamily { std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); - const_iterator begin() const { - return rows_.begin(); - } - const_iterator end() const { - return rows_.end(); - } + const_iterator begin() const { return rows_.begin(); } + const_iterator end() const { return rows_.end(); } const_iterator lower_bound(std::string const& row_key) const { return rows_.lower_bound(row_key); } @@ -98,6 +113,15 @@ class ColumnFamily { return rows_.lower_bound(row_key); } + std::map::iterator find( + std::string const& row_key) { + return rows_.find(row_key); + } + + void erase(std::map::iterator row_it) { + rows_.erase(row_it); + } + private: std::map rows_; }; @@ -108,7 +132,7 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { std::shared_ptr row_set); bool ApplyFilter(InternalFilter const& internal_filter) override; bool HasValue() const override; - CellView const &Value() const override; + CellView const& Value() const override; bool Next(NextMode mode) override; std::string const& column_family_name() const { return column_family_name_; } diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index bd477b274ebbd..a289de8548fd1 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -12,15 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/row_iterators.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" -#include "google/protobuf/util/field_mask_util.h" #include "google/cloud/internal/make_status.h" +#include "google/protobuf/util/field_mask_util.h" +#include +#include +#include namespace google { namespace cloud { @@ -64,27 +66,23 @@ Status Table::Construct(google::bigtable::admin::v2::Table schema) { if (schema_.has_change_stream_config()) { return UnimplementedError( "`change_stream_config` not empty.", - GCP_ERROR_INFO().WithMetadata( - "schema", schema.DebugString())); + GCP_ERROR_INFO().WithMetadata("schema", schema.DebugString())); } if (schema_.has_automated_backup_policy()) { return UnimplementedError( "`automated_backup_policy` not empty.", - GCP_ERROR_INFO().WithMetadata( - "schema", schema.DebugString())); + GCP_ERROR_INFO().WithMetadata("schema", schema.DebugString())); } - for (auto const &column_family_def : schema_.column_families()) { - column_families_.emplace( - column_family_def.first, - std::make_shared()); + for (auto const& column_family_def : schema_.column_families()) { + column_families_.emplace(column_family_def.first, + std::make_shared()); } return Status(); } StatusOr Table::ModifyColumnFamilies( btadmin::ModifyColumnFamiliesRequest const& request) { - std::cout << "Modify column families: " << request.DebugString() - << std::endl; + std::cout << "Modify column families: " << request.DebugString() << std::endl; std::unique_lock lock(mu_); auto new_schema = schema_; auto new_column_families = column_families_; @@ -97,25 +95,22 @@ StatusOr Table::ModifyColumnFamilies( modification.DebugString())); } if (new_column_families.erase(modification.id()) == 0) { - return NotFoundError( - "No such column family.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return NotFoundError("No such column family.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } if (new_schema.mutable_column_families()->erase(modification.id()) == 0) { - return InternalError( - "Column family with no schema.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return InternalError("Column family with no schema.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } } else if (modification.has_update()) { auto& cfs = *new_schema.mutable_column_families(); auto cf_it = cfs.find(modification.id()); if (cf_it == cfs.end()) { - return NotFoundError( - "No such column family.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return NotFoundError("No such column family.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } using google::protobuf::util::FieldMaskUtil; @@ -154,10 +149,9 @@ StatusOr Table::ModifyColumnFamilies( if (!new_schema.mutable_column_families() ->emplace(modification.id(), modification.create()) .second) { - return InternalError( - "Column family with schema but no data.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return InternalError("Column family with schema but no data.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } } else { return UnimplementedError( @@ -193,8 +187,7 @@ Status Table::Update(google::bigtable::admin::v2::Table const& new_schema, to_update)) { return InvalidArgumentError( "Update mask is invalid.", - GCP_ERROR_INFO().WithMetadata( - "mask", to_update.DebugString())); + GCP_ERROR_INFO().WithMetadata("mask", to_update.DebugString())); } google::protobuf::FieldMask disallowed_mask; FieldMaskUtil::Subtract( @@ -202,8 +195,7 @@ Status Table::Update(google::bigtable::admin::v2::Table const& new_schema, if (disallowed_mask.paths_size() > 0) { return UnimplementedError( "Update mask contains disallowed fields.", - GCP_ERROR_INFO().WithMetadata( - "mask", disallowed_mask.DebugString())); + GCP_ERROR_INFO().WithMetadata("mask", disallowed_mask.DebugString())); } std::lock_guard lock(mu_); FieldMaskUtil::MergeMessageTo(new_schema, to_update, @@ -223,32 +215,28 @@ StatusOr> Table::FindColumnFamily( return std::ref(*column_family_it->second); } -Status Table::MutateRow( - google::bigtable::v2::MutateRowRequest const &request) { +Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { // FIXME - add atomicity // FIXME - determine what happens when row/column family/column does not exist std::lock_guard lock(mu_); assert(request.table_name() == schema_.name()); + + RowTransaction row_transaction(this, request); + for (auto mutation : request.mutations()) { if (mutation.has_set_cell()) { - auto const & set_cell = mutation.set_cell(); - auto maybe_column_family = FindColumnFamily(set_cell); - if (!maybe_column_family) { - return maybe_column_family.status(); + auto const& set_cell = mutation.set_cell(); + auto status = row_transaction.SetCell(set_cell); + if (!status.ok()) { + return status; } - maybe_column_family->get().SetCell( - request.row_key(), set_cell.column_qualifier(), - std::chrono::duration_cast( - std::chrono::microseconds(set_cell.timestamp_micros())), - set_cell.value()); } else if (mutation.has_add_to_cell()) { // FIXME } else if (mutation.has_merge_to_cell()) { // FIXME } else if (mutation.has_delete_from_column()) { - auto const & delete_from_column = mutation.delete_from_column(); - auto maybe_column_family = - FindColumnFamily(delete_from_column); + auto const& delete_from_column = mutation.delete_from_column(); + auto maybe_column_family = FindColumnFamily(delete_from_column); if (!maybe_column_family) { return maybe_column_family.status(); } @@ -280,10 +268,16 @@ Status Table::MutateRow( GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); } } + + // If we get here, all mutations on the row have succeeded. We can + // commit and return which will prevent the destructor from undoing + // the transaction. + row_transaction.commit(); + return Status(); } -class FilteredTableStream : public MergeCellStreams { +class FilteredTableStream : public MergeCellStreams { public: FilteredTableStream( std::vector> cf_streams) @@ -295,8 +289,8 @@ class FilteredTableStream : public MergeCellStreams { } for (auto stream_it = unfinished_streams_.begin(); stream_it != unfinished_streams_.end(); ++stream_it) { - auto* cf_stream = - dynamic_cast(&(*stream_it)->impl()); + auto* cf_stream = dynamic_cast( + &(*stream_it)->impl()); assert(cf_stream); if (re2::RE2::PartialMatch( cf_stream->column_family_name(), @@ -312,6 +306,7 @@ class FilteredTableStream : public MergeCellStreams { } return true; } + private: static std::vector CreateCellStreams( std::vector> cf_streams) { @@ -333,7 +328,7 @@ StatusOr CreateStringRangeSet( "`row_key` empty", GCP_ERROR_INFO().WithMetadata("row_set", row_set.DebugString())); } - res.Insert(StringRangeSet::Range(row_key, false, row_key, false)); + res.Insert(StringRangeSet::Range(row_key, false, row_key, false)); } for (auto const& row_range : row_set.row_ranges()) { auto maybe_range = StringRangeSet::Range::FromRowRange(row_range); @@ -362,7 +357,7 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, } std::lock_guard lock(mu_); std::vector> per_cf_streams; - for (auto const & column_family: column_families_) { + for (auto const& column_family : column_families_) { per_cf_streams.emplace_back(std::make_shared( *column_family.second, column_family.first, row_set)); } @@ -406,6 +401,129 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } +Status RowTransaction::AddToCell( + ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); +} + +Status RowTransaction::MergeToCell( + ::google::bigtable::v2::Mutation_MergeToCell const& merge_to_cell) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", merge_to_cell.DebugString())); +} + +Status RowTransaction::SetCell( + ::google::bigtable::v2::Mutation_SetCell const& set_cell) { + auto maybe_column_family = table_->FindColumnFamily(set_cell); + if (!maybe_column_family) { + return maybe_column_family.status(); + } + + auto column_family = maybe_column_family->get(); + + bool row_existed = true; + // First if the key introduces a new ColumnFamilyRow, we need to + // arrange for the entire ColumnFamilyrow to go when we revert + // the transaction. + auto row_key_it = column_family.find(request_.row_key()); + if (row_key_it == column_family.end()) { + row_existed = false; + } + + ::google::cloud::bigtable::emulator::ColumnFamilyRow column_family_row; + if (row_existed) { + column_family_row = row_key_it->second; + } + + bool column_existed = true; + auto column_row_it = column_family_row.find(set_cell.column_qualifier()); + if (column_row_it == column_family_row.end()) { + column_existed = false; + } + + bool cell_existed = true; + if (!column_existed) { + cell_existed = false; + } else { + auto timestamp_it = column_row_it->second.find( + std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros()))); + if (timestamp_it == column_row_it->second.end()) { + cell_existed = false; + } + } + + column_family.SetCell( + request_.row_key(), set_cell.column_qualifier(), + std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros())), + set_cell.value()); + + if (!row_existed) { + row_key_it = column_family.find(request_.row_key()); + DeleteRow delete_row = {row_key_it, column_family}; + undo_.emplace(delete_row); + } + + if (!column_existed) { + column_row_it = column_family_row.find(set_cell.column_qualifier()); + DeleteColumn delete_column_row = {column_row_it, column_family_row}; + undo_.emplace(delete_column_row); + } + + auto timestamp_it = column_row_it->second.find( + std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros()))); + if (!cell_existed) { + DeleteValue delete_value = {column_row_it, timestamp_it->first}; + undo_.emplace(delete_value); + } else { + RestoreValue restore_value = {column_row_it, timestamp_it->first, + std::move(timestamp_it->second)}; + undo_.emplace(restore_value); + } + + return Status(); +} + +void RowTransaction::Undo() { + while (!undo_.empty()) { + auto op = undo_.top(); + undo_.pop(); + + if (auto* restore_value = absl::get_if(&op)) { + auto column_row = restore_value->column_row_it_->second; + column_row.find(restore_value->timestamp_)->second = + std::move(restore_value->value_); + continue; + } + + if (auto* delete_value = absl::get_if(&op)) { + auto column_row = delete_value->column_row_it_->second; + auto timestamp_it = column_row.find(delete_value->timestamp_); + column_row.erase(timestamp_it); + continue; + } + + if (auto* delete_row = absl::get_if(&op)) { + delete_row->column_family.erase(delete_row->row_it); + continue; + } + + if (auto* delete_column = absl::get_if(&op)) { + delete_column->column_family_row.erase(delete_column->column_row_it); + continue; + } + + // If we get here, there is an type of undo log that has not been + // implemented! + std::abort(); + } +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 5d1983abf0b27..192199de829de 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -15,17 +15,21 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" +#include "absl/types/variant.h" #include #include #include +#include #include #include -#include "google/cloud/bigtable/emulator/filter.h" -#include "google/cloud/bigtable/emulator/column_family.h" -#include "google/cloud/bigtable/emulator/row_streamer.h" #include +#include +#include namespace google { namespace cloud { @@ -47,7 +51,7 @@ class Table { bool IsDeleteProtected() const; - Status MutateRow(google::bigtable::v2::MutateRowRequest const & request); + Status MutateRow(google::bigtable::v2::MutateRowRequest const& request); Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const; @@ -55,6 +59,7 @@ class Table { private: Table() = default; friend class RowSetIterator; + friend class RowTransaction; template StatusOr> FindColumnFamily( @@ -67,6 +72,78 @@ class Table { std::map> column_families_; }; +struct RestoreValue { + // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where + // we should reinsert the value. + std::map::iterator column_row_it_; + std::chrono::milliseconds timestamp_; + std::string value_; +}; + +struct DeleteValue { + // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where + // we should delete value. + std::map::iterator column_row_it_; + std::chrono::milliseconds timestamp_; +}; + +struct DeleteRow { + // The iterator to the `rows_` member of a relavant ColumnFamily + // which we should delete the row if the ColumnfamilyRow has been + // introduced by the mutation (i.e. it did not exist previously). + std::map::iterator row_it; + ::google::cloud::bigtable::emulator::ColumnFamily& column_family; +}; + +struct DeleteColumn { + // The iterator to the `columns_` member of the relevant + // ColumnFamilyRow which we should delete if the ColumnRow has been + // introduced in the mutation (i.e. did not exist previously). + std::map::iterator column_row_it; + ::google::cloud::bigtable::emulator::ColumnFamilyRow& column_family_row; +}; + +class RowTransaction { + public: + explicit RowTransaction( + Table const* table, + ::google::bigtable::v2::MutateRowRequest const& request) + : request_(request) { + table_ = table; + }; + + ~RowTransaction() { + if (!committed_) { + Undo(); + } + }; + + void commit() { committed_ = true; } + + Status SetCell(::google::bigtable::v2::Mutation_SetCell const& set_cell); + Status AddToCell( + ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell); + Status MergeToCell( + ::google::bigtable::v2::Mutation_MergeToCell const& merge_to_cell); + Status DeleteFromColumn( + ::google::bigtable::v2::Mutation_DeleteFromColumn const& + delete_from_column); + Status DeleteFromFamily( + ::google::bigtable::v2::Mutation_DeleteFromFamily const& + delete_from_family); + Status DeleteFromRow( + ::google::bigtable::v2::Mutation_DeleteFromRow const& delete_from_row); + + private: + void Undo(); + + bool committed_; + Table const* table_; + std::stack> + undo_; + ::google::bigtable::v2::MutateRowRequest const& request_; +}; + } // namespace emulator } // namespace bigtable } // namespace cloud From 274a19de37b4e2cdc50c7bf58b6d992c6eb2fb55 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 21 Feb 2025 19:18:18 +0300 Subject: [PATCH 023/195] emulator: Start test suite for row mutation transaction rollback. --- google/cloud/bigtable/emulator/CMakeLists.txt | 1 + .../emulator/bigtable_emulator_unit_tests.bzl | 1 + .../cloud/bigtable/emulator/rollback_test.cc | 30 +++++++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 google/cloud/bigtable/emulator/rollback_test.cc diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index c6cfdbb5cb161..8ba508160e78a 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -60,6 +60,7 @@ if (BUILD_TESTING) column_family_test.cc filter_test.cc filtered_map_test.cc + rollback_test.cc row_iterators_test.cc server_test.cc range_set_test.cc) diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index 99e3ba082a1db..20e661be2b1aa 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -20,6 +20,7 @@ bigtable_emulator_unit_tests = [ "column_family_test.cc", "filter_test.cc", "filtered_map_test.cc", + "rollback_test.cc", "row_iterators_test.cc", "server_test.cc", "range_set_test.cc", diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc new file mode 100644 index 0000000000000..0688f70dcefd1 --- /dev/null +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -0,0 +1,30 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +// Ensure that SetCell still works to set a cell that was not set +// before. +TEST(TransactonRollback, SetCellBasicFunction) { + +} +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google From 93d757e868dd177c4a488236b28e4553a64a8468 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Fri, 21 Feb 2025 18:15:11 +0100 Subject: [PATCH 024/195] feat: some semi-working tests --- .../cloud/bigtable/emulator/column_family.cc | 35 +- .../cloud/bigtable/emulator/column_family.h | 38 +- .../bigtable/emulator/column_family_test.cc | 382 ++++++++++++++++-- google/cloud/bigtable/emulator/filtered_map.h | 111 ++++- .../bigtable/emulator/filtered_map_test.cc | 136 ++++--- google/cloud/bigtable/emulator/range_set.cc | 50 ++- google/cloud/bigtable/emulator/range_set.h | 6 +- .../cloud/bigtable/emulator/range_set_test.cc | 67 +-- .../cloud/bigtable/emulator/row_streamer.cc | 21 +- google/cloud/bigtable/emulator/row_streamer.h | 8 +- google/cloud/bigtable/emulator/table.cc | 4 +- 11 files changed, 677 insertions(+), 181 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index a10327a8aa8ea..15bdd894c47ee 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -56,13 +56,14 @@ std::size_t ColumnFamilyRow::DeleteColumn( std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range) { auto column_it = columns_.find(column_qualifier); - if (column_it != columns_.end()) { - return column_it->second.DeleteTimeRange(time_range); + if (column_it == columns_.end()) { + return 0; } + auto res = column_it->second.DeleteTimeRange(time_range); if (!column_it->second.HasCells()) { columns_.erase(column_it); } - return 0; + return res; } void ColumnFamily::SetCell(std::string const& row_key, @@ -96,12 +97,12 @@ class FilteredColumnFamilyStream::FilterApply { FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} bool operator()(ColumnRange const& column_range) { - parent_.column_ranges_.Insert(column_range.range); + parent_.column_ranges_.Sum(column_range.range); return true; } bool operator()(TimestampRange const& timestamp_range) { - parent_.timestamp_ranges_.Insert(timestamp_range.range); + parent_.timestamp_ranges_.Sum(timestamp_range.range); return true; } @@ -126,7 +127,11 @@ FilteredColumnFamilyStream::FilteredColumnFamilyStream( std::shared_ptr row_set) : column_family_name_(std::move(column_family_name)), row_ranges_(std::move(row_set)), - rows_(column_family, *row_ranges_), + column_ranges_(StringRangeSet::All()), + timestamp_ranges_(TimestampRangeSet::All()), + rows_(RangeFilteredMapView(column_family, + *row_ranges_), + std::cref(row_regexes_)), row_it_(rows_.begin()), initialized_(false) {} @@ -176,13 +181,6 @@ bool FilteredColumnFamilyStream::Next(NextMode mode) { void FilteredColumnFamilyStream::InitializeIfNeeded() const { if (!initialized_) { - if (column_ranges_.disjoint_ranges().empty()) { - column_ranges_.Insert(*StringRangeSet::All().disjoint_ranges().begin()); - } - if (timestamp_ranges_.disjoint_ranges().empty()) { - timestamp_ranges_.Insert(* - TimestampRangeSet::All().disjoint_ranges().begin()); - } PointToFirstCellAfterRowChange(); initialized_ = true; } @@ -191,7 +189,7 @@ void FilteredColumnFamilyStream::InitializeIfNeeded() const { // Returns whether we've managed to find another cell in currently pointed row bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { for (; column_it_.value() != columns_.value().end(); ++(column_it_.value())) { - cells_ = FilteredMapView( + cells_ = RangeFilteredMapView( column_it_.value()->second, timestamp_ranges_); cell_it_ = cells_.value().begin(); if (cell_it_.value() != cells_.value().end()) { @@ -204,9 +202,12 @@ bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { // Returns whether we've managed to find another cell bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() const { for (; row_it_ != rows_.end(); ++row_it_) { - columns_ = FilteredMapView( - row_it_->second, column_ranges_); - column_it_.value() = columns_.value().begin(); + columns_ = RegexFiteredMapView< + RangeFilteredMapView>( + RangeFilteredMapView(row_it_->second, + column_ranges_), + column_regexes_); + column_it_ = columns_.value().begin(); if (PointToFirstCellAfterColumnChange()) { return true; } diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index e47dd9f43bcde..786c2d9457ce2 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -29,7 +29,6 @@ namespace cloud { namespace bigtable { namespace emulator { - class ColumnRow { public: void SetCell(std::chrono::milliseconds timestamp, std::string const& value); @@ -45,13 +44,13 @@ class ColumnRow { return cells_.lower_bound(timestamp); } const_iterator upper_bound(std::chrono::milliseconds timestamp) const { - return cells_.lower_bound(timestamp); + return cells_.upper_bound(timestamp); } private: std::map cells_; }; - + class ColumnFamilyRow { public: void SetCell(std::string const& column_qualifier, @@ -67,7 +66,7 @@ class ColumnFamilyRow { return columns_.lower_bound(column_qualifier); } const_iterator upper_bound(std::string const& column_qualifier) const { - return columns_.lower_bound(column_qualifier); + return columns_.upper_bound(column_qualifier); } private: @@ -85,22 +84,19 @@ class ColumnFamily { std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); - const_iterator begin() const { - return rows_.begin(); - } - const_iterator end() const { - return rows_.end(); - } + const_iterator begin() const { return rows_.begin(); } + const_iterator end() const { return rows_.end(); } const_iterator lower_bound(std::string const& row_key) const { return rows_.lower_bound(row_key); } const_iterator upper_bound(std::string const& row_key) const { - return rows_.lower_bound(row_key); + return rows_.upper_bound(row_key); } private: std::map rows_; }; + class FilteredColumnFamilyStream : public AbstractCellStreamImpl { public: FilteredColumnFamilyStream(ColumnFamily const& column_family, @@ -108,7 +104,7 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { std::shared_ptr row_set); bool ApplyFilter(InternalFilter const& internal_filter) override; bool HasValue() const override; - CellView const &Value() const override; + CellView const& Value() const override; bool Next(NextMode mode) override; std::string const& column_family_name() const { return column_family_name_; } @@ -129,20 +125,24 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { std::vector> column_regexes_; mutable TimestampRangeSet timestamp_ranges_; - FilteredMapView rows_; - mutable absl::optional> + RegexFiteredMapView> rows_; + mutable absl::optional>> columns_; - mutable absl::optional> cells_; + mutable absl::optional> + cells_; // If row_it_ == rows_.end() we've reached the end. // We keep the invariant that if (row_it_ != rows_.end()) then // cell_it_ != cells.end() && column_it_ != columns_.end() - mutable FilteredMapView::const_iterator row_it_; - mutable absl::optional< - FilteredMapView::const_iterator> + mutable RegexFiteredMapView< + RangeFilteredMapView>::const_iterator + row_it_; + mutable absl::optional>::const_iterator> column_it_; mutable absl::optional< - FilteredMapView::const_iterator> + RangeFilteredMapView::const_iterator> cell_it_; mutable absl::optional cur_value_; mutable bool initialized_; diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index 21fa50b57b924..9d0328cd78799 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -15,6 +15,7 @@ #include "google/cloud/bigtable/emulator/row_iterators.h" #include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/is_proto_equal.h" +#include "google/cloud/testing_util/chrono_literals.h" #include #include @@ -24,36 +25,357 @@ namespace bigtable { namespace emulator { namespace { -using namespace std::chrono_literals; - -TEST(ColumnFamilyIterator, Simple) { -// ColumnFamily fam; -// fam.SetCell("row1", "col1", 123ms, "foo"); -// fam.SetCell("row1", "col1", 124ms, "fo"); -// fam.SetCell("row1", "col2", 123ms, "bar"); -// fam.SetCell("row2", "col1", 123ms, "foo"); -// fam.SetCell("row2", "col3", 120ms, "baz"); -// fam.SetCell("row2", "col3", 120ms, "baz"); -// std::vector rows; -// std::transform( -// fam.FindRows(std::shared_ptr( -// new StringRangeSet(StringRangeSet::All()))), -// fam.end(), -// std::back_inserter(rows), -// [](std::pair const& val) { -// return val.first; -// }); -// std::vector expected{"row1", "row2"}; -// EXPECT_EQ(expected, rows); -} - -class Foo { - public: - Foo(std::string const& foo) : foo_(foo) {} - - private: - std::reference_wrapper foo_; -}; +std::string DumpColumnRow(ColumnRow const& col_row, + std::string const& prefix = "") { + std::stringstream ss; + for (auto const& cell : col_row) { + ss << prefix << "@" << cell.first.count() << "ms: " << cell.second + << std::endl; + } + return ss.str(); +} + +std::string DumpColumnFamilyRow(ColumnFamilyRow const& fam_row, + std::string const& prefix = "") { + std::stringstream ss; + for (auto const& col_row : fam_row) { + ss << DumpColumnRow(col_row.second, prefix + col_row.first + " "); + } + return ss.str(); +} + +std::string DumpColumnFamily(ColumnFamily const& fam, + std::string const& cf_name = "") { + std::stringstream ss; + for (auto const& fam_row : fam) { + ss << DumpColumnFamilyRow(fam_row.second, + fam_row.first + " " + cf_name + ":"); + } + return ss.str(); +} + +TEST(ColumnRow, Trivial) { + using testing_util::chrono_literals::operator""_ms; + + ColumnRow col_row; + EXPECT_FALSE(col_row.HasCells()); + col_row.SetCell(10_ms, "foo"); + EXPECT_TRUE(col_row.HasCells()); + col_row.SetCell(10_ms, "bar"); + EXPECT_EQ(std::next(col_row.begin()), col_row.end()); + EXPECT_EQ("bar", col_row.begin()->second); + + col_row.SetCell(0_ms, "baz"); + col_row.SetCell(20_ms, "qux"); + EXPECT_EQ("bar", col_row.lower_bound(10_ms)->second); + EXPECT_EQ("qux", col_row.upper_bound(10_ms)->second); +} + +TEST(ColumnRow, DeleteTimeRangeFinite) { + using testing_util::chrono_literals::operator""_ms; + + ColumnRow col_row; + col_row.SetCell(10_ms, "foo"); + col_row.SetCell(20_ms, "bar"); + col_row.SetCell(30_ms, "baz"); + col_row.SetCell(40_ms, "qux"); + google::bigtable::v2::TimestampRange range; + range.set_start_timestamp_micros(5000); + range.set_end_timestamp_micros(40000); + col_row.DeleteTimeRange(range); + + EXPECT_EQ("@40ms: qux\n", DumpColumnRow(col_row)); +} + +TEST(ColumnRow, DeleteTimeRangeInfinite) { + using testing_util::chrono_literals::operator""_ms; + + ColumnRow col_row; + col_row.SetCell(10_ms, "foo"); + col_row.SetCell(20_ms, "bar"); + col_row.SetCell(30_ms, "baz"); + col_row.SetCell(40_ms, "qux"); + google::bigtable::v2::TimestampRange range; + range.set_start_timestamp_micros(20000); + col_row.DeleteTimeRange(range); + + EXPECT_EQ("@10ms: foo\n", DumpColumnRow(col_row)); +} + +TEST(ColumnFamilyRow, Trivial) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamilyRow fam_row; + EXPECT_FALSE(fam_row.HasColumns()); + fam_row.SetCell("col1", 10_ms, "foo"); + EXPECT_TRUE(fam_row.HasColumns()); + fam_row.SetCell("col1", 10_ms, "bar"); + EXPECT_EQ(std::next(fam_row.begin()), fam_row.end()); + EXPECT_EQ("bar", fam_row.begin()->second.begin()->second); + + fam_row.SetCell("col0", 10_ms, "baz"); + fam_row.SetCell("col2", 10_ms, "qux"); + + EXPECT_EQ(R"""( +col0 @10ms: baz +col1 @10ms: bar +col2 @10ms: qux +)""", + "\n" + DumpColumnFamilyRow(fam_row)); + + EXPECT_EQ("bar", fam_row.lower_bound("col1")->second.begin()->second); + EXPECT_EQ("qux", fam_row.upper_bound("col1")->second.begin()->second); + + EXPECT_EQ(1, fam_row.DeleteColumn("col1", + ::google::bigtable::v2::TimestampRange{})); + + // Verify that there is no empty column. + EXPECT_EQ(2, std::distance(fam_row.begin(), fam_row.end())); + + google::bigtable::v2::TimestampRange not_matching_range; + not_matching_range.set_start_timestamp_micros(10); + not_matching_range.set_end_timestamp_micros(20); + EXPECT_EQ(0, fam_row.DeleteColumn("col2", not_matching_range)); + + EXPECT_EQ(R"""( +col0 @10ms: baz +col2 @10ms: qux +)""", + "\n" + DumpColumnFamilyRow(fam_row)); +} + +TEST(ColumnFamily, Trivial) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam; + fam.SetCell("row1", "col0", 10_ms, "foo"); + fam.SetCell("row1", "col0", 10_ms, "bar"); + EXPECT_EQ("row1 :col0 @10ms: bar\n", DumpColumnFamily(fam)); + + fam.SetCell("row0", "col0", 10_ms, "baz"); + fam.SetCell("row2", "col0", 10_ms, "qux"); + + EXPECT_EQ(R"""( +row0 :col0 @10ms: baz +row1 :col0 @10ms: bar +row2 :col0 @10ms: qux +)""", + "\n" + DumpColumnFamily(fam)); + + EXPECT_EQ("col0 @10ms: bar\n", + DumpColumnFamilyRow(fam.lower_bound("row1")->second)); + EXPECT_EQ("col0 @10ms: qux\n", + DumpColumnFamilyRow(fam.upper_bound("row1")->second)); + + EXPECT_EQ(1, fam.DeleteColumn("row1", "col0", + ::google::bigtable::v2::TimestampRange{})); + + // Verify that there is no empty row + EXPECT_EQ(2, std::distance(fam.begin(), fam.end())); + + EXPECT_EQ(R"""( +row0 :col0 @10ms: baz +row2 :col0 @10ms: qux +)""", + "\n" + DumpColumnFamily(fam)); + + EXPECT_TRUE(fam.DeleteRow("row2")); + EXPECT_FALSE(fam.DeleteRow("row_nonexistent")); + + EXPECT_EQ("row0 :col0 @10ms: baz\n", DumpColumnFamily(fam)); +} + +std::string DumpFilteredColumnFamilyStream( + FilteredColumnFamilyStream& stream, NextMode next_mode = NextMode::kCell) { + std::stringstream ss; + for (; stream.HasValue(); stream.Next(next_mode)) { + auto const& cell = stream.Value(); + ss << cell.row_key() << " " << cell.column_family() << ":" + << cell.column_qualifier() << " @" << cell.timestamp().count() + << "ms: " << cell.value() << std::endl; + } + return ss.str(); +} + +TEST(FilteredColumnFamilyStream, Empty) { + ColumnFamily fam; + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + EXPECT_EQ("", DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, Unfiltered) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); + fam.SetCell("row0", "col1", 20_ms, "bar"); + fam.SetCell("row0", "col1", 30_ms, "baz"); + fam.SetCell("row1", "col0", 10_ms, "foo"); + fam.SetCell("row1", "col1", 20_ms, "foo"); + fam.SetCell("row1", "col1", 30_ms, "foo"); + fam.SetCell("row2", "col0", 10_ms, "qux"); + fam.SetCell("row2", "col2", 40_ms, "qux"); + fam.SetCell("row2", "col2", 50_ms, "qux"); + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + EXPECT_EQ(R"""( +row0 cf1:col0 @10ms: foo +row0 cf1:col1 @20ms: bar +row0 cf1:col1 @30ms: baz +row1 cf1:col0 @10ms: foo +row1 cf1:col1 @20ms: foo +row1 cf1:col1 @30ms: foo +row2 cf1:col0 @10ms: qux +row2 cf1:col2 @40ms: qux +row2 cf1:col2 @50ms: qux +)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, FilterByTimestampRange) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); + fam.SetCell("row0", "col0", 20_ms, "bar"); // Filter out + fam.SetCell("row0", "col0", 30_ms, "baz"); + fam.SetCell("row0", "col1", 100_ms, "foo"); // Filter out + fam.SetCell("row0", "col1", 150_ms, "foo"); // Filter out + fam.SetCell("row0", "col1", 190_ms, "foo"); // Filter out + fam.SetCell("row0", "col2", 200_ms, "foo"); + fam.SetCell("row0", "col2", 220_ms, "foo"); + fam.SetCell("row0", "col2", 240_ms, "foo"); + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row1", "col0", 10_ms, "foo"); + fam.SetCell("row1", "col0", 20_ms, "bar"); // Filter out + fam.SetCell("row1", "col0", 30_ms, "baz"); + fam.SetCell("row1", "col1", 100_ms, "foo"); // Filter out + fam.SetCell("row1", "col1", 150_ms, "foo"); // Filter out + fam.SetCell("row1", "col1", 190_ms, "foo"); // Filter out + fam.SetCell("row1", "col2", 200_ms, "foo"); + fam.SetCell("row1", "col2", 220_ms, "foo"); + fam.SetCell("row1", "col2", 240_ms, "foo"); + fam.SetCell("row1", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row1", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row1", "col3", 300_ms, "foo"); // Filter out + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + filtered_stream.ApplyFilter( + TimestampRange{TimestampRangeSet::Range(0_ms, 20_ms)}); + filtered_stream.ApplyFilter( + TimestampRange{TimestampRangeSet::Range(30_ms, 100_ms)}); + filtered_stream.ApplyFilter( + TimestampRange{TimestampRangeSet::Range(200_ms, 300_ms)}); + EXPECT_EQ(R"""( +row0 cf1:col0 @10ms: foo +row0 cf1:col0 @30ms: baz +row0 cf1:col2 @200ms: foo +row0 cf1:col2 @220ms: foo +row0 cf1:col2 @240ms: foo +row1 cf1:col0 @10ms: foo +row1 cf1:col0 @30ms: baz +row1 cf1:col2 @200ms: foo +row1 cf1:col2 @220ms: foo +row1 cf1:col2 @240ms: foo +)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, FilterByColumnRange) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); + fam.SetCell("row0", "col1", 100_ms, "foo"); // Filter out + fam.SetCell("row0", "col2", 200_ms, "foo"); + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row1", "col2", 300_ms, "foo"); + fam.SetCell("row2", "col0", 300_ms, "foo"); + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + filtered_stream.ApplyFilter( + ColumnRange{StringRangeSet::Range("col0", false, "col0", false)}); + filtered_stream.ApplyFilter( + ColumnRange{StringRangeSet::Range("col2", false, "col2", false)}); + EXPECT_EQ(R"""( +row0 cf1:col0 @10ms: foo +row0 cf1:col2 @200ms: foo +row1 cf1:col2 @300ms: foo +row2 cf1:col0 @300ms: foo +)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, FilterByColumnRegex) { + using testing_util::chrono_literals::operator""_ms; + auto pattern1 = std::make_shared("col"); + ASSERT_TRUE(pattern1->ok()); + auto pattern2 = std::make_shared("[02]"); + ASSERT_TRUE(pattern2->ok()); + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); + fam.SetCell("row0", "col1", 100_ms, "foo"); // Filter out + fam.SetCell("row0", "col2", 200_ms, "foo"); + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row1", "col2", 300_ms, "foo"); + fam.SetCell("row2", "col0", 300_ms, "foo"); + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + filtered_stream.ApplyFilter(ColumnRegex{pattern1}); + filtered_stream.ApplyFilter(ColumnRegex{pattern2}); + EXPECT_EQ(R"""( +row0 cf1:col0 @10ms: foo +row0 cf1:col2 @200ms: foo +row1 cf1:col2 @300ms: foo +row2 cf1:col0 @300ms: foo +)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, FilterRowKeyRegex) { + using testing_util::chrono_literals::operator""_ms; + auto pattern1 = std::make_shared("row"); + ASSERT_TRUE(pattern1->ok()); + auto pattern2 = std::make_shared("[02]"); + ASSERT_TRUE(pattern2->ok()); + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); + fam.SetCell("row1", "col1", 100_ms, "foo"); // Filter out + fam.SetCell("row2", "col2", 200_ms, "foo"); + fam.SetCell("row3", "col3", 300_ms, "foo"); // Filter out + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + filtered_stream.ApplyFilter(RowKeyRegex{pattern1}); + filtered_stream.ApplyFilter(RowKeyRegex{pattern2}); + EXPECT_EQ(R"""( +row0 cf1:col0 @10ms: foo +row2 cf1:col2 @200ms: foo +)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, FilterRowSet) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); + fam.SetCell("row1", "col1", 100_ms, "foo"); // Filter out + fam.SetCell("row2", "col2", 200_ms, "foo"); + fam.SetCell("row3", "col3", 300_ms, "foo"); // Filter out + auto included_rows = + std::make_shared(StringRangeSet::Empty()); + included_rows->Sum(StringRangeSet::Range("row0", false, "row2", true)); + included_rows->Sum(StringRangeSet::Range( + "row3", false, StringRangeSet::Range::Infinity{}, false)); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + EXPECT_EQ(R"""( +row0 cf1:col0 @10ms: foo +row1 cf1:col1 @100ms: foo +row3 cf1:col3 @300ms: foo +)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} } // anonymous namespace } // namespace emulator diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h index aed96939f5473..b3ddd53af536b 100644 --- a/google/cloud/bigtable/emulator/filtered_map.h +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -18,30 +18,31 @@ #include "google/cloud/bigtable/emulator/range_set.h" #include #include +#include namespace google { namespace cloud { namespace bigtable { namespace emulator { -template -class FilteredMapView { +template +class RangeFilteredMapView { public: class const_iterator { public: using iterator_category = std::input_iterator_tag; - using value_type = typename std::iterator_traits< - typename Map::const_iterator>::value_type; + using value_type = + typename std::iterator_traits::value_type; using difference_type = typename std::iterator_traits< typename Map::const_iterator>::difference_type; using reference = value_type const&; using pointer = value_type const*; const_iterator( - FilteredMapView const& parent, + RangeFilteredMapView const& parent, typename Map::const_iterator unfiltered_pos, - typename std::set:: + typename std::set:: const_iterator filter_pos) : parent_(std::cref(parent)), unfiltered_pos_(std::move(unfiltered_pos)), @@ -103,25 +104,24 @@ class FilteredMapView { // pointed by filter_pos_. Make sure this only happens when the iteration // reaches its end. while (unfiltered_pos_ != parent_.get().unfiltered_.get().end() && - filter_pos_ != parent_.get().filter_.get().disjoint_ranges().end() && + filter_pos_ != + parent_.get().filter_.get().disjoint_ranges().end() && filter_pos_->IsAboveEnd(unfiltered_pos_->first)) { ++filter_pos_; AdvanceToNextRange(); } } - std::reference_wrapper parent_; + std::reference_wrapper parent_; typename Map::const_iterator unfiltered_pos_; - typename std::set::const_iterator - filter_pos_; + typename std::set< + typename PermittedRanges::Range, + typename PermittedRanges::Range::StartLess>::const_iterator filter_pos_; }; - FilteredMapView(Map const& unfiltered, - ExcludedRanges const& filter) + RangeFilteredMapView(Map const& unfiltered, PermittedRanges const& filter) : unfiltered_(std::cref(unfiltered)), filter_(std::cref(filter)) {} - const_iterator begin() const { return const_iterator(*this, unfiltered_.get().begin(), filter_.get().disjoint_ranges().begin()); @@ -130,9 +130,88 @@ class FilteredMapView { return const_iterator(*this, unfiltered_.get().end(), filter_.get().disjoint_ranges().end()); } + private: std::reference_wrapper unfiltered_; - std::reference_wrapper filter_; + std::reference_wrapper filter_; +}; + +template +class RegexFiteredMapView { + public: + class const_iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = + typename std::iterator_traits::value_type; + using difference_type = typename std::iterator_traits< + typename Map::const_iterator>::difference_type; + using reference = value_type const&; + using pointer = value_type const*; + + const_iterator(RegexFiteredMapView const& parent, + typename Map::const_iterator unfiltered_pos) + : parent_(std::cref(parent)), + unfiltered_pos_(std::move(unfiltered_pos)) { + EnsureIteratorValid(); + } + + const_iterator& operator++() { + ++unfiltered_pos_; + EnsureIteratorValid(); + return *this; + } + + const_iterator operator++(int) { + const_iterator retval = *this; + ++(*this); + return retval; + } + + bool operator==(const_iterator const& other) const { + return unfiltered_pos_ == other.unfiltered_pos_; + } + + bool operator!=(const_iterator const& other) const { + return !(*this == other); + } + + reference operator*() const { return *unfiltered_pos_; } + pointer operator->() const { return &*unfiltered_pos_; } + + private: + void EnsureIteratorValid() { + for (; unfiltered_pos_ != parent_.get().unfiltered_.end() && + std::any_of(parent_.get().filters_.get().begin(), + parent_.get().filters_.get().end(), + [&](std::shared_ptr const& filter) { + return !re2::RE2::PartialMatch( + unfiltered_pos_->first, *filter); + }); + ++unfiltered_pos_) { + } + } + + std::reference_wrapper parent_; + typename Map::const_iterator unfiltered_pos_; + }; + + RegexFiteredMapView( + Map unfiltered, + std::vector> const& filters) + : unfiltered_(std::move(unfiltered)), filters_(std::cref(filters)) {} + + const_iterator begin() const { + return const_iterator(*this, unfiltered_.begin()); + } + const_iterator end() const { + return const_iterator(*this, unfiltered_.end()); + } + + private: + Map unfiltered_; + std::reference_wrapper> const> + filters_; }; } // namespace emulator diff --git a/google/cloud/bigtable/emulator/filtered_map_test.cc b/google/cloud/bigtable/emulator/filtered_map_test.cc index 67632e318a175..c376ef7f7da90 100644 --- a/google/cloud/bigtable/emulator/filtered_map_test.cc +++ b/google/cloud/bigtable/emulator/filtered_map_test.cc @@ -38,7 +38,7 @@ std::vector Keys(Map const& map) { return res; } -std::vector Vec(std::initializer_list const &v) { +std::vector Vec(std::initializer_list const& v) { std::vector res; std::transform(v.begin(), v.end(), std::back_inserter(res), [](char const* s) { return std::string(s); }); @@ -46,98 +46,144 @@ std::vector Vec(std::initializer_list const &v) { return res; } -TEST(FilteredMap, NoFilter) { +TEST(RangeFilteredMapView, NoFilter) { std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; auto filter = StringRangeSet::All(); - FilteredMapView filtered(unfiltered, - filter); + RangeFilteredMapView filtered( + unfiltered, filter); EXPECT_EQ(Vec({"zero", "one", "two"}), Keys(filtered)); } -TEST(FilteredMap, EmptyFilter) { +TEST(RangeFilteredMapView, EmptyFilter) { std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; auto filter = StringRangeSet::Empty(); - FilteredMapView filtered(unfiltered, - filter); + RangeFilteredMapView filtered( + unfiltered, filter); EXPECT_EQ(Vec({}), Keys(filtered)); } -TEST(FilteredMap, OneOpen) { +TEST(RangeFilteredMapView, OneOpen) { std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); - filter.Insert(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); - FilteredMapView filtered(unfiltered, - filter); + filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); + RangeFilteredMapView filtered( + unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); } -TEST(FilteredMap, OneClosed) { +TEST(RangeFilteredMapView, OneClosed) { std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); - filter.Insert(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); - FilteredMapView filtered(unfiltered, - filter); + filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); + RangeFilteredMapView filtered( + unfiltered, filter); EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb", "AAB"}), Keys(filtered)); } -TEST(FilteredMap, NoEntriesAfterClosedFilter) { - std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, - {"AAAb", 0}}; +TEST(RangeFilteredMapView, NoEntriesAfterClosedFilter) { + std::map unfiltered{ + {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}}; auto filter = StringRangeSet::Empty(); - filter.Insert(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); - FilteredMapView filtered(unfiltered, - filter); + filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); + RangeFilteredMapView filtered( + unfiltered, filter); EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb"}), Keys(filtered)); } -TEST(FilteredMap, NoEntriesAfterOpenFilter) { - std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, - {"AAAb", 0}}; +TEST(RangeFilteredMapView, NoEntriesAfterOpenFilter) { + std::map unfiltered{ + {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}}; auto filter = StringRangeSet::Empty(); - filter.Insert(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); - FilteredMapView filtered(unfiltered, - filter); + filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); + RangeFilteredMapView filtered( + unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); } -TEST(FilteredMap, NoEntriesBeforeClosedFilter) { - std::map unfiltered{{"AAA", 0}, {"AAAa", 0}, - {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; +TEST(RangeFilteredMapView, NoEntriesBeforeClosedFilter) { + std::map unfiltered{ + {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); - filter.Insert(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); - FilteredMapView filtered(unfiltered, - filter); + filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); + RangeFilteredMapView filtered( + unfiltered, filter); EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb", "AAB"}), Keys(filtered)); } -TEST(FilteredMap, NoEntriesBeforeOpenFilter) { - std::map unfiltered{{"AAAa", 0}, - {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; +TEST(RangeFilteredMapView, NoEntriesBeforeOpenFilter) { + std::map unfiltered{ + {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); - filter.Insert(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); - FilteredMapView filtered(unfiltered, - filter); + filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); + RangeFilteredMapView filtered( + unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); } -TEST(FilteredMap, MultipleFilters) { +TEST(RangeFilteredMapView, MultipleFilters) { std::map unfiltered{ {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}, {"BB", 0}, {"BBB", 0}, {"BBBa", 0}, {"BBBb", 0}, {"CCCa", 0}, {"CCCb", 0}, {"CCD", 0}, {"CCE", 0}}; auto filter = StringRangeSet::Empty(); - filter.Insert(StringRangeSet::Range("AAA", kOpen, "AAB", kClosed)); - filter.Insert(StringRangeSet::Range("BBB", kClosed, "BBC", kOpen)); - filter.Insert(StringRangeSet::Range("CCC", kClosed, "CCD", kOpen)); - FilteredMapView filtered(unfiltered, - filter); + filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kClosed)); + filter.Sum(StringRangeSet::Range("BBB", kClosed, "BBC", kOpen)); + filter.Sum(StringRangeSet::Range("CCC", kClosed, "CCD", kOpen)); + RangeFilteredMapView filtered( + unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb", "AAB", "BBB", "BBBa", "BBBb", "CCCa", "CCCb"}), Keys(filtered)); } +TEST(RegexFiteredMapView, NoFilter) { + std::vector> patterns; + std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; + auto filter = StringRangeSet::All(); + + RegexFiteredMapView filtered(unfiltered, patterns); + EXPECT_EQ(Vec({"zero", "one", "two"}), Keys(filtered)); +} + +TEST(RegexFiteredMapView, EmptyFilter) { + auto pattern = std::make_shared("this_will_not_be_matched"); + ASSERT_TRUE(pattern->ok()); + std::vector> patterns({std::move(pattern)}); + + std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; + RegexFiteredMapView filtered(unfiltered, patterns); + EXPECT_EQ(Vec({}), Keys(filtered)); +} + +TEST(RegexFiteredMapView, OneFilter) { + auto pattern = std::make_shared("^[a-z_]*$"); + ASSERT_TRUE(pattern->ok()); + std::vector> patterns({std::move(pattern)}); + + std::map unfiltered{ + {"NO_MATCH", 0}, {"match", 1}, {"another_match", 2}}; + RegexFiteredMapView filtered(unfiltered, patterns); + EXPECT_EQ(Vec({"match", "another_match"}), Keys(filtered)); +} + +TEST(RegexFiteredMapView, MultipleFilters) { + auto has_a = std::make_shared("a"); + ASSERT_TRUE(has_a->ok()); + auto has_b = std::make_shared("b"); + ASSERT_TRUE(has_b->ok()); + auto has_c = std::make_shared("c"); + ASSERT_TRUE(has_c->ok()); + std::vector> patterns( + {std::move(has_a), std::move(has_b), std::move(has_c)}); + + std::map unfiltered{ + {"abc", 0}, {"ab", 1}, {"a", 2}, {"QQ b QQ c QQ a QQ", 4}, {"ac", 5}}; + RegexFiteredMapView filtered(unfiltered, patterns); + EXPECT_EQ(Vec({"abc", "QQ b QQ c QQ a QQ"}), Keys(filtered)); +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index df5e5fcc6bfc3..da9911fea8bb6 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -96,7 +96,7 @@ bool DisjointAndSortedRangesAdjacent(TimestampRangeSet::Range const& lhs, } template -void RangeSetInsertImpl(RangeSetType& disjoint_ranges, +void RangeSetSumImpl(RangeSetType& disjoint_ranges, RangeType inserted_range) { // Remove all ranges which either have an overlap with `inserted_range` or are // adjacent to it. Then add `inserted_range` with `start` and `end` @@ -130,6 +130,33 @@ void RangeSetInsertImpl(RangeSetType& disjoint_ranges, disjoint_ranges.insert(std::move(inserted_range)); } +template +void RangeSetIntersectImpl(RangeSetType& disjoint_ranges, + RangeType const& intersected_range) { + // Intersect with every range. This could be optimized but it's unlikely that + // the extra code complexity would be offset by any performance gains in real + // life. + for (auto range_it = disjoint_ranges.begin(); + range_it != disjoint_ranges.end();) { + if (!detail::HasOverlap(*range_it, intersected_range)) { + disjoint_ranges.erase(range_it++); + } + if (typename RangeType::StartLess()(*range_it, intersected_range)) { + RangeType to_update = std::move(*range_it); + disjoint_ranges.erase(range_it); + to_update.set_start(intersected_range); + range_it = disjoint_ranges.emplace(std::move(to_update)).first; + } + if (typename RangeType::EndLess()(intersected_range, *range_it)) { + RangeType to_update = std::move(*range_it); + disjoint_ranges.erase(range_it); + to_update.set_end(intersected_range); + range_it = disjoint_ranges.emplace(std::move(to_update)).first; + } + ++range_it; + } +} + } // namespace detail StringRangeSet::Range::Range(Value start, bool start_open, Value end, @@ -342,7 +369,7 @@ bool StringRangeSet::Range::EndLess::operator()(Range const& lhs, StringRangeSet StringRangeSet::All() { StringRangeSet res; - res.Insert(Range("", false, StringRangeSet::Range::Infinity{}, false)); + res.Sum(Range("", false, StringRangeSet::Range::Infinity{}, false)); return res; } @@ -350,8 +377,12 @@ StringRangeSet StringRangeSet::Empty() { return StringRangeSet{}; } -void StringRangeSet::Insert(StringRangeSet::Range inserted_range) { - detail::RangeSetInsertImpl(disjoint_ranges_, std::move(inserted_range)); +void StringRangeSet::Sum(StringRangeSet::Range inserted_range) { + detail::RangeSetSumImpl(disjoint_ranges_, std::move(inserted_range)); +} + +void StringRangeSet::Intersect(StringRangeSet::Range const& inserted_range) { + detail::RangeSetIntersectImpl(disjoint_ranges_, inserted_range); } bool operator==(StringRangeSet::Range::Value const& lhs, @@ -442,7 +473,7 @@ bool TimestampRangeSet::Range::EndLess::operator()(Range const& lhs, TimestampRangeSet TimestampRangeSet::All() { TimestampRangeSet res; - res.Insert(Range(std::chrono::milliseconds(0), std::chrono::milliseconds(0))); + res.Sum(Range(std::chrono::milliseconds(0), std::chrono::milliseconds(0))); return res; } @@ -450,8 +481,13 @@ TimestampRangeSet TimestampRangeSet::Empty() { return TimestampRangeSet{}; } -void TimestampRangeSet::Insert(TimestampRangeSet::Range inserted_range) { - detail::RangeSetInsertImpl(disjoint_ranges_, std::move(inserted_range)); +void TimestampRangeSet::Sum(TimestampRangeSet::Range inserted_range) { + detail::RangeSetSumImpl(disjoint_ranges_, std::move(inserted_range)); +} + +void TimestampRangeSet::Intersect( + TimestampRangeSet::Range const& intersected_range) { + detail::RangeSetIntersectImpl(disjoint_ranges_, intersected_range); } bool operator==(TimestampRangeSet::Range const& lhs, diff --git a/google/cloud/bigtable/emulator/range_set.h b/google/cloud/bigtable/emulator/range_set.h index 019e95a418abd..6a90626232659 100644 --- a/google/cloud/bigtable/emulator/range_set.h +++ b/google/cloud/bigtable/emulator/range_set.h @@ -93,7 +93,8 @@ class StringRangeSet { static StringRangeSet All(); static StringRangeSet Empty(); - void Insert(Range inserted_range); + void Sum(Range inserted_range); + void Intersect(Range const &intersected_range); std::set const& disjoint_ranges() const { return disjoint_ranges_; @@ -161,7 +162,8 @@ class TimestampRangeSet { static TimestampRangeSet All(); static TimestampRangeSet Empty(); - void Insert(Range inserted_range); + void Sum(Range inserted_range); + void Intersect(Range const &intersected_range); std::set const& disjoint_ranges() const { return disjoint_ranges_; diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index 1632190b127d2..f4e82ca5efc41 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -688,7 +688,7 @@ TEST(TimestampRangeSet, DisjointAdjacent) { TEST(StringRangeSet, SingleRange) { StringRangeSet srs; - srs.Insert(StringRangeSet::Range("a", kClosed, "b", kClosed)); + srs.Sum(StringRangeSet::Range("a", kClosed, "b", kClosed)); ASSERT_EQ(1, srs.disjoint_ranges().size()); ASSERT_EQ(StringRangeSet::Range("a", kClosed, "b", kClosed), *srs.disjoint_ranges().begin()); @@ -709,9 +709,9 @@ TSRanges(std::vector& writer) bool RowStreamer::Stream(CellView const& cell) { std::cout << "Attempting to stream" << std::endl; btproto::ReadRowsResponse::CellChunk chunk; - if (!current_row_key_ || (¤t_row_key_->get() != &cell.row_key() && - current_row_key_->get() != cell.row_key())) { + if (!current_row_key_ || current_row_key_ != cell.row_key()) { if (!pending_chunks_.empty()) { pending_chunks_.back().set_commit_row(true); } - current_row_key_ = std::cref(cell.row_key()); - current_column_family_ = std::cref(cell.column_family()); - current_column_qualifier_ = std::cref(cell.column_qualifier()); + current_row_key_ = cell.row_key(); + current_column_family_ = cell.column_family(); + current_column_qualifier_ = cell.column_qualifier(); chunk.set_row_key(cell.row_key()); chunk.mutable_family_name()->set_value(cell.column_family()); chunk.mutable_qualifier()->set_value(cell.column_qualifier()); } - if (¤t_column_family_->get() != &cell.column_family() && - current_row_key_->get() != cell.column_family()) { - current_column_family_ = std::cref(cell.column_family()); - current_column_qualifier_ = std::cref(cell.column_qualifier()); + if (current_row_key_ != cell.column_family()) { + current_column_family_ = cell.column_family(); + current_column_qualifier_ = cell.column_qualifier(); chunk.mutable_family_name()->set_value(cell.column_family()); chunk.mutable_qualifier()->set_value(cell.column_qualifier()); } - if (¤t_column_qualifier_->get() != &cell.column_qualifier() && - current_row_key_->get() != cell.column_qualifier()) { - current_column_qualifier_ = std::cref(cell.column_qualifier()); + if (current_row_key_ != cell.column_qualifier()) { + current_column_qualifier_ = cell.column_qualifier(); chunk.mutable_qualifier()->set_value(cell.column_qualifier()); } chunk.set_timestamp_micros( diff --git a/google/cloud/bigtable/emulator/row_streamer.h b/google/cloud/bigtable/emulator/row_streamer.h index 3a44152aed687..742da42c80132 100644 --- a/google/cloud/bigtable/emulator/row_streamer.h +++ b/google/cloud/bigtable/emulator/row_streamer.h @@ -35,11 +35,9 @@ class RowStreamer { private: grpc::ServerWriter& writer_; - absl::optional> current_row_key_; - absl::optional> - current_column_family_; - absl::optional> - current_column_qualifier_; + absl::optional current_row_key_; + absl::optional current_column_family_; + absl::optional current_column_qualifier_; std::vector pending_chunks_; }; diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index bd477b274ebbd..648b986a1ef63 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -333,7 +333,7 @@ StatusOr CreateStringRangeSet( "`row_key` empty", GCP_ERROR_INFO().WithMetadata("row_set", row_set.DebugString())); } - res.Insert(StringRangeSet::Range(row_key, false, row_key, false)); + res.Sum(StringRangeSet::Range(row_key, false, row_key, false)); } for (auto const& row_range : row_set.row_ranges()) { auto maybe_range = StringRangeSet::Range::FromRowRange(row_range); @@ -343,7 +343,7 @@ StatusOr CreateStringRangeSet( if (maybe_range->IsEmpty()) { continue; } - res.Insert(*std::move(maybe_range)); + res.Sum(*std::move(maybe_range)); } return res; } From 19d57ff6ceb3b03c82b0e399736a06de6af611c0 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 21 Feb 2025 21:35:51 +0300 Subject: [PATCH 025/195] emulator: Add a static method to Table that returns a default-constructed table. --- google/cloud/bigtable/emulator/rollback_test.cc | 6 +++++- google/cloud/bigtable/emulator/table.h | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 0688f70dcefd1..7c95b3381ebf6 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -12,7 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/table.h" #include +#include namespace google { namespace cloud { @@ -22,8 +25,9 @@ namespace emulator { // Ensure that SetCell still works to set a cell that was not set // before. TEST(TransactonRollback, SetCellBasicFunction) { - + std::shared_ptr
table = Table::TestTable(); } + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 192199de829de..b959b4548a242 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -41,6 +41,13 @@ class Table { static StatusOr> Create( google::bigtable::admin::v2::Table schema); + + static std::shared_ptr
TestTable() { + std::shared_ptr
r(new Table); + + return r; + } + google::bigtable::admin::v2::Table GetSchema() const; Status Update(google::bigtable::admin::v2::Table const& new_schema, From 6eefe402958180847053f0fde42744fd4c11d7d9 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 24 Feb 2025 15:13:53 +0300 Subject: [PATCH 026/195] emulator: testing: Create a table in the correct way. Populate a schema protobuf with the minimal fields and call Table::Create(); --- google/cloud/bigtable/emulator/rollback_test.cc | 11 ++++++++++- google/cloud/bigtable/emulator/table.h | 6 ------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 7c95b3381ebf6..24b17dfeffbcc 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -14,6 +14,9 @@ #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/table.h" +#include +#include +#include #include #include @@ -25,7 +28,13 @@ namespace emulator { // Ensure that SetCell still works to set a cell that was not set // before. TEST(TransactonRollback, SetCellBasicFunction) { - std::shared_ptr
table = Table::TestTable(); + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + schema.set_name("projects/test/instances/test/tables/test"); + (*schema.mutable_column_families())["test"] = column_family; + + auto table = Table::Create(schema); } } // namespace emulator diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index b959b4548a242..903ec5a6039aa 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -42,12 +42,6 @@ class Table { google::bigtable::admin::v2::Table schema); - static std::shared_ptr
TestTable() { - std::shared_ptr
r(new Table); - - return r; - } - google::bigtable::admin::v2::Table GetSchema() const; Status Update(google::bigtable::admin::v2::Table const& new_schema, From 7f95ac2f3db6e87ab1484d4169045d14e680ab65 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 24 Feb 2025 20:50:27 +0300 Subject: [PATCH 027/195] emulator: testing: rollback_test: Fix crash when setting up SetCell mutation. There is a crash later on, in the code that sets up the undo log, that will be fixed in an upcoming commit. --- .../cloud/bigtable/emulator/rollback_test.cc | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 24b17dfeffbcc..f59818fdd03f9 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -14,9 +14,13 @@ #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/table.h" +#include "google/cloud/testing_util/status_matchers.h" #include #include #include +#include +#include +#include #include #include @@ -31,10 +35,29 @@ TEST(TransactonRollback, SetCellBasicFunction) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; - schema.set_name("projects/test/instances/test/tables/test"); + const auto *const table_name = "projects/test/instances/test/tables/test"; + const auto *const row_key = "0"; + + schema.set_name(table_name); (*schema.mutable_column_families())["test"] = column_family; auto table = Table::Create(schema); + ASSERT_STATUS_OK(table); + + ::google::bigtable::v2::Mutation_SetCell set_cell_mutation; + set_cell_mutation.set_family_name("test"); + set_cell_mutation.set_column_qualifier("test"); + set_cell_mutation.set_timestamp_micros(1234); + set_cell_mutation.set_value("test"); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + auto *mutation_request_mutation = mutation_request.add_mutations(); + mutation_request_mutation->set_allocated_set_cell(&set_cell_mutation); + + auto status = table.value()->MutateRow(mutation_request); + ASSERT_STATUS_OK(status); } } // namespace emulator From 82bcf54dc32d71ad76ecc02812397a4488679ab1 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 24 Feb 2025 22:28:00 +0300 Subject: [PATCH 028/195] emulator: SetCell mutation: Various logic fixes for undo log. --- google/cloud/bigtable/emulator/table.cc | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 8a73d4a2abf4e..b7183b83982ea 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -440,12 +440,12 @@ Status RowTransaction::SetCell( bool column_existed = true; auto column_row_it = column_family_row.find(set_cell.column_qualifier()); - if (column_row_it == column_family_row.end()) { + if (!row_existed || column_row_it == column_family_row.end()) { column_existed = false; } bool cell_existed = true; - if (!column_existed) { + if (!row_existed || !column_existed) { cell_existed = false; } else { auto timestamp_it = column_row_it->second.find( @@ -462,21 +462,26 @@ Status RowTransaction::SetCell( std::chrono::microseconds(set_cell.timestamp_micros())), set_cell.value()); + // If we have added a row, a column or a cell, we need to recompute + // these iterators. + row_key_it = column_family.find(request_.row_key()); + column_family_row = row_key_it->second; + column_row_it = column_family_row.find(set_cell.column_qualifier()); + auto timestamp_it = column_row_it->second.find( + std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros()))); + + if (!row_existed) { - row_key_it = column_family.find(request_.row_key()); DeleteRow delete_row = {row_key_it, column_family}; undo_.emplace(delete_row); } if (!column_existed) { - column_row_it = column_family_row.find(set_cell.column_qualifier()); DeleteColumn delete_column_row = {column_row_it, column_family_row}; undo_.emplace(delete_column_row); } - auto timestamp_it = column_row_it->second.find( - std::chrono::duration_cast( - std::chrono::microseconds(set_cell.timestamp_micros()))); if (!cell_existed) { DeleteValue delete_value = {column_row_it, timestamp_it->first}; undo_.emplace(delete_value); From d685ab4b8dacfb0a1c52e5e3037c06e463cac64e Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 24 Feb 2025 23:39:16 +0300 Subject: [PATCH 029/195] emulator: trasnaction rollback test: Fix crash in destructor. Use the correct sub-message allocation method, otherwise the protobuf code becomes confused about who is the owner of the SetCell mutation message and tries to free it twice, I think. See also: https://protobuf.dev/reference/cpp/arenas/ --- google/cloud/bigtable/emulator/rollback_test.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index f59818fdd03f9..af740ddd63f8f 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -44,17 +44,17 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto table = Table::Create(schema); ASSERT_STATUS_OK(table); - ::google::bigtable::v2::Mutation_SetCell set_cell_mutation; - set_cell_mutation.set_family_name("test"); - set_cell_mutation.set_column_qualifier("test"); - set_cell_mutation.set_timestamp_micros(1234); - set_cell_mutation.set_value("test"); - ::google::bigtable::v2::MutateRowRequest mutation_request; mutation_request.set_table_name(table_name); mutation_request.set_row_key(row_key); + + auto *mutation_request_mutation = mutation_request.add_mutations(); - mutation_request_mutation->set_allocated_set_cell(&set_cell_mutation); + auto *set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name("test"); + set_cell_mutation->set_column_qualifier("test"); + set_cell_mutation->set_timestamp_micros(1234); + set_cell_mutation->set_value("test"); auto status = table.value()->MutateRow(mutation_request); ASSERT_STATUS_OK(status); From d34e47ce526d7654943f5b93029ab3f03ce8a432 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 24 Feb 2025 23:39:16 +0300 Subject: [PATCH 030/195] emulator: transaction rollback test: Fix crash in destructor. Use the correct sub-message allocation method, otherwise the protobuf code becomes confused about who is the owner of the SetCell mutation message and tries to free it twice, I think. See also: https://protobuf.dev/reference/cpp/arenas/ --- google/cloud/bigtable/emulator/rollback_test.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index f59818fdd03f9..af740ddd63f8f 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -44,17 +44,17 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto table = Table::Create(schema); ASSERT_STATUS_OK(table); - ::google::bigtable::v2::Mutation_SetCell set_cell_mutation; - set_cell_mutation.set_family_name("test"); - set_cell_mutation.set_column_qualifier("test"); - set_cell_mutation.set_timestamp_micros(1234); - set_cell_mutation.set_value("test"); - ::google::bigtable::v2::MutateRowRequest mutation_request; mutation_request.set_table_name(table_name); mutation_request.set_row_key(row_key); + + auto *mutation_request_mutation = mutation_request.add_mutations(); - mutation_request_mutation->set_allocated_set_cell(&set_cell_mutation); + auto *set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name("test"); + set_cell_mutation->set_column_qualifier("test"); + set_cell_mutation->set_timestamp_micros(1234); + set_cell_mutation->set_value("test"); auto status = table.value()->MutateRow(mutation_request); ASSERT_STATUS_OK(status); From 77adf301cf475713fc87ba6e6253297f76183d82 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 25 Feb 2025 13:11:23 +0300 Subject: [PATCH 031/195] emulator: table: Add a method to return an iterator to the column families map. --- google/cloud/bigtable/emulator/table.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 903ec5a6039aa..02dfed2c4a875 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -56,6 +56,9 @@ class Table { Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const; + std::map>::iterator find(std::string const &column_family) { + return column_families_.find(column_family); + } private: Table() = default; From b9aaa078a12af097851378b1b9104f774148e64d Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 25 Feb 2025 13:21:24 +0300 Subject: [PATCH 032/195] emulator: Introduce methods to retrieve begin and end of column family map iteration. --- google/cloud/bigtable/emulator/rollback_test.cc | 4 ++++ google/cloud/bigtable/emulator/table.h | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index af740ddd63f8f..0455869755c47 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -58,6 +58,10 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto status = table.value()->MutateRow(mutation_request); ASSERT_STATUS_OK(status); + + + // auto column_family_it = table->get()->find("test"); + } } // namespace emulator diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 02dfed2c4a875..b11c55c9583a0 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -56,6 +56,12 @@ class Table { Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const; + std::map>::iterator begin() { + return column_families_.begin(); + } + std::map>::iterator end() { + return column_families_.end(); + } std::map>::iterator find(std::string const &column_family) { return column_families_.find(column_family); } From eb3e40fe3f5d0b7859684b7b53d04ddccc97b7a8 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 25 Feb 2025 14:19:24 +0300 Subject: [PATCH 033/195] emulator: Complete implementing test for basic transaction functionality. The test ensures that SetCell mutations still work via the RowTransaction class. Rollback will be tested in an upcoming change. --- .../cloud/bigtable/emulator/rollback_test.cc | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 0455869755c47..812540fd37f8c 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -30,7 +30,7 @@ namespace bigtable { namespace emulator { // Ensure that SetCell still works to set a cell that was not set -// before. +// before, when using the RowTransaction class. TEST(TransactonRollback, SetCellBasicFunction) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -60,8 +60,24 @@ TEST(TransactonRollback, SetCellBasicFunction) { ASSERT_STATUS_OK(status); - // auto column_family_it = table->get()->find("test"); + auto column_family_it = table->get()->find("test"); + ASSERT_NE(column_family_it, table->get()->end()); + auto cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + ASSERT_NE(column_family_row_it, cf->end()); + + auto column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find("test"); + ASSERT_NE(column_row_it, column_family_row.end()); + + auto column_row = column_row_it->second; + auto timestamp_it = column_row.find(std::chrono::duration_cast( + std::chrono::microseconds(1234))); + ASSERT_NE(timestamp_it, column_row.end()); + + auto value = timestamp_it->second; + ASSERT_EQ(value, "string"); } } // namespace emulator From 62f6ecc8b653dd4572cb1663861689843161aa84 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 26 Feb 2025 17:19:53 +0300 Subject: [PATCH 034/195] emulator: rollback test: Fix for more consistent handling of the table shared pointer. --- google/cloud/bigtable/emulator/rollback_test.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 812540fd37f8c..68ed24a7b97e6 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -41,8 +41,10 @@ TEST(TransactonRollback, SetCellBasicFunction) { schema.set_name(table_name); (*schema.mutable_column_families())["test"] = column_family; - auto table = Table::Create(schema); - ASSERT_STATUS_OK(table); + auto maybe_table = Table::Create(schema); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); ::google::bigtable::v2::MutateRowRequest mutation_request; mutation_request.set_table_name(table_name); @@ -56,12 +58,11 @@ TEST(TransactonRollback, SetCellBasicFunction) { set_cell_mutation->set_timestamp_micros(1234); set_cell_mutation->set_value("test"); - auto status = table.value()->MutateRow(mutation_request); + auto status = table->MutateRow(mutation_request); ASSERT_STATUS_OK(status); - - auto column_family_it = table->get()->find("test"); - ASSERT_NE(column_family_it, table->get()->end()); + auto column_family_it = table->find("test"); + ASSERT_NE(column_family_it, table->end()); auto cf = column_family_it->second; auto column_family_row_it = cf->find(row_key); From 050a55c883d1c5535562ca962ab8b647b7553a71 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 26 Feb 2025 22:01:47 +0300 Subject: [PATCH 035/195] emulator: Store a std::shared_ptr
in RowTransaction. This is a more correct way of doing it than storing a raw pointer. --- google/cloud/bigtable/emulator/table.cc | 2 +- google/cloud/bigtable/emulator/table.h | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index b7183b83982ea..458c0d14ec59b 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -221,7 +221,7 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { std::lock_guard lock(mu_); assert(request.table_name() == schema_.name()); - RowTransaction row_transaction(this, request); + RowTransaction row_transaction(this->get(), request); for (auto mutation : request.mutations()) { if (mutation.has_set_cell()) { diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index b11c55c9583a0..0d553b4bbd080 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -36,7 +36,7 @@ namespace cloud { namespace bigtable { namespace emulator { -class Table { +class Table : public std::enable_shared_from_this
{ public: static StatusOr> Create( google::bigtable::admin::v2::Table schema); @@ -66,6 +66,8 @@ class Table { return column_families_.find(column_family); } + std::shared_ptr
get() { return shared_from_this(); } + private: Table() = default; friend class RowSetIterator; @@ -116,10 +118,10 @@ struct DeleteColumn { class RowTransaction { public: explicit RowTransaction( - Table const* table, + std::shared_ptr
table, ::google::bigtable::v2::MutateRowRequest const& request) : request_(request) { - table_ = table; + table_ = std::move(table); }; ~RowTransaction() { @@ -148,7 +150,7 @@ class RowTransaction { void Undo(); bool committed_; - Table const* table_; + std::shared_ptr
table_; std::stack> undo_; ::google::bigtable::v2::MutateRowRequest const& request_; From b1cb90fab3182895ab4906f9148d543f2d4fd6ec Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Thu, 27 Feb 2025 15:47:55 +0100 Subject: [PATCH 036/195] More tests --- google/cloud/bigtable/emulator/CMakeLists.txt | 4 +- .../bigtable_emulator_test_common.bzl | 25 ++++++ .../emulator/bigtable_emulator_unit_tests.bzl | 3 +- .../cloud/bigtable/emulator/column_family.cc | 4 +- .../bigtable/emulator/column_family_test.cc | 76 +++++++++---------- google/cloud/bigtable/emulator/range_set.cc | 1 + .../cloud/bigtable/emulator/range_set_test.cc | 41 +++++++++- google/cloud/bigtable/emulator/table_test.cc | 45 +++++++++++ 8 files changed, 149 insertions(+), 50 deletions(-) create mode 100644 google/cloud/bigtable/emulator/bigtable_emulator_test_common.bzl create mode 100644 google/cloud/bigtable/emulator/table_test.cc diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index c6cfdbb5cb161..972005f962410 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -38,7 +38,6 @@ add_library( to_grpc_status.cc to_grpc_status.h) - target_link_libraries( bigtable_emulator_common google-cloud-cpp::bigtable @@ -60,9 +59,10 @@ if (BUILD_TESTING) column_family_test.cc filter_test.cc filtered_map_test.cc + range_set_test.cc row_iterators_test.cc server_test.cc - range_set_test.cc) + table_test.cc) export_list_to_bazel("bigtable_emulator_unit_tests.bzl" "bigtable_emulator_unit_tests" YEAR "2024") diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_test_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_test_common.bzl new file mode 100644 index 0000000000000..553f19d2de95f --- /dev/null +++ b/google/cloud/bigtable/emulator/bigtable_emulator_test_common.bzl @@ -0,0 +1,25 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated source lists for bigtable_emulator_test_common - DO NOT EDIT.""" + +bigtable_emulator_test_common_hdrs = [ + "test_util.h", +] + +bigtable_emulator_test_common_srcs = [ + "test_util.cc", +] diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index 99e3ba082a1db..75b68000449f9 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -20,7 +20,8 @@ bigtable_emulator_unit_tests = [ "column_family_test.cc", "filter_test.cc", "filtered_map_test.cc", + "range_set_test.cc", "row_iterators_test.cc", "server_test.cc", - "range_set_test.cc", + "table_test.cc", ] diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 15bdd894c47ee..9ee8514441004 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -97,12 +97,12 @@ class FilteredColumnFamilyStream::FilterApply { FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} bool operator()(ColumnRange const& column_range) { - parent_.column_ranges_.Sum(column_range.range); + parent_.column_ranges_.Intersect(column_range.range); return true; } bool operator()(TimestampRange const& timestamp_range) { - parent_.timestamp_ranges_.Sum(timestamp_range.range); + parent_.timestamp_ranges_.Intersect(timestamp_range.range); return true; } diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index 9d0328cd78799..f9a71ed5340af 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -186,7 +186,7 @@ row2 :col0 @10ms: qux } std::string DumpFilteredColumnFamilyStream( - FilteredColumnFamilyStream& stream, NextMode next_mode = NextMode::kCell) { + AbstractCellStreamImpl& stream, NextMode next_mode = NextMode::kCell) { std::stringstream ss; for (; stream.HasValue(); stream.Next(next_mode)) { auto const& cell = stream.Value(); @@ -236,49 +236,43 @@ TEST(FilteredColumnFamilyStream, FilterByTimestampRange) { using testing_util::chrono_literals::operator""_ms; ColumnFamily fam; - fam.SetCell("row0", "col0", 10_ms, "foo"); - fam.SetCell("row0", "col0", 20_ms, "bar"); // Filter out - fam.SetCell("row0", "col0", 30_ms, "baz"); - fam.SetCell("row0", "col1", 100_ms, "foo"); // Filter out - fam.SetCell("row0", "col1", 150_ms, "foo"); // Filter out - fam.SetCell("row0", "col1", 190_ms, "foo"); // Filter out - fam.SetCell("row0", "col2", 200_ms, "foo"); - fam.SetCell("row0", "col2", 220_ms, "foo"); - fam.SetCell("row0", "col2", 240_ms, "foo"); + fam.SetCell("row0", "col0", 100_ms, "foo"); + fam.SetCell("row0", "col0", 300_ms, "bar"); // Filter out + + fam.SetCell("row0", "col1", 200_ms, "foo"); // Filter out + fam.SetCell("row0", "col1", 250_ms, "foo"); // Filter out + fam.SetCell("row0", "col1", 290_ms, "foo"); // Filter out + fam.SetCell("row0", "col2", 100_ms, "foo"); + fam.SetCell("row0", "col2", 120_ms, "foo"); + fam.SetCell("row0", "col2", 140_ms, "foo"); fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out - fam.SetCell("row1", "col0", 10_ms, "foo"); fam.SetCell("row1", "col0", 20_ms, "bar"); // Filter out - fam.SetCell("row1", "col0", 30_ms, "baz"); - fam.SetCell("row1", "col1", 100_ms, "foo"); // Filter out - fam.SetCell("row1", "col1", 150_ms, "foo"); // Filter out - fam.SetCell("row1", "col1", 190_ms, "foo"); // Filter out - fam.SetCell("row1", "col2", 200_ms, "foo"); - fam.SetCell("row1", "col2", 220_ms, "foo"); - fam.SetCell("row1", "col2", 240_ms, "foo"); + fam.SetCell("row1", "col0", 10_ms, "baz"); + fam.SetCell("row1", "col1", 200_ms, "foo"); // Filter out + fam.SetCell("row1", "col1", 250_ms, "foo"); // Filter out + fam.SetCell("row1", "col1", 290_ms, "foo"); // Filter out + fam.SetCell("row1", "col2", 100_ms, "foo"); + fam.SetCell("row1", "col2", 120_ms, "foo"); + fam.SetCell("row1", "col2", 140_ms, "foo"); fam.SetCell("row1", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row1", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row1", "col3", 300_ms, "foo"); // Filter out auto included_rows = std::make_shared(StringRangeSet::All()); FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); filtered_stream.ApplyFilter( - TimestampRange{TimestampRangeSet::Range(0_ms, 20_ms)}); + TimestampRange{TimestampRangeSet::Range(0_ms, 300_ms)}); filtered_stream.ApplyFilter( - TimestampRange{TimestampRangeSet::Range(30_ms, 100_ms)}); - filtered_stream.ApplyFilter( - TimestampRange{TimestampRangeSet::Range(200_ms, 300_ms)}); + TimestampRange{TimestampRangeSet::Range(100_ms, 200_ms)}); EXPECT_EQ(R"""( -row0 cf1:col0 @10ms: foo -row0 cf1:col0 @30ms: baz -row0 cf1:col2 @200ms: foo -row0 cf1:col2 @220ms: foo -row0 cf1:col2 @240ms: foo -row1 cf1:col0 @10ms: foo -row1 cf1:col0 @30ms: baz -row1 cf1:col2 @200ms: foo -row1 cf1:col2 @220ms: foo -row1 cf1:col2 @240ms: foo +row0 cf1:col0 @100ms: foo +row0 cf1:col2 @100ms: foo +row0 cf1:col2 @120ms: foo +row0 cf1:col2 @140ms: foo +row1 cf1:col2 @100ms: foo +row1 cf1:col2 @120ms: foo +row1 cf1:col2 @140ms: foo )""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } @@ -286,24 +280,22 @@ TEST(FilteredColumnFamilyStream, FilterByColumnRange) { using testing_util::chrono_literals::operator""_ms; ColumnFamily fam; - fam.SetCell("row0", "col0", 10_ms, "foo"); - fam.SetCell("row0", "col1", 100_ms, "foo"); // Filter out + fam.SetCell("row0", "col0", 10_ms, "foo"); // Filter out + fam.SetCell("row0", "col1", 100_ms, "foo"); fam.SetCell("row0", "col2", 200_ms, "foo"); fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out - fam.SetCell("row1", "col2", 300_ms, "foo"); - fam.SetCell("row2", "col0", 300_ms, "foo"); + fam.SetCell("row2", "col1", 300_ms, "foo"); auto included_rows = std::make_shared(StringRangeSet::All()); FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); filtered_stream.ApplyFilter( - ColumnRange{StringRangeSet::Range("col0", false, "col0", false)}); + ColumnRange{StringRangeSet::Range("col1", false, "col4", false)}); filtered_stream.ApplyFilter( - ColumnRange{StringRangeSet::Range("col2", false, "col2", false)}); + ColumnRange{StringRangeSet::Range("col1", false, "col2", false)}); EXPECT_EQ(R"""( -row0 cf1:col0 @10ms: foo +row0 cf1:col1 @100ms: foo row0 cf1:col2 @200ms: foo -row1 cf1:col2 @300ms: foo -row2 cf1:col0 @300ms: foo +row2 cf1:col1 @300ms: foo )""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } @@ -377,6 +369,8 @@ row3 cf1:col3 @300ms: foo )""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } +// Add Next Column, Next Row tests + } // anonymous namespace } // namespace emulator } // namespace bigtable diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index da9911fea8bb6..ffe1a6a564650 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -140,6 +140,7 @@ void RangeSetIntersectImpl(RangeSetType& disjoint_ranges, range_it != disjoint_ranges.end();) { if (!detail::HasOverlap(*range_it, intersected_range)) { disjoint_ranges.erase(range_it++); + continue; } if (typename RangeType::StartLess()(*range_it, intersected_range)) { RangeType to_update = std::move(*range_it); diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index f4e82ca5efc41..43fbeab4c8b22 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -788,12 +788,45 @@ TEST(TimestampRangeSet, IntersectAll) { ASSERT_EQ(TSRanges({{3_ms, 5_ms}}), trs.disjoint_ranges()); } -TEST(TimestampRangeSet, IntersectPartial) { +TEST(TimestampRangeSet, IntersectPartialShorter) { using testing_util::chrono_literals::operator""_ms; - auto trs = TimestampRangeSet::All(); - trs.Intersect(TimestampRangeSet::Range(3_ms, 5_ms)); - ASSERT_EQ(TSRanges({{3_ms, 5_ms}}), trs.disjoint_ranges()); + auto trs = TimestampRangeSet::Empty(); + trs.Sum(TimestampRangeSet::Range(1_ms, 4_ms)); + trs.Sum(TimestampRangeSet::Range(5_ms, 6_ms)); + trs.Sum(TimestampRangeSet::Range(7_ms, 10_ms)); + trs.Intersect(TimestampRangeSet::Range(3_ms, 8_ms)); + ASSERT_EQ(TSRanges({{3_ms, 4_ms}, {5_ms, 6_ms}, {7_ms, 8_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, IntersectPartialLonger) { + using testing_util::chrono_literals::operator""_ms; + + auto trs = TimestampRangeSet::Empty(); + trs.Sum(TimestampRangeSet::Range(3_ms, 4_ms)); + trs.Sum(TimestampRangeSet::Range(5_ms, 6_ms)); + trs.Sum(TimestampRangeSet::Range(7_ms, 8_ms)); + trs.Intersect(TimestampRangeSet::Range(1_ms, 10_ms)); + ASSERT_EQ(TSRanges({{3_ms, 4_ms}, {5_ms, 6_ms}, {7_ms, 8_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, IntersectDistinct) { + using testing_util::chrono_literals::operator""_ms; + + auto trs = TimestampRangeSet::Empty(); + trs.Sum(TimestampRangeSet::Range(3_ms, 4_ms)); + trs.Intersect(TimestampRangeSet::Range(7_ms, 10_ms)); + ASSERT_EQ(TSRanges({}), trs.disjoint_ranges()); +} + +TEST(StringRangeSet, IntersectDistinct) { + auto srs = StringRangeSet::All(); + srs.Intersect({StringRangeSet::Range("col0", false, "col0", false)}); + srs.Intersect({StringRangeSet::Range("col2", false, "col2", false)}); + std::set empty; + ASSERT_EQ(empty, srs.disjoint_ranges()); } } // anonymous namespace diff --git a/google/cloud/bigtable/emulator/table_test.cc b/google/cloud/bigtable/emulator/table_test.cc new file mode 100644 index 0000000000000..e930f2eb9b52d --- /dev/null +++ b/google/cloud/bigtable/emulator/table_test.cc @@ -0,0 +1,45 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/row_range.h" +#include "google/cloud/testing_util/is_proto_equal.h" +#include "google/cloud/testing_util/status_matchers.h" +#include "google/cloud/testing_util/chrono_literals.h" +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +std::string DumpStream( + AbstractCellStreamImpl& stream, NextMode next_mode = NextMode::kCell) { + std::stringstream ss; + for (; stream.HasValue(); stream.Next(next_mode)) { + auto const& cell = stream.Value(); + ss << cell.row_key() << " " << cell.column_family() << ":" + << cell.column_qualifier() << " @" << cell.timestamp().count() + << "ms: " << cell.value() << std::endl; + } + return ss.str(); +} + + +} // anonymous namespace +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google From dd528d334b092008fff5058bb20af7107831fc7d Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 27 Feb 2025 19:16:44 +0300 Subject: [PATCH 037/195] emulator: rollback tests: fix test for basic functionality. Thanks to Marek for the hint on the major part of the fix, which involved copying a reference instead of a value! --- google/cloud/bigtable/emulator/rollback_test.cc | 2 +- google/cloud/bigtable/emulator/table.cc | 2 +- google/cloud/bigtable/emulator/table.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 68ed24a7b97e6..f1109c1471828 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -78,7 +78,7 @@ TEST(TransactonRollback, SetCellBasicFunction) { ASSERT_NE(timestamp_it, column_row.end()); auto value = timestamp_it->second; - ASSERT_EQ(value, "string"); + ASSERT_EQ(value, "test"); } } // namespace emulator diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 458c0d14ec59b..cb49348760860 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -422,7 +422,7 @@ Status RowTransaction::SetCell( return maybe_column_family.status(); } - auto column_family = maybe_column_family->get(); + auto &column_family = maybe_column_family->get(); bool row_existed = true; // First if the key introduces a new ColumnFamilyRow, we need to diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 0d553b4bbd080..03a67b366ba38 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -122,6 +122,7 @@ class RowTransaction { ::google::bigtable::v2::MutateRowRequest const& request) : request_(request) { table_ = std::move(table); + committed_ = false; }; ~RowTransaction() { From 9fcb0d32056efbec9533b41d27572568f1cf9066 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 27 Feb 2025 19:16:44 +0300 Subject: [PATCH 038/195] emulator: rollback tests: fix test for basic functionality. Thanks to Marek for the hint on the major part of the fix, which involved copying a reference instead of a value! --- google/cloud/bigtable/emulator/rollback_test.cc | 2 +- google/cloud/bigtable/emulator/table.cc | 2 +- google/cloud/bigtable/emulator/table.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 68ed24a7b97e6..f1109c1471828 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -78,7 +78,7 @@ TEST(TransactonRollback, SetCellBasicFunction) { ASSERT_NE(timestamp_it, column_row.end()); auto value = timestamp_it->second; - ASSERT_EQ(value, "string"); + ASSERT_EQ(value, "test"); } } // namespace emulator diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 458c0d14ec59b..cb49348760860 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -422,7 +422,7 @@ Status RowTransaction::SetCell( return maybe_column_family.status(); } - auto column_family = maybe_column_family->get(); + auto &column_family = maybe_column_family->get(); bool row_existed = true; // First if the key introduces a new ColumnFamilyRow, we need to diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 0d553b4bbd080..03a67b366ba38 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -122,6 +122,7 @@ class RowTransaction { ::google::bigtable::v2::MutateRowRequest const& request) : request_(request) { table_ = std::move(table); + committed_ = false; }; ~RowTransaction() { From 8383131b7502d61bd2cfd7e7d4450a6204c7d626 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 28 Feb 2025 13:34:15 +0300 Subject: [PATCH 039/195] emulator: ColumnFamily class: Disable copying. --- google/cloud/bigtable/emulator/column_family.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 65b44f1b4aaf6..b19dce6bd5943 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -95,6 +95,12 @@ class ColumnFamilyRow { class ColumnFamily { public: + ColumnFamily() = default; + // Disable copying. + ColumnFamily(ColumnFamily const &) = delete; + ColumnFamily& operator=(ColumnFamily const &) = delete; + + using const_iterator = std::map::const_iterator; void SetCell(std::string const& row_key, std::string const& column_qualifier, From f56b010135c4d1ae8977f256272b3293525c5a7b Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 28 Feb 2025 16:31:20 +0300 Subject: [PATCH 040/195] emulator: Disable copying ColumnFamilyRow and simplify tx SetCell logic. --- .../cloud/bigtable/emulator/column_family.h | 5 +++ .../cloud/bigtable/emulator/rollback_test.cc | 2 +- google/cloud/bigtable/emulator/table.cc | 43 ++++++++----------- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index b19dce6bd5943..af797a0d59a27 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -64,6 +64,11 @@ class ColumnRow { class ColumnFamilyRow { public: + ColumnFamilyRow() = default; + // Disable copying. + ColumnFamilyRow(ColumnFamilyRow const &) = delete; + ColumnFamilyRow& operator=(ColumnFamilyRow const &) = delete; + void SetCell(std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value); std::size_t DeleteColumn( diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index f1109c1471828..6eb42ca4a87e0 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -68,7 +68,7 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto column_family_row_it = cf->find(row_key); ASSERT_NE(column_family_row_it, cf->end()); - auto column_family_row = column_family_row_it->second; + auto &column_family_row = column_family_row_it->second; auto column_row_it = column_family_row.find("test"); ASSERT_NE(column_row_it, column_family_row.end()); diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index cb49348760860..b32c57ab4462d 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -422,37 +422,31 @@ Status RowTransaction::SetCell( return maybe_column_family.status(); } - auto &column_family = maybe_column_family->get(); + auto& column_family = maybe_column_family->get(); bool row_existed = true; - // First if the key introduces a new ColumnFamilyRow, we need to - // arrange for the entire ColumnFamilyrow to go when we revert - // the transaction. + bool column_existed = true; + bool cell_existed = true; + auto row_key_it = column_family.find(request_.row_key()); + if (row_key_it == column_family.end()) { row_existed = false; - } - - ::google::cloud::bigtable::emulator::ColumnFamilyRow column_family_row; - if (row_existed) { - column_family_row = row_key_it->second; - } - - bool column_existed = true; - auto column_row_it = column_family_row.find(set_cell.column_qualifier()); - if (!row_existed || column_row_it == column_family_row.end()) { column_existed = false; - } - - bool cell_existed = true; - if (!row_existed || !column_existed) { cell_existed = false; } else { - auto timestamp_it = column_row_it->second.find( - std::chrono::duration_cast( - std::chrono::microseconds(set_cell.timestamp_micros()))); - if (timestamp_it == column_row_it->second.end()) { + auto& column_family_row = row_key_it->second; + auto column_row_it = column_family_row.find(set_cell.column_qualifier()); + if (column_row_it == column_family_row.end()) { + column_existed = false; cell_existed = false; + } else { + auto timestamp_it = column_row_it->second.find( + std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros()))); + if (timestamp_it == column_row_it->second.end()) { + cell_existed = false; + } } } @@ -465,13 +459,12 @@ Status RowTransaction::SetCell( // If we have added a row, a column or a cell, we need to recompute // these iterators. row_key_it = column_family.find(request_.row_key()); - column_family_row = row_key_it->second; - column_row_it = column_family_row.find(set_cell.column_qualifier()); + auto& column_family_row = row_key_it->second; + auto column_row_it = column_family_row.find(set_cell.column_qualifier()); auto timestamp_it = column_row_it->second.find( std::chrono::duration_cast( std::chrono::microseconds(set_cell.timestamp_micros()))); - if (!row_existed) { DeleteRow delete_row = {row_key_it, column_family}; undo_.emplace(delete_row); From ec0f58bf22a3cb0d7751b7babb4bcffa2111828b Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 28 Feb 2025 17:03:22 +0300 Subject: [PATCH 041/195] emulator: Disable the copying of ColumnRows. --- google/cloud/bigtable/emulator/column_family.h | 5 +++++ google/cloud/bigtable/emulator/rollback_test.cc | 4 ++-- google/cloud/bigtable/emulator/table.cc | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index af797a0d59a27..18e03712703d4 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -32,6 +32,11 @@ namespace emulator { class ColumnRow { public: + ColumnRow() = default; + // Disable copying. + ColumnRow(ColumnRow const &) = delete; + ColumnRow& operator=(ColumnRow const &) = delete; + void SetCell(std::chrono::milliseconds timestamp, std::string const& value); std::size_t DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range); diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 6eb42ca4a87e0..022851069c0cf 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -64,7 +64,7 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto column_family_it = table->find("test"); ASSERT_NE(column_family_it, table->end()); - auto cf = column_family_it->second; + const auto& cf = column_family_it->second; auto column_family_row_it = cf->find(row_key); ASSERT_NE(column_family_row_it, cf->end()); @@ -72,7 +72,7 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto column_row_it = column_family_row.find("test"); ASSERT_NE(column_row_it, column_family_row.end()); - auto column_row = column_row_it->second; + auto &column_row = column_row_it->second; auto timestamp_it = column_row.find(std::chrono::duration_cast( std::chrono::microseconds(1234))); ASSERT_NE(timestamp_it, column_row.end()); diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index b32c57ab4462d..2acef386bc51d 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -493,14 +493,14 @@ void RowTransaction::Undo() { undo_.pop(); if (auto* restore_value = absl::get_if(&op)) { - auto column_row = restore_value->column_row_it_->second; + auto& column_row = restore_value->column_row_it_->second; column_row.find(restore_value->timestamp_)->second = std::move(restore_value->value_); continue; } if (auto* delete_value = absl::get_if(&op)) { - auto column_row = delete_value->column_row_it_->second; + auto& column_row = delete_value->column_row_it_->second; auto timestamp_it = column_row.find(delete_value->timestamp_); column_row.erase(timestamp_it); continue; From 601b6640cacd42b379861ed3bffd97f6313a7e6b Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 28 Feb 2025 20:12:18 +0300 Subject: [PATCH 042/195] emulator: rollback_test.cc: factor out helper to check a cell exists with a specific value. --- .../cloud/bigtable/emulator/rollback_test.cc | 100 +++++++++++++----- 1 file changed, 73 insertions(+), 27 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 022851069c0cf..66d543af37216 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -14,6 +14,8 @@ #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/table.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" #include #include @@ -21,7 +23,10 @@ #include #include #include +#include #include +#include +#include #include namespace google { @@ -29,17 +34,70 @@ namespace cloud { namespace bigtable { namespace emulator { +Status has_cell( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier, + int64_t timestamp_micros, std::string const& value) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return Status( + StatusCode::kNotFound, + absl::StrFormat("column family %s not found in table", column_family), + ErrorInfo()); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return Status(StatusCode::kNotFound, + absl::StrFormat("no row key %s found in column famiily %s", + row_key, column_family), + ErrorInfo()); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return Status( + StatusCode::kNotFound, + absl::StrFormat("no column found with qualifer %s", column_qualifier), + ErrorInfo()); + } + + auto& column_row = column_row_it->second; + auto timestamp_it = + column_row.find(std::chrono::duration_cast( + std::chrono::microseconds(timestamp_micros))); + if (timestamp_it == column_row.end()) { + return Status(StatusCode::kNotFound, "timestamp not found", ErrorInfo()); + } + + if (timestamp_it->second != value) { + return Status(StatusCode::kNotFound, + absl::StrFormat("wrong value: expected %s, found %s", value, + timestamp_it->second), + ErrorInfo()); + } + + return Status(StatusCode::kOk, "", ErrorInfo()); +} + // Ensure that SetCell still works to set a cell that was not set // before, when using the RowTransaction class. TEST(TransactonRollback, SetCellBasicFunction) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; - const auto *const table_name = "projects/test/instances/test/tables/test"; - const auto *const row_key = "0"; + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifer = "test"; + auto const timestamp_micros = 1234; + auto const* data = "test"; schema.set_name(table_name); - (*schema.mutable_column_families())["test"] = column_family; + (*schema.mutable_column_families())[column_family_name] = column_family; auto maybe_table = Table::Create(schema); ASSERT_STATUS_OK(maybe_table); @@ -50,35 +108,23 @@ TEST(TransactonRollback, SetCellBasicFunction) { mutation_request.set_table_name(table_name); mutation_request.set_row_key(row_key); - - auto *mutation_request_mutation = mutation_request.add_mutations(); - auto *set_cell_mutation = mutation_request_mutation->mutable_set_cell(); - set_cell_mutation->set_family_name("test"); - set_cell_mutation->set_column_qualifier("test"); - set_cell_mutation->set_timestamp_micros(1234); + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(column_family_name); + set_cell_mutation->set_column_qualifier(column_qualifer); + set_cell_mutation->set_timestamp_micros(timestamp_micros); set_cell_mutation->set_value("test"); auto status = table->MutateRow(mutation_request); ASSERT_STATUS_OK(status); - auto column_family_it = table->find("test"); - ASSERT_NE(column_family_it, table->end()); - - const auto& cf = column_family_it->second; - auto column_family_row_it = cf->find(row_key); - ASSERT_NE(column_family_row_it, cf->end()); - - auto &column_family_row = column_family_row_it->second; - auto column_row_it = column_family_row.find("test"); - ASSERT_NE(column_row_it, column_family_row.end()); - - auto &column_row = column_row_it->second; - auto timestamp_it = column_row.find(std::chrono::duration_cast( - std::chrono::microseconds(1234))); - ASSERT_NE(timestamp_it, column_row.end()); - - auto value = timestamp_it->second; - ASSERT_EQ(value, "test"); + ASSERT_STATUS_OK(has_cell( + table, + column_family_name, + row_key, + column_qualifer, + timestamp_micros, + data)); } } // namespace emulator From 863f1c018eccaa81062a6e693a7dd51e623b0e4c Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 28 Feb 2025 21:06:30 +0300 Subject: [PATCH 043/195] emulator: add helper to set a cell to a value. Also, run clang-format on the file. --- .../cloud/bigtable/emulator/rollback_test.cc | 46 ++++++++++--------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 66d543af37216..313c73dd0adcf 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -34,11 +34,30 @@ namespace cloud { namespace bigtable { namespace emulator { +Status set_cell( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::string const& column_family_name, std::string const& column_qualifier, + int64_t timestamp_micros, std::string const& data) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(column_family_name); + set_cell_mutation->set_column_qualifier(column_qualifier); + set_cell_mutation->set_timestamp_micros(timestamp_micros); + set_cell_mutation->set_value(data); + + return table->MutateRow(mutation_request); +} + Status has_cell( std::shared_ptr& table, std::string const& column_family, std::string const& row_key, - std::string const& column_qualifier, - int64_t timestamp_micros, std::string const& value) { + std::string const& column_qualifier, int64_t timestamp_micros, + std::string const& value) { auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { return Status( @@ -104,27 +123,12 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto table = maybe_table.value(); - ::google::bigtable::v2::MutateRowRequest mutation_request; - mutation_request.set_table_name(table_name); - mutation_request.set_row_key(row_key); - - auto* mutation_request_mutation = mutation_request.add_mutations(); - auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); - set_cell_mutation->set_family_name(column_family_name); - set_cell_mutation->set_column_qualifier(column_qualifer); - set_cell_mutation->set_timestamp_micros(timestamp_micros); - set_cell_mutation->set_value("test"); - - auto status = table->MutateRow(mutation_request); + auto status = set_cell(table, table_name, row_key, column_family_name, + column_qualifer, timestamp_micros, data); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell( - table, - column_family_name, - row_key, - column_qualifer, - timestamp_micros, - data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + timestamp_micros, data)); } } // namespace emulator From 7d15c2081a7ab5e285a3505b9cdb6ff132f1bd2d Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 28 Feb 2025 21:38:15 +0300 Subject: [PATCH 044/195] emulator: rollback tests: implement helper function to create tables. --- .../cloud/bigtable/emulator/rollback_test.cc | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 313c73dd0adcf..e5a7aa08b0aa6 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/table.h" #include "google/cloud/status.h" @@ -28,12 +29,25 @@ #include #include #include +#include namespace google { namespace cloud { namespace bigtable { namespace emulator { +StatusOr> create_table( + std::string const& table_name, std::vector& column_families) { + ::google::bigtable::admin::v2::Table schema; + schema.set_name(table_name); + for (auto& column_family_name : column_families) { + (*schema.mutable_column_families())[column_family_name] = + ::google::bigtable::admin::v2::ColumnFamily(); + } + + return Table::Create(schema); +} + Status set_cell( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, @@ -115,10 +129,9 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto const timestamp_micros = 1234; auto const* data = "test"; - schema.set_name(table_name); - (*schema.mutable_column_families())[column_family_name] = column_family; + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); - auto maybe_table = Table::Create(schema); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); From a38278f369ba8c55c213ce425d36dcf8b1d8f093 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 28 Feb 2025 22:03:01 +0300 Subject: [PATCH 045/195] emulator: rollack tests: Fix comment describing test. --- google/cloud/bigtable/emulator/rollback_test.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index e5a7aa08b0aa6..00f4e99357b95 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -116,8 +116,7 @@ Status has_cell( return Status(StatusCode::kOk, "", ErrorInfo()); } -// Ensure that SetCell still works to set a cell that was not set -// before, when using the RowTransaction class. +// Does the SetCell mutation work to set a cell to a specific value? TEST(TransactonRollback, SetCellBasicFunction) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; From 978c3fbbdee7b1729a00608ad147b98205327cd0 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Sun, 2 Mar 2025 17:24:45 +0300 Subject: [PATCH 046/195] emulator: rollback tests: Implement additional testing helper functions. - To check if a row exists - To check if a column exists --- .../cloud/bigtable/emulator/rollback_test.cc | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 00f4e99357b95..6847d45b5c776 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -116,6 +116,61 @@ Status has_cell( return Status(StatusCode::kOk, "", ErrorInfo()); } +Status has_column( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return Status( + StatusCode::kNotFound, + absl::StrFormat("column family %s not found in table", column_family), + ErrorInfo()); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return Status(StatusCode::kNotFound, + absl::StrFormat("no row key %s found in column famiily %s", + row_key, column_family), + ErrorInfo()); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return Status( + StatusCode::kNotFound, + absl::StrFormat("no column found with qualifer %s", column_qualifier), + ErrorInfo()); + } + + return Status(StatusCode::kOk, "", ErrorInfo()); +} + +Status has_row(std::shared_ptr& table, + std::string const& column_family, std::string const& row_key) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return Status( + StatusCode::kNotFound, + absl::StrFormat("column family %s not found in table", column_family), + ErrorInfo()); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return Status(StatusCode::kNotFound, + absl::StrFormat("no row key %s found in column famiily %s", + row_key, column_family), + ErrorInfo()); + } + + return Status(StatusCode::kOk, "", ErrorInfo()); +} + // Does the SetCell mutation work to set a cell to a specific value? TEST(TransactonRollback, SetCellBasicFunction) { ::google::bigtable::admin::v2::Table schema; From 21051c1bf04ff449470d8867607caa2e1b34ecda Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Sun, 2 Mar 2025 19:24:23 +0300 Subject: [PATCH 047/195] emulator: add helper to test > 1 SetCell mutations at once. --- .../cloud/bigtable/emulator/rollback_test.cc | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 6847d45b5c776..80adfc0bf627a 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -36,6 +36,13 @@ namespace cloud { namespace bigtable { namespace emulator { +struct SetCellParams { + std::string column_family_name; + std::string column_qualifier; + int64_t timestamp_micros; + std::string data; +}; + StatusOr> create_table( std::string const& table_name, std::vector& column_families) { ::google::bigtable::admin::v2::Table schema; @@ -48,6 +55,27 @@ StatusOr> create_table( return Table::Create(schema); } +Status set_cells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params) { + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto m : set_cell_params) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(m.column_family_name); + set_cell_mutation->set_column_qualifier(m.column_qualifier); + set_cell_mutation->set_timestamp_micros(m.timestamp_micros); + set_cell_mutation->set_value(m.data); + } + + return table->MutateRow(mutation_request); +} + Status set_cell( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, From 09c5b85b00d0fc013fc513f2d473a3ee0086dd46 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Sun, 2 Mar 2025 20:29:25 +0300 Subject: [PATCH 048/195] emulator: use set_cells() to set possibly more than one SetCell mutation. --- .../cloud/bigtable/emulator/rollback_test.cc | 29 ++++--------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 80adfc0bf627a..11c819c79f40d 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -14,7 +14,6 @@ #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/table.h" -#include "google/cloud/bigtable/table.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" @@ -76,25 +75,6 @@ Status set_cells( return table->MutateRow(mutation_request); } -Status set_cell( - std::shared_ptr& table, - std::string const& table_name, std::string const& row_key, - std::string const& column_family_name, std::string const& column_qualifier, - int64_t timestamp_micros, std::string const& data) { - ::google::bigtable::v2::MutateRowRequest mutation_request; - mutation_request.set_table_name(table_name); - mutation_request.set_row_key(row_key); - - auto* mutation_request_mutation = mutation_request.add_mutations(); - auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); - set_cell_mutation->set_family_name(column_family_name); - set_cell_mutation->set_column_qualifier(column_qualifier); - set_cell_mutation->set_timestamp_micros(timestamp_micros); - set_cell_mutation->set_value(data); - - return table->MutateRow(mutation_request); -} - Status has_cell( std::shared_ptr& table, std::string const& column_family, std::string const& row_key, @@ -215,11 +195,14 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto maybe_table = create_table(table_name, column_families); ASSERT_STATUS_OK(maybe_table); - auto table = maybe_table.value(); - auto status = set_cell(table, table_name, row_key, column_family_name, - column_qualifer, timestamp_micros, data); + std::vector v; + SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, From 36f56860f28020d1d586cb29b85b350078f46406 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 3 Mar 2025 14:18:00 +0300 Subject: [PATCH 049/195] emualtor: transaction rollback: Implement working test for RestoreValue. This also involves a fix to the code due to a bug revealed by the test! --- .../cloud/bigtable/emulator/rollback_test.cc | 64 +++++++++++++++++++ google/cloud/bigtable/emulator/table.cc | 6 +- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 11c819c79f40d..c40b5c99be793 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -209,6 +209,70 @@ TEST(TransactonRollback, SetCellBasicFunction) { timestamp_micros, data)); } +// Test that an old value is correctly restored in a pre-populated +// cell, when one of a set of SetCell mutations fails after the cell +// had been updated with a new value. +TEST(TransactonRollback, TestRestoreValue) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + auto const* const column_qualifer = "test"; + int64_t good_mutation_timestamp_micros = 1000; + auto const* const good_mutation_data = "expected to succeed"; + + std::vector column_families = {valid_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {valid_column_family_name, column_qualifer, + good_mutation_timestamp_micros, good_mutation_data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, + column_qualifer, good_mutation_timestamp_micros, + good_mutation_data)); + + // Now atomically try 2 mutations. One modifies the above set cell, + // and the other one is expected to fail. The test is that + // RestoreValue will restore the previous value in cell with + // timestamp 1000. + std::vector w; + // Everything is the same but we try and modify the value in the cell cell set above. + p.data = "new data"; + w.push_back(p); + + // Because "invalid_column_family" does not exist in the table + // schema, a mutation with these SetCell parameters is expected to + // fail. + p = {"invalid_column_family", "test2", 1000, "expected to fail"}; + w.push_back(p); + + status = set_cells(table, table_name, row_key, w); + ASSERT_NE(status.ok(), true); // The whole mutation chain should + // fail because the 2nd mutation + // contains an invalid column family. + + // And the first mutation should have been rolled back by + // RestoreValue and so should contain the old value, and not "new + // data". + ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, + column_qualifer, good_mutation_timestamp_micros, + good_mutation_data)); + +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 2acef386bc51d..0e87c0f6bf0d3 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -429,7 +429,7 @@ Status RowTransaction::SetCell( bool cell_existed = true; auto row_key_it = column_family.find(request_.row_key()); - + std::string value_to_restore; if (row_key_it == column_family.end()) { row_existed = false; column_existed = false; @@ -446,6 +446,8 @@ Status RowTransaction::SetCell( std::chrono::microseconds(set_cell.timestamp_micros()))); if (timestamp_it == column_row_it->second.end()) { cell_existed = false; + } else{ + value_to_restore = timestamp_it->second; } } } @@ -480,7 +482,7 @@ Status RowTransaction::SetCell( undo_.emplace(delete_value); } else { RestoreValue restore_value = {column_row_it, timestamp_it->first, - std::move(timestamp_it->second)}; + std::move(value_to_restore)}; undo_.emplace(restore_value); } From e59e72916f2fc8f8b83fbbeadca35455861a45c7 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 3 Mar 2025 14:22:24 +0300 Subject: [PATCH 050/195] emulator: rollback_tests.cc: run clang-format. --- google/cloud/bigtable/emulator/rollback_test.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index c40b5c99be793..3f09709a6a7d3 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -58,7 +58,6 @@ Status set_cells( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, std::vector& set_cell_params) { - ::google::bigtable::v2::MutateRowRequest mutation_request; mutation_request.set_table_name(table_name); mutation_request.set_row_key(row_key); @@ -198,7 +197,8 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto table = maybe_table.value(); std::vector v; - SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, data}; + SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + data}; v.push_back(p); auto status = set_cells(table, table_name, row_key, v); @@ -249,7 +249,8 @@ TEST(TransactonRollback, TestRestoreValue) { // RestoreValue will restore the previous value in cell with // timestamp 1000. std::vector w; - // Everything is the same but we try and modify the value in the cell cell set above. + // Everything is the same but we try and modify the value in the cell cell set + // above. p.data = "new data"; w.push_back(p); @@ -260,9 +261,9 @@ TEST(TransactonRollback, TestRestoreValue) { w.push_back(p); status = set_cells(table, table_name, row_key, w); - ASSERT_NE(status.ok(), true); // The whole mutation chain should - // fail because the 2nd mutation - // contains an invalid column family. + ASSERT_NE(status.ok(), true); // The whole mutation chain should + // fail because the 2nd mutation + // contains an invalid column family. // And the first mutation should have been rolled back by // RestoreValue and so should contain the old value, and not "new @@ -270,7 +271,6 @@ TEST(TransactonRollback, TestRestoreValue) { ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, column_qualifer, good_mutation_timestamp_micros, good_mutation_data)); - } } // namespace emulator From 693ddff97e1c74d43fea486c3e990a562f7f9251 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 3 Mar 2025 14:32:44 +0300 Subject: [PATCH 051/195] emulator: transaction rollback: Avoid copying the possibly very large cell values. --- google/cloud/bigtable/emulator/table.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 0e87c0f6bf0d3..a9d006cf0e0eb 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -446,8 +446,8 @@ Status RowTransaction::SetCell( std::chrono::microseconds(set_cell.timestamp_micros()))); if (timestamp_it == column_row_it->second.end()) { cell_existed = false; - } else{ - value_to_restore = timestamp_it->second; + } else { + value_to_restore = std::move(timestamp_it->second); } } } From 9ee731c3474f077f8316fc156ded3a2f289cc6df Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 3 Mar 2025 15:20:06 +0300 Subject: [PATCH 052/195] emulator: transaction rollback: SetCell: Implement working test for DeleteValue. --- .../cloud/bigtable/emulator/rollback_test.cc | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 3f09709a6a7d3..1e2530d374080 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -273,6 +273,57 @@ TEST(TransactonRollback, TestRestoreValue) { good_mutation_data)); } +// Test that a new cell introduced in a chain of SetCell mutations is +// deleted on rollback if a subsequent mutation fails. +TEST(TransactonRollback, DeleteValue) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + std::vector column_families = {valid_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + // To test that we do not delete a row or column that we should not, + // let us first commit a transaction on the same row where we will + // do the DeleteValue test. + std::vector v = { + {valid_column_family_name, "test", 1000, "data"}}; + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); + + // We then setup a transaction chain with 2 SetCells, the first one + // should succeed to add a new cell and the second one should fail + // (because it assumes an invalid schema in column family name). We + // expect the first cell to not exist after the rollback (and of + // course also no data from the 2nd failing SetCell mutation should + // exist either). + v = {{valid_column_family_name, "test", 2000, "new data"}, + {"invalid_column_family_name", "test", 3000, "more new data"}}; + + status = set_cells(table, table_name, row_key, v); + ASSERT_NE(status.ok(), true); // We expect the chain of mutations to + // fail alltogether. + status = has_cell(table, v[0].column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, v[0].data); + ASSERT_NE(status.ok(), true); // Undo should delete the cell + status = has_cell(table, v[1].column_family_name, row_key, + v[1].column_qualifier, v[1].timestamp_micros, v[1].data); + ASSERT_NE(status.ok(), true); // Also the SetCell with invalud shema + // should not have set anything. +} + } // namespace emulator } // namespace bigtable } // namespace cloud From ca8a6215cd6ed5ddd230885501dcf812906ee1ba Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 3 Mar 2025 15:52:39 +0300 Subject: [PATCH 053/195] emulator: transaction rollback: Implement working DeleteRow test. --- .../cloud/bigtable/emulator/rollback_test.cc | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 1e2530d374080..b507c70e1d267 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -324,6 +324,95 @@ TEST(TransactonRollback, DeleteValue) { // should not have set anything. } +// Test that if a successful SetCell mutation in a chain of SetCell +// mutations in one transaction introduces a new column but a +// subsequent SetCell mutation fails (we simulate this by passing an +// column family name that is not in the table schema) then the column +// and any of the cells introduced is deleted in the rollback, but +// that any pre-transaction-attemot data in the row is unaffected. +TEST(TransactonRollback, DeleteColumn) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + std::vector column_families = {valid_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v = { + {valid_column_family_name, "test", 1000, "data"}}; + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); + + // Introduce a new column in a chain of SetCell mutations, a + // subsequent one of which must fail due to an invalid schema + // assumption (bad column family name). + v = {{valid_column_family_name, "new_column", 2000, "new data"}, + {"invalid_column_family_name", "test", 3000, "more new data"}}; + + status = set_cells(table, table_name, row_key, v); + ASSERT_NE(status.ok(), + true); // We expect the chain of mutations to + // fail alltogether because the last one must fail. + + // The original column ("test") should still exist. + status = has_column(table, valid_column_family_name, row_key, "test"); + ASSERT_STATUS_OK(status); + + // Bit the new column introduced should have been rolled back. + status = has_column(table, v[0].column_family_name, row_key, + v[0].column_qualifier); + ASSERT_NE(status.ok(), true); +} + +// Test that a chain of SetCell mutations that initially introduces a +// new row, but one of which eventually fails, will end with the whole +// row rolled back. +TEST(TransactonRollback, DeleteRow) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + std::vector column_families = {valid_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + // First SetCell should succeed and introduce a new row with key + // "0". The second one will fail due to bad schema settings. We + // expect not to find the row after the row mutation call returns. + std::vector v = { + {valid_column_family_name, "test", 1000, "data"}, + {"invalid_column_family_name", "test", 2000, + "more new data which should never be written"}}; + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_NE(status.ok(), + true); // We expect the chain of mutations to + // fail alltogether because the last one must fail. + + status = has_row(table, valid_column_family_name, row_key); + ASSERT_NE(status.ok(), true); +} + } // namespace emulator } // namespace bigtable } // namespace cloud From 60ab24528cbf947427f1b3b9ceecbc953c80937c Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 5 Mar 2025 20:19:06 +0300 Subject: [PATCH 054/195] emulator: transaction rollback: Implement a DeleteFromRow that updates the undo log. Also, add code coverage output artefacts to ignored files. --- .gitignore | 4 ++ google/cloud/bigtable/emulator/table.cc | 52 +++++++++++++++++++++++++ google/cloud/bigtable/emulator/table.h | 14 ++++++- 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1d012e3845f3d..c9097511f9df2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# Code coverage output artefacts +code_coverage.info +code_coverage_report/ + # Common build output directory names .build/ _build/ diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index a9d006cf0e0eb..3facd75834a14 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -20,6 +20,7 @@ #include "google/cloud/bigtable/internal/google_bytes_traits.h" #include "google/cloud/internal/make_status.h" #include "google/protobuf/util/field_mask_util.h" +#include #include #include #include @@ -415,6 +416,57 @@ Status RowTransaction::MergeToCell( GCP_ERROR_INFO().WithMetadata("mutation", merge_to_cell.DebugString())); } +Status RowTransaction::DeleteFromFamily( + ::google::bigtable::v2::Mutation_DeleteFromFamily const& + delete_from_family) { + auto maybe_column_family = table_->FindColumnFamily(delete_from_family); + if (!maybe_column_family) { + return maybe_column_family.status(); + } + + auto table_it = table_->find(delete_from_family.family_name()); + if (table_it == table_->end()) { + return Status(StatusCode::kNotFound, + absl::StrFormat("column family %s not found in table", + delete_from_family.family_name()), + ErrorInfo()); + } + + if (auto column_family_it = table_it->second->find(request_.row_key()); + column_family_it != table_it->second->end()) { + RestoreRow restore_row; + + restore_row.table_it = table_it; + restore_row.row_key = request_.row_key(); + std::vector cells; + for (auto const& column : column_family_it->second) { + for (auto const& column_row : column.second) { + RestoreRow::Cell cell; + + cell.column_qualifer = std::move(column.first); + cell.timestamp = column_row.first; // Wait, is this correct? + cell.value = std::move(column_row.second); + cells.push_back(cell); + } + } + restore_row.cells = cells; + table_it->second->DeleteRow(request_.row_key()); // Is certain + // to succeed + // unless we + // run out of + // memory. + undo_.emplace(restore_row); + } else { + // The row does not exist + return Status(StatusCode::kNotFound, + absl::StrFormat("row key %s not found in column family %s", + request_.row_key(), table_it->first), + ErrorInfo()); + } + + return Status(); +} + Status RowTransaction::SetCell( ::google::bigtable::v2::Mutation_SetCell const& set_cell) { auto maybe_column_family = table_->FindColumnFamily(set_cell); diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 03a67b366ba38..ef44da3623f15 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -84,6 +85,17 @@ class Table : public std::enable_shared_from_this
{ std::map> column_families_; }; +struct RestoreRow { + std::map>::iterator table_it; + std::string row_key; + struct Cell { + std::string column_qualifer; + std::chrono::milliseconds timestamp; + std::string value; + }; + std::vector cells; +}; + struct RestoreValue { // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where // we should reinsert the value. @@ -152,7 +164,7 @@ class RowTransaction { bool committed_; std::shared_ptr
table_; - std::stack> + std::stack> undo_; ::google::bigtable::v2::MutateRowRequest const& request_; }; From 0d4fd5a97965af44b4a7b9425953a287e29ec679 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 5 Mar 2025 20:40:01 +0300 Subject: [PATCH 055/195] emulator: transaction rollback: Implement undo for row deletions (DeleteFromFamily). And be careful not to potentially very large values (use std::move on the column qualifier and cell values. --- google/cloud/bigtable/emulator/table.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 3facd75834a14..3273bb3a11195 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -570,6 +570,17 @@ void RowTransaction::Undo() { continue; } + if (auto* restore_row = absl::get_if(&op)) { + for (auto const& cell : restore_row->cells) { + // Take care to use std::move() to avoid copying potentially + // very larg values (the column qualifier and cell values can + // be very large. + restore_row->table_it->second->SetCell( + restore_row->row_key, std::move(cell.column_qualifer), + cell.timestamp, std::move(cell.value)); + } + } + // If we get here, there is an type of undo log that has not been // implemented! std::abort(); From e3791d49ff0b44d80d0df8d707c5ae1a1a6aa8ba Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 5 Mar 2025 20:53:13 +0300 Subject: [PATCH 056/195] emulator: RestoreRow undo log: Rename struct members more conventionally. --- google/cloud/bigtable/emulator/table.cc | 20 ++++++++++---------- google/cloud/bigtable/emulator/table.h | 12 ++++++------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 3273bb3a11195..d184772d5affe 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -436,20 +436,20 @@ Status RowTransaction::DeleteFromFamily( column_family_it != table_it->second->end()) { RestoreRow restore_row; - restore_row.table_it = table_it; - restore_row.row_key = request_.row_key(); + restore_row.table_it_ = table_it; + restore_row.row_key_ = request_.row_key(); std::vector cells; for (auto const& column : column_family_it->second) { for (auto const& column_row : column.second) { RestoreRow::Cell cell; - cell.column_qualifer = std::move(column.first); - cell.timestamp = column_row.first; // Wait, is this correct? - cell.value = std::move(column_row.second); + cell.column_qualifer_ = std::move(column.first); + cell.timestamp_ = column_row.first; // Wait, is this correct? + cell.value_ = std::move(column_row.second); cells.push_back(cell); } } - restore_row.cells = cells; + restore_row.cells_ = cells; table_it->second->DeleteRow(request_.row_key()); // Is certain // to succeed // unless we @@ -571,13 +571,13 @@ void RowTransaction::Undo() { } if (auto* restore_row = absl::get_if(&op)) { - for (auto const& cell : restore_row->cells) { + for (auto const& cell : restore_row->cells_) { // Take care to use std::move() to avoid copying potentially // very larg values (the column qualifier and cell values can // be very large. - restore_row->table_it->second->SetCell( - restore_row->row_key, std::move(cell.column_qualifer), - cell.timestamp, std::move(cell.value)); + restore_row->table_it_->second->SetCell( + restore_row->row_key_, std::move(cell.column_qualifer_), + cell.timestamp_, std::move(cell.value_)); } } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index ef44da3623f15..243dbe5358432 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -86,14 +86,14 @@ class Table : public std::enable_shared_from_this
{ }; struct RestoreRow { - std::map>::iterator table_it; - std::string row_key; + std::map>::iterator table_it_; + std::string row_key_; struct Cell { - std::string column_qualifer; - std::chrono::milliseconds timestamp; - std::string value; + std::string column_qualifer_; + std::chrono::milliseconds timestamp_; + std::string value_; }; - std::vector cells; + std::vector cells_; }; struct RestoreValue { From a6119b2c587b5597e2e8e6c1b37a194973797dce Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 5 Mar 2025 21:06:44 +0300 Subject: [PATCH 057/195] emulator: DeleteFromFamily mutation: Implement with rollback support using RowTransaction. --- google/cloud/bigtable/emulator/table.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index d184772d5affe..fe1c8009148e9 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -247,13 +247,10 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { // FIXME no such row or column } } else if (mutation.has_delete_from_family()) { - auto maybe_column_family = - FindColumnFamily(mutation.delete_from_family()); - if (!maybe_column_family) { - return maybe_column_family.status(); - } - if (maybe_column_family->get().DeleteRow(request.row_key())) { - // FIXME no such row existed in that column family + auto const& delete_from_family = mutation.delete_from_family(); + auto status = row_transaction.DeleteFromFamily(delete_from_family); + if (!status.ok()) { + return status; } } else if (mutation.has_delete_from_row()) { bool row_existed = false; From c8ce0d819424b5e3235ce34f9b74c8cc0272b15d Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 6 Mar 2025 15:18:50 +0300 Subject: [PATCH 058/195] emulator: transaction rollback: Test basic functionality of a DeleteFromFamily which uses RowTransaction. --- .../cloud/bigtable/emulator/rollback_test.cc | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index b507c70e1d267..c3752907fc7f4 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -17,6 +17,7 @@ #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" +#include "gmock/gmock.h" #include #include #include @@ -34,6 +35,7 @@ namespace google { namespace cloud { namespace bigtable { namespace emulator { +using std::string; struct SetCellParams { std::string column_family_name; @@ -54,6 +56,24 @@ StatusOr> create_table( return Table::Create(schema); } +Status delete_from_families( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector const& column_families) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto column_family : column_families) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* delete_from_family_mutation = + mutation_request_mutation->mutable_delete_from_family(); + delete_from_family_mutation->set_family_name(column_family); + } + + return table->MutateRow(mutation_request); +} + Status set_cells( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, @@ -413,6 +433,55 @@ TEST(TransactonRollback, DeleteRow) { ASSERT_NE(status.ok(), true); } +// Does the DeleteFromfamily mutation work to delete a row from a +// specific family and does it rows with the same row key in othe +// column families alone? +TEST(TransactonRollback, DeleteFromFamilyBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifer = "test"; + auto const timestamp_micros = 1234; + auto const* data = "test"; + + auto const* const second_column_family_name = "test2"; + + std::vector column_families = {column_family_name, second_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + data}; + v.push_back(p); + + p = {second_column_family_name, column_qualifer, timestamp_micros, data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + timestamp_micros, data)); + ASSERT_STATUS_OK( + has_column(table, column_family_name, row_key, column_qualifer)); + ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); + + // Having established that the data is there, test the basic + // functionality of the DeleteFromFamily mutation by trying to + // delete it. + ASSERT_STATUS_OK( + delete_from_families(table, table_name, row_key, {column_family_name})); + ASSERT_NE(true, has_row(table, column_family_name, row_key).ok()); + + // Ensure that we did not delete a row in anothe column family. + ASSERT_EQ(true, has_row(table, second_column_family_name, row_key).ok()); +} + } // namespace emulator } // namespace bigtable } // namespace cloud From c9953d6ce6e7fd38291cf60950dd72a2829922f1 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 6 Mar 2025 18:08:41 +0300 Subject: [PATCH 059/195] emulator: test rollback of a row deletion due to DeleteFromFamily. And fix a code bug discovered by this test. --- .../cloud/bigtable/emulator/rollback_test.cc | 59 +++++++++++++++++++ google/cloud/bigtable/emulator/table.cc | 1 + 2 files changed, 60 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index c3752907fc7f4..740cf2b392a96 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -482,6 +482,65 @@ TEST(TransactonRollback, DeleteFromFamilyBasicFunction) { ASSERT_EQ(true, has_row(table, second_column_family_name, row_key).ok()); } +// Test that DeleteFromfamily can be rolled back in case a subsequent +// mutation fails. +TEST(TransactonRollback, DeleteFromFamilyRollback) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifer = "test"; + auto const timestamp_micros = 1234; + auto const* data = "test"; + + // Failure of one of the mutations is simalted by having a mutation + // with this column family, which has not been provisioned. Previous + // successful mutations should be rolled back when RowTransaction + // sees a mutation with this invlaid column family name. + auto const* const column_family_not_in_schema = + "i_do_not_exist_in_the_schema"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + timestamp_micros, data)); + ASSERT_STATUS_OK( + has_column(table, column_family_name, row_key, column_qualifer)); + ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); + + // Setup two DeleteFromfamily mutation: The first one uses the + // correct table schema (a column family that exists and is expected + // to succeed to delete the row saved above. The second one uses a + // column family not provisioned and should fail, which should + // trigger a rollback of the previous row deletion. In the end, the + // above row should still exist and all its data should be intact. + status = + delete_from_families(table, table_name, row_key, + {column_family_name, column_family_not_in_schema}); + ASSERT_NE(true, status.ok()); // The overall chain of mutations should fail. + + // Check that the row deleted by the first mutation is restored, + // with all its data. + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + timestamp_micros, data)); + ASSERT_STATUS_OK( + has_column(table, column_family_name, row_key, column_qualifer)); + ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index fe1c8009148e9..35ceb95d7e6be 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -576,6 +576,7 @@ void RowTransaction::Undo() { restore_row->row_key_, std::move(cell.column_qualifer_), cell.timestamp_, std::move(cell.value_)); } + continue; } // If we get here, there is an type of undo log that has not been From 520a1994661a153d57a0667d7e817fa4c4994239 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Sun, 9 Mar 2025 22:00:23 +0100 Subject: [PATCH 060/195] Filters work again. --- .../cloud/bigtable/emulator/column_family.cc | 18 +- .../cloud/bigtable/emulator/column_family.h | 4 +- google/cloud/bigtable/emulator/filter.cc | 733 ++++++++++-------- google/cloud/bigtable/emulator/filter.h | 11 +- google/cloud/bigtable/emulator/table.cc | 92 ++- google/cloud/bigtable/emulator/table.h | 14 + google/cloud/bigtable/emulator/table_test.cc | 80 ++ 7 files changed, 574 insertions(+), 378 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 9ee8514441004..2ad4a93fdd1e0 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -132,7 +132,6 @@ FilteredColumnFamilyStream::FilteredColumnFamilyStream( rows_(RangeFilteredMapView(column_family, *row_ranges_), std::cref(row_regexes_)), - row_it_(rows_.begin()), initialized_(false) {} bool FilteredColumnFamilyStream::ApplyFilter( @@ -143,14 +142,14 @@ bool FilteredColumnFamilyStream::ApplyFilter( bool FilteredColumnFamilyStream::HasValue() const { InitializeIfNeeded(); - return row_it_ != rows_.end(); + return *row_it_ != rows_.end(); } CellView const& FilteredColumnFamilyStream::Value() const { InitializeIfNeeded(); if (!cur_value_) { - cur_value_ = - CellView(row_it_->first, column_family_name_, column_it_.value()->first, - cell_it_.value()->first, cell_it_.value()->second); + cur_value_ = CellView((*row_it_)->first, column_family_name_, + column_it_.value()->first, cell_it_.value()->first, + cell_it_.value()->second); } return cur_value_.value(); } @@ -158,7 +157,7 @@ CellView const& FilteredColumnFamilyStream::Value() const { bool FilteredColumnFamilyStream::Next(NextMode mode) { InitializeIfNeeded(); cur_value_.reset(); - assert(row_it_ != rows_.end()); + assert(*row_it_ != rows_.end()); assert(column_it_.value() != columns_.value().end()); assert(cell_it_.value() != cells_.value().end()); @@ -174,13 +173,14 @@ bool FilteredColumnFamilyStream::Next(NextMode mode) { return true; } } - ++row_it_; + ++(*row_it_); PointToFirstCellAfterRowChange(); return true; } void FilteredColumnFamilyStream::InitializeIfNeeded() const { if (!initialized_) { + row_it_ = rows_.begin(); PointToFirstCellAfterRowChange(); initialized_ = true; } @@ -201,10 +201,10 @@ bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { // Returns whether we've managed to find another cell bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() const { - for (; row_it_ != rows_.end(); ++row_it_) { + for (; (*row_it_) != rows_.end(); ++(*row_it_)) { columns_ = RegexFiteredMapView< RangeFilteredMapView>( - RangeFilteredMapView(row_it_->second, + RangeFilteredMapView((*row_it_)->second, column_ranges_), column_regexes_); column_it_ = columns_.value().begin(); diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 786c2d9457ce2..6f1e6021ee287 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -135,8 +135,8 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { // If row_it_ == rows_.end() we've reached the end. // We keep the invariant that if (row_it_ != rows_.end()) then // cell_it_ != cells.end() && column_it_ != columns_.end() - mutable RegexFiteredMapView< - RangeFilteredMapView>::const_iterator + mutable absl::optional>::const_iterator> row_it_; mutable absl::optional>::const_iterator> diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 60a20533c102d..b6863170dc278 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -28,10 +28,6 @@ namespace bigtable { namespace emulator { namespace { -bool StringRefEq(std::string const &s1, std::string const &s2) { - return &s1 == &s2 || s1 == s2; -} - bool PassAllFilters(InternalFilter const&) { return true; } } // namespace @@ -84,15 +80,14 @@ class PerRowStateFilter { : filter_(std::move(filter)), reset_(std::move(reset)) {} absl::optional operator()(CellView const &cell_view) { - if (!prev_row_ || - !StringRefEq(prev_row_.value().get(), cell_view.row_key())) { + if (!prev_row_ || prev_row_.value() != cell_view.row_key()) { state_ = reset_(); prev_row_ = cell_view.row_key(); } return filter_(state_, cell_view); } private: - absl::optional> prev_row_; + absl::optional prev_row_; State state_; FilterFunctor filter_; StateResetFunctor reset_; @@ -134,15 +129,15 @@ class PerColumnStateFilter { column_qualifier_(cell_view.column_qualifier()) {} bool Matches(CellView const &cell_view) { - return StringRefEq(row_key_.get(), cell_view.row_key()) && - StringRefEq(column_family_.get(), cell_view.column_family()) && - StringRefEq(column_qualifier_, cell_view.column_qualifier()); + return row_key_ == cell_view.row_key() && + column_family_ == cell_view.column_family() && + column_qualifier_ == cell_view.column_qualifier(); } private: - std::reference_wrapper row_key_; - std::reference_wrapper column_family_; - std::reference_wrapper column_qualifier_; + std::string row_key_; + std::string column_family_; + std::string column_qualifier_; }; absl::optional prev_; State state_; @@ -183,7 +178,7 @@ class TrivialTransformer : public AbstractCellStreamImpl { template CellStream MakeTrivialTransformer(CellStream source, Transformer transformer) { - return CellStream(std::make_shared>( + return CellStream(std::make_unique>( std::move(source), std::move(transformer))); } @@ -254,7 +249,7 @@ template CellStream MakeTrivialFilter( CellStream source, Filter filter, std::function filter_filter = PassAllFilters) { - return CellStream(std::make_shared>( + return CellStream(std::make_unique>( std::move(source), std::move(filter), std::move(filter_filter))); } @@ -358,10 +353,12 @@ void MergeCellStreams::InitializeIfNeeded() const { void MergeCellStreams::ReassesStreams() const { for (auto stream_it = unfinished_streams_.begin(); - stream_it != unfinished_streams_.end(); ++stream_it) { + stream_it != unfinished_streams_.end(); ) { if (!(*stream_it)->HasValue()) { stream_it->swap(unfinished_streams_.back()); unfinished_streams_.pop_back(); + } else { + ++stream_it; } } std::make_heap(unfinished_streams_.begin(), unfinished_streams_.end(), @@ -397,90 +394,110 @@ class ConditionStream : public AbstractCellStreamImpl { : source_(std::move(source)), predicate_stream_(std::move(predicate)), true_stream_(std::move(true_stream)), - false_stream_(std::move(false_stream)) {} + false_stream_(std::move(false_stream)), + initialized_(false) {} bool ApplyFilter(InternalFilter const& ) override { return false; } bool HasValue() const override { - return true; // FIXME + InitializeIfNeeded(); + return source_.HasValue(); } CellView const &Value() const override { - return *source_; // FIXME + InitializeIfNeeded(); + if (condition_true_) { + return *true_stream_; + } else { + return *false_stream_; + } } bool Next(NextMode mode) override { - if (mode != NextMode::kCell) { - // FIXME - we can be smarter than that. - return false; + InitializeIfNeeded(); + assert(source_); + if (condition_true_) { + true_stream_.Next(mode); + if (!true_stream_ || + !internal::CompareRowKey(current_row_, true_stream_->row_key())) { + source_.Next(NextMode::kRow); + OnNewRow(); + } + } else { + false_stream_.Next(mode); + if (!false_stream_ || + !internal::CompareRowKey(current_row_, false_stream_->row_key())) { + source_.Next(NextMode::kRow); + OnNewRow(); + } } - while (true) { - auto cell_view = *source_; + return true; + } - if (!prev_row_ || - !StringRefEq(prev_row_.value().get(), cell_view.row_key())) { - prev_row_ = cell_view.row_key(); - condition_true_.reset(); + private: + void OnNewRow() const { + while (true) { + if (!source_) { + return; } - if (!condition_true_.has_value()) { - // Let's test if the predicate stream returned something for this row. - for (; predicate_stream_ && - internal::CompareRowKey(predicate_stream_->row_key(), - cell_view.row_key()) < 0; - predicate_stream_.Next()); - if (predicate_stream_ && - internal::CompareRowKey(predicate_stream_->row_key(), - cell_view.row_key()) == 0) { - // Predicate stream did return somthing for this row. - condition_true_ = true; - // Fast-forward the true stream to start at current row. - for (; - true_stream_ && internal::CompareRowKey(true_stream_->row_key(), + auto cell_view = *source_; + current_row_ = cell_view.row_key(); + + // Let's test if the predicate stream returned something for this row. + for (; predicate_stream_ && + internal::CompareRowKey(predicate_stream_->row_key(), + cell_view.row_key()) < 0; + predicate_stream_.Next(NextMode::kRow)); + if (predicate_stream_ && + internal::CompareRowKey(predicate_stream_->row_key(), + cell_view.row_key()) == 0) { + // Predicate stream did return somthing for this row. + condition_true_ = true; + // Fast-forward the true stream to start at current row. + for (; true_stream_ && internal::CompareRowKey(true_stream_->row_key(), cell_view.row_key()) < 0; - true_stream_.Next(NextMode::kRow)); - } else { - // Predicate stream did not return anything for this row. - condition_true_ = false; - // Fast-forward the false stream to start at current row. - for (; false_stream_ && - internal::CompareRowKey(false_stream_->row_key(), - cell_view.row_key()) < 0; - false_stream_.Next(NextMode::kRow)); - } - } - if (*condition_true_) { - if (true_stream_ && internal::CompareRowKey(true_stream_->row_key(), - cell_view.row_key()) == 0) { - return true; - } + true_stream_.Next(NextMode::kRow)); } else { - if (false_stream_ && - internal::CompareRowKey(false_stream_->row_key(), - cell_view.row_key()) == 0) { - return true; - } + // Predicate stream did not return anything for this row. + condition_true_ = false; + // Fast-forward the false stream to start at current row. + for (; + false_stream_ && internal::CompareRowKey(false_stream_->row_key(), + cell_view.row_key()) < 0; + false_stream_.Next(NextMode::kRow)); } - // True/false stream exhausted, reset state and fast-forward source. - condition_true_.reset(); - for (; - source_ && internal::CompareRowKey(source_->row_key(), - prev_row_->get()) == 0; - source_.Next(NextMode::kRow)); - if (!source_) { - return {}; + if (condition_true_ && true_stream_ && + internal::CompareRowKey(true_stream_->row_key(), + cell_view.row_key()) == 0) { + return; } + if (!condition_true_ && false_stream_ && + internal::CompareRowKey(false_stream_->row_key(), + cell_view.row_key()) == 0) { + return; + } + // True/false stream exhausted, fast-forward source. + source_.Next(NextMode::kRow); } } - private: - CellStream source_; - CellStream predicate_stream_; - CellStream true_stream_; - CellStream false_stream_; - absl::optional> prev_row_; - absl::optional condition_true_; + void InitializeIfNeeded() const { + if (initialized_) { + return; + } + OnNewRow(); + initialized_ = true; + } + + mutable CellStream source_; + mutable CellStream predicate_stream_; + mutable CellStream true_stream_; + mutable CellStream false_stream_; + mutable bool initialized_; + mutable bool condition_true_; + mutable std::string current_row_; }; class EmptyCellStreamImpl : public AbstractCellStreamImpl { @@ -496,26 +513,28 @@ class EmptyCellStreamImpl : public AbstractCellStreamImpl { bool Next(NextMode) override { return true; } }; -StatusOr CreateFilterImpl( - ::google::bigtable::v2::RowFilter const& filter, CellStream source, - FilterContext const& ctx, std::vector &direct_sinks) { +StatusOr CreateFilterImpl( + ::google::bigtable::v2::RowFilter const& filter, + CellStreamConstructor source_ctor, FilterContext const& ctx, + std::vector& direct_sinks) { if (filter.has_pass_all_filter()) { if (!filter.pass_all_filter()) { return InvalidArgumentError( "`pass_all_filter` explicitly set to `false`.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return source; - } - if (filter.has_block_all_filter()) { + return source_ctor; + } else if (filter.has_block_all_filter()) { if (!filter.block_all_filter()) { return InvalidArgumentError( "`block_all_filter` explicitly set to `false`.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return CellStream(std::make_shared()); - } - if (filter.has_row_key_regex_filter()) { + CellStreamConstructor res = [] { + return CellStream(std::make_unique()); + }; + return res; + } else if (filter.has_row_key_regex_filter()) { auto pattern = std::make_shared(filter.row_key_regex_filter()); if (!pattern->ok()) { return InvalidArgumentError( @@ -524,20 +543,24 @@ StatusOr CreateFilterImpl( .WithMetadata("filter", filter.DebugString()) .WithMetadata("description", pattern->error())); } - if (source.ApplyFilter(RowKeyRegex{pattern})) { - return source; - } - return MakeTrivialFilter( - std::move(source), - [pattern = std::move(pattern)]( - CellView const& cell_view) mutable -> absl::optional { - if (re2::RE2::PartialMatch(cell_view.row_key(), *pattern)) { - return {}; - } - return NextMode::kCell; - }); - } - if (filter.has_value_regex_filter()) { + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + pattern = std::move(pattern)] { + auto source = source_ctor(); + if (source.ApplyFilter(RowKeyRegex{pattern})) { + return source; + } + return MakeTrivialFilter( + std::move(source), + [pattern = pattern]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.row_key(), *pattern)) { + return {}; + } + return NextMode::kCell; + }); + }; + return res; + } else if (filter.has_value_regex_filter()) { auto pattern = std::make_shared(filter.value_regex_filter()); if (!pattern->ok()) { return InvalidArgumentError( @@ -546,17 +569,21 @@ StatusOr CreateFilterImpl( .WithMetadata("filter", filter.DebugString()) .WithMetadata("description", pattern->error())); } - return MakeTrivialFilter( - std::move(source), - [pattern = std::move(pattern)]( - CellView const& cell_view) mutable -> absl::optional { - if (re2::RE2::PartialMatch(cell_view.value(), *pattern)) { - return {}; - } - return NextMode::kCell; - }); - } - if (filter.has_row_sample_filter()) { + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + pattern = std::move(pattern)] { + auto source = source_ctor(); + return MakeTrivialFilter( + std::move(source), + [pattern = pattern]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.value(), *pattern)) { + return {}; + } + return NextMode::kCell; + }); + }; + return res; + } else if (filter.has_row_sample_filter()) { double pass_prob = filter.row_sample_filter(); if (pass_prob + std::numeric_limits::epsilon() < 0 || pass_prob - std::numeric_limits::epsilon() > 1) { @@ -564,20 +591,24 @@ StatusOr CreateFilterImpl( "`row_sample_filter` is not a valid probability.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return MakePerRowStateFilter( - std::move(source), - [](bool& should_pass, CellView const&) -> absl::optional { - if (should_pass) { - return {}; - } - return NextMode::kRow; - }, - [gen = std::mt19937(), pass_prob]() mutable { - std::uniform_real_distribution dis(0.0, 1.0); - return dis(gen) < pass_prob; - }); - } - if (filter.has_family_name_regex_filter()) { + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + pass_prob] { + auto source = source_ctor(); + return MakePerRowStateFilter( + std::move(source), + [](bool& should_pass, CellView const&) -> absl::optional { + if (should_pass) { + return {}; + } + return NextMode::kRow; + }, + [gen = std::mt19937(), pass_prob]() mutable { + std::uniform_real_distribution dis(0.0, 1.0); + return dis(gen) < pass_prob; + }); + }; + return res; + } else if (filter.has_family_name_regex_filter()) { auto pattern = std::make_shared(filter.family_name_regex_filter()); if (!pattern->ok()) { @@ -587,21 +618,25 @@ StatusOr CreateFilterImpl( .WithMetadata("filter", filter.DebugString()) .WithMetadata("description", pattern->error())); } - if (source.ApplyFilter(FamilyNameRegex{pattern})) { - return source; - } - return MakeTrivialFilter( - std::move(source), - [pattern = std::move(pattern)]( - CellView const& cell_view) mutable -> absl::optional { - if (re2::RE2::PartialMatch(cell_view.column_family(), *pattern)) { - return {}; - } - // FIXME we could introduce even column family skipping - return NextMode::kColumn; - }); - } - if (filter.has_column_qualifier_regex_filter()) { + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + pattern = std::move(pattern)] { + auto source = source_ctor(); + if (source.ApplyFilter(FamilyNameRegex{pattern})) { + return source; + } + return MakeTrivialFilter( + std::move(source), + [pattern = pattern]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.column_family(), *pattern)) { + return {}; + } + // FIXME we could introduce even column family skipping + return NextMode::kColumn; + }); + }; + return res; + } else if (filter.has_column_qualifier_regex_filter()) { auto pattern = std::make_shared(filter.column_qualifier_regex_filter()); if (!pattern->ok()) { @@ -611,169 +646,208 @@ StatusOr CreateFilterImpl( .WithMetadata("filter", filter.DebugString()) .WithMetadata("description", pattern->error())); } - if (source.ApplyFilter(ColumnRegex{pattern})) { - return source; - } - return MakeTrivialFilter( - std::move(source), - [pattern = std::move(pattern)]( - CellView const& cell_view) mutable -> absl::optional { - if (re2::RE2::PartialMatch(cell_view.column_qualifier(), *pattern)) { - return {}; - } - return NextMode::kColumn; - }); - } - if (filter.has_column_range_filter()) { + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + pattern = std::move(pattern)] { + auto source = source_ctor(); + if (source.ApplyFilter(ColumnRegex{pattern})) { + return source; + } + return MakeTrivialFilter( + std::move(source), + [pattern]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.column_qualifier(), + *pattern)) { + return {}; + } + return NextMode::kColumn; + }); + }; + return res; + } else if (filter.has_column_range_filter()) { auto maybe_range = StringRangeSet::Range::FromColumnRange(filter.column_range_filter()); if (!maybe_range) { return maybe_range.status(); } - if (source.ApplyFilter(ColumnRange{*maybe_range})) { - return source; - } - return MakeTrivialFilter( - std::move(source), - [range = *std::move(maybe_range), - column_family = filter.column_range_filter().family_name()]( - CellView const& cell_view) -> absl::optional { - if ( cell_view.column_family() == column_family && - range.IsWithin(cell_view.column_qualifier())) { - return {}; + std::string family_name = filter.column_range_filter().family_name(); + CellStreamConstructor res = + [source_ctor = std::move(source_ctor), + family_name = std::move(family_name), + range = *std::move(maybe_range)] { + auto source = source_ctor(); + if (source.ApplyFilter(ColumnRange{range})) { + return source; } - // FIXME - we might know that we should skip the whole column family - return NextMode::kColumn; - }); - } - if (filter.has_value_range_filter()) { + return MakeTrivialFilter( + std::move(source), + [range, family_name]( + CellView const& cell_view) -> absl::optional { + if (cell_view.column_family() == family_name && + range.IsWithin(cell_view.column_qualifier())) { + return {}; + } + // FIXME - we might know that we should skip the whole column + // family + return NextMode::kColumn; + }); + }; + return res; + } else if (filter.has_value_range_filter()) { auto maybe_range = StringRangeSet::Range::FromValueRange(filter.value_range_filter()); if (!maybe_range) { return maybe_range.status(); } - return MakeTrivialFilter( - std::move(source), - [range = *std::move(maybe_range)]( - CellView const& cell_view) -> absl::optional { - if (range.IsWithin(cell_view.value())) { - return {}; - } - return NextMode::kCell; - }); - } - if (filter.has_cells_per_row_offset_filter()) { + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + range = *std::move(maybe_range)] { + auto source = source_ctor(); + return MakeTrivialFilter( + std::move(source), + [range]( + CellView const& cell_view) -> absl::optional { + if (range.IsWithin(cell_view.value())) { + return {}; + } + return NextMode::kCell; + }); + }; + return res; + } else if (filter.has_cells_per_row_offset_filter()) { std::int64_t cells_per_row_offset = filter.cells_per_row_offset_filter(); if (cells_per_row_offset < 0) { return InvalidArgumentError( "`cells_per_row_offset_filter` is negative.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return MakePerRowStateFilter( - std::move(source), - [](std::int64_t& per_row_state, - CellView const&) -> absl::optional { - if (per_row_state-- <= 0) { - return {}; - } - return NextMode::kRow; - }, - [cells_per_row_offset]() { return cells_per_row_offset; }, - [](InternalFilter const& internal_filter) { - return absl::holds_alternative(internal_filter); - }); - } - if (filter.has_cells_per_row_limit_filter()) { + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + cells_per_row_offset] { + auto source = source_ctor(); + return MakePerRowStateFilter( + std::move(source), + [](std::int64_t& per_row_state, + CellView const&) -> absl::optional { + if (per_row_state-- <= 0) { + return {}; + } + return NextMode::kRow; + }, + [cells_per_row_offset]() { return cells_per_row_offset; }, + [](InternalFilter const& internal_filter) { + return absl::holds_alternative(internal_filter); + }); + }; + return res; + } else if (filter.has_cells_per_row_limit_filter()) { std::int64_t cells_per_row_limit = filter.cells_per_row_limit_filter(); if (cells_per_row_limit < 0) { return InvalidArgumentError( "`cells_per_row_limit_filter` is negative.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return MakePerRowStateFilter( - std::move(source), - [cells_per_row_limit](std::int64_t& per_row_state, - CellView const&) -> absl::optional { - if (per_row_state++ < cells_per_row_limit) { - return {}; - } - return NextMode::kRow; - }, - []() -> std::int64_t { return 0; }, - [](InternalFilter const& internal_filter) { - return absl::holds_alternative(internal_filter); - }); - } - if (filter.has_cells_per_column_limit_filter()) { + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + cells_per_row_limit] { + auto source = source_ctor(); + return MakePerRowStateFilter( + std::move(source), + [cells_per_row_limit](std::int64_t& per_row_state, + CellView const&) -> absl::optional { + if (per_row_state++ < cells_per_row_limit) { + return {}; + } + return NextMode::kRow; + }, + []() -> std::int64_t { return 0; }, + [](InternalFilter const& internal_filter) { + return absl::holds_alternative(internal_filter); + }); + }; + return res; + } else if (filter.has_cells_per_column_limit_filter()) { std::int64_t cells_per_column_limit = filter.cells_per_column_limit_filter(); if (cells_per_column_limit < 0) { return InvalidArgumentError( "`cells_per_column_limit_filter` is negative.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return MakePerColumnStateFilter( - std::move(source), - [cells_per_column_limit](std::int64_t& per_column_state, - CellView const&) -> absl::optional { - if (per_column_state++ < cells_per_column_limit) { - return {}; - } - return NextMode::kColumn; - }, - []() -> std::int64_t { return 0; }, - [](InternalFilter const& internal_filter) { - return !absl::holds_alternative(internal_filter); - }); - } - if (filter.has_timestamp_range_filter()) { + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + cells_per_column_limit] { + auto source = source_ctor(); + return MakePerColumnStateFilter( + std::move(source), + [cells_per_column_limit]( + std::int64_t& per_column_state, + CellView const&) -> absl::optional { + if (per_column_state++ < cells_per_column_limit) { + return {}; + } + return NextMode::kColumn; + }, + []() -> std::int64_t { return 0; }, + [](InternalFilter const& internal_filter) { + return !absl::holds_alternative(internal_filter); + }); + }; + return res; + } else if (filter.has_timestamp_range_filter()) { auto maybe_range = TimestampRangeSet::Range::FromTimestampRange( filter.timestamp_range_filter()); if (!maybe_range) { return maybe_range.status(); } - if (source.ApplyFilter(TimestampRange{*maybe_range})) { - return source; - } - return MakeTrivialFilter( - std::move(source), - [range = *std::move(maybe_range)]( - CellView const& cell_view) -> absl::optional { - if (range.IsBelowStart(cell_view.timestamp())) { - return NextMode::kCell; - } - if (range.IsAboveEnd(cell_view.timestamp())) { - return NextMode::kColumn; - } - return {}; - }); - } - if (filter.has_apply_label_transformer()) { + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + range = *std::move(maybe_range)] { + auto source = source_ctor(); + if (source.ApplyFilter(TimestampRange{range})) { + return source; + } + return MakeTrivialFilter( + std::move(source), + [range]( + CellView const& cell_view) -> absl::optional { + if (range.IsBelowStart(cell_view.timestamp())) { + return NextMode::kCell; + } + if (range.IsAboveEnd(cell_view.timestamp())) { + return NextMode::kColumn; + } + return {}; + }); + }; + return res; + } else if (filter.has_apply_label_transformer()) { if (!ctx.IsApplyLabelAllowed()) { return InvalidArgumentError( "Two `apply_label_transformer`s cannot coexist in one chain.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return MakeTrivialTransformer( - std::move(source), - [label = std::make_shared( - filter.apply_label_transformer())](CellView cell_view) { - cell_view.SetLabel(*label); - return cell_view; - }); - } - if (filter.has_strip_value_transformer()) { + std::string label = filter.apply_label_transformer(); + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + label = std::move(label)] { + auto source = source_ctor(); + return MakeTrivialTransformer(std::move(source), + [label](CellView cell_view) { + cell_view.SetLabel(label); + return cell_view; + }); + }; + return res; + } else if (filter.has_strip_value_transformer()) { if (!filter.strip_value_transformer()) { return InvalidArgumentError( "`strip_value_transformer` explicitly set to `false`.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return MakeTrivialTransformer(std::move(source), [](CellView cell_view) { - cell_view.SetValue(""); - return cell_view; - }); - } - if (filter.has_chain()) { - CellStream res = std::move(source); + CellStreamConstructor res = [source_ctor = std::move(source_ctor)] { + auto source = source_ctor(); + return MakeTrivialTransformer(std::move(source), [](CellView cell_view) { + cell_view.SetValue(""); + return cell_view; + }); + }; + return res; + } else if (filter.has_chain()) { + CellStreamConstructor res = std::move(source_ctor); // FIXME handle the contexts properly for (auto const &subfilter : filter.chain().filters()) { if (subfilter.has_sink()) { @@ -783,7 +857,11 @@ StatusOr CreateFilterImpl( GCP_ERROR_INFO().WithMetadata("filter", subfilter.DebugString())); } direct_sinks.emplace_back(std::move(res)); - return CellStream(std::make_shared()); + res = [] { + return CellStream(std::make_unique()); + }; + return res; + } auto maybe_res = CreateFilterImpl(subfilter, std::move(res), ctx, direct_sinks); @@ -793,9 +871,8 @@ StatusOr CreateFilterImpl( res = *std::move(maybe_res); } return res; - } - if (filter.has_interleave()) { - std::vector parallel_streams; + } else if (filter.has_interleave()) { + std::vector parallel_stream_ctors; for (auto const & subfilter : filter.interleave().filters()) { if (subfilter.has_sink()) { if (!subfilter.sink()) { @@ -803,55 +880,76 @@ StatusOr CreateFilterImpl( "`sink` explicitly set to `false`.", GCP_ERROR_INFO().WithMetadata("filter", subfilter.DebugString())); } - direct_sinks.emplace_back(source); + direct_sinks.emplace_back(source_ctor); continue; } auto maybe_filter = - CreateFilterImpl(subfilter, source, ctx, direct_sinks); + CreateFilterImpl(subfilter, source_ctor, ctx, direct_sinks); if (!maybe_filter) { return maybe_filter.status(); } - parallel_streams.emplace_back(*maybe_filter); - } - if (parallel_streams.empty()) { - return CellStream(std::make_shared()); - } - return CellStream(std::make_shared(parallel_streams)); - } - if (filter.has_condition()) { + parallel_stream_ctors.emplace_back(*maybe_filter); + } + if (parallel_stream_ctors.empty()) { + CellStreamConstructor res = [] { + return CellStream(std::make_unique()); + }; + return res; + } + CellStreamConstructor res = [parallel_stream_ctors = + std::move(parallel_stream_ctors)] { + std::vector parallel_streams; + std::transform(parallel_stream_ctors.begin(), parallel_stream_ctors.end(), + std::back_inserter(parallel_streams), + [](CellStreamConstructor const& stream_ctor) { + return stream_ctor(); + }); + return CellStream( + std::make_unique(std::move(parallel_streams))); + }; + return res; + } else if (filter.has_condition()) { if (!filter.condition().has_predicate_filter()){ return InvalidArgumentError( "`condition` must have a `predicate_filter` set.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - // FIXME stream must be deep-copied - auto maybe_predicate_stream = CreateFilterImpl( - filter.condition().predicate_filter(), source, ctx, direct_sinks); - if (!maybe_predicate_stream) { - return maybe_predicate_stream.status(); + + auto maybe_predicate_stream_ctor = CreateFilterImpl( + filter.condition().predicate_filter(), source_ctor, ctx, direct_sinks); + if (!maybe_predicate_stream_ctor) { + return maybe_predicate_stream_ctor.status(); } - auto maybe_true_stream = + auto maybe_true_stream_ctor = filter.condition().has_true_filter() - ? CreateFilterImpl(filter.condition().true_filter(), source, ctx, - direct_sinks) - : StatusOr( - CellStream(std::make_shared())); - if (!maybe_true_stream) { - return maybe_true_stream.status(); - } - auto maybe_false_stream = + ? CreateFilterImpl(filter.condition().true_filter(), source_ctor, + ctx, direct_sinks) + : StatusOr([] { + return CellStream(std::make_unique()); + }); + if (!maybe_true_stream_ctor) { + return maybe_true_stream_ctor.status(); + } + auto maybe_false_stream_ctor = filter.condition().has_false_filter() - ? CreateFilterImpl(filter.condition().false_filter(), source, ctx, - direct_sinks) - : StatusOr( - CellStream(std::make_shared())); - if (!maybe_false_stream) { - return maybe_true_stream.status(); - } - - return CellStream(std::make_shared( - std::move(source), *std::move(maybe_predicate_stream), - *std::move(maybe_true_stream), *std::move(maybe_false_stream))); + ? CreateFilterImpl(filter.condition().false_filter(), source_ctor, + ctx, direct_sinks) + : StatusOr([] { + return CellStream(std::make_unique()); + }); + if (!maybe_false_stream_ctor) { + return maybe_false_stream_ctor.status(); + } + + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + predicate_stream_ctor = *std::move(maybe_predicate_stream_ctor), + true_stream_ctor = *std::move(maybe_true_stream_ctor), + false_stream_ctor = *std::move(maybe_false_stream_ctor)] { + return CellStream(std::make_unique( + source_ctor(), predicate_stream_ctor(), true_stream_ctor(), + false_stream_ctor())); + }; + return res; } return UnimplementedError( "Unsupported filter.", @@ -860,34 +958,39 @@ StatusOr CreateFilterImpl( CellStream JoinCellStreams(std::vector cell_streams) { return CellStream( - std::make_shared(std::move(cell_streams))); + std::make_unique(std::move(cell_streams))); } StatusOr CreateFilter( - ::google::bigtable::v2::RowFilter const& filter, CellStream source, - FilterContext const& ctx) { - std::cout << "Creating a filter structure for: " << std::endl - << filter.DebugString() << std::endl; - std::vector direct_sinks; + ::google::bigtable::v2::RowFilter const& filter, + CellStreamConstructor source_ctor, FilterContext const& ctx) { + std::vector direct_sink_ctors; if (filter.has_sink()) { if (!filter.sink()) { return InvalidArgumentError( "`sink` explicitly set to `false`.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - return source; + return source_ctor(); } - auto maybe_filter = - CreateFilterImpl(filter, std::move(source), ctx, direct_sinks); - if (!maybe_filter) { - return maybe_filter.status(); + auto maybe_filter_ctor = + CreateFilterImpl(filter, std::move(source_ctor), ctx, direct_sink_ctors); + if (!maybe_filter_ctor) { + return maybe_filter_ctor.status(); } - if (!direct_sinks.empty()) { - direct_sinks.emplace_back(*std::move(maybe_filter)); - return CellStream( - std::make_shared(std::move(direct_sinks))); + if (direct_sink_ctors.empty()) { + return (*maybe_filter_ctor)(); } - return maybe_filter; + std::vector direct_sinks; + + std::transform( + direct_sink_ctors.begin(), direct_sink_ctors.end(), + std::back_inserter(direct_sinks), + [](CellStreamConstructor const& stream_ctor) { return stream_ctor(); }); + + direct_sinks.emplace_back((*maybe_filter_ctor)()); + return CellStream( + std::make_unique(std::move(direct_sinks))); } } // namespace emulator diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index 91c55f578bac9..395d299348845 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -65,7 +65,7 @@ class AbstractCellStreamImpl { class CellStream { public: - CellStream(std::shared_ptr impl) + CellStream(std::unique_ptr impl) : impl_(std::move(impl)) {} bool ApplyFilter(InternalFilter const& internal_filter) { @@ -76,13 +76,13 @@ class CellStream { void Next(NextMode mode = NextMode::kCell); void operator++() { Next(); } CellView operator++(int); - CellView operator*() const { return Value(); } + CellView const& operator*() const { return Value(); } CellView const* operator->() const { return &Value(); } explicit operator bool() const { return HasValue(); } AbstractCellStreamImpl const &impl() const { return *impl_; } private: - std::shared_ptr impl_; + std::unique_ptr impl_; }; class FilterContext { @@ -124,9 +124,10 @@ class MergeCellStreams : public AbstractCellStreamImpl { CellStream JoinCellStreams(std::vector cell_streams); +using CellStreamConstructor = std::function; StatusOr CreateFilter( - ::google::bigtable::v2::RowFilter const& filter, CellStream source, - FilterContext const& ctx); + ::google::bigtable::v2::RowFilter const& filter, + CellStreamConstructor source_ctor, FilterContext const& ctx); } // namespace emulator } // namespace bigtable diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 648b986a1ef63..e8feee05e75b8 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -283,46 +283,39 @@ Status Table::MutateRow( return Status(); } -class FilteredTableStream : public MergeCellStreams { - public: - FilteredTableStream( - std::vector> cf_streams) - : MergeCellStreams(CreateCellStreams(std::move(cf_streams))) {} - - bool ApplyFilter(InternalFilter const& internal_filter) override { - if (!absl::holds_alternative(internal_filter)) { - return MergeCellStreams::ApplyFilter(internal_filter); - } - for (auto stream_it = unfinished_streams_.begin(); - stream_it != unfinished_streams_.end(); ++stream_it) { - auto* cf_stream = - dynamic_cast(&(*stream_it)->impl()); - assert(cf_stream); - if (re2::RE2::PartialMatch( - cf_stream->column_family_name(), - *absl::get(internal_filter).regex)) { - auto last_it = std::prev(unfinished_streams_.end()); - if (stream_it == last_it) { - unfinished_streams_.pop_back(); - break; - } - stream_it->swap(unfinished_streams_.back()); +bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { + if (!absl::holds_alternative(internal_filter)) { + return MergeCellStreams::ApplyFilter(internal_filter); + } + for (auto stream_it = unfinished_streams_.begin(); + stream_it != unfinished_streams_.end(); ++stream_it) { + auto* cf_stream = + dynamic_cast(&(*stream_it)->impl()); + assert(cf_stream); + if (!re2::RE2::PartialMatch( + cf_stream->column_family_name(), + *absl::get(internal_filter).regex)) { + auto last_it = std::prev(unfinished_streams_.end()); + if (stream_it == last_it) { unfinished_streams_.pop_back(); + break; } + stream_it->swap(unfinished_streams_.back()); + unfinished_streams_.pop_back(); } - return true; } - private: - static std::vector CreateCellStreams( - std::vector> cf_streams) { - std::vector res; - res.reserve(cf_streams.size()); - for (auto& stream : cf_streams) { - res.emplace_back(std::move(stream)); - } - return res; + return true; +} + +std::vector FilteredTableStream::CreateCellStreams( + std::vector> cf_streams) { + std::vector res; + res.reserve(cf_streams.size()); + for (auto& stream : cf_streams) { + res.emplace_back(CellStream(std::move(stream))); } -}; + return res; +} StatusOr CreateStringRangeSet( google::bigtable::v2::RowSet const& row_set) { @@ -361,21 +354,26 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, row_set = std::make_shared(StringRangeSet::All()); } std::lock_guard lock(mu_); - std::vector> per_cf_streams; - for (auto const & column_family: column_families_) { - per_cf_streams.emplace_back(std::make_shared( - *column_family.second, column_family.first, row_set)); - } - auto stream = CellStream( - std::make_shared(std::move(per_cf_streams))); + auto table_stream_ctor = [row_set = std::move(row_set), this] { + std::vector> per_cf_streams; + for (auto const& column_family : column_families_) { + per_cf_streams.emplace_back(std::make_unique( + *column_family.second, column_family.first, row_set)); + } + return CellStream( + std::make_unique(std::move(per_cf_streams))); + }; FilterContext ctx; + StatusOr maybe_stream; if (request.has_filter()) { - auto maybe_stream = CreateFilter(request.filter(), std::move(stream), ctx); - if (!maybe_stream) { - return maybe_stream.status(); - } - stream = *maybe_stream; + maybe_stream = CreateFilter(request.filter(), table_stream_ctor, ctx); + } else { + maybe_stream = table_stream_ctor(); + } + if (!maybe_stream) { + return maybe_stream.status(); } + CellStream &stream = *maybe_stream; for (; stream; ++stream) { std::cout << "Row: " << stream->row_key() << " column_family: " << stream->column_family() diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 5d1983abf0b27..0a83299d51551 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -67,6 +67,20 @@ class Table { std::map> column_families_; }; +// This class is public only to enable testing. +class FilteredTableStream : public MergeCellStreams { + public: + FilteredTableStream( + std::vector> cf_streams) + : MergeCellStreams(CreateCellStreams(std::move(cf_streams))) {} + + bool ApplyFilter(InternalFilter const& internal_filter) override; + + private: + static std::vector CreateCellStreams( + std::vector> cf_streams); +}; + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table_test.cc b/google/cloud/bigtable/emulator/table_test.cc index e930f2eb9b52d..0a14031c6045a 100644 --- a/google/cloud/bigtable/emulator/table_test.cc +++ b/google/cloud/bigtable/emulator/table_test.cc @@ -37,6 +37,86 @@ std::string DumpStream( return ss.str(); } +TEST(FilteredTableStream, Empty) { + FilteredTableStream stream({}); + EXPECT_EQ("", DumpStream(stream)); +} + +TEST(FilteredTableStream, EmptyColumnFamilies) { + ColumnFamily fam1; + ColumnFamily fam2; + auto ffam1 = std::make_unique( + fam1, "fam1", std::make_unique(StringRangeSet::All())); + auto ffam2 = std::make_unique( + fam2, "fam2", std::make_unique(StringRangeSet::All())); + std::vector> fams; + fams.emplace_back(std::move(ffam1)); + fams.emplace_back(std::move(ffam2)); + FilteredTableStream stream(std::move(fams)); + EXPECT_EQ("", DumpStream(stream)); +} + +TEST(FilteredTableStream, ColumnFamiliesAreFiltered) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam1; + ColumnFamily fam2; + fam1.SetCell("row0", "col0", 10_ms, "foo"); + fam2.SetCell("row0", "col0", 10_ms, "foo"); + auto ffam1 = std::make_unique( + fam1, "fam1", std::make_unique(StringRangeSet::All())); + auto ffam2 = std::make_unique( + fam2, "fam2", std::make_unique(StringRangeSet::All())); + std::vector> fams; + fams.emplace_back(std::move(ffam1)); + fams.emplace_back(std::move(ffam2)); + FilteredTableStream stream(std::move(fams)); + auto family_pattern = std::make_shared("fam1"); + ASSERT_TRUE(family_pattern->ok()); + stream.ApplyFilter(FamilyNameRegex{family_pattern}); + EXPECT_EQ("row0 fam1:col0 @10ms: foo\n", DumpStream(stream)); +} + +TEST(FilteredTableStream, OtherFiltersArePropagated) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam1; + ColumnFamily fam2; + fam1.SetCell("row1", "col1", 10_ms, "foo"); + fam1.SetCell("row0", "col1", 10_ms, "foo"); // row key regex + fam2.SetCell("row1", "col1", 10_ms, "foo"); // column family regex + fam1.SetCell("row1", "col2", 10_ms, "foo"); // column qualifier regex + fam1.SetCell("row1", "a1", 10_ms, "foo"); // column range + fam1.SetCell("row1", "col1", 1000_ms, "foo"); // timestamp range + auto ffam1 = std::make_unique( + fam1, "fam1", std::make_unique(StringRangeSet::All())); + auto ffam2 = std::make_unique( + fam2, "fam2", std::make_unique(StringRangeSet::All())); + std::vector> fams; + fams.emplace_back(std::move(ffam1)); + fams.emplace_back(std::move(ffam2)); + FilteredTableStream stream(std::move(fams)); + + auto row_key_pattern = std::make_shared("row1"); + ASSERT_TRUE(row_key_pattern->ok()); + EXPECT_TRUE(stream.ApplyFilter(RowKeyRegex{row_key_pattern})); + + auto family_pattern = std::make_shared("fam1"); + ASSERT_TRUE(family_pattern->ok()); + EXPECT_TRUE(stream.ApplyFilter(FamilyNameRegex{family_pattern})); + + auto qualifier_pattern = std::make_shared("1$"); + ASSERT_TRUE(qualifier_pattern->ok()); + EXPECT_TRUE(stream.ApplyFilter(ColumnRegex{qualifier_pattern})); + + EXPECT_TRUE(stream.ApplyFilter( + ColumnRange{StringRangeSet::Range("co", false, "com", false)})); + + EXPECT_TRUE(stream.ApplyFilter( + TimestampRange{TimestampRangeSet::Range(0_ms, 300_ms)})); + + EXPECT_EQ("row1 fam1:col1 @10ms: foo\n", DumpStream(stream)); +} } // anonymous namespace } // namespace emulator From 2a17dc7f325d88409e6881f0f0a743d7a35c7a98 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 15:44:56 +0300 Subject: [PATCH 061/195] emulator: Fixes for review: Improve name of return varible. In particular, make it clear that we are only examining the status, and will not use the possibly valid status value (returned object). --- google/cloud/bigtable/emulator/table.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 838154347342c..7df13b1c763ae 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -413,9 +413,11 @@ Status RowTransaction::MergeToCell( Status RowTransaction::DeleteFromFamily( ::google::bigtable::v2::Mutation_DeleteFromFamily const& delete_from_family) { - auto maybe_column_family = table_->FindColumnFamily(delete_from_family); - if (!maybe_column_family) { - return maybe_column_family.status(); + // If the request references an incorrect schema (non-existent + // column family) then return a failure status error immediately. + auto status = table_->FindColumnFamily(delete_from_family); + if (!status.ok()) { + return status.status(); } auto table_it = table_->find(delete_from_family.family_name()); From b99368af427810cb190715553469b93a14f2f394 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 15:47:10 +0300 Subject: [PATCH 062/195] emulator: Rename RestoreRow -> RestoreColumnFamilyRow. --- google/cloud/bigtable/emulator/table.cc | 8 ++++---- google/cloud/bigtable/emulator/table.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 7df13b1c763ae..76622a9741311 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -430,14 +430,14 @@ Status RowTransaction::DeleteFromFamily( if (auto column_family_it = table_it->second->find(request_.row_key()); column_family_it != table_it->second->end()) { - RestoreRow restore_row; + RestoreColumnFamilyRow restore_row; restore_row.table_it_ = table_it; restore_row.row_key_ = request_.row_key(); - std::vector cells; + std::vector cells; for (auto const& column : column_family_it->second) { for (auto const& column_row : column.second) { - RestoreRow::Cell cell; + RestoreColumnFamilyRow::Cell cell; cell.column_qualifer_ = std::move(column.first); cell.timestamp_ = column_row.first; // Wait, is this correct? @@ -566,7 +566,7 @@ void RowTransaction::Undo() { continue; } - if (auto* restore_row = absl::get_if(&op)) { + if (auto* restore_row = absl::get_if(&op)) { for (auto const& cell : restore_row->cells_) { // Take care to use std::move() to avoid copying potentially // very larg values (the column qualifier and cell values can diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index f21e9053ab35a..f0281eea8090f 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -85,7 +85,7 @@ class Table : public std::enable_shared_from_this
{ std::map> column_families_; }; -struct RestoreRow { +struct RestoreColumnFamilyRow { std::map>::iterator table_it_; std::string row_key_; struct Cell { @@ -164,7 +164,7 @@ class RowTransaction { bool committed_; std::shared_ptr
table_; - std::stack> + std::stack> undo_; ::google::bigtable::v2::MutateRowRequest const& request_; }; From dab9605221b06940d96654f60ca2f948fdf33429 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 15:56:58 +0300 Subject: [PATCH 063/195] emulator: style: Remove trailing underscore from struct member names. --- google/cloud/bigtable/emulator/table.cc | 30 ++++++++++++------------- google/cloud/bigtable/emulator/table.h | 22 +++++++++--------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 76622a9741311..3b38b23a8fe3a 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -432,20 +432,20 @@ Status RowTransaction::DeleteFromFamily( column_family_it != table_it->second->end()) { RestoreColumnFamilyRow restore_row; - restore_row.table_it_ = table_it; - restore_row.row_key_ = request_.row_key(); + restore_row.table_it = table_it; + restore_row.row_key = request_.row_key(); std::vector cells; for (auto const& column : column_family_it->second) { for (auto const& column_row : column.second) { RestoreColumnFamilyRow::Cell cell; - cell.column_qualifer_ = std::move(column.first); - cell.timestamp_ = column_row.first; // Wait, is this correct? - cell.value_ = std::move(column_row.second); + cell.column_qualifer = std::move(column.first); + cell.timestamp = column_row.first; // Wait, is this correct? + cell.value = std::move(column_row.second); cells.push_back(cell); } } - restore_row.cells_ = cells; + restore_row.cells = cells; table_it->second->DeleteRow(request_.row_key()); // Is certain // to succeed // unless we @@ -543,15 +543,15 @@ void RowTransaction::Undo() { undo_.pop(); if (auto* restore_value = absl::get_if(&op)) { - auto& column_row = restore_value->column_row_it_->second; - column_row.find(restore_value->timestamp_)->second = - std::move(restore_value->value_); + auto& column_row = restore_value->column_row_it->second; + column_row.find(restore_value->timestamp)->second = + std::move(restore_value->value); continue; } if (auto* delete_value = absl::get_if(&op)) { - auto& column_row = delete_value->column_row_it_->second; - auto timestamp_it = column_row.find(delete_value->timestamp_); + auto& column_row = delete_value->column_row_it->second; + auto timestamp_it = column_row.find(delete_value->timestamp); column_row.erase(timestamp_it); continue; } @@ -567,13 +567,13 @@ void RowTransaction::Undo() { } if (auto* restore_row = absl::get_if(&op)) { - for (auto const& cell : restore_row->cells_) { + for (auto const& cell : restore_row->cells) { // Take care to use std::move() to avoid copying potentially // very larg values (the column qualifier and cell values can // be very large. - restore_row->table_it_->second->SetCell( - restore_row->row_key_, std::move(cell.column_qualifer_), - cell.timestamp_, std::move(cell.value_)); + restore_row->table_it->second->SetCell( + restore_row->row_key, std::move(cell.column_qualifer), + cell.timestamp, std::move(cell.value)); } continue; } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index f0281eea8090f..2b0d66cf53999 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -86,29 +86,29 @@ class Table : public std::enable_shared_from_this
{ }; struct RestoreColumnFamilyRow { - std::map>::iterator table_it_; - std::string row_key_; + std::map>::iterator table_it; + std::string row_key; struct Cell { - std::string column_qualifer_; - std::chrono::milliseconds timestamp_; - std::string value_; + std::string column_qualifer; + std::chrono::milliseconds timestamp; + std::string value; }; - std::vector cells_; + std::vector cells; }; struct RestoreValue { // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where // we should reinsert the value. - std::map::iterator column_row_it_; - std::chrono::milliseconds timestamp_; - std::string value_; + std::map::iterator column_row_it; + std::chrono::milliseconds timestamp; + std::string value; }; struct DeleteValue { // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where // we should delete value. - std::map::iterator column_row_it_; - std::chrono::milliseconds timestamp_; + std::map::iterator column_row_it; + std::chrono::milliseconds timestamp; }; struct DeleteRow { From a518e8d925da9c36c2145b6a0287c5f2c71d2abc Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 17:03:33 +0300 Subject: [PATCH 064/195] emulator: Rename several iterator variables more appropriately. --- google/cloud/bigtable/emulator/table.cc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 3b38b23a8fe3a..bef7d23b714a0 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -420,33 +420,33 @@ Status RowTransaction::DeleteFromFamily( return status.status(); } - auto table_it = table_->find(delete_from_family.family_name()); - if (table_it == table_->end()) { + auto column_family_it = table_->find(delete_from_family.family_name()); + if (column_family_it == table_->end()) { return Status(StatusCode::kNotFound, absl::StrFormat("column family %s not found in table", delete_from_family.family_name()), ErrorInfo()); } - if (auto column_family_it = table_it->second->find(request_.row_key()); - column_family_it != table_it->second->end()) { + if (auto column_family_row_it = column_family_it->second->find(request_.row_key()); + column_family_row_it != column_family_it->second->end()) { RestoreColumnFamilyRow restore_row; - restore_row.table_it = table_it; + restore_row.table_it = column_family_it; restore_row.row_key = request_.row_key(); std::vector cells; - for (auto const& column : column_family_it->second) { - for (auto const& column_row : column.second) { + for (auto const& column : column_family_row_it->second) { + for (auto const& column_row_it : column.second) { RestoreColumnFamilyRow::Cell cell; cell.column_qualifer = std::move(column.first); - cell.timestamp = column_row.first; // Wait, is this correct? - cell.value = std::move(column_row.second); + cell.timestamp = column_row_it.first; + cell.value = std::move(column_row_it.second); cells.push_back(cell); } } restore_row.cells = cells; - table_it->second->DeleteRow(request_.row_key()); // Is certain + column_family_it->second->DeleteRow(request_.row_key()); // Is certain // to succeed // unless we // run out of @@ -456,7 +456,7 @@ Status RowTransaction::DeleteFromFamily( // The row does not exist return Status(StatusCode::kNotFound, absl::StrFormat("row key %s not found in column family %s", - request_.row_key(), table_it->first), + request_.row_key(), column_family_it->first), ErrorInfo()); } From 6255c5e0fffe284950dee58c84e372bd6619aa09 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 17:16:59 +0300 Subject: [PATCH 065/195] emulator: Use std::move() to avoid a copy. --- google/cloud/bigtable/emulator/table.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index bef7d23b714a0..4a79a94bc9785 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -445,7 +445,7 @@ Status RowTransaction::DeleteFromFamily( cells.push_back(cell); } } - restore_row.cells = cells; + restore_row.cells = std::move(cells); column_family_it->second->DeleteRow(request_.row_key()); // Is certain // to succeed // unless we From a76f2d02635d660cebbfb3f6bb4620e2e27900a1 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 17:29:15 +0300 Subject: [PATCH 066/195] emulator: Rename another iterator properly. --- google/cloud/bigtable/emulator/table.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 4a79a94bc9785..153def189d65f 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -435,11 +435,11 @@ Status RowTransaction::DeleteFromFamily( restore_row.table_it = column_family_it; restore_row.row_key = request_.row_key(); std::vector cells; - for (auto const& column : column_family_row_it->second) { - for (auto const& column_row_it : column.second) { + for (auto const& column_family_row_it : column_family_row_it->second) { + for (auto const& column_row_it : column_family_row_it.second) { RestoreColumnFamilyRow::Cell cell; - cell.column_qualifer = std::move(column.first); + cell.column_qualifer = std::move(column_family_row_it.first); cell.timestamp = column_row_it.first; cell.value = std::move(column_row_it.second); cells.push_back(cell); From 688c66bfa303ab8286b78085287a8c28a0747c6a Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 17:35:05 +0300 Subject: [PATCH 067/195] emulator: Use std::move() to avoid another copy. --- google/cloud/bigtable/emulator/table.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 153def189d65f..7370a2e532323 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -451,7 +451,7 @@ Status RowTransaction::DeleteFromFamily( // unless we // run out of // memory. - undo_.emplace(restore_row); + undo_.emplace(std::move(restore_row)); } else { // The row does not exist return Status(StatusCode::kNotFound, From 3805e40bb2f3704485079c281525378049eb11e4 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 18:13:35 +0300 Subject: [PATCH 068/195] emulator: check and return from exceptional conditions first. This is to remain in line with the rest of the code. --- google/cloud/bigtable/emulator/table.cc | 50 ++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 7370a2e532323..cf21887aac90b 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -428,37 +428,37 @@ Status RowTransaction::DeleteFromFamily( ErrorInfo()); } - if (auto column_family_row_it = column_family_it->second->find(request_.row_key()); - column_family_row_it != column_family_it->second->end()) { - RestoreColumnFamilyRow restore_row; - - restore_row.table_it = column_family_it; - restore_row.row_key = request_.row_key(); - std::vector cells; - for (auto const& column_family_row_it : column_family_row_it->second) { - for (auto const& column_row_it : column_family_row_it.second) { - RestoreColumnFamilyRow::Cell cell; - - cell.column_qualifer = std::move(column_family_row_it.first); - cell.timestamp = column_row_it.first; - cell.value = std::move(column_row_it.second); - cells.push_back(cell); - } - } - restore_row.cells = std::move(cells); - column_family_it->second->DeleteRow(request_.row_key()); // Is certain - // to succeed - // unless we - // run out of - // memory. - undo_.emplace(std::move(restore_row)); - } else { + std::map::iterator column_family_row_it; + if (column_family_row_it = column_family_it->second->find(request_.row_key()); + column_family_row_it == column_family_it->second->end()) { // The row does not exist return Status(StatusCode::kNotFound, absl::StrFormat("row key %s not found in column family %s", request_.row_key(), column_family_it->first), ErrorInfo()); } + RestoreColumnFamilyRow restore_row; + + restore_row.table_it = column_family_it; + restore_row.row_key = request_.row_key(); + std::vector cells; + for (auto const& column_family_row_it : column_family_row_it->second) { + for (auto const& column_row_it : column_family_row_it.second) { + RestoreColumnFamilyRow::Cell cell; + + cell.column_qualifer = std::move(column_family_row_it.first); + cell.timestamp = column_row_it.first; + cell.value = std::move(column_row_it.second); + cells.push_back(cell); + } + } + restore_row.cells = std::move(cells); + column_family_it->second->DeleteRow(request_.row_key()); // Is certain + // to succeed + // unless we + // run out of + // memory. + undo_.emplace(std::move(restore_row)); return Status(); } From 5cea718e39ffbb36dfdaef79011f489b283bdf84 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 18:44:37 +0300 Subject: [PATCH 069/195] emualtor: Fix the naming of another iterator variable. --- google/cloud/bigtable/emulator/table.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index cf21887aac90b..f57a27cb7fdd6 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -476,14 +476,14 @@ Status RowTransaction::SetCell( bool column_existed = true; bool cell_existed = true; - auto row_key_it = column_family.find(request_.row_key()); + auto column_family_row_it = column_family.find(request_.row_key()); std::string value_to_restore; - if (row_key_it == column_family.end()) { + if (column_family_row_it == column_family.end()) { row_existed = false; column_existed = false; cell_existed = false; } else { - auto& column_family_row = row_key_it->second; + auto& column_family_row = column_family_row_it->second; auto column_row_it = column_family_row.find(set_cell.column_qualifier()); if (column_row_it == column_family_row.end()) { column_existed = false; @@ -508,15 +508,15 @@ Status RowTransaction::SetCell( // If we have added a row, a column or a cell, we need to recompute // these iterators. - row_key_it = column_family.find(request_.row_key()); - auto& column_family_row = row_key_it->second; + column_family_row_it = column_family.find(request_.row_key()); + auto& column_family_row = column_family_row_it->second; auto column_row_it = column_family_row.find(set_cell.column_qualifier()); auto timestamp_it = column_row_it->second.find( std::chrono::duration_cast( std::chrono::microseconds(set_cell.timestamp_micros()))); if (!row_existed) { - DeleteRow delete_row = {row_key_it, column_family}; + DeleteRow delete_row = {column_family_row_it, column_family}; undo_.emplace(delete_row); } From c7f50b0303b60b44ef3e8e61550d6222fe2a8f6d Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 19:14:19 +0300 Subject: [PATCH 070/195] emulator: remove use of C++17 syntax feature (if with initializer). We need to have the code compilable by a C++14 compiler. --- google/cloud/bigtable/emulator/table.cc | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index f57a27cb7fdd6..07b5a8c17d738 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -542,31 +542,36 @@ void RowTransaction::Undo() { auto op = undo_.top(); undo_.pop(); - if (auto* restore_value = absl::get_if(&op)) { + auto* restore_value = absl::get_if(&op); + if (restore_value) { auto& column_row = restore_value->column_row_it->second; column_row.find(restore_value->timestamp)->second = std::move(restore_value->value); continue; } - if (auto* delete_value = absl::get_if(&op)) { + auto* delete_value = absl::get_if(&op); + if (delete_value) { auto& column_row = delete_value->column_row_it->second; auto timestamp_it = column_row.find(delete_value->timestamp); column_row.erase(timestamp_it); continue; } - if (auto* delete_row = absl::get_if(&op)) { + auto* delete_row = absl::get_if(&op); + if (delete_row) { delete_row->column_family.erase(delete_row->row_it); continue; } - if (auto* delete_column = absl::get_if(&op)) { + auto* delete_column = absl::get_if(&op); + if (delete_column) { delete_column->column_family_row.erase(delete_column->column_row_it); continue; } - if (auto* restore_row = absl::get_if(&op)) { + auto* restore_row = absl::get_if(&op); + if (restore_row) { for (auto const& cell : restore_row->cells) { // Take care to use std::move() to avoid copying potentially // very larg values (the column qualifier and cell values can From a51584e17ef4525af2b2c899adcee01ce0113350 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 10 Mar 2025 19:22:44 +0300 Subject: [PATCH 071/195] emulator: rename another iterator variable more appropriately. --- google/cloud/bigtable/emulator/table.cc | 4 ++-- google/cloud/bigtable/emulator/table.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 07b5a8c17d738..e03df49f7fe9e 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -439,7 +439,7 @@ Status RowTransaction::DeleteFromFamily( } RestoreColumnFamilyRow restore_row; - restore_row.table_it = column_family_it; + restore_row.column_family_it = column_family_it; restore_row.row_key = request_.row_key(); std::vector cells; for (auto const& column_family_row_it : column_family_row_it->second) { @@ -576,7 +576,7 @@ void RowTransaction::Undo() { // Take care to use std::move() to avoid copying potentially // very larg values (the column qualifier and cell values can // be very large. - restore_row->table_it->second->SetCell( + restore_row->column_family_it->second->SetCell( restore_row->row_key, std::move(cell.column_qualifer), cell.timestamp, std::move(cell.value)); } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 2b0d66cf53999..4f61442b480c8 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -86,7 +86,7 @@ class Table : public std::enable_shared_from_this
{ }; struct RestoreColumnFamilyRow { - std::map>::iterator table_it; + std::map>::iterator column_family_it; std::string row_key; struct Cell { std::string column_qualifer; From ff5d40660159543faeaf648326224dc4b9aefe0e Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 12 Mar 2025 16:11:28 +0300 Subject: [PATCH 072/195] emulator: Add a comment justifying storing the column family iterator in the undo log. In general, it is unsafe to store other kind of map iterators in the undo log, since a subsequent mutation on the same row could delete the underlying map, and invalidate the iterator. However, deleting a column family is a schema operation that cannot happen while we hold the table lock, therefore for column families only, it is safe to store the iterators in the undo log. --- google/cloud/bigtable/emulator/table.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 4f61442b480c8..c20073e6e9da5 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -86,6 +86,12 @@ class Table : public std::enable_shared_from_this
{ }; struct RestoreColumnFamilyRow { + // We hold the table lock and any operation to delete a column + // family will need to acquire the same lock. Therefore we are sure + // that a column family cannot disappear concurrent to a chain of + // mutations on the same row. So for column families alone (but not + // for other maps) it is safe to store their iterator in the undo + // log. It is unsafe for all other maps. std::map>::iterator column_family_it; std::string row_key; struct Cell { From 1ac8657dc13299b0ce8b72aa0345d2744c5d18fe Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 12 Mar 2025 19:21:05 +0300 Subject: [PATCH 073/195] emulator: transaction rollback: RestoreValue: Eliminate the storage of iterators in undo log. --- google/cloud/bigtable/emulator/table.cc | 14 ++++++++++---- google/cloud/bigtable/emulator/table.h | 3 +++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index e03df49f7fe9e..7d3bbbb4f831f 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include namespace google { @@ -529,7 +530,11 @@ Status RowTransaction::SetCell( DeleteValue delete_value = {column_row_it, timestamp_it->first}; undo_.emplace(delete_value); } else { - RestoreValue restore_value = {column_row_it, timestamp_it->first, + RestoreValue restore_value = {column_row_it, + column_family, + column_family_row_it->first, + std::move(set_cell.column_qualifier()), + timestamp_it->first, std::move(value_to_restore)}; undo_.emplace(restore_value); } @@ -544,9 +549,10 @@ void RowTransaction::Undo() { auto* restore_value = absl::get_if(&op); if (restore_value) { - auto& column_row = restore_value->column_row_it->second; - column_row.find(restore_value->timestamp)->second = - std::move(restore_value->value); + restore_value->column_family.SetCell( + std::move(restore_value->row_key), + std::move(restore_value->column_qualifier), restore_value->timestamp, + std::move(restore_value->value)); continue; } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index c20073e6e9da5..dd9689875cc59 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -106,6 +106,9 @@ struct RestoreValue { // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where // we should reinsert the value. std::map::iterator column_row_it; + ColumnFamily& column_family; + std::string row_key; + std::string column_qualifier; std::chrono::milliseconds timestamp; std::string value; }; From 8221065efa79f504b40aae317d7869838dcef9e5 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 12 Mar 2025 19:27:19 +0300 Subject: [PATCH 074/195] emulator: actually remove iterator field from RestoreValue. --- google/cloud/bigtable/emulator/table.cc | 4 +--- google/cloud/bigtable/emulator/table.h | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 7d3bbbb4f831f..77461a79c5d7e 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -530,9 +530,7 @@ Status RowTransaction::SetCell( DeleteValue delete_value = {column_row_it, timestamp_it->first}; undo_.emplace(delete_value); } else { - RestoreValue restore_value = {column_row_it, - column_family, - column_family_row_it->first, + RestoreValue restore_value = {column_family, column_family_row_it->first, std::move(set_cell.column_qualifier()), timestamp_it->first, std::move(value_to_restore)}; diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index dd9689875cc59..e3eaeea0d39b5 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -103,9 +103,6 @@ struct RestoreColumnFamilyRow { }; struct RestoreValue { - // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where - // we should reinsert the value. - std::map::iterator column_row_it; ColumnFamily& column_family; std::string row_key; std::string column_qualifier; From 54acb8278e667a46083709ac5db13c17f1086f6a Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 12 Mar 2025 21:40:19 +0300 Subject: [PATCH 075/195] emulator: Correct and radically simplify SetCell undo code logic. - Don't store iterators in the DeleteValue undo type (they may be invalidated by a subsequent mutation). - Maintain the invariants that there is never an empty column or empty row by using the ColumnFamily::DeleteColumn() function to delete ranges of cells during undo. The latter takes care of deleting empty columns and empty rows, already. - And therefore we can get rid of the DeleteRow and DeleteColumn undo types and associated code. All tests continue to pass. --- google/cloud/bigtable/emulator/table.cc | 34 +++++++++++-------------- google/cloud/bigtable/emulator/table.h | 6 ++--- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 77461a79c5d7e..5841309998b23 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -20,6 +20,7 @@ #include "google/cloud/bigtable/internal/google_bytes_traits.h" #include "google/cloud/internal/make_status.h" #include "google/protobuf/util/field_mask_util.h" +#include #include #include #include @@ -473,21 +474,16 @@ Status RowTransaction::SetCell( auto& column_family = maybe_column_family->get(); - bool row_existed = true; - bool column_existed = true; bool cell_existed = true; auto column_family_row_it = column_family.find(request_.row_key()); std::string value_to_restore; if (column_family_row_it == column_family.end()) { - row_existed = false; - column_existed = false; cell_existed = false; } else { auto& column_family_row = column_family_row_it->second; auto column_row_it = column_family_row.find(set_cell.column_qualifier()); if (column_row_it == column_family_row.end()) { - column_existed = false; cell_existed = false; } else { auto timestamp_it = column_row_it->second.find( @@ -516,18 +512,10 @@ Status RowTransaction::SetCell( std::chrono::duration_cast( std::chrono::microseconds(set_cell.timestamp_micros()))); - if (!row_existed) { - DeleteRow delete_row = {column_family_row_it, column_family}; - undo_.emplace(delete_row); - } - - if (!column_existed) { - DeleteColumn delete_column_row = {column_row_it, column_family_row}; - undo_.emplace(delete_column_row); - } - if (!cell_existed) { - DeleteValue delete_value = {column_row_it, timestamp_it->first}; + DeleteValue delete_value = {column_family, column_family_row_it->first, + std::move(set_cell.column_qualifier()), + timestamp_it->first}; undo_.emplace(delete_value); } else { RestoreValue restore_value = {column_family, column_family_row_it->first, @@ -556,9 +544,17 @@ void RowTransaction::Undo() { auto* delete_value = absl::get_if(&op); if (delete_value) { - auto& column_row = delete_value->column_row_it->second; - auto timestamp_it = column_row.find(delete_value->timestamp); - column_row.erase(timestamp_it); + ::google::bigtable::v2::TimestampRange range; + auto start_micros = delete_value->timestamp.count() * 1000; + // The following is an exclusive upper bound, 1ms higher Since + // timestamps have millisecond resolution, 2 timestamps have to + // be at least 1ms apart which means that setting this as the + // end of the range guarantees that we delete at most 1 (because + // the upper bound is exclusive). + auto end_micros = start_micros + 1000; + range.set_start_timestamp_micros(start_micros); + range.set_end_timestamp_micros(end_micros); + delete_value->column_family.DeleteColumn(delete_value->row_key, std::move(delete_value->column_qualifier), range); continue; } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index e3eaeea0d39b5..49dca446fb8c9 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -111,9 +111,9 @@ struct RestoreValue { }; struct DeleteValue { - // The iterator to the `columns_` member of a relevant `ColumnFamilyRow` where - // we should delete value. - std::map::iterator column_row_it; + ColumnFamily& column_family; + std::string row_key; + std::string column_qualifier; std::chrono::milliseconds timestamp; }; From c62d625f3b5fd8a7622473930be308b3cbd50532 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 12 Mar 2025 22:12:25 +0300 Subject: [PATCH 076/195] emulator: remove undo types that are no longer needed. --- google/cloud/bigtable/emulator/table.cc | 12 ------------ google/cloud/bigtable/emulator/table.h | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 5841309998b23..43bc773438183 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -558,18 +558,6 @@ void RowTransaction::Undo() { continue; } - auto* delete_row = absl::get_if(&op); - if (delete_row) { - delete_row->column_family.erase(delete_row->row_it); - continue; - } - - auto* delete_column = absl::get_if(&op); - if (delete_column) { - delete_column->column_family_row.erase(delete_column->column_row_it); - continue; - } - auto* restore_row = absl::get_if(&op); if (restore_row) { for (auto const& cell : restore_row->cells) { diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 49dca446fb8c9..56a930117c33f 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -170,7 +170,7 @@ class RowTransaction { bool committed_; std::shared_ptr
table_; - std::stack> + std::stack> undo_; ::google::bigtable::v2::MutateRowRequest const& request_; }; From 6c8e1ccaa2843a48b4a9d8aea8acc642badc9aae Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 13 Mar 2025 13:01:43 +0300 Subject: [PATCH 077/195] emulator: Delete definitions of rollback types no longer used. --- google/cloud/bigtable/emulator/table.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 56a930117c33f..32d30294d0644 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -117,22 +117,6 @@ struct DeleteValue { std::chrono::milliseconds timestamp; }; -struct DeleteRow { - // The iterator to the `rows_` member of a relavant ColumnFamily - // which we should delete the row if the ColumnfamilyRow has been - // introduced by the mutation (i.e. it did not exist previously). - std::map::iterator row_it; - ::google::cloud::bigtable::emulator::ColumnFamily& column_family; -}; - -struct DeleteColumn { - // The iterator to the `columns_` member of the relevant - // ColumnFamilyRow which we should delete if the ColumnRow has been - // introduced in the mutation (i.e. did not exist previously). - std::map::iterator column_row_it; - ::google::cloud::bigtable::emulator::ColumnFamilyRow& column_family_row; -}; - class RowTransaction { public: explicit RowTransaction( From f871e272817139d7694876e01aa43ba4eb121aa2 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 13 Mar 2025 14:26:02 +0300 Subject: [PATCH 078/195] emulator: return unimplemented error status for unsupported aggregation mutations. Replaces FIXMEs which would silently fail when this mutations are attempted. --- google/cloud/bigtable/emulator/table.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 43bc773438183..77d0bba296416 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -234,9 +234,13 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { return status; } } else if (mutation.has_add_to_cell()) { - // FIXME + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); } else if (mutation.has_merge_to_cell()) { - // FIXME + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); } else if (mutation.has_delete_from_column()) { auto const& delete_from_column = mutation.delete_from_column(); auto maybe_column_family = FindColumnFamily(delete_from_column); From 60e3a1ac79e7dbb73f789dc888b2a80b0e976f18 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 13 Mar 2025 14:34:50 +0300 Subject: [PATCH 079/195] emulator: row mutations: use a const reference rather than copying the mutation. --- google/cloud/bigtable/emulator/table.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 77d0bba296416..dd0d36d21b18f 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -226,7 +226,7 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { RowTransaction row_transaction(this->get(), request); - for (auto mutation : request.mutations()) { + for (const auto& mutation : request.mutations()) { if (mutation.has_set_cell()) { auto const& set_cell = mutation.set_cell(); auto status = row_transaction.SetCell(set_cell); From 35aaa89843619eccb8393676e3dc7e6599681adf Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 13 Mar 2025 20:38:17 +0300 Subject: [PATCH 080/195] emulator: Modify the Delete* methods to return vector of deleted cells. And also use std::move() to avoid copying potentially large cell values. These change is needed to simplify the logic of preparing undo logs for the Delete* mutations. In particular, we don't have to re-iterate the table. --- .../cloud/bigtable/emulator/column_family.cc | 21 ++++++++++--------- .../cloud/bigtable/emulator/column_family.h | 12 ++++++++--- .../bigtable/emulator/column_family_test.cc | 12 +++++------ google/cloud/bigtable/emulator/table.cc | 2 +- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 2ad4a93fdd1e0..91e05629f4baf 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -29,9 +29,9 @@ void ColumnRow::SetCell(std::chrono::milliseconds timestamp, cells_[timestamp] = std::move(value); } -std::size_t ColumnRow::DeleteTimeRange( +std::vector ColumnRow::DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range) { - std::size_t num_erased = 0; + std::vector deleted_cells; for (auto cell_it = cells_.lower_bound( std::chrono::duration_cast( std::chrono::microseconds(time_range.start_timestamp_micros()))); @@ -40,10 +40,11 @@ std::size_t ColumnRow::DeleteTimeRange( cell_it->first < std::chrono::duration_cast( std::chrono::microseconds( time_range.end_timestamp_micros())));) { + Cell cell = {std::move(cell_it->first), std::move(cell_it->second)}; + deleted_cells.push_back(cell); cells_.erase(cell_it++); - ++num_erased; } - return num_erased; + return deleted_cells; } void ColumnFamilyRow::SetCell(std::string const& column_qualifier, @@ -52,12 +53,12 @@ void ColumnFamilyRow::SetCell(std::string const& column_qualifier, columns_[column_qualifier].SetCell(timestamp, value); } -std::size_t ColumnFamilyRow::DeleteColumn( +std::vector ColumnFamilyRow::DeleteColumn( std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range) { auto column_it = columns_.find(column_qualifier); if (column_it == columns_.end()) { - return 0; + return std::vector(); } auto res = column_it->second.DeleteTimeRange(time_range); if (!column_it->second.HasCells()) { @@ -77,19 +78,19 @@ bool ColumnFamily::DeleteRow(std::string const& row_key) { return rows_.erase(row_key) > 0; } -std::size_t ColumnFamily::DeleteColumn( +std::vector ColumnFamily::DeleteColumn( std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range) { auto row_it = rows_.find(row_key); if (row_it != rows_.end()) { - auto num_erased_cells = + auto erased_cells = row_it->second.DeleteColumn(column_qualifier, time_range); if (!row_it->second.HasColumns()) { rows_.erase(row_it); } - return num_erased_cells; + return erased_cells; } - return 0; + return std::vector(); } class FilteredColumnFamilyStream::FilterApply { diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index a235bb8d81eec..c1440668425f3 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -15,6 +15,7 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H +#include "google/cloud/bigtable/cell.h" #include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" @@ -30,6 +31,11 @@ namespace cloud { namespace bigtable { namespace emulator { +struct Cell { + std::chrono::milliseconds timestamp; + std::string value; +}; + class ColumnRow { public: ColumnRow() = default; @@ -38,7 +44,7 @@ class ColumnRow { ColumnRow& operator=(ColumnRow const &) = delete; void SetCell(std::chrono::milliseconds timestamp, std::string const& value); - std::size_t DeleteTimeRange( + std::vector DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range); bool HasCells() const { return !cells_.empty(); } @@ -76,7 +82,7 @@ class ColumnFamilyRow { void SetCell(std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value); - std::size_t DeleteColumn( + std::vector DeleteColumn( std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); bool HasColumns() { return !columns_.empty(); } @@ -116,7 +122,7 @@ class ColumnFamily { void SetCell(std::string const& row_key, std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value); bool DeleteRow(std::string const& row_key); - std::size_t DeleteColumn( + std::vector DeleteColumn( std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index f9a71ed5340af..66e2c260029c4 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -127,7 +127,7 @@ col2 @10ms: qux EXPECT_EQ("qux", fam_row.upper_bound("col1")->second.begin()->second); EXPECT_EQ(1, fam_row.DeleteColumn("col1", - ::google::bigtable::v2::TimestampRange{})); + ::google::bigtable::v2::TimestampRange{}).size()); // Verify that there is no empty column. EXPECT_EQ(2, std::distance(fam_row.begin(), fam_row.end())); @@ -135,7 +135,7 @@ col2 @10ms: qux google::bigtable::v2::TimestampRange not_matching_range; not_matching_range.set_start_timestamp_micros(10); not_matching_range.set_end_timestamp_micros(20); - EXPECT_EQ(0, fam_row.DeleteColumn("col2", not_matching_range)); + EXPECT_EQ(0, fam_row.DeleteColumn("col2", not_matching_range).size()); EXPECT_EQ(R"""( col0 @10ms: baz @@ -168,7 +168,7 @@ row2 :col0 @10ms: qux DumpColumnFamilyRow(fam.upper_bound("row1")->second)); EXPECT_EQ(1, fam.DeleteColumn("row1", "col0", - ::google::bigtable::v2::TimestampRange{})); + ::google::bigtable::v2::TimestampRange{}).size()); // Verify that there is no empty row EXPECT_EQ(2, std::distance(fam.begin(), fam.end())); @@ -285,7 +285,7 @@ TEST(FilteredColumnFamilyStream, FilterByColumnRange) { fam.SetCell("row0", "col2", 200_ms, "foo"); fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out - fam.SetCell("row2", "col1", 300_ms, "foo"); + fam.SetCell("row2", "col1", 300_ms, "foo"); auto included_rows = std::make_shared(StringRangeSet::All()); FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); filtered_stream.ApplyFilter( @@ -313,7 +313,7 @@ TEST(FilteredColumnFamilyStream, FilterByColumnRegex) { fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row1", "col2", 300_ms, "foo"); - fam.SetCell("row2", "col0", 300_ms, "foo"); + fam.SetCell("row2", "col0", 300_ms, "foo"); auto included_rows = std::make_shared(StringRangeSet::All()); FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); filtered_stream.ApplyFilter(ColumnRegex{pattern1}); @@ -376,5 +376,3 @@ row3 cf1:col3 @300ms: foo } // namespace bigtable } // namespace cloud } // namespace google - - diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index dd0d36d21b18f..f0d3a138bc7ea 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -249,7 +249,7 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { } if (maybe_column_family->get().DeleteColumn( request.row_key(), delete_from_column.column_qualifier(), - delete_from_column.time_range()) == 0) { + delete_from_column.time_range()).empty()) { // FIXME no such row or column } } else if (mutation.has_delete_from_family()) { From c02a348e40b97a55f8b7b9a14aa0090ea80c2a87 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 14 Mar 2025 15:25:30 +0300 Subject: [PATCH 081/195] emualator: Implement DeleteFromColumn mutation using RowTransaction. --- google/cloud/bigtable/emulator/table.cc | 36 +++++++++++++++++++------ 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index f0d3a138bc7ea..51d66be18e3f5 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -243,14 +243,9 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); } else if (mutation.has_delete_from_column()) { auto const& delete_from_column = mutation.delete_from_column(); - auto maybe_column_family = FindColumnFamily(delete_from_column); - if (!maybe_column_family) { - return maybe_column_family.status(); - } - if (maybe_column_family->get().DeleteColumn( - request.row_key(), delete_from_column.column_qualifier(), - delete_from_column.time_range()).empty()) { - // FIXME no such row or column + auto status = row_transaction.DeleteFromColumn(delete_from_column); + if (!status.ok()) { + return status; } } else if (mutation.has_delete_from_family()) { auto const& delete_from_family = mutation.delete_from_family(); @@ -416,6 +411,31 @@ Status RowTransaction::MergeToCell( GCP_ERROR_INFO().WithMetadata("mutation", merge_to_cell.DebugString())); } +Status RowTransaction::DeleteFromColumn( + ::google::bigtable::v2::Mutation_DeleteFromColumn const& + delete_from_column) { + auto status = table_->FindColumnFamily(delete_from_column); + if (!status.ok()) { + return status.status(); + } + + auto& column_family = status->get(); + + auto deleted_cells = column_family.DeleteColumn( + request_.row_key(), delete_from_column.column_qualifier(), + delete_from_column.time_range()); + + for (auto cell : deleted_cells) { + RestoreValue restore_value = {column_family, request_.row_key(), + delete_from_column.column_qualifier(), + std::move(cell.timestamp), + std::move(cell.value)}; + undo_.emplace(restore_value); + } + + return Status(); +} + Status RowTransaction::DeleteFromFamily( ::google::bigtable::v2::Mutation_DeleteFromFamily const& delete_from_family) { From 8c1cbb0a2d2d499377cccc219d4335aecc80dcdd Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 14 Mar 2025 19:32:39 +0300 Subject: [PATCH 082/195] emulator: Test transaction DeleteFromColumn for basic functionality. --- .../cloud/bigtable/emulator/rollback_test.cc | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 740cf2b392a96..6e6a95dd039e1 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -74,6 +74,26 @@ Status delete_from_families( return table->MutateRow(mutation_request); } +Status delete_from_column( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::string const& column_family, + std::string const& column_qualifer, + ::google::bigtable::v2::TimestampRange* timestamp_range) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* delete_from_column_mutation = + mutation_request_mutation->mutable_delete_from_column(); + delete_from_column_mutation->set_family_name(column_family); + delete_from_column_mutation->set_column_qualifier(column_qualifer); + delete_from_column_mutation->set_allocated_time_range(timestamp_range); + + return table->MutateRow(mutation_request); +} + Status set_cells( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, @@ -541,6 +561,46 @@ TEST(TransactonRollback, DeleteFromFamilyRollback) { ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); } +// Does DeleteFromColumn basically work? +TEST(TransactonRollback, DeleteFromColumnBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifer = "test"; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v = { + {column_family_name, column_qualifer, 1000, data}, + {column_family_name, column_qualifer, 2000, data}, + {column_family_name, column_qualifer, 3000, data}, + }; + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + 1000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + 2000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + 3000, data)); + + auto* range = new(::google::bigtable::v2::TimestampRange); + range->set_start_timestamp_micros(v[0].timestamp_micros); + range->set_end_timestamp_micros(v[2].timestamp_micros + 1000); + + ASSERT_STATUS_OK(delete_from_column( + table, table_name, row_key, column_family_name, column_qualifer, range)); +} + } // namespace emulator } // namespace bigtable } // namespace cloud From 6e183b9a8a9b4e194bab8ad0e08698e5aa14c74f Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 14 Mar 2025 19:37:44 +0300 Subject: [PATCH 083/195] emulator: DeleteFromColumn basic functionality test: test that the column is actually deleted. --- google/cloud/bigtable/emulator/rollback_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 6e6a95dd039e1..78265533b6310 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -599,6 +599,8 @@ TEST(TransactonRollback, DeleteFromColumnBasicFunction) { ASSERT_STATUS_OK(delete_from_column( table, table_name, row_key, column_family_name, column_qualifer, range)); + status = has_column(table, column_family_name, row_key, column_qualifer); + ASSERT_EQ(false, status.ok()); } } // namespace emulator From 1d6b85f8215ed5f81fdf44357a6abd4134225854 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 14 Mar 2025 20:14:44 +0300 Subject: [PATCH 084/195] emulator: make it possible to test a vector of DeleteFromColumn row mutations. This is needed to test DeleteFromColumn rollback. --- .../cloud/bigtable/emulator/rollback_test.cc | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 78265533b6310..8faf2e24d8dad 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -74,26 +74,33 @@ Status delete_from_families( return table->MutateRow(mutation_request); } -Status delete_from_column( +struct DeleteFromColumnParams { + std::string column_family; + std::string column_qualifier; + ::google::bigtable::v2::TimestampRange *timestamp_range; +}; + +Status delete_from_columns( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, - std::string const& column_family, - std::string const& column_qualifer, - ::google::bigtable::v2::TimestampRange* timestamp_range) { + std::vector v) { ::google::bigtable::v2::MutateRowRequest mutation_request; mutation_request.set_table_name(table_name); mutation_request.set_row_key(row_key); + for (auto& param : v) { auto* mutation_request_mutation = mutation_request.add_mutations(); auto* delete_from_column_mutation = mutation_request_mutation->mutable_delete_from_column(); - delete_from_column_mutation->set_family_name(column_family); - delete_from_column_mutation->set_column_qualifier(column_qualifer); - delete_from_column_mutation->set_allocated_time_range(timestamp_range); + delete_from_column_mutation->set_family_name(param.column_family); + delete_from_column_mutation->set_column_qualifier(param.column_qualifier); + delete_from_column_mutation->set_allocated_time_range(param.timestamp_range); + } return table->MutateRow(mutation_request); } + Status set_cells( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, @@ -561,6 +568,14 @@ TEST(TransactonRollback, DeleteFromFamilyRollback) { ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); } +::google::bigtable::v2::TimestampRange* new_timestamp_range(int64_t start, int64_t end) { + auto* range = new(::google::bigtable::v2::TimestampRange); + range->set_start_timestamp_micros(start); + range->set_end_timestamp_micros(end); + + return range; +} + // Does DeleteFromColumn basically work? TEST(TransactonRollback, DeleteFromColumnBasicFunction) { ::google::bigtable::admin::v2::Table schema; @@ -593,12 +608,13 @@ TEST(TransactonRollback, DeleteFromColumnBasicFunction) { ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, 3000, data)); - auto* range = new(::google::bigtable::v2::TimestampRange); - range->set_start_timestamp_micros(v[0].timestamp_micros); - range->set_end_timestamp_micros(v[2].timestamp_micros + 1000); + std::vector dv = { + {column_family_name, column_qualifer, + new_timestamp_range(v[0].timestamp_micros, + v[2].timestamp_micros + 1000)}}; + + ASSERT_STATUS_OK(delete_from_columns(table, table_name, row_key, dv)); - ASSERT_STATUS_OK(delete_from_column( - table, table_name, row_key, column_family_name, column_qualifer, range)); status = has_column(table, column_family_name, row_key, column_qualifer); ASSERT_EQ(false, status.ok()); } From 9dc3cc489620301c712f198e45f3e7df301171e6 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 14 Mar 2025 20:52:09 +0300 Subject: [PATCH 085/195] emulator: Test transaction rollback for DeleteFromColumn. --- .../cloud/bigtable/emulator/rollback_test.cc | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 8faf2e24d8dad..6505ac4600b34 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -619,6 +619,58 @@ TEST(TransactonRollback, DeleteFromColumnBasicFunction) { ASSERT_EQ(false, status.ok()); } +// Does DeleteFromColumn rollback work? +TEST(TransactonRollback, DeleteFromColumnRollback) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifer = "test"; + // Simulate mutation failure and cause rollback by attempting a + // mutation with a non-existent column family name. + auto const* const bad_column_family_name = "this_column_family_does_not_exist"; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v = { + {column_family_name, column_qualifer, 1000, data}, + {column_family_name, column_qualifer, 2000, data}, + {column_family_name, column_qualifer, 3000, data}, + }; + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + 1000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + 2000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + 3000, data)); + + // The first mutation will succeed. The second assumes a schema that + // does not exist - it should fail and cause rollback of the column + // deletion in the first mutation. + std::vector dv = { + {column_family_name, column_qualifer, + new_timestamp_range(v[0].timestamp_micros, + v[2].timestamp_micros + 1000)}, + {bad_column_family_name, column_qualifer, new_timestamp_range(1000, 2000)}, + }; + // The mutation chains should fail and rollback should occur. + ASSERT_EQ(false, delete_from_columns(table, table_name, row_key, dv).ok()); + + // The column should have been restored. + ASSERT_STATUS_OK(has_column(table, column_family_name, row_key, column_qualifer)); +} + + } // namespace emulator } // namespace bigtable } // namespace cloud From 17c733b5cd2c44019eb552175b008abe7492cbcb Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 14 Mar 2025 21:16:17 +0300 Subject: [PATCH 086/195] emulator: TEST(TransactonRollback, DeleteFromColumnRollback): also check that the cells are properly restored. --- google/cloud/bigtable/emulator/rollback_test.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 6505ac4600b34..57221e5d8660a 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -668,6 +668,13 @@ TEST(TransactonRollback, DeleteFromColumnRollback) { // The column should have been restored. ASSERT_STATUS_OK(has_column(table, column_family_name, row_key, column_qualifer)); + // Check that the data is where and what we expect. + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + 1000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + 2000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + 3000, data)); } From 155fc81125ebd1b4ad6a73261ec8048f260f8aed Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 14 Mar 2025 22:40:57 +0300 Subject: [PATCH 087/195] emulator: Prepare to support transaction rollbacks for DeleteRow mutation. In particular, reutrn a map of vectors containing the column qualifers to columns (cells) deleted, for the undo log. --- .../cloud/bigtable/emulator/column_family.cc | 19 +++++++++++++++++-- .../cloud/bigtable/emulator/column_family.h | 2 +- .../bigtable/emulator/column_family_test.cc | 4 ++-- google/cloud/bigtable/emulator/table.cc | 2 +- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 91e05629f4baf..0285df122d785 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -14,6 +14,7 @@ #include "google/cloud/bigtable/emulator/column_family.h" #include +#include namespace google { namespace cloud { @@ -74,8 +75,22 @@ void ColumnFamily::SetCell(std::string const& row_key, rows_[row_key].SetCell(column_qualifier, timestamp, value); } -bool ColumnFamily::DeleteRow(std::string const& row_key) { - return rows_.erase(row_key) > 0; +std::map> ColumnFamily::DeleteRow(std::string const& row_key) { + std::map> res; + + auto& column_family_row = rows_[row_key]; + + for (const auto& column_it : column_family_row) { + // Not setting start and end timestamps selects all cells for deletion. + ::google::bigtable::v2::TimestampRange time_range; + + auto deleted_column = DeleteColumn(row_key, column_it.first, time_range); + if (deleted_column.size() > 0) { + res[std::move(column_it.first)] = std::move(deleted_column); + } + } + + return res; } std::vector ColumnFamily::DeleteColumn( diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index c1440668425f3..796acd34aae6c 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -121,7 +121,7 @@ class ColumnFamily { void SetCell(std::string const& row_key, std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value); - bool DeleteRow(std::string const& row_key); + std::map> DeleteRow(std::string const& row_key); std::vector DeleteColumn( std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index 66e2c260029c4..6e3adc191df2e 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -179,8 +179,8 @@ row2 :col0 @10ms: qux )""", "\n" + DumpColumnFamily(fam)); - EXPECT_TRUE(fam.DeleteRow("row2")); - EXPECT_FALSE(fam.DeleteRow("row_nonexistent")); + EXPECT_TRUE(fam.DeleteRow("row2").size() > 0); + EXPECT_FALSE(fam.DeleteRow("row_nonexistent").size() > 0); EXPECT_EQ("row0 :col0 @10ms: baz\n", DumpColumnFamily(fam)); } diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 51d66be18e3f5..c44f118e338c9 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -256,7 +256,7 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { } else if (mutation.has_delete_from_row()) { bool row_existed = false; for (auto& column_family : column_families_) { - row_existed |= column_family.second->DeleteRow(request.row_key()); + row_existed |= column_family.second->DeleteRow(request.row_key()).size(); } if (!row_existed) { // FIXME no such row existed From 077c6382dccf2021db169dcdb5e6f81022e7c5cd Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Sun, 16 Mar 2025 23:29:12 +0100 Subject: [PATCH 088/195] chore: comments, tidy, fixes --- google/cloud/bigtable/CMakeLists.txt | 2 +- google/cloud/bigtable/emulator/CMakeLists.txt | 16 +- .../emulator/bigtable_emulator_common.bzl | 2 - .../emulator/bigtable_emulator_unit_tests.bzl | 1 - google/cloud/bigtable/emulator/cell_view.h | 4 +- google/cloud/bigtable/emulator/cluster.cc | 25 +- google/cloud/bigtable/emulator/cluster.h | 24 +- .../cloud/bigtable/emulator/column_family.cc | 13 +- .../cloud/bigtable/emulator/column_family.h | 137 +++++++- .../bigtable/emulator/column_family_test.cc | 30 +- google/cloud/bigtable/emulator/emulator.cc | 7 +- google/cloud/bigtable/emulator/filter.cc | 292 +++++++++--------- google/cloud/bigtable/emulator/filter.h | 146 +++++++-- google/cloud/bigtable/emulator/filter_test.cc | 5 +- google/cloud/bigtable/emulator/filtered_map.h | 60 +++- .../bigtable/emulator/filtered_map_test.cc | 10 +- google/cloud/bigtable/emulator/range_set.cc | 77 +++-- google/cloud/bigtable/emulator/range_set.h | 47 ++- .../cloud/bigtable/emulator/range_set_test.cc | 31 +- .../cloud/bigtable/emulator/row_iterators.cc | 28 -- .../cloud/bigtable/emulator/row_iterators.h | 38 --- .../bigtable/emulator/row_iterators_test.cc | 35 --- .../cloud/bigtable/emulator/row_streamer.cc | 7 +- google/cloud/bigtable/emulator/row_streamer.h | 30 +- google/cloud/bigtable/emulator/server.cc | 39 +-- google/cloud/bigtable/emulator/server.h | 3 + google/cloud/bigtable/emulator/server_test.cc | 3 +- google/cloud/bigtable/emulator/table.cc | 114 ++++--- google/cloud/bigtable/emulator/table.h | 12 +- google/cloud/bigtable/emulator/table_test.cc | 14 +- .../cloud/bigtable/emulator/to_grpc_status.h | 1 - .../bigtable/google_cloud_cpp_bigtable.bzl | 2 +- .../bigtable/internal/row_range_helpers.cc | 23 +- .../bigtable/internal/row_range_helpers.h | 8 +- 34 files changed, 735 insertions(+), 551 deletions(-) delete mode 100644 google/cloud/bigtable/emulator/row_iterators.cc delete mode 100644 google/cloud/bigtable/emulator/row_iterators.h delete mode 100644 google/cloud/bigtable/emulator/row_iterators_test.cc diff --git a/google/cloud/bigtable/CMakeLists.txt b/google/cloud/bigtable/CMakeLists.txt index 430ab2284b051..c99cd4d1a0c09 100644 --- a/google/cloud/bigtable/CMakeLists.txt +++ b/google/cloud/bigtable/CMakeLists.txt @@ -205,9 +205,9 @@ add_library( internal/retry_context.cc internal/retry_context.h internal/retry_traits.h - internal/row_reader_impl.h internal/row_range_helpers.cc internal/row_range_helpers.h + internal/row_reader_impl.h internal/rpc_policy_parameters.h internal/rpc_policy_parameters.inc internal/traced_row_reader.cc diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 972005f962410..788f2896817f8 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -27,8 +27,6 @@ add_library( filtered_map.h range_set.cc range_set.h - row_iterators.cc - row_iterators.h row_streamer.cc row_streamer.h server.cc @@ -56,13 +54,8 @@ if (BUILD_TESTING) # List the unit tests, then setup the targets and dependencies. set(bigtable_emulator_unit_tests # cmake-format: sort - column_family_test.cc - filter_test.cc - filtered_map_test.cc - range_set_test.cc - row_iterators_test.cc - server_test.cc - table_test.cc) + column_family_test.cc filter_test.cc filtered_map_test.cc + range_set_test.cc server_test.cc table_test.cc) export_list_to_bazel("bigtable_emulator_unit_tests.bzl" "bigtable_emulator_unit_tests" YEAR "2024") @@ -89,9 +82,8 @@ if (BUILD_TESTING) endforeach () endif () -set(bigtable_emulator_programs - # cmake-format: sort - emulator.cc) +set(bigtable_emulator_programs # cmake-format: sort + emulator.cc) export_list_to_bazel("bigtable_emulator_programs.bzl" "bigtable_emulator_programs" YEAR "2024") diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index ae95dda8c484f..1b733901fde4a 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -23,7 +23,6 @@ bigtable_emulator_common_hdrs = [ "filter.h", "filtered_map.h", "range_set.h", - "row_iterators.h", "row_streamer.h", "server.h", "table.h", @@ -36,7 +35,6 @@ bigtable_emulator_common_srcs = [ "column_family_test.cc", "filter.cc", "range_set.cc", - "row_iterators.cc", "row_streamer.cc", "server.cc", "table.cc", diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index 75b68000449f9..bc82e69a79ded 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -21,7 +21,6 @@ bigtable_emulator_unit_tests = [ "filter_test.cc", "filtered_map_test.cc", "range_set_test.cc", - "row_iterators_test.cc", "server_test.cc", "table_test.cc", ] diff --git a/google/cloud/bigtable/emulator/cell_view.h b/google/cloud/bigtable/emulator/cell_view.h index dc0f8002a2ac7..b0cb9db253b49 100644 --- a/google/cloud/bigtable/emulator/cell_view.h +++ b/google/cloud/bigtable/emulator/cell_view.h @@ -15,10 +15,10 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CELL_VIEW_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CELL_VIEW_H -#include +#include #include #include -#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/cluster.cc b/google/cloud/bigtable/emulator/cluster.cc index c327457a2fb1a..e346b757d8708 100644 --- a/google/cloud/bigtable/emulator/cluster.cc +++ b/google/cloud/bigtable/emulator/cluster.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/cluster.h" +#include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" -#include "google/cloud/internal/make_status.h" #include "absl/strings/match.h" namespace google { @@ -27,8 +27,7 @@ namespace { namespace btadmin = google::bigtable::admin::v2; StatusOr ApplyView(std::string const& table_name, - Table const &table, - btadmin::Table_View view, + Table const& table, btadmin::Table_View view, btadmin::Table_View default_view) { if (view == btadmin::Table::VIEW_UNSPECIFIED) { view = default_view; @@ -80,7 +79,7 @@ StatusOr Cluster::CreateTable(std::string const& table_name, return maybe_table.status(); } { - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); if (!table_by_name_.emplace(table_name, *maybe_table).second) { return google::cloud::internal::AlreadyExistsError( "Table already exists.", @@ -94,7 +93,7 @@ StatusOr> Cluster::ListTables( std::string const& instance_name, btadmin::Table_View view) const { std::map> table_by_name_copy; { - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); table_by_name_copy = table_by_name_; } std::vector res; @@ -119,7 +118,7 @@ StatusOr Cluster::GetTable(std::string const& table_name, btadmin::Table_View view) const { std::shared_ptr
found_table; { - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); auto it = table_by_name_.find(table_name); if (it == table_by_name_.end()) { return NotFoundError("No such table.", GCP_ERROR_INFO().WithMetadata( @@ -132,12 +131,11 @@ StatusOr Cluster::GetTable(std::string const& table_name, Status Cluster::DeleteTable(std::string const& table_name) { { - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); auto it = table_by_name_.find(table_name); if (it == table_by_name_.end()) { - return NotFoundError( - "No such table.", - GCP_ERROR_INFO().WithMetadata("table_name", table_name)); + return NotFoundError("No such table.", GCP_ERROR_INFO().WithMetadata( + "table_name", table_name)); } if (it->second->IsDeleteProtected()) { return FailedPreconditionError( @@ -150,7 +148,7 @@ Status Cluster::DeleteTable(std::string const& table_name) { } bool Cluster::HasTable(std::string const& table_name) const { - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); return table_by_name_.find(table_name) != table_by_name_.end(); } @@ -160,9 +158,8 @@ StatusOr> Cluster::FindTable( std::lock_guard lock(mu_); auto it = table_by_name_.find(table_name); if (it == table_by_name_.end()) { - return NotFoundError( - "No such table.", - GCP_ERROR_INFO().WithMetadata("table_name", table_name)); + return NotFoundError("No such table.", GCP_ERROR_INFO().WithMetadata( + "table_name", table_name)); } return it->second; } diff --git a/google/cloud/bigtable/emulator/cluster.h b/google/cloud/bigtable/emulator/cluster.h index 73bbbc8cab44c..ff7b9f6bcd8eb 100644 --- a/google/cloud/bigtable/emulator/cluster.h +++ b/google/cloud/bigtable/emulator/cluster.h @@ -22,17 +22,17 @@ namespace google { namespace cloud { namespace bigtable { namespace emulator { + /** * An emulated cluster, which manages the lifecycle of all tables. * * This emulated cluster holds tables from all projects and instances - they are - * merely a component of table names. + * merely components of table names. */ class Cluster { public: StatusOr CreateTable( - std::string const& table_name, - google::bigtable::admin::v2::Table schema); + std::string const& table_name, google::bigtable::admin::v2::Table schema); StatusOr> ListTables( std::string const& instance_name, @@ -42,19 +42,23 @@ class Cluster { std::string const& table_name, google::bigtable::admin::v2::Table_View view) const; - Status DeleteTable(std::string const &table_name); + Status DeleteTable(std::string const& table_name); - bool HasTable(std::string const &table_name) const; + bool HasTable(std::string const& table_name) const; StatusOr> FindTable(std::string const& table_name); private: - mutable std::mutex mu_; - // All the tables indexed by their names (i.e. - // projects/{}/instances/{}/tables/{}). We're holding the tables by - // `shared_ptr`s in order to be able to allow for more concurrency - every - // access to a table should start with creating a copy of the shared pointer. + + /** + * All the tables indexed by their names. + * + * The names are in the form `/ projects/{}/instances/{}/tables/{}`. We're + * holding the tables by `shared_ptr`s in order to be able to allow for more + * concurrency - every access to a table should start with creating a copy of + * the shared pointer. + */ std::map> table_by_name_; }; diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 2ad4a93fdd1e0..58da352079a2e 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -26,7 +26,7 @@ void ColumnRow::SetCell(std::chrono::milliseconds timestamp, timestamp = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()); } - cells_[timestamp] = std::move(value); + cells_[timestamp] = value; } std::size_t ColumnRow::DeleteTimeRange( @@ -94,7 +94,7 @@ std::size_t ColumnFamily::DeleteColumn( class FilteredColumnFamilyStream::FilterApply { public: - FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} + explicit FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} bool operator()(ColumnRange const& column_range) { parent_.column_ranges_.Intersect(column_range.range); @@ -124,15 +124,14 @@ class FilteredColumnFamilyStream::FilterApply { FilteredColumnFamilyStream::FilteredColumnFamilyStream( ColumnFamily const& column_family, std::string column_family_name, - std::shared_ptr row_set) + std::shared_ptr row_set) : column_family_name_(std::move(column_family_name)), row_ranges_(std::move(row_set)), column_ranges_(StringRangeSet::All()), timestamp_ranges_(TimestampRangeSet::All()), rows_(RangeFilteredMapView(column_family, *row_ranges_), - std::cref(row_regexes_)), - initialized_(false) {} + std::cref(row_regexes_)) {} bool FilteredColumnFamilyStream::ApplyFilter( InternalFilter const& internal_filter) { @@ -204,8 +203,8 @@ bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() const { for (; (*row_it_) != rows_.end(); ++(*row_it_)) { columns_ = RegexFiteredMapView< RangeFilteredMapView>( - RangeFilteredMapView((*row_it_)->second, - column_ranges_), + RangeFilteredMapView( + (*row_it_)->second, column_ranges_), column_regexes_); column_it_ = columns_.value().begin(); if (PointToFirstCellAfterColumnChange()) { diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 6f1e6021ee287..5ee1220f68e92 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -15,13 +15,13 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H -#include -#include -#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" -#include "google/cloud/bigtable/emulator/cell_view.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include "absl/types/optional.h" +#include +#include #include namespace google { @@ -29,9 +29,28 @@ namespace cloud { namespace bigtable { namespace emulator { +/** + * Objects of this class hold contents of a specific column in a specific row. + * + * This is essentially a blessed map from timestamps to values. + */ class ColumnRow { public: + /** + * Insert or update and existing cell at a given timestamp. + * + * @param timestamp the time stamp at which the value will be inserted or + * updated. If it equals zero then number of milliseconds since epoch will + * be used instead. + * @param value the value to insert/update. + */ void SetCell(std::chrono::milliseconds timestamp, std::string const& value); + /** + * Delete cells falling into a given timestamp range. + * + * @param time_range the timestamp range dictating which values to delete. + * @return number of deleted cells. + */ std::size_t DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range); @@ -51,10 +70,35 @@ class ColumnRow { std::map cells_; }; +/** + * Objects of this class hold contents of a specific row in a column family. + * + * The users of this class may access the columns for a given row via + * references to `ColumnRow`. + * + * It is guaranteed that every returned `ColumnRow` contains at least one cell. + */ class ColumnFamilyRow { public: + /** + * Insert or update and existing cell at a given column and timestamp. + * + * @param column_qualifier the column qualifier at which to update the value. + * @param timestamp the time stamp at which the value will be inserted or + * updated. If it equals zero then number of milliseconds since epoch will + * be used instead. + * @param value the value to insert/update. + */ void SetCell(std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value); + /** + * Delete cells falling into a given timestamp range in one column. + * + * @param column_qualifier the column qualifier from which to delete the + * values. + * @param time_range the timestamp range dictating which values to delete. + * @return number of deleted cells. + */ std::size_t DeleteColumn( std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); @@ -73,13 +117,47 @@ class ColumnFamilyRow { std::map columns_; }; +/** + * Objects of this class hold contents of a column family indexed by rows. + * + * The users of this class may access individual rows via references to + * `ColumnFamilyRow`. + * + * It is guaranteed that every returned `ColumnFamilyRow` contains at least one + * `ColumnRow`. + */ class ColumnFamily { public: using const_iterator = std::map::const_iterator; + /** + * Insert or update and existing cell at a given row, column and timestamp. + * + * @param row_key the row key at which to update the value. + * @param column_qualifier the column qualifier at which to update the value. + * @param timestamp the time stamp at which the value will be inserted or + * updated. If it equals zero then number of milliseconds since epoch will + * be used instead. + * @param value the value to insert/update. + */ void SetCell(std::string const& row_key, std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value); + /** + * Delete the whole row from this column family. + * + * @param row_key the row key to remove. + * @return whether such a row existed. + */ bool DeleteRow(std::string const& row_key); + /** + * Delete cells from a row falling into a given timestamp range in one column. + * + * @param row_key the row key to remove the cells from. + * @param column_qualifier the column qualifier from which to delete the + * values. + * @param time_range the timestamp range dictating which values to delete. + * @return number of deleted cells. + */ std::size_t DeleteColumn( std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); @@ -97,11 +175,37 @@ class ColumnFamily { std::map rows_; }; +/** + * A stream of cells which allows for filtering unwanted ones. + * + * In absence of any filters, objects of this class stream the contents of a + * whole `ColumnFamily` just like true `Bigtable` would. + * + * The users can apply the following filters: + * * row sets - to only stream cells for relevant rows + * * row regexes - ditto + * * column ranges - to only stream cells with given column qualifiers + * * column regexes - ditto + * * timestamp ranges - to only stream cells with timestamps in given ranges + * + * Objects of this class are not thread safe. Their users need to ensure that + * underlying `ColumnFamily` object tree doesn't change. + */ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { public: + /** + * Construct a new object. + * + * @column_family the family to iterate over. It should not change over this + * objects lifetime. + * @column_family_name the name of this column family. It will be used to + * populate the returned `CellView`s. + * @row_set the row set indicating which row keys include in the returned + * values. + */ FilteredColumnFamilyStream(ColumnFamily const& column_family, std::string column_family_name, - std::shared_ptr row_set); + std::shared_ptr row_set); bool ApplyFilter(InternalFilter const& internal_filter) override; bool HasValue() const override; CellView const& Value() const override; @@ -112,9 +216,21 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { class FilterApply; void InitializeIfNeeded() const; - // Returns whether we've managed to find another cell in currently pointed row + /** + * Adjust the internal iterators after `column_it_` advanced. + * + * We need to make sure that either we reach the end of the column family or: + * * `column_it_` doesn't point to `end()` + * * `cell_it` points to a cell in the column family pointed to by + * `column_it_` + */ bool PointToFirstCellAfterColumnChange() const; - // Returns whether we've managed to find another cell + /** + * Adjust the internal iterators after `row_it_` advanced. + * + * Similarly to `PointToFirstCellAfterColumnChange()` it ensures that all + * internal iterators are valid (or we've reached `end()`). + */ bool PointToFirstCellAfterRowChange() const; std::string column_family_name_; @@ -133,8 +249,9 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { cells_; // If row_it_ == rows_.end() we've reached the end. - // We keep the invariant that if (row_it_ != rows_.end()) then - // cell_it_ != cells.end() && column_it_ != columns_.end() + // We maintain the following invariant: + // if (row_it_ != rows_.end()) then + // cell_it_ != cells.end() && column_it_ != columns_.end(). mutable absl::optional>::const_iterator> row_it_; @@ -145,7 +262,7 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { RangeFilteredMapView::const_iterator> cell_it_; mutable absl::optional cur_value_; - mutable bool initialized_; + mutable bool initialized_{false}; }; } // namespace emulator diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index f9a71ed5340af..c396a0a3e570e 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "google/cloud/bigtable/emulator/row_iterators.h" +#include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/row_range.h" -#include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/chrono_literals.h" +#include "google/cloud/testing_util/is_proto_equal.h" #include #include @@ -229,7 +229,8 @@ row1 cf1:col1 @30ms: foo row2 cf1:col0 @10ms: qux row2 cf1:col2 @40ms: qux row2 cf1:col2 @50ms: qux -)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } TEST(FilteredColumnFamilyStream, FilterByTimestampRange) { @@ -248,7 +249,7 @@ TEST(FilteredColumnFamilyStream, FilterByTimestampRange) { fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out - fam.SetCell("row1", "col0", 20_ms, "bar"); // Filter out + fam.SetCell("row1", "col0", 20_ms, "bar"); // Filter out fam.SetCell("row1", "col0", 10_ms, "baz"); fam.SetCell("row1", "col1", 200_ms, "foo"); // Filter out fam.SetCell("row1", "col1", 250_ms, "foo"); // Filter out @@ -273,7 +274,8 @@ row0 cf1:col2 @140ms: foo row1 cf1:col2 @100ms: foo row1 cf1:col2 @120ms: foo row1 cf1:col2 @140ms: foo -)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } TEST(FilteredColumnFamilyStream, FilterByColumnRange) { @@ -285,7 +287,7 @@ TEST(FilteredColumnFamilyStream, FilterByColumnRange) { fam.SetCell("row0", "col2", 200_ms, "foo"); fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out - fam.SetCell("row2", "col1", 300_ms, "foo"); + fam.SetCell("row2", "col1", 300_ms, "foo"); auto included_rows = std::make_shared(StringRangeSet::All()); FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); filtered_stream.ApplyFilter( @@ -296,7 +298,8 @@ TEST(FilteredColumnFamilyStream, FilterByColumnRange) { row0 cf1:col1 @100ms: foo row0 cf1:col2 @200ms: foo row2 cf1:col1 @300ms: foo -)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } TEST(FilteredColumnFamilyStream, FilterByColumnRegex) { @@ -313,7 +316,7 @@ TEST(FilteredColumnFamilyStream, FilterByColumnRegex) { fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out fam.SetCell("row1", "col2", 300_ms, "foo"); - fam.SetCell("row2", "col0", 300_ms, "foo"); + fam.SetCell("row2", "col0", 300_ms, "foo"); auto included_rows = std::make_shared(StringRangeSet::All()); FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); filtered_stream.ApplyFilter(ColumnRegex{pattern1}); @@ -323,7 +326,8 @@ row0 cf1:col0 @10ms: foo row0 cf1:col2 @200ms: foo row1 cf1:col2 @300ms: foo row2 cf1:col0 @300ms: foo -)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } TEST(FilteredColumnFamilyStream, FilterRowKeyRegex) { @@ -345,7 +349,8 @@ TEST(FilteredColumnFamilyStream, FilterRowKeyRegex) { EXPECT_EQ(R"""( row0 cf1:col0 @10ms: foo row2 cf1:col2 @200ms: foo -)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } TEST(FilteredColumnFamilyStream, FilterRowSet) { @@ -366,7 +371,8 @@ TEST(FilteredColumnFamilyStream, FilterRowSet) { row0 cf1:col0 @10ms: foo row1 cf1:col1 @100ms: foo row3 cf1:col3 @300ms: foo -)""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } // Add Next Column, Next Row tests @@ -376,5 +382,3 @@ row3 cf1:col3 @300ms: foo } // namespace bigtable } // namespace cloud } // namespace google - - diff --git a/google/cloud/bigtable/emulator/emulator.cc b/google/cloud/bigtable/emulator/emulator.cc index ff6cb1d0a4060..e1dc144a93ed5 100644 --- a/google/cloud/bigtable/emulator/emulator.cc +++ b/google/cloud/bigtable/emulator/emulator.cc @@ -18,16 +18,15 @@ namespace google { namespace cloud { namespace bigtable { -namespace emulator { -} // namespace emulator +namespace emulator {} // namespace emulator } // namespace bigtable } // namespace cloud } // namespace google -int main() { +int main() { using namespace google::cloud::bigtable::emulator; auto server = CreateDefaultEmulatorServer("[::]", 8888); std::cout << "Server running on port " << server->bound_port() << "\n"; server->Wait(); - return 0; + return 0; } diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index b6863170dc278..a4de0e2fac43d 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -14,13 +14,13 @@ #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/range_set.h" -#include "google/cloud/status_or.h" +#include "google/cloud/bigtable/internal/google_bytes_traits.h" #include "google/cloud/internal/invoke_result.h" #include "google/cloud/internal/make_status.h" -#include "google/cloud/bigtable/internal/google_bytes_traits.h" +#include "google/cloud/status_or.h" #include -#include #include +#include namespace google { namespace cloud { @@ -32,64 +32,73 @@ bool PassAllFilters(InternalFilter const&) { return true; } } // namespace -FilterContext& FilterContext::DisallowApplyLabel() { - allow_apply_label_ = false; - return *this; -} - void CellStream::Next(NextMode mode) { if (impl_->Next(mode)) { return; } if (mode == NextMode::kColumn) { - std::string cur_row_key = impl_->Value().row_key(); - std::string cur_column_family = impl_->Value().column_family(); - std::string cur_column_qualifier = impl_->Value().column_qualifier(); - for (impl_->Next(); - impl_->HasValue() && cur_row_key == impl_->Value().row_key() && - cur_column_family == impl_->Value().column_family() && - cur_column_qualifier == impl_->Value().column_qualifier(); - impl_->Next()); + EmulateNextColumn(); return; } assert(mode == NextMode::kRow); + EmulateNextRow(); +} + +void CellStream::NextColumn() { + if (!impl_->Next(NextMode::kColumn)) { + EmulateNextColumn(); + } +} + +void CellStream::EmulateNextColumn() { + std::string cur_row_key = impl_->Value().row_key(); + std::string cur_column_family = impl_->Value().column_family(); + std::string cur_column_qualifier = impl_->Value().column_qualifier(); + for (impl_->Next(NextMode::kCell); + impl_->HasValue() && cur_row_key == impl_->Value().row_key() && + cur_column_family == impl_->Value().column_family() && + cur_column_qualifier == impl_->Value().column_qualifier(); + impl_->Next(NextMode::kCell)); +} + +void CellStream::EmulateNextRow() { std::string cur_row_key = impl_->Value().row_key(); - for (Next(NextMode::kColumn); + for (NextColumn(); impl_->HasValue() && cur_row_key == impl_->Value().row_key(); - impl_->Next(NextMode::kColumn)); + NextColumn()); } template class PerRowStateFilter { - static_assert( - google::cloud::internal::is_invocable::value, - "StateResetFunctor must be invocable with no arguments"); + static_assert(google::cloud::internal::is_invocable::value, + "StateResetFunctor must be invocable with no arguments"); using State = std::decay_t>; - static_assert(std::is_default_constructible_v, + static_assert(std::is_default_constructible::value, "State must be default constructible"); - static_assert(std::is_assignable_v, + static_assert(std::is_assignable::value, "State must assignable"); - static_assert(std::is_same_v, - absl::optional>, + static_assert(std::is_same, + absl::optional>::value, "Invalid result of `FilterFunctor` invocation."); public: PerRowStateFilter(FilterFunctor filter, StateResetFunctor reset) : filter_(std::move(filter)), reset_(std::move(reset)) {} - absl::optional operator()(CellView const &cell_view) { + absl::optional operator()(CellView const& cell_view) { if (!prev_row_ || prev_row_.value() != cell_view.row_key()) { state_ = reset_(); prev_row_ = cell_view.row_key(); } return filter_(state_, cell_view); } + private: absl::optional prev_row_; State state_; - FilterFunctor filter_; + FilterFunctor filter_; StateResetFunctor reset_; }; @@ -100,35 +109,36 @@ class PerColumnStateFilter { "StateResetFunctor must be invocable with no arguments"); using State = std::decay_t>; - static_assert(std::is_default_constructible_v, + static_assert(std::is_default_constructible::value, "State must be default constructible"); - static_assert(std::is_assignable_v, + static_assert(std::is_assignable::value, "State must assignable"); - static_assert(std::is_same_v, - absl::optional>, + static_assert(std::is_same, + absl::optional>::value, "Invali result of `FilterFunctor` invocation."); public: PerColumnStateFilter(FilterFunctor filter, StateResetFunctor reset) : filter_(std::move(filter)), reset_(std::move(reset)) {} - absl::optional operator()(CellView const &cell_view) { - if (!prev_|| !prev_->Matches(cell_view)) { + absl::optional operator()(CellView const& cell_view) { + if (!prev_ || !prev_->Matches(cell_view)) { state_ = reset_(); prev_ = Prev(cell_view); } return filter_(state_, cell_view); } + private: class Prev { public: - Prev(CellView const& cell_view) + explicit Prev(CellView const& cell_view) : row_key_(cell_view.row_key()), column_family_(cell_view.column_family()), column_qualifier_(cell_view.column_qualifier()) {} - bool Matches(CellView const &cell_view) { + bool Matches(CellView const& cell_view) { return row_key_ == cell_view.row_key() && column_family_ == cell_view.column_family() && column_qualifier_ == cell_view.column_qualifier(); @@ -141,7 +151,7 @@ class PerColumnStateFilter { }; absl::optional prev_; State state_; - FilterFunctor filter_; + FilterFunctor filter_; StateResetFunctor reset_; }; @@ -151,9 +161,7 @@ class TrivialTransformer : public AbstractCellStreamImpl { TrivialTransformer(CellStream source, Transformer transformer) : source_(std::move(source)), transformer_(std::move(transformer)) {} - bool ApplyFilter(InternalFilter const& ) override { - return false; - } + bool ApplyFilter(InternalFilter const&) override { return false; } bool HasValue() const override { return source_.HasValue(); } @@ -164,8 +172,8 @@ class TrivialTransformer : public AbstractCellStreamImpl { return transformed_.value(); } - bool Next(NextMode mode) override { - source_.Next(mode); + bool Next(NextMode mode) override { + source_.Next(mode); transformed_.reset(); return true; } @@ -185,16 +193,15 @@ CellStream MakeTrivialTransformer(CellStream source, Transformer transformer) { template class TrivialFilter : public AbstractCellStreamImpl { static_assert( - std::is_same_v< + std::is_same< google::cloud::internal::invoke_result_t, - absl::optional>, + absl::optional>::value, "Invalid filter return type"); public: TrivialFilter(CellStream source, Filter filter, std::function filter_filter) - : initialized_(false), - source_(std::move(source)), + : source_(std::move(source)), filter_(std::move(filter)), filter_filter_(std::move(filter_filter)) {} @@ -239,7 +246,7 @@ class TrivialFilter : public AbstractCellStreamImpl { } } - mutable bool initialized_; + mutable bool initialized_{false}; mutable CellStream source_; mutable Filter filter_; std::function filter_filter_; @@ -295,8 +302,7 @@ bool MergeCellStreams::CellStreamGreater::operator()( return (*lhs)->timestamp() > (*rhs)->timestamp(); } -MergeCellStreams::MergeCellStreams(std::vector streams) - : initialized_(false) { +MergeCellStreams::MergeCellStreams(std::vector streams) { for (auto& stream : streams) { unfinished_streams_.emplace_back( std::make_unique(std::move(stream))); @@ -334,7 +340,7 @@ bool MergeCellStreams::Next(NextMode mode) { std::pop_heap(unfinished_streams_.begin(), unfinished_streams_.end(), CellStreamGreater()); auto& stream_to_advance = unfinished_streams_.back(); - stream_to_advance->Next(); + stream_to_advance->Next(NextMode::kCell); if (stream_to_advance->HasValue()) { std::push_heap(unfinished_streams_.begin(), unfinished_streams_.end(), CellStreamGreater()); @@ -353,7 +359,7 @@ void MergeCellStreams::InitializeIfNeeded() const { void MergeCellStreams::ReassesStreams() const { for (auto stream_it = unfinished_streams_.begin(); - stream_it != unfinished_streams_.end(); ) { + stream_it != unfinished_streams_.end();) { if (!(*stream_it)->HasValue()) { stream_it->swap(unfinished_streams_.back()); unfinished_streams_.pop_back(); @@ -394,25 +400,21 @@ class ConditionStream : public AbstractCellStreamImpl { : source_(std::move(source)), predicate_stream_(std::move(predicate)), true_stream_(std::move(true_stream)), - false_stream_(std::move(false_stream)), - initialized_(false) {} + false_stream_(std::move(false_stream)) {} - bool ApplyFilter(InternalFilter const& ) override { - return false; - } + bool ApplyFilter(InternalFilter const&) override { return false; } bool HasValue() const override { InitializeIfNeeded(); return source_.HasValue(); } - CellView const &Value() const override { + CellView const& Value() const override { InitializeIfNeeded(); if (condition_true_) { return *true_stream_; - } else { - return *false_stream_; } + return *false_stream_; } bool Next(NextMode mode) override { @@ -453,7 +455,7 @@ class ConditionStream : public AbstractCellStreamImpl { if (predicate_stream_ && internal::CompareRowKey(predicate_stream_->row_key(), cell_view.row_key()) == 0) { - // Predicate stream did return somthing for this row. + // Predicate stream did return something for this row. condition_true_ = true; // Fast-forward the true stream to start at current row. for (; true_stream_ && internal::CompareRowKey(true_stream_->row_key(), @@ -495,7 +497,7 @@ class ConditionStream : public AbstractCellStreamImpl { mutable CellStream predicate_stream_; mutable CellStream true_stream_; mutable CellStream false_stream_; - mutable bool initialized_; + mutable bool initialized_{false}; mutable bool condition_true_; mutable std::string current_row_; }; @@ -513,9 +515,10 @@ class EmptyCellStreamImpl : public AbstractCellStreamImpl { bool Next(NextMode) override { return true; } }; +// NOLINTBEGIN(misc-no-recursion,readability-function-cognitive-complexity) StatusOr CreateFilterImpl( ::google::bigtable::v2::RowFilter const& filter, - CellStreamConstructor source_ctor, FilterContext const& ctx, + CellStreamConstructor source_ctor, std::vector& direct_sinks) { if (filter.has_pass_all_filter()) { if (!filter.pass_all_filter()) { @@ -524,7 +527,8 @@ StatusOr CreateFilterImpl( GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } return source_ctor; - } else if (filter.has_block_all_filter()) { + } + if (filter.has_block_all_filter()) { if (!filter.block_all_filter()) { return InvalidArgumentError( "`block_all_filter` explicitly set to `false`.", @@ -534,7 +538,8 @@ StatusOr CreateFilterImpl( return CellStream(std::make_unique()); }; return res; - } else if (filter.has_row_key_regex_filter()) { + } + if (filter.has_row_key_regex_filter()) { auto pattern = std::make_shared(filter.row_key_regex_filter()); if (!pattern->ok()) { return InvalidArgumentError( @@ -544,7 +549,7 @@ StatusOr CreateFilterImpl( .WithMetadata("description", pattern->error())); } CellStreamConstructor res = [source_ctor = std::move(source_ctor), - pattern = std::move(pattern)] { + pattern = std::move(pattern)] { auto source = source_ctor(); if (source.ApplyFilter(RowKeyRegex{pattern})) { return source; @@ -560,7 +565,8 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_value_regex_filter()) { + } + if (filter.has_value_regex_filter()) { auto pattern = std::make_shared(filter.value_regex_filter()); if (!pattern->ok()) { return InvalidArgumentError( @@ -570,7 +576,7 @@ StatusOr CreateFilterImpl( .WithMetadata("description", pattern->error())); } CellStreamConstructor res = [source_ctor = std::move(source_ctor), - pattern = std::move(pattern)] { + pattern = std::move(pattern)] { auto source = source_ctor(); return MakeTrivialFilter( std::move(source), @@ -583,10 +589,11 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_row_sample_filter()) { + } + if (filter.has_row_sample_filter()) { double pass_prob = filter.row_sample_filter(); - if (pass_prob + std::numeric_limits::epsilon() < 0 - || pass_prob - std::numeric_limits::epsilon() > 1) { + if (pass_prob + std::numeric_limits::epsilon() < 0 || + pass_prob - std::numeric_limits::epsilon() > 1) { return InvalidArgumentError( "`row_sample_filter` is not a valid probability.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); @@ -608,7 +615,8 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_family_name_regex_filter()) { + } + if (filter.has_family_name_regex_filter()) { auto pattern = std::make_shared(filter.family_name_regex_filter()); if (!pattern->ok()) { @@ -619,7 +627,7 @@ StatusOr CreateFilterImpl( .WithMetadata("description", pattern->error())); } CellStreamConstructor res = [source_ctor = std::move(source_ctor), - pattern = std::move(pattern)] { + pattern = std::move(pattern)] { auto source = source_ctor(); if (source.ApplyFilter(FamilyNameRegex{pattern})) { return source; @@ -636,7 +644,8 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_column_qualifier_regex_filter()) { + } + if (filter.has_column_qualifier_regex_filter()) { auto pattern = std::make_shared(filter.column_qualifier_regex_filter()); if (!pattern->ok()) { @@ -664,36 +673,37 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_column_range_filter()) { + } + if (filter.has_column_range_filter()) { auto maybe_range = StringRangeSet::Range::FromColumnRange(filter.column_range_filter()); if (!maybe_range) { return maybe_range.status(); } std::string family_name = filter.column_range_filter().family_name(); - CellStreamConstructor res = - [source_ctor = std::move(source_ctor), - family_name = std::move(family_name), - range = *std::move(maybe_range)] { - auto source = source_ctor(); - if (source.ApplyFilter(ColumnRange{range})) { - return source; - } - return MakeTrivialFilter( - std::move(source), - [range, family_name]( - CellView const& cell_view) -> absl::optional { - if (cell_view.column_family() == family_name && - range.IsWithin(cell_view.column_qualifier())) { - return {}; - } - // FIXME - we might know that we should skip the whole column - // family - return NextMode::kColumn; - }); - }; + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + family_name = std::move(family_name), + range = *std::move(maybe_range)] { + auto source = source_ctor(); + if (source.ApplyFilter(ColumnRange{range})) { + return source; + } + return MakeTrivialFilter( + std::move(source), + [range, + family_name](CellView const& cell_view) -> absl::optional { + if (cell_view.column_family() == family_name && + range.IsWithin(cell_view.column_qualifier())) { + return {}; + } + // FIXME - we might know that we should skip the whole column + // family + return NextMode::kColumn; + }); + }; return res; - } else if (filter.has_value_range_filter()) { + } + if (filter.has_value_range_filter()) { auto maybe_range = StringRangeSet::Range::FromValueRange(filter.value_range_filter()); if (!maybe_range) { @@ -704,8 +714,7 @@ StatusOr CreateFilterImpl( auto source = source_ctor(); return MakeTrivialFilter( std::move(source), - [range]( - CellView const& cell_view) -> absl::optional { + [range](CellView const& cell_view) -> absl::optional { if (range.IsWithin(cell_view.value())) { return {}; } @@ -713,7 +722,8 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_cells_per_row_offset_filter()) { + } + if (filter.has_cells_per_row_offset_filter()) { std::int64_t cells_per_row_offset = filter.cells_per_row_offset_filter(); if (cells_per_row_offset < 0) { return InvalidArgumentError( @@ -738,7 +748,8 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_cells_per_row_limit_filter()) { + } + if (filter.has_cells_per_row_limit_filter()) { std::int64_t cells_per_row_limit = filter.cells_per_row_limit_filter(); if (cells_per_row_limit < 0) { return InvalidArgumentError( @@ -763,8 +774,10 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_cells_per_column_limit_filter()) { - std::int64_t cells_per_column_limit = filter.cells_per_column_limit_filter(); + } + if (filter.has_cells_per_column_limit_filter()) { + std::int64_t cells_per_column_limit = + filter.cells_per_column_limit_filter(); if (cells_per_column_limit < 0) { return InvalidArgumentError( "`cells_per_column_limit_filter` is negative.", @@ -789,7 +802,8 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_timestamp_range_filter()) { + } + if (filter.has_timestamp_range_filter()) { auto maybe_range = TimestampRangeSet::Range::FromTimestampRange( filter.timestamp_range_filter()); if (!maybe_range) { @@ -803,8 +817,7 @@ StatusOr CreateFilterImpl( } return MakeTrivialFilter( std::move(source), - [range]( - CellView const& cell_view) -> absl::optional { + [range](CellView const& cell_view) -> absl::optional { if (range.IsBelowStart(cell_view.timestamp())) { return NextMode::kCell; } @@ -815,12 +828,8 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_apply_label_transformer()) { - if (!ctx.IsApplyLabelAllowed()) { - return InvalidArgumentError( - "Two `apply_label_transformer`s cannot coexist in one chain.", - GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); - } + } + if (filter.has_apply_label_transformer()) { std::string label = filter.apply_label_transformer(); CellStreamConstructor res = [source_ctor = std::move(source_ctor), label = std::move(label)] { @@ -832,7 +841,8 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_strip_value_transformer()) { + } + if (filter.has_strip_value_transformer()) { if (!filter.strip_value_transformer()) { return InvalidArgumentError( "`strip_value_transformer` explicitly set to `false`.", @@ -846,10 +856,10 @@ StatusOr CreateFilterImpl( }); }; return res; - } else if (filter.has_chain()) { + } + if (filter.has_chain()) { CellStreamConstructor res = std::move(source_ctor); - // FIXME handle the contexts properly - for (auto const &subfilter : filter.chain().filters()) { + for (auto const& subfilter : filter.chain().filters()) { if (subfilter.has_sink()) { if (!subfilter.sink()) { return InvalidArgumentError( @@ -861,19 +871,19 @@ StatusOr CreateFilterImpl( return CellStream(std::make_unique()); }; return res; - } auto maybe_res = - CreateFilterImpl(subfilter, std::move(res), ctx, direct_sinks); + CreateFilterImpl(subfilter, std::move(res), direct_sinks); if (!maybe_res) { return maybe_res.status(); } res = *std::move(maybe_res); } return res; - } else if (filter.has_interleave()) { + } + if (filter.has_interleave()) { std::vector parallel_stream_ctors; - for (auto const & subfilter : filter.interleave().filters()) { + for (auto const& subfilter : filter.interleave().filters()) { if (subfilter.has_sink()) { if (!subfilter.sink()) { return InvalidArgumentError( @@ -884,7 +894,7 @@ StatusOr CreateFilterImpl( continue; } auto maybe_filter = - CreateFilterImpl(subfilter, source_ctor, ctx, direct_sinks); + CreateFilterImpl(subfilter, source_ctor, direct_sinks); if (!maybe_filter) { return maybe_filter.status(); } @@ -908,22 +918,23 @@ StatusOr CreateFilterImpl( std::make_unique(std::move(parallel_streams))); }; return res; - } else if (filter.has_condition()) { - if (!filter.condition().has_predicate_filter()){ + } + if (filter.has_condition()) { + if (!filter.condition().has_predicate_filter()) { return InvalidArgumentError( "`condition` must have a `predicate_filter` set.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - + auto maybe_predicate_stream_ctor = CreateFilterImpl( - filter.condition().predicate_filter(), source_ctor, ctx, direct_sinks); + filter.condition().predicate_filter(), source_ctor, direct_sinks); if (!maybe_predicate_stream_ctor) { return maybe_predicate_stream_ctor.status(); } auto maybe_true_stream_ctor = filter.condition().has_true_filter() ? CreateFilterImpl(filter.condition().true_filter(), source_ctor, - ctx, direct_sinks) + direct_sinks) : StatusOr([] { return CellStream(std::make_unique()); }); @@ -933,7 +944,7 @@ StatusOr CreateFilterImpl( auto maybe_false_stream_ctor = filter.condition().has_false_filter() ? CreateFilterImpl(filter.condition().false_filter(), source_ctor, - ctx, direct_sinks) + direct_sinks) : StatusOr([] { return CellStream(std::make_unique()); }); @@ -941,29 +952,26 @@ StatusOr CreateFilterImpl( return maybe_false_stream_ctor.status(); } - CellStreamConstructor res = [source_ctor = std::move(source_ctor), - predicate_stream_ctor = *std::move(maybe_predicate_stream_ctor), - true_stream_ctor = *std::move(maybe_true_stream_ctor), - false_stream_ctor = *std::move(maybe_false_stream_ctor)] { - return CellStream(std::make_unique( - source_ctor(), predicate_stream_ctor(), true_stream_ctor(), - false_stream_ctor())); - }; + CellStreamConstructor res = + [source_ctor = std::move(source_ctor), + predicate_stream_ctor = *std::move(maybe_predicate_stream_ctor), + true_stream_ctor = *std::move(maybe_true_stream_ctor), + false_stream_ctor = *std::move(maybe_false_stream_ctor)] { + return CellStream(std::make_unique( + source_ctor(), predicate_stream_ctor(), true_stream_ctor(), + false_stream_ctor())); + }; return res; } return UnimplementedError( "Unsupported filter.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } - -CellStream JoinCellStreams(std::vector cell_streams) { - return CellStream( - std::make_unique(std::move(cell_streams))); -} +// NOLINTEND(misc-no-recursion,readability-function-cognitive-complexity) StatusOr CreateFilter( ::google::bigtable::v2::RowFilter const& filter, - CellStreamConstructor source_ctor, FilterContext const& ctx) { + CellStreamConstructor source_ctor) { std::vector direct_sink_ctors; if (filter.has_sink()) { if (!filter.sink()) { @@ -974,7 +982,7 @@ StatusOr CreateFilter( return source_ctor(); } auto maybe_filter_ctor = - CreateFilterImpl(filter, std::move(source_ctor), ctx, direct_sink_ctors); + CreateFilterImpl(filter, std::move(source_ctor), direct_sink_ctors); if (!maybe_filter_ctor) { return maybe_filter_ctor.status(); } diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index 395d299348845..73a3a08570fc1 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -28,19 +28,23 @@ namespace cloud { namespace bigtable { namespace emulator { - +/// Only return cells from rows whose keys match `regex`. struct RowKeyRegex { std::shared_ptr regex; }; +/// Only return cells from column families whose names match `regex`. struct FamilyNameRegex { std::shared_ptr regex; }; +/// Only return cells from columns whose qualifiers match `regex`. struct ColumnRegex { std::shared_ptr regex; }; +/// Only return cells from columns which fall into `range`. struct ColumnRange { StringRangeSet::Range range; }; +/// Only return cells from timestamps which fall into `range`. struct TimestampRange { TimestampRangeSet::Range range; }; @@ -48,54 +52,141 @@ struct TimestampRange { using InternalFilter = absl::variant; enum class NextMode { + // Advance a stream to the next available cell. kCell = 0, + // Advance a stream to the first cell which is in a different column. kColumn, + // Advance a stream to the first cell which is in a different row. kRow, }; +/** + * An interface for `CellView` stream implementations. + * + * Objects of classes implementing this abstract class represent a stream of + * `CellView`. They should all guarantee that returned `CellViews` are sorted by + * (row_key, column_family, column_qualifier, timestamp). + * + * Depending on the implementation, objects of this class may support filtering + * of the returned `CellView`. The users may request filtering via `Apply()`. It + * should be used only before first access to the actual stream (i.e. functions + * `HasValue()`, `Value()` and `Next()`). + * + * Objects of derived classes should be assumed to be not thread safe. + */ class AbstractCellStreamImpl { public: virtual ~AbstractCellStreamImpl() = default; + /** + * Attempt to apply a filter on the stream. + * + * It should not be called after `HasValue()`, `Value()` or `Next()` have been + * called. + * + * Depending on the implementation the application may succeed or not. If it + * doesn't, the stream is unchanged. + * + * @param internal_filter a filter to apply on the stream. + * @return whether the filter application succeeded. If it didn't the filter + * is unchanged. + */ virtual bool ApplyFilter(InternalFilter const& internal_filter) = 0; + /// Whether the stream is pointing to a cell or has it finished. virtual bool HasValue() const = 0; - virtual CellView const &Value() const = 0; - virtual bool Next(NextMode mode = NextMode::kCell) = 0; + /** + * The first "unconsumed" value. + * + * \pre{One should not call this member function if `HasValue() == false`.} + * + * @return currently pointed cell + */ + virtual CellView const& Value() const = 0; + /** + * Advance the stream to next `CellView`. + * + * \pre{One should not call this member function if `HasValue() == false`.} + * + * Specific implementations have to support `mode == NextMode::kCell` but may + * not support others. If the requested `mode` is not supported, `false` is + * returned. + * + * @param mode how far to advance - it may be any next cell, or the first cell + * which is in a different column or the first cell which is in a + * different row. + * @return whether `mode` is supported; the returned value is unrelated to + * what `HasValue()` will return. + */ + virtual bool Next(NextMode mode) = 0; }; +/** + * A convenience wrapper around `AbstractCellStreamImpl`. + * + * The purpose of this class is to provide what `AbstractCellStreamImpl` + * implementations do but with a more convenient interface. + */ class CellStream { public: - CellStream(std::unique_ptr impl) + explicit CellStream(std::unique_ptr impl) : impl_(std::move(impl)) {} + /** + * Attempt to apply a filter on the stream. + * + * It should not be called after `HasValue()`, `Value()` or `Next()` have been + * called. + * + * Depending on the implementation the application may succeed or not. If it + * doesn't, the stream is unchanged. + * + * @param internal_filter a filter to apply on the stream. + * @return whether the filter application succeeded. If it didn't the filter + * is unchanged. + */ bool ApplyFilter(InternalFilter const& internal_filter) { return impl_->ApplyFilter(internal_filter); } + /// Whether the stream is pointing to a cell or has it finished. bool HasValue() const { return impl_->HasValue(); } - CellView const & Value() const { return impl_->Value(); } + /** + * The first "unconsumed" value. + * + * \pre{One should not call this member function if `HasValue() == false`.} + * + * @return currently pointed cell + */ + CellView const& Value() const { return impl_->Value(); } + /** + * Advance the stream to next `CellView`. + * + * \pre{One should not call this member function if `HasValue() == false`.} + * + * @param mode how far to advance - it may be any next cell, or the first cell + * which is in a different column or the first cell which is in a + * different row. + */ void Next(NextMode mode = NextMode::kCell); + /// equivalent to `Next(NextMode::kCell)` void operator++() { Next(); } + /// equivalent to `Next(NextMode::kCell)` CellView operator++(int); CellView const& operator*() const { return Value(); } CellView const* operator->() const { return &Value(); } + /// equivalent to `HasValue()` explicit operator bool() const { return HasValue(); } - AbstractCellStreamImpl const &impl() const { return *impl_; } + AbstractCellStreamImpl const& impl() const { return *impl_; } private: + void NextColumn(); + void EmulateNextColumn(); + void EmulateNextRow(); std::unique_ptr impl_; }; -class FilterContext { - public: - FilterContext() : allow_apply_label_(true) {} - - FilterContext& DisallowApplyLabel(); - - bool IsApplyLabelAllowed() const { return allow_apply_label_; } - private: - bool allow_apply_label_; -}; - +/** + * A stream which merges multiple stream while maintaining ordering. + */ class MergeCellStreams : public AbstractCellStreamImpl { public: class CellStreamGreater { @@ -104,7 +195,7 @@ class MergeCellStreams : public AbstractCellStreamImpl { std::unique_ptr const& rhs) const; }; - MergeCellStreams(std::vector streams); + explicit MergeCellStreams(std::vector streams); bool ApplyFilter(InternalFilter const& internal_filter) override; bool HasValue() const override; CellView const& Value() const override; @@ -115,19 +206,29 @@ class MergeCellStreams : public AbstractCellStreamImpl { void ReassesStreams() const; bool SkipRowOrColumn(NextMode mode); - mutable bool initialized_; + mutable bool initialized_{false}; + protected: // A priority queue of streams which still have data. // `std::priority_queue` can't be used because it cannot be iterated over. mutable std::vector> unfinished_streams_; }; -CellStream JoinCellStreams(std::vector cell_streams); - +/** + * Create a filter hierarchy according to a protobuf description. + * + * The filter hierarchy is essentially a DAG with specific filters in nodes. + * + * @param filter the protobuf description of the filter hierarchy + * @param source_ctor a zero argument function to create the unfiltered stream + * to be filtered. Depending on `filter` it may be called multiple times and + * it should return separate streams each time. + * @return the filtered stream or an error. + */ using CellStreamConstructor = std::function; StatusOr CreateFilter( ::google::bigtable::v2::RowFilter const& filter, - CellStreamConstructor source_ctor, FilterContext const& ctx); + CellStreamConstructor source_ctor); } // namespace emulator } // namespace bigtable @@ -135,4 +236,3 @@ StatusOr CreateFilter( } // namespace google #endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTER_H - diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index 70c6f9439e4e0..b49161b4acffa 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "google/cloud/testing_util/is_proto_equal.h" -#include #include "google/cloud/bigtable/data_connection.h" #include "google/cloud/bigtable/table.h" +#include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/status_matchers.h" +#include namespace google { namespace cloud { @@ -58,4 +58,3 @@ TEST(DummyFilter, Simple) { } // namespace bigtable } // namespace cloud } // namespace google - diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h index b3ddd53af536b..a6f369ba38fc3 100644 --- a/google/cloud/bigtable/emulator/filtered_map.h +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -16,18 +16,35 @@ #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTERED_MAP_H #include "google/cloud/bigtable/emulator/range_set.h" +#include #include #include -#include namespace google { namespace cloud { namespace bigtable { namespace emulator { +/** + * A map view filtering elements by whether their keys fall into a range set. + * + * Objects of this type provide a lightweight wrapper around `std::map`, which + * provides a iterator, which will skip over unwanted elements. + * + * This class is not very generic. It should be thought of as a crude way of + * deduplicating code. + * + * The unfiltered elements' keys should fall into a given range set - either + * `StringRangeSet` or by `TimestampRangeSet`. + * + * @tparam Map the type of the map to be wrapped, an instantiation of `std::map` + * @tparam PermittedRanges the type of the filter, either `StringRangeSet` or + * `TimestampRangeSet` + */ template class RangeFilteredMapView { public: + // NOLINTNEXTLINE(readability-identifier-naming) class const_iterator { public: using iterator_category = std::input_iterator_tag; @@ -75,6 +92,7 @@ class RangeFilteredMapView { pointer operator->() const { return &*unfiltered_pos_; } private: + // Adjust `unfiltered_pos_` after we transition to a different range. void AdvanceToNextRange() { if (filter_pos_ == parent_.get().filter_.get().disjoint_ranges().end()) { // We've reached the end. @@ -99,6 +117,8 @@ class RangeFilteredMapView { } } + // After `unfiltered_pos_` was increased, make sure it's within a valid + // range. void EnsureIteratorValid() { // `unfiltered_pos_` may point to a row which is past the end of the range // pointed by filter_pos_. Make sure this only happens when the iteration @@ -119,6 +139,17 @@ class RangeFilteredMapView { typename PermittedRanges::Range::StartLess>::const_iterator filter_pos_; }; + /** + * Create a new object. + * + * Objects of this class store references to arguments passed in the + * constructor. The user is responsible for making sure that the referenced + * objects continue to exist throughout the lifetime of this object. They + * should also not change. + * + * @unfiltered the map whose elements need to be filtered. + * @filter the range set which dictates which ranges should remain unfiltered. + */ RangeFilteredMapView(Map const& unfiltered, PermittedRanges const& filter) : unfiltered_(std::cref(unfiltered)), filter_(std::cref(filter)) {} @@ -136,9 +167,23 @@ class RangeFilteredMapView { std::reference_wrapper filter_; }; +/** + * A map view filtering elements by whether their keys match a regex. + * + * Objects of this type provide a lightweight wrapper around `std::map`, which + * provides a iterator, which will skip over unwanted elements. + * + * This class is not very generic. It should be thought of as a crude way of + * deduplicating code. + * + * Elements whose keys match all regexes are not filtered out. + * + * @tparam Map the type of the map to be wrapped, an instantiation of `std::map` + */ template class RegexFiteredMapView { public: + // NOLINTNEXTLINE(readability-identifier-naming) class const_iterator { public: using iterator_category = std::input_iterator_tag; @@ -180,6 +225,7 @@ class RegexFiteredMapView { pointer operator->() const { return &*unfiltered_pos_; } private: + // Make sure that `unfiltered_pos_` points to an unfiltered elem or end(). void EnsureIteratorValid() { for (; unfiltered_pos_ != parent_.get().unfiltered_.end() && std::any_of(parent_.get().filters_.get().begin(), @@ -196,6 +242,18 @@ class RegexFiteredMapView { typename Map::const_iterator unfiltered_pos_; }; + /** + * Create a new object. + * + * Objects of this class store references to arguments passed in the + * constructor. The user is responsible for making sure that the referenced + * objects continue to exist throughout the lifetime of this object. They + * should also not change. + * + * @unfiltered the map whose elements need to be filtered. + * @filters the regexes which element's keys have match to not be filtered + * out. + */ RegexFiteredMapView( Map unfiltered, std::vector> const& filters) diff --git a/google/cloud/bigtable/emulator/filtered_map_test.cc b/google/cloud/bigtable/emulator/filtered_map_test.cc index c376ef7f7da90..69dd3856d5b98 100644 --- a/google/cloud/bigtable/emulator/filtered_map_test.cc +++ b/google/cloud/bigtable/emulator/filtered_map_test.cc @@ -14,9 +14,9 @@ #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/row_range.h" +#include "google/cloud/testing_util/chrono_literals.h" #include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/status_matchers.h" -#include "google/cloud/testing_util/chrono_literals.h" #include #include @@ -124,9 +124,9 @@ TEST(RangeFilteredMapView, NoEntriesBeforeOpenFilter) { TEST(RangeFilteredMapView, MultipleFilters) { std::map unfiltered{ - {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, - {"AAC", 0}, {"BB", 0}, {"BBB", 0}, {"BBBa", 0}, {"BBBb", 0}, - {"CCCa", 0}, {"CCCb", 0}, {"CCD", 0}, {"CCE", 0}}; + {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, + {"AAC", 0}, {"BB", 0}, {"BBB", 0}, {"BBBb", 0}, {"CCCa", 0}, + {"CCCb", 0}, {"CCD", 0}, {"CCE", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kClosed)); filter.Sum(StringRangeSet::Range("BBB", kClosed, "BBC", kOpen)); @@ -134,7 +134,7 @@ TEST(RangeFilteredMapView, MultipleFilters) { RangeFilteredMapView filtered( unfiltered, filter); - EXPECT_EQ(Vec({"AAAa", "AAAb", "AAB", "BBB", "BBBa", "BBBb", "CCCa", "CCCb"}), + EXPECT_EQ(Vec({"AAAa", "AAAb", "AAB", "BBB", "BBBb", "CCCa", "CCCb"}), Keys(filtered)); } diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index ffe1a6a564650..33d8316487497 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -48,16 +48,22 @@ bool ConsecutiveRowKeys(StringRangeSet::Range::Value const& lhs, bool HasOverlap(StringRangeSet::Range const& lhs, StringRangeSet::Range const& rhs) { auto const start_cmp = CompareRangeValues(lhs.start(), rhs.start()); - StringRangeSet::Range const& intersect_start = - (start_cmp == 0) ? (lhs.start_open() ? lhs : rhs) - : ((start_cmp > 0) ? lhs : rhs); + StringRangeSet::Range const* intersect_start; + if (start_cmp == 0) { + intersect_start = lhs.start_open() ? &lhs : &rhs; + } else { + intersect_start = (start_cmp > 0) ? &lhs : &rhs; + } auto const end_cmp = CompareRangeValues(lhs.end(), rhs.end()); - StringRangeSet::Range const& intersect_end = (end_cmp == 0) - ? (lhs.end_open() ? lhs : rhs) - : ((end_cmp < 0) ? lhs : rhs); + StringRangeSet::Range const* intersect_end; + if (end_cmp == 0) { + intersect_end = lhs.end_open() ? &lhs : &rhs; + } else { + intersect_end = (end_cmp < 0) ? &lhs : &rhs; + } return !StringRangeSet::Range::IsEmpty( - intersect_start.start(), intersect_start.start_open(), - intersect_end.end(), intersect_end.end_open()); + intersect_start->start(), intersect_start->start_open(), + intersect_end->end(), intersect_end->end_open()); } bool HasOverlap(TimestampRangeSet::Range const& lhs, @@ -74,7 +80,7 @@ bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, assert(!HasOverlap(lhs, rhs)); assert(StringRangeSet::Range::StartLess()(lhs, rhs)); if (lhs.end_closed() && rhs.start_open() && lhs.end() == rhs.start()) { - return true; + return true; } if (lhs.end_open() && rhs.start_closed() && lhs.end() == rhs.start()) { return true; @@ -96,8 +102,7 @@ bool DisjointAndSortedRangesAdjacent(TimestampRangeSet::Range const& lhs, } template -void RangeSetSumImpl(RangeSetType& disjoint_ranges, - RangeType inserted_range) { +void RangeSetSumImpl(RangeSetType& disjoint_ranges, RangeType inserted_range) { // Remove all ranges which either have an overlap with `inserted_range` or are // adjacent to it. Then add `inserted_range` with `start` and `end` // adjusted to cover what the removed ranges used to cover. @@ -161,18 +166,18 @@ void RangeSetIntersectImpl(RangeSetType& disjoint_ranges, } // namespace detail StringRangeSet::Range::Range(Value start, bool start_open, Value end, - bool end_open) + bool end_open) : start_(std::move(start)), start_open_(start_open), end_(std::move(end)), end_open_(end_open) { - assert(!Range::ValueLess()(end, start)); - assert(!absl::holds_alternative(start) || + assert(!Range::ValueLess()(end_, start_)); + assert(!absl::holds_alternative(start_) || !start_open_); - assert(!absl::holds_alternative(end) || + assert(!absl::holds_alternative(end_) || !end_open_); - assert(!absl::holds_alternative(start) || - absl::holds_alternative(end)); + assert(!absl::holds_alternative(start_) || + absl::holds_alternative(end_)); } StatusOr StringRangeSet::Range::FromRowRange( @@ -301,7 +306,7 @@ void StringRangeSet::Range::set_end(Range const& source) { end_open_ = source.end_open(); } -bool StringRangeSet::Range::IsBelowStart(Value const &value) const { +bool StringRangeSet::Range::IsBelowStart(Value const& value) const { auto const cmp = detail::CompareRangeValues(value, start_); if (cmp != 0) { return cmp < 0; @@ -329,7 +334,7 @@ bool StringRangeSet::Range::IsEmpty(StringRangeSet::Range::Value const& start, return false; } -bool StringRangeSet::Range::IsAboveEnd(Value const &value) const { +bool StringRangeSet::Range::IsAboveEnd(Value const& value) const { auto const cmp = detail::CompareRangeValues(value, end_); if (cmp != 0) { return cmp > 0; @@ -337,7 +342,7 @@ bool StringRangeSet::Range::IsAboveEnd(Value const &value) const { return end_open_; } -bool StringRangeSet::Range::IsWithin(Value const &value) const { +bool StringRangeSet::Range::IsWithin(Value const& value) const { return !IsAboveEnd(value) && !IsBelowStart(value); } @@ -345,13 +350,13 @@ bool StringRangeSet::Range::IsEmpty() const { return Range::IsEmpty(start_, start_open_, end_, end_open_); } -bool StringRangeSet::Range::ValueLess::operator()(Range::Value const& lhs, - Range::Value const& rhs) const { +bool StringRangeSet::Range::ValueLess::operator()( + Range::Value const& lhs, Range::Value const& rhs) const { return detail::CompareRangeValues(lhs, rhs) < 0; } bool StringRangeSet::Range::StartLess::operator()(Range const& lhs, - Range const& rhs) const { + Range const& rhs) const { auto res = detail::CompareRangeValues(lhs.start(), rhs.start()); if (res == 0) { return lhs.start_closed() && rhs.start_open(); @@ -360,7 +365,7 @@ bool StringRangeSet::Range::StartLess::operator()(Range const& lhs, } bool StringRangeSet::Range::EndLess::operator()(Range const& lhs, - Range const& rhs) const { + Range const& rhs) const { auto res = detail::CompareRangeValues(lhs.end(), rhs.end()); if (res == 0) { return lhs.end_open() && rhs.end_closed(); @@ -374,16 +379,14 @@ StringRangeSet StringRangeSet::All() { return res; } -StringRangeSet StringRangeSet::Empty() { - return StringRangeSet{}; -} +StringRangeSet StringRangeSet::Empty() { return StringRangeSet{}; } void StringRangeSet::Sum(StringRangeSet::Range inserted_range) { detail::RangeSetSumImpl(disjoint_ranges_, std::move(inserted_range)); } -void StringRangeSet::Intersect(StringRangeSet::Range const& inserted_range) { - detail::RangeSetIntersectImpl(disjoint_ranges_, inserted_range); +void StringRangeSet::Intersect(StringRangeSet::Range const& intersected_range) { + detail::RangeSetIntersectImpl(disjoint_ranges_, intersected_range); } bool operator==(StringRangeSet::Range::Value const& lhs, @@ -413,8 +416,7 @@ bool operator==(StringRangeSet::Range const& lhs, lhs.end() == rhs.end() && lhs.end_open() == rhs.end_open(); } -std::ostream& operator<<(std::ostream& os, - StringRangeSet::Range const& range) { +std::ostream& operator<<(std::ostream& os, StringRangeSet::Range const& range) { os << (range.start_closed() ? "[" : "(") << range.start() << "," << range.end() << (range.end_closed() ? "]" : ")"); return os; @@ -422,11 +424,11 @@ std::ostream& operator<<(std::ostream& os, TimestampRangeSet::Range::Range(Value start, Value end) : start_(std::move(start)), end_(std::move(end)) { - assert(end == std::chrono::milliseconds::zero() || start <= end); + assert(end_ == std::chrono::milliseconds::zero() || start_ <= end_); } StatusOr TimestampRangeSet::Range::FromTimestampRange( - google::bigtable::v2::TimestampRange const& timestamp_range) { + google::bigtable::v2::TimestampRange const& timestamp_range) { auto start = std::chrono::duration_cast( std::chrono::microseconds(timestamp_range.start_timestamp_micros())); auto end = std::chrono::duration_cast( @@ -457,12 +459,12 @@ bool TimestampRangeSet::Range::IsEmpty(TimestampRangeSet::Range::Value start, } bool TimestampRangeSet::Range::StartLess::operator()(Range const& lhs, - Range const& rhs) const { + Range const& rhs) const { return lhs.start() < rhs.start(); } bool TimestampRangeSet::Range::EndLess::operator()(Range const& lhs, - Range const& rhs) const { + Range const& rhs) const { if (lhs.end() == std::chrono::milliseconds::zero()) { return false; } @@ -478,9 +480,7 @@ TimestampRangeSet TimestampRangeSet::All() { return res; } -TimestampRangeSet TimestampRangeSet::Empty() { - return TimestampRangeSet{}; -} +TimestampRangeSet TimestampRangeSet::Empty() { return TimestampRangeSet{}; } void TimestampRangeSet::Sum(TimestampRangeSet::Range inserted_range) { detail::RangeSetSumImpl(disjoint_ranges_, std::move(inserted_range)); @@ -508,7 +508,6 @@ std::ostream& operator<<(std::ostream& os, return os; } - } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/range_set.h b/google/cloud/bigtable/emulator/range_set.h index 6a90626232659..dd5e87cdd69ef 100644 --- a/google/cloud/bigtable/emulator/range_set.h +++ b/google/cloud/bigtable/emulator/range_set.h @@ -15,8 +15,8 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_RANGE_SET_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_RANGE_SET_H -#include "absl/types/variant.h" #include "google/cloud/status_or.h" +#include "absl/types/variant.h" #include #include #include @@ -35,6 +35,16 @@ namespace cloud { namespace bigtable { namespace emulator { +/** + * Objects of this class hold a sorted, disjoint set of string ranges. + * + * Users of this class can intersect and sum the ranges held by this structure. + * + * The ranges can be open or closed on each end and the end may hold a special + * value - infinity. + * + * Ranges starts are never larger than ends. + */ class StringRangeSet { public: class Range { @@ -50,7 +60,7 @@ class StringRangeSet { static StatusOr FromColumnRange( google::bigtable::v2::ColumnRange const& column_range); - Value const& start() const & { return start_; } + Value const& start() const& { return start_; } std::string const& start_finite() const& { return absl::get(start_); } @@ -58,14 +68,14 @@ class StringRangeSet { bool start_closed() const { return !start_open_; } void set_start(Range const& source); - Value const& end() const & { return end_; } + Value const& end() const& { return end_; } bool end_open() const { return end_open_; } bool end_closed() const { return !end_open_; } void set_end(Range const& source); - bool IsBelowStart(Value const &value) const; - bool IsAboveEnd(Value const &value) const; - bool IsWithin(Value const &value) const; + bool IsBelowStart(Value const& value) const; + bool IsAboveEnd(Value const& value) const; + bool IsWithin(Value const& value) const; bool IsEmpty() const; static bool IsEmpty(StringRangeSet::Range::Value const& start, @@ -94,13 +104,12 @@ class StringRangeSet { static StringRangeSet All(); static StringRangeSet Empty(); void Sum(Range inserted_range); - void Intersect(Range const &intersected_range); + void Intersect(Range const& intersected_range); std::set const& disjoint_ranges() const { return disjoint_ranges_; }; - private: std::set disjoint_ranges_; }; @@ -114,10 +123,18 @@ std::ostream& operator<<(std::ostream& os, bool operator==(StringRangeSet::Range const& lhs, StringRangeSet::Range const& rhs); -std::ostream& operator<<(std::ostream& os, - StringRangeSet::Range const& range); - - +std::ostream& operator<<(std::ostream& os, StringRangeSet::Range const& range); + +/** + * Objects of this class hold a sorted, disjoint set of timestamp ranges. + * + * Users of this class can intersect and sum the ranges held by this structure. + * + * The ranges have are open on the left and closed on the right. A value zero on + * the end is treated as infinity. + * + * Ranges starts are never larger than ends. + */ class TimestampRangeSet { public: class Range { @@ -130,12 +147,16 @@ class TimestampRangeSet { Value start() const { return start_; } Value start_finite() const { return start_; } + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) bool start_open() const { return false; } + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) bool start_closed() const { return true; } void set_start(Range const& source) { start_ = source.start_; } Value end() const { return end_; } + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) bool end_open() const { return true; } + // NOLINTNEXTLINE(readability-convert-member-functions-to-static) bool end_closed() const { return false; } void set_end(Range const& source) { end_ = source.end_; } @@ -163,7 +184,7 @@ class TimestampRangeSet { static TimestampRangeSet All(); static TimestampRangeSet Empty(); void Sum(Range inserted_range); - void Intersect(Range const &intersected_range); + void Intersect(Range const& intersected_range); std::set const& disjoint_ranges() const { return disjoint_ranges_; diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index 43fbeab4c8b22..31b1c2b9e0cee 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -14,9 +14,9 @@ #include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/row_range.h" +#include "google/cloud/testing_util/chrono_literals.h" #include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/status_matchers.h" -#include "google/cloud/testing_util/chrono_literals.h" #include #include @@ -526,7 +526,7 @@ TEST(StringRangeSet, DisjointAdjacent) { } // FIXME test invalid data -TEST(TimestampRangeSet, FromInfiniteTimstampRange) { +TEST(TimestampRangeSet, FromInfiniteTimestampRange) { using testing_util::chrono_literals::operator""_ms; auto infinite = TimestampRangeSet::Range::FromTimestampRange( google::bigtable::v2::TimestampRange{}); @@ -540,7 +540,7 @@ TEST(TimestampRangeSet, FromInfiniteTimstampRange) { EXPECT_FALSE(infinite->end_closed()); } -TEST(TimestampRangeSet, FromFiniteTimstampRange) { +TEST(TimestampRangeSet, FromFiniteTimestampRange) { using testing_util::chrono_literals::operator""_ms; google::bigtable::v2::TimestampRange proto; proto.set_start_timestamp_micros(1234); @@ -716,25 +716,23 @@ TEST(TimestampRangeSet, ThreeDisjointIntervals) { trs.disjoint_ranges()); } -TEST(TimestampRangeSet, MergingAdjacentPreceeding) { +TEST(TimestampRangeSet, MergingAdjacentPreceding) { using testing_util::chrono_literals::operator""_ms; TimestampRangeSet trs; trs.Sum(TimestampRangeSet::Range(7_ms, 8_ms)); trs.Sum(TimestampRangeSet::Range(8_ms, 9_ms)); - ASSERT_EQ(TSRanges({{7_ms, 9_ms}}), - trs.disjoint_ranges()); + ASSERT_EQ(TSRanges({{7_ms, 9_ms}}), trs.disjoint_ranges()); } -TEST(TimestampRangeSet, MergingOverlappingPreceeding) { +TEST(TimestampRangeSet, MergingOverlappingPreceding) { using testing_util::chrono_literals::operator""_ms; TimestampRangeSet trs; trs.Sum(TimestampRangeSet::Range(7_ms, 9_ms)); trs.Sum(TimestampRangeSet::Range(8_ms, 10_ms)); - ASSERT_EQ(TSRanges({{7_ms, 10_ms}}), - trs.disjoint_ranges()); + ASSERT_EQ(TSRanges({{7_ms, 10_ms}}), trs.disjoint_ranges()); } -TEST(TimestampRangeSet, RemovingOvelapping) { +TEST(TimestampRangeSet, RemovingOverlapping) { using testing_util::chrono_literals::operator""_ms; TimestampRangeSet trs; trs.Sum(TimestampRangeSet::Range(1_ms, 2_ms)); @@ -742,11 +740,10 @@ TEST(TimestampRangeSet, RemovingOvelapping) { trs.Sum(TimestampRangeSet::Range(5_ms, 6_ms)); trs.Sum(TimestampRangeSet::Range(7_ms, 8_ms)); trs.Sum(TimestampRangeSet::Range(1_ms, 8_ms)); - ASSERT_EQ(TSRanges({{1_ms, 8_ms}}), - trs.disjoint_ranges()); + ASSERT_EQ(TSRanges({{1_ms, 8_ms}}), trs.disjoint_ranges()); } -TEST(TimestampRangeSet, RemovingOvelappingExtendEnd) { +TEST(TimestampRangeSet, RemovingOverlappingExtendEnd) { using testing_util::chrono_literals::operator""_ms; TimestampRangeSet trs; trs.Sum(TimestampRangeSet::Range(1_ms, 2_ms)); @@ -754,11 +751,10 @@ TEST(TimestampRangeSet, RemovingOvelappingExtendEnd) { trs.Sum(TimestampRangeSet::Range(5_ms, 6_ms)); trs.Sum(TimestampRangeSet::Range(7_ms, 8_ms)); trs.Sum(TimestampRangeSet::Range(1_ms, 9_ms)); - ASSERT_EQ(TSRanges({{1_ms, 9_ms}}), - trs.disjoint_ranges()); + ASSERT_EQ(TSRanges({{1_ms, 9_ms}}), trs.disjoint_ranges()); } -TEST(TimestampRangeSet, RemovingOvelappingEarlyEnd) { +TEST(TimestampRangeSet, RemovingOverlappingEarlyEnd) { using testing_util::chrono_literals::operator""_ms; TimestampRangeSet trs; trs.Sum(TimestampRangeSet::Range(1_ms, 2_ms)); @@ -766,8 +762,7 @@ TEST(TimestampRangeSet, RemovingOvelappingEarlyEnd) { trs.Sum(TimestampRangeSet::Range(5_ms, 6_ms)); trs.Sum(TimestampRangeSet::Range(7_ms, 9_ms)); trs.Sum(TimestampRangeSet::Range(1_ms, 8_ms)); - ASSERT_EQ(TSRanges({{1_ms, 9_ms}}), - trs.disjoint_ranges()); + ASSERT_EQ(TSRanges({{1_ms, 9_ms}}), trs.disjoint_ranges()); } TEST(TimestampRangeSet, PluggingGap) { diff --git a/google/cloud/bigtable/emulator/row_iterators.cc b/google/cloud/bigtable/emulator/row_iterators.cc deleted file mode 100644 index ba6a224012255..0000000000000 --- a/google/cloud/bigtable/emulator/row_iterators.cc +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2024 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "google/cloud/bigtable/emulator/row_iterators.h" -#include "google/cloud/bigtable/internal/google_bytes_traits.h" -#include "google/cloud/bigtable/internal/row_range_helpers.h" - -namespace google { -namespace cloud { -namespace bigtable { -namespace emulator { - -} // namespace emulator -} // namespace bigtable -} // namespace cloud -} // namespace google - diff --git a/google/cloud/bigtable/emulator/row_iterators.h b/google/cloud/bigtable/emulator/row_iterators.h deleted file mode 100644 index e7c2d60930bcf..0000000000000 --- a/google/cloud/bigtable/emulator/row_iterators.h +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2024 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_ITERATORS_H -#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_ITERATORS_H - -#include -#include "google/cloud/bigtable/emulator/cell_view.h" -#include "google/cloud/bigtable/emulator/table.h" -#include "google/cloud/internal/invoke_result.h" -#include -#include -#include - -namespace google { -namespace cloud { -namespace bigtable { -namespace emulator { - - - -} // namespace emulator -} // namespace bigtable -} // namespace cloud -} // namespace google - -#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_ITERATORS_H diff --git a/google/cloud/bigtable/emulator/row_iterators_test.cc b/google/cloud/bigtable/emulator/row_iterators_test.cc deleted file mode 100644 index 80eeb2a09aec4..0000000000000 --- a/google/cloud/bigtable/emulator/row_iterators_test.cc +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2024 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "google/cloud/bigtable/emulator/row_iterators.h" -#include "google/cloud/bigtable/row_range.h" -#include "google/cloud/testing_util/is_proto_equal.h" -#include -#include - -namespace google { -namespace cloud { -namespace bigtable { -namespace emulator { -namespace { - -TEST(MergedSortedIterator, Simple) { -} - -} // anonymous namespace -} // namespace emulator -} // namespace bigtable -} // namespace cloud -} // namespace google - diff --git a/google/cloud/bigtable/emulator/row_streamer.cc b/google/cloud/bigtable/emulator/row_streamer.cc index 081c2907f514b..f511be77060d6 100644 --- a/google/cloud/bigtable/emulator/row_streamer.cc +++ b/google/cloud/bigtable/emulator/row_streamer.cc @@ -26,7 +26,6 @@ RowStreamer::RowStreamer(grpc::ServerWriter& writer) : writer_(writer) {} bool RowStreamer::Stream(CellView const& cell) { - std::cout << "Attempting to stream" << std::endl; btproto::ReadRowsResponse::CellChunk chunk; if (!current_row_key_ || current_row_key_ != cell.row_key()) { if (!pending_chunks_.empty()) { @@ -60,12 +59,10 @@ bool RowStreamer::Stream(CellView const& cell) { if (pending_chunks_.size() > 200) { return Flush(false); } - std::cout << "Not flushing" << std::endl; return true; } bool RowStreamer::Flush(bool stream_finished) { - std::cout << "Flushing" << std::endl; absl::optional dont_flush_this; if (stream_finished) { if (!pending_chunks_.empty()) { @@ -81,14 +78,13 @@ bool RowStreamer::Flush(bool stream_finished) { } } btproto::ReadRowsResponse resp; - for (auto &chunk : pending_chunks_) { + for (auto& chunk : pending_chunks_) { *resp.add_chunks() = std::move(chunk); } pending_chunks_.resize(0); if (dont_flush_this) { pending_chunks_.emplace_back(*std::move(dont_flush_this)); } - std::cout << "Writing: " << resp.DebugString() << std::endl; return writer_.Write(resp); } @@ -96,4 +92,3 @@ bool RowStreamer::Flush(bool stream_finished) { } // namespace bigtable } // namespace cloud } // namespace google - diff --git a/google/cloud/bigtable/emulator/row_streamer.h b/google/cloud/bigtable/emulator/row_streamer.h index 742da42c80132..ae22d18c5aea0 100644 --- a/google/cloud/bigtable/emulator/row_streamer.h +++ b/google/cloud/bigtable/emulator/row_streamer.h @@ -15,22 +15,45 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_STREAMER_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_STREAMER_H -#include #include "google/cloud/bigtable/emulator/cell_view.h" -#include #include "absl/types/optional.h" +#include +#include namespace google { namespace cloud { namespace bigtable { namespace emulator { +/** + * Objects of this class implement the ReadRows response protocol. + * + * Incoming cells are used to populate an internal buffer, which batches them + * into messages, which are then written to a gRPC stream. + */ class RowStreamer { public: - RowStreamer( + /** + * Create a new object. + * + * @param writer the gRPC stream to be written to. User should ensure it + * outlives this object. + */ + explicit RowStreamer( grpc::ServerWriter& writer); + /// Stream a cell. bool Stream(CellView const& cell_view); + /** + * Manually flush the stream, potentially closing it. + * + * One should call `Flush(true)` before destroying this object. + * + * @param stream_finished if `true` no more cells will be streamed. If + * `false`, the buffer of outstanding cells will be immediately sent to + * the recipient. + * @return whether flushing succeeded + */ bool Flush(bool stream_finished); private: @@ -48,4 +71,3 @@ class RowStreamer { } // namespace google #endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_STREAMER_H - diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 015203c1b7f84..53a29e5dd9960 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "google/cloud/bigtable/emulator/cluster.h" #include "google/cloud/bigtable/emulator/server.h" +#include "google/cloud/bigtable/emulator/cluster.h" #include "google/cloud/bigtable/emulator/to_grpc_status.h" #include "google/cloud/internal/make_status.h" -#include #include #include +#include #include #include @@ -32,7 +32,7 @@ namespace btadmin = ::google::bigtable::admin::v2; class EmulatorService final : public btproto::Bigtable::Service { public: - EmulatorService(std::shared_ptr cluster) + explicit EmulatorService(std::shared_ptr cluster) : cluster_(std::move(cluster)) {} grpc::Status ReadRows( @@ -92,14 +92,13 @@ class EmulatorService final : public btproto::Bigtable::Service { class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { public: - EmulatorTableService(std::shared_ptr cluster) + explicit EmulatorTableService(std::shared_ptr cluster) : cluster_(std::move(cluster)) {} grpc::Status CreateTable(grpc::ServerContext* /* context */, btadmin::CreateTableRequest const* request, btadmin::Table* response) override { auto table_name = request->parent() + "/tables/" + request->table_id(); - auto maybe_table = - cluster_->CreateTable(table_name, request->table()); + auto maybe_table = cluster_->CreateTable(table_name, request->table()); if (!maybe_table) { return ToGrpcStatus(maybe_table.status()); } @@ -107,17 +106,16 @@ class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { return grpc::Status::OK; } - grpc::Status ListTables( - grpc::ServerContext* /* context */, - btadmin::ListTablesRequest const* request, - btadmin::ListTablesResponse* response) override { - + grpc::Status ListTables(grpc::ServerContext* /* context */, + btadmin::ListTablesRequest const* request, + btadmin::ListTablesResponse* response) override { if (!request->page_token().empty()) { return ToGrpcStatus(UnimplementedError( "Pagination is not supported.", GCP_ERROR_INFO().WithMetadata("page_token", request->page_token()))); } - auto maybe_tables = cluster_->ListTables(request->parent(), request->view()); + auto maybe_tables = + cluster_->ListTables(request->parent(), request->view()); if (!maybe_tables) { return ToGrpcStatus(maybe_tables.status()); } @@ -132,7 +130,7 @@ class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { response->set_next_page_token("unsupported"); maybe_tables->resize(request->page_size()); } - for (auto &table : *maybe_tables) { + for (auto& table : *maybe_tables) { *response->add_tables() = std::move(table); } return grpc::Status::OK; @@ -149,10 +147,9 @@ class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { return grpc::Status::OK; } - grpc::Status UpdateTable( - grpc::ServerContext* /* context */, - btadmin::UpdateTableRequest const* request, - google::longrunning::Operation* response) override { + grpc::Status UpdateTable(grpc::ServerContext* /* context */, + btadmin::UpdateTableRequest const* request, + google::longrunning::Operation* response) override { auto maybe_table = cluster_->FindTable(request->table().name()); if (!maybe_table) { return ToGrpcStatus(maybe_table.status()); @@ -256,11 +253,9 @@ class DefaultEmulatorServer : public EmulatorServer { builder_.RegisterService(&table_service_); server_ = builder_.BuildAndStart(); } - virtual ~DefaultEmulatorServer() = default; - - virtual int bound_port() { return bound_port_; } - virtual void Shutdown() { server_->Shutdown(); } - virtual void Wait() { server_->Wait(); } + int bound_port() override { return bound_port_; } + void Shutdown() override { server_->Shutdown(); } + void Wait() override { server_->Wait(); } private: int bound_port_; diff --git a/google/cloud/bigtable/emulator/server.h b/google/cloud/bigtable/emulator/server.h index 8e7659cc25fad..890c7c341ab6e 100644 --- a/google/cloud/bigtable/emulator/server.h +++ b/google/cloud/bigtable/emulator/server.h @@ -28,8 +28,11 @@ class EmulatorServer { public: virtual ~EmulatorServer() = default; + /// Return the port to which the server bound. virtual int bound_port() = 0; + /// Initiate shutting the server down. virtual void Shutdown() = 0; + /// Wait until the server shuts down. virtual void Wait() = 0; }; diff --git a/google/cloud/bigtable/emulator/server_test.cc b/google/cloud/bigtable/emulator/server_test.cc index 3312f88f4536d..76b970d819d33 100644 --- a/google/cloud/bigtable/emulator/server_test.cc +++ b/google/cloud/bigtable/emulator/server_test.cc @@ -17,8 +17,7 @@ namespace google { namespace cloud { namespace bigtable { -namespace emulator { -} // namespace emulator +namespace emulator {} // namespace emulator } // namespace bigtable } // namespace cloud } // namespace google diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index e8feee05e75b8..bc7bb74337853 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -12,15 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" -#include "google/cloud/bigtable/emulator/row_iterators.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" -#include "google/protobuf/util/field_mask_util.h" #include "google/cloud/internal/make_status.h" +#include "google/protobuf/util/field_mask_util.h" +#include +#include namespace google { namespace cloud { @@ -28,7 +27,6 @@ namespace bigtable { namespace emulator { namespace btadmin = ::google::bigtable::admin::v2; -namespace btproto = ::google::bigtable::v2; StatusOr> Table::Create( google::bigtable::admin::v2::Table schema) { @@ -45,7 +43,7 @@ Status Table::Construct(google::bigtable::admin::v2::Table schema) { // that luxury here, so we need to make sure that the changes performed in // this member function are reflected in other threads. The simplest way to do // this is the mutex. - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); schema_ = std::move(schema); if (schema_.granularity() == btadmin::Table::TIMESTAMP_GRANULARITY_UNSPECIFIED) { @@ -54,38 +52,35 @@ Status Table::Construct(google::bigtable::admin::v2::Table schema) { if (schema_.cluster_states_size() > 0) { return InvalidArgumentError( "`cluster_states` not empty.", - GCP_ERROR_INFO().WithMetadata("schema", schema.DebugString())); + GCP_ERROR_INFO().WithMetadata("schema", schema_.DebugString())); } if (schema_.has_restore_info()) { return InvalidArgumentError( "`restore_info` not empty.", - GCP_ERROR_INFO().WithMetadata("schema", schema.DebugString())); + GCP_ERROR_INFO().WithMetadata("schema", schema_.DebugString())); } if (schema_.has_change_stream_config()) { return UnimplementedError( "`change_stream_config` not empty.", - GCP_ERROR_INFO().WithMetadata( - "schema", schema.DebugString())); + GCP_ERROR_INFO().WithMetadata("schema", schema_.DebugString())); } if (schema_.has_automated_backup_policy()) { return UnimplementedError( "`automated_backup_policy` not empty.", - GCP_ERROR_INFO().WithMetadata( - "schema", schema.DebugString())); + GCP_ERROR_INFO().WithMetadata("schema", schema_.DebugString())); } - for (auto const &column_family_def : schema_.column_families()) { - column_families_.emplace( - column_family_def.first, - std::make_shared()); + for (auto const& column_family_def : schema_.column_families()) { + column_families_.emplace(column_family_def.first, + std::make_shared()); } return Status(); } +// NOLINTBEGIN(readability-function-cognitive-complexity) StatusOr Table::ModifyColumnFamilies( btadmin::ModifyColumnFamiliesRequest const& request) { - std::cout << "Modify column families: " << request.DebugString() - << std::endl; - std::unique_lock lock(mu_); + std::cout << "Modify column families: " << request.DebugString() << std::endl; + std::unique_lock lock(mu_); auto new_schema = schema_; auto new_column_families = column_families_; for (auto const& modification : request.modifications()) { @@ -97,25 +92,22 @@ StatusOr Table::ModifyColumnFamilies( modification.DebugString())); } if (new_column_families.erase(modification.id()) == 0) { - return NotFoundError( - "No such column family.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return NotFoundError("No such column family.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } if (new_schema.mutable_column_families()->erase(modification.id()) == 0) { - return InternalError( - "Column family with no schema.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return InternalError("Column family with no schema.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } } else if (modification.has_update()) { auto& cfs = *new_schema.mutable_column_families(); auto cf_it = cfs.find(modification.id()); if (cf_it == cfs.end()) { - return NotFoundError( - "No such column family.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return NotFoundError("No such column family.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } using google::protobuf::util::FieldMaskUtil; @@ -154,10 +146,9 @@ StatusOr Table::ModifyColumnFamilies( if (!new_schema.mutable_column_families() ->emplace(modification.id(), modification.create()) .second) { - return InternalError( - "Column family with schema but no data.", - GCP_ERROR_INFO().WithMetadata("modification", - modification.DebugString())); + return InternalError("Column family with schema but no data.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); } } else { return UnimplementedError( @@ -166,15 +157,16 @@ StatusOr Table::ModifyColumnFamilies( modification.DebugString())); } } - // Defer destorying potentially large objects to after releasing the lock. + // Defer destroying potentially large objects to after releasing the lock. column_families_.swap(new_column_families); schema_ = new_schema; lock.unlock(); return new_schema; } +// NOLINTEND(readability-function-cognitive-complexity) google::bigtable::admin::v2::Table Table::GetSchema() const { - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); return schema_; } @@ -193,8 +185,7 @@ Status Table::Update(google::bigtable::admin::v2::Table const& new_schema, to_update)) { return InvalidArgumentError( "Update mask is invalid.", - GCP_ERROR_INFO().WithMetadata( - "mask", to_update.DebugString())); + GCP_ERROR_INFO().WithMetadata("mask", to_update.DebugString())); } google::protobuf::FieldMask disallowed_mask; FieldMaskUtil::Subtract( @@ -202,10 +193,9 @@ Status Table::Update(google::bigtable::admin::v2::Table const& new_schema, if (disallowed_mask.paths_size() > 0) { return UnimplementedError( "Update mask contains disallowed fields.", - GCP_ERROR_INFO().WithMetadata( - "mask", disallowed_mask.DebugString())); + GCP_ERROR_INFO().WithMetadata("mask", disallowed_mask.DebugString())); } - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); FieldMaskUtil::MergeMessageTo(new_schema, to_update, FieldMaskUtil::MergeOptions(), &schema_); return Status(); @@ -223,15 +213,15 @@ StatusOr> Table::FindColumnFamily( return std::ref(*column_family_it->second); } -Status Table::MutateRow( - google::bigtable::v2::MutateRowRequest const &request) { +// NOLINTBEGIN(readability-function-cognitive-complexity) +Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { // FIXME - add atomicity // FIXME - determine what happens when row/column family/column does not exist - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); assert(request.table_name() == schema_.name()); - for (auto mutation : request.mutations()) { + for (auto const& mutation : request.mutations()) { if (mutation.has_set_cell()) { - auto const & set_cell = mutation.set_cell(); + auto const& set_cell = mutation.set_cell(); auto maybe_column_family = FindColumnFamily(set_cell); if (!maybe_column_family) { return maybe_column_family.status(); @@ -241,14 +231,13 @@ Status Table::MutateRow( std::chrono::duration_cast( std::chrono::microseconds(set_cell.timestamp_micros())), set_cell.value()); - } else if (mutation.has_add_to_cell()) { - // FIXME - } else if (mutation.has_merge_to_cell()) { - // FIXME + //} else if (mutation.has_add_to_cell()) { + // // FIXME + //} else if (mutation.has_merge_to_cell()) { + // // FIXME } else if (mutation.has_delete_from_column()) { - auto const & delete_from_column = mutation.delete_from_column(); - auto maybe_column_family = - FindColumnFamily(delete_from_column); + auto const& delete_from_column = mutation.delete_from_column(); + auto maybe_column_family = FindColumnFamily(delete_from_column); if (!maybe_column_family) { return maybe_column_family.status(); } @@ -282,6 +271,7 @@ Status Table::MutateRow( } return Status(); } +// NOLINTEND(readability-function-cognitive-complexity) bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { if (!absl::holds_alternative(internal_filter)) { @@ -289,7 +279,7 @@ bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { } for (auto stream_it = unfinished_streams_.begin(); stream_it != unfinished_streams_.end(); ++stream_it) { - auto* cf_stream = + auto const* cf_stream = dynamic_cast(&(*stream_it)->impl()); assert(cf_stream); if (!re2::RE2::PartialMatch( @@ -312,7 +302,7 @@ std::vector FilteredTableStream::CreateCellStreams( std::vector res; res.reserve(cf_streams.size()); for (auto& stream : cf_streams) { - res.emplace_back(CellStream(std::move(stream))); + res.emplace_back(std::move(stream)); } return res; } @@ -326,7 +316,7 @@ StatusOr CreateStringRangeSet( "`row_key` empty", GCP_ERROR_INFO().WithMetadata("row_set", row_set.DebugString())); } - res.Sum(StringRangeSet::Range(row_key, false, row_key, false)); + res.Sum(StringRangeSet::Range(row_key, false, row_key, false)); } for (auto const& row_range : row_set.row_ranges()) { auto maybe_range = StringRangeSet::Range::FromRowRange(row_range); @@ -353,9 +343,10 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, } else { row_set = std::make_shared(StringRangeSet::All()); } - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); auto table_stream_ctor = [row_set = std::move(row_set), this] { std::vector> per_cf_streams; + per_cf_streams.reserve(column_families_.size()); for (auto const& column_family : column_families_) { per_cf_streams.emplace_back(std::make_unique( *column_family.second, column_family.first, row_set)); @@ -363,17 +354,16 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, return CellStream( std::make_unique(std::move(per_cf_streams))); }; - FilterContext ctx; StatusOr maybe_stream; if (request.has_filter()) { - maybe_stream = CreateFilter(request.filter(), table_stream_ctor, ctx); + maybe_stream = CreateFilter(request.filter(), table_stream_ctor); } else { maybe_stream = table_stream_ctor(); } if (!maybe_stream) { return maybe_stream.status(); } - CellStream &stream = *maybe_stream; + CellStream& stream = *maybe_stream; for (; stream; ++stream) { std::cout << "Row: " << stream->row_key() << " column_family: " << stream->column_family() @@ -396,7 +386,7 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, } bool Table::IsDeleteProtected() const { - std::lock_guard lock(mu_); + std::lock_guard lock(mu_); return IsDeleteProtectedNoLock(); } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 0a83299d51551..e1a1393771c99 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -15,6 +15,9 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include @@ -22,9 +25,6 @@ #include #include #include -#include "google/cloud/bigtable/emulator/filter.h" -#include "google/cloud/bigtable/emulator/column_family.h" -#include "google/cloud/bigtable/emulator/row_streamer.h" #include namespace google { @@ -47,7 +47,7 @@ class Table { bool IsDeleteProtected() const; - Status MutateRow(google::bigtable::v2::MutateRowRequest const & request); + Status MutateRow(google::bigtable::v2::MutateRowRequest const& request); Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const; @@ -68,9 +68,9 @@ class Table { }; // This class is public only to enable testing. -class FilteredTableStream : public MergeCellStreams { +class FilteredTableStream : public MergeCellStreams { public: - FilteredTableStream( + explicit FilteredTableStream( std::vector> cf_streams) : MergeCellStreams(CreateCellStreams(std::move(cf_streams))) {} diff --git a/google/cloud/bigtable/emulator/table_test.cc b/google/cloud/bigtable/emulator/table_test.cc index 0a14031c6045a..9c9a31759bccd 100644 --- a/google/cloud/bigtable/emulator/table_test.cc +++ b/google/cloud/bigtable/emulator/table_test.cc @@ -14,9 +14,9 @@ #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/row_range.h" +#include "google/cloud/testing_util/chrono_literals.h" #include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/status_matchers.h" -#include "google/cloud/testing_util/chrono_literals.h" #include namespace google { @@ -25,8 +25,8 @@ namespace bigtable { namespace emulator { namespace { -std::string DumpStream( - AbstractCellStreamImpl& stream, NextMode next_mode = NextMode::kCell) { +std::string DumpStream(AbstractCellStreamImpl& stream, + NextMode next_mode = NextMode::kCell) { std::stringstream ss; for (; stream.HasValue(); stream.Next(next_mode)) { auto const& cell = stream.Value(); @@ -83,10 +83,10 @@ TEST(FilteredTableStream, OtherFiltersArePropagated) { ColumnFamily fam1; ColumnFamily fam2; fam1.SetCell("row1", "col1", 10_ms, "foo"); - fam1.SetCell("row0", "col1", 10_ms, "foo"); // row key regex - fam2.SetCell("row1", "col1", 10_ms, "foo"); // column family regex - fam1.SetCell("row1", "col2", 10_ms, "foo"); // column qualifier regex - fam1.SetCell("row1", "a1", 10_ms, "foo"); // column range + fam1.SetCell("row0", "col1", 10_ms, "foo"); // row key regex + fam2.SetCell("row1", "col1", 10_ms, "foo"); // column family regex + fam1.SetCell("row1", "col2", 10_ms, "foo"); // column qualifier regex + fam1.SetCell("row1", "a1", 10_ms, "foo"); // column range fam1.SetCell("row1", "col1", 1000_ms, "foo"); // timestamp range auto ffam1 = std::make_unique( fam1, "fam1", std::make_unique(StringRangeSet::All())); diff --git a/google/cloud/bigtable/emulator/to_grpc_status.h b/google/cloud/bigtable/emulator/to_grpc_status.h index 14355438149ed..9fe134f9e0cff 100644 --- a/google/cloud/bigtable/emulator/to_grpc_status.h +++ b/google/cloud/bigtable/emulator/to_grpc_status.h @@ -31,4 +31,3 @@ ::grpc::Status ToGrpcStatus(Status const& to_convert); } // namespace google #endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TO_GRPC_STATUS_H - diff --git a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl index 49f4cd94525db..baa9047dd08b9 100644 --- a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl +++ b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl @@ -102,8 +102,8 @@ google_cloud_cpp_bigtable_hdrs = [ "internal/readrowsparser.h", "internal/retry_context.h", "internal/retry_traits.h", - "internal/row_reader_impl.h", "internal/row_range_helpers.h", + "internal/row_reader_impl.h", "internal/rpc_policy_parameters.h", "internal/rpc_policy_parameters.inc", "internal/traced_row_reader.h", diff --git a/google/cloud/bigtable/internal/row_range_helpers.cc b/google/cloud/bigtable/internal/row_range_helpers.cc index 069afb0e7223c..bb7a5a0fd2811 100644 --- a/google/cloud/bigtable/internal/row_range_helpers.cc +++ b/google/cloud/bigtable/internal/row_range_helpers.cc @@ -32,7 +32,7 @@ btproto::RowRange RowRangeHelpers::Empty() { return result; } -bool RowRangeHelpers::IsEmpty(btproto::RowRange const &row_range) { +bool RowRangeHelpers::IsEmpty(btproto::RowRange const& row_range) { RowKeyType unused; // We do not want to copy the strings unnecessarily, so initialize a reference // pointing to *_key_closed() or *_key_open(), as needed. @@ -200,21 +200,17 @@ void RowRangeHelpers::SanitizeEmptyEndKeys( bool RowRangeHelpers::StartLess::operator()( btproto::RowRange const& left, btproto::RowRange const& right) const { - if (!left.has_start_key_open() && - !left.has_start_key_closed()) { + if (!left.has_start_key_open() && !left.has_start_key_closed()) { // left is empty - return right.has_start_key_open() || - right.has_start_key_closed(); + return right.has_start_key_open() || right.has_start_key_closed(); } // left is non-empty - if (!right.has_start_key_open() && - !right.has_start_key_closed()) { + if (!right.has_start_key_open() && !right.has_start_key_closed()) { return false; } // both are non-empty - auto const& left_start = left.has_start_key_closed() - ? left.start_key_closed() - : left.start_key_open(); + auto const& left_start = left.has_start_key_closed() ? left.start_key_closed() + : left.start_key_open(); auto const& right_start = right.has_start_key_closed() ? right.start_key_closed() : right.start_key_open(); @@ -224,14 +220,13 @@ bool RowRangeHelpers::StartLess::operator()( return cmp < 0; } // same row key in both - return left.has_start_key_closed() && - right.has_start_key_open(); + return left.has_start_key_closed() && right.has_start_key_open(); } bool RowRangeHelpers::EndLess::operator()( btproto::RowRange const& left, btproto::RowRange const& right) const { if (!right.has_end_key_open() && !right.has_end_key_closed()) { - // right is inifinite + // right is infinite return left.has_end_key_open() || left.has_end_key_closed(); } // right is finite @@ -257,5 +252,3 @@ GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END } // namespace bigtable } // namespace cloud } // namespace google - - diff --git a/google/cloud/bigtable/internal/row_range_helpers.h b/google/cloud/bigtable/internal/row_range_helpers.h index 4f49b246aac02..39beaa6dddee5 100644 --- a/google/cloud/bigtable/internal/row_range_helpers.h +++ b/google/cloud/bigtable/internal/row_range_helpers.h @@ -30,19 +30,19 @@ class RowRangeHelpers { static google::bigtable::v2::RowRange Empty(); static bool IsEmpty(google::bigtable::v2::RowRange const& row_range); static bool BelowStart(google::bigtable::v2::RowRange const& row_range, - RowKeyType const& key); + RowKeyType const& key); static bool AboveEnd(google::bigtable::v2::RowRange const& row_range, - RowKeyType const& key); + RowKeyType const& key); static std::pair Intersect( google::bigtable::v2::RowRange const& lhs, google::bigtable::v2::RowRange const& rhs); /// Return true if @p key is in the range. template static bool Contains(google::bigtable::v2::RowRange const& row_range, - T const& key) { + T const& key) { return !BelowStart(row_range, key) && !AboveEnd(row_range, key); } - static void SanitizeEmptyEndKeys(google::bigtable::v2::RowRange &row_range); + static void SanitizeEmptyEndKeys(google::bigtable::v2::RowRange& row_range); /// A Functor describing the order on range starts. struct StartLess { From 807de75953bc421dce00491f80bcd462dea1c3bf Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 18 Mar 2025 15:23:18 +0300 Subject: [PATCH 089/195] emulator: DeleteRow: don't forget to erase the row key. --- google/cloud/bigtable/emulator/column_family.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 0285df122d785..e82965069aae1 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -90,6 +90,8 @@ std::map> ColumnFamily::DeleteRow(std::string con } } + rows_.erase(row_key); + return res; } From eda404273e336bc7f6a3cebee99934e08b2a7e6c Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 18 Mar 2025 16:32:43 +0300 Subject: [PATCH 090/195] emulator: Fix segmentation fault caused by invalidated iterator in DeleteRow. --- .../cloud/bigtable/emulator/column_family.cc | 25 ++++++++++++++----- google/cloud/bigtable/emulator/table.cc | 25 +++++-------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index e82965069aae1..de11408421b21 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -75,18 +75,32 @@ void ColumnFamily::SetCell(std::string const& row_key, rows_[row_key].SetCell(column_qualifier, timestamp, value); } -std::map> ColumnFamily::DeleteRow(std::string const& row_key) { +std::map> ColumnFamily::DeleteRow( + std::string const& row_key) { std::map> res; auto& column_family_row = rows_[row_key]; - for (const auto& column_it : column_family_row) { + for (auto column_it = column_family_row.begin(); + column_it != column_family_row.end(); + column_it = column_family_row.begin()) { // DeleteColumn can + // invalidate the + // iterator by + // deleting a column + // family row's keys + // (the columnn + // qualifiers, + // therefore we need + // to re-calculate the + // beginning of the + // map every loop). + // Not setting start and end timestamps selects all cells for deletion. ::google::bigtable::v2::TimestampRange time_range; - auto deleted_column = DeleteColumn(row_key, column_it.first, time_range); + auto deleted_column = DeleteColumn(row_key, column_it->first, time_range); if (deleted_column.size() > 0) { - res[std::move(column_it.first)] = std::move(deleted_column); + res[std::move(column_it->first)] = std::move(deleted_column); } } @@ -133,8 +147,7 @@ class FilteredColumnFamilyStream::FilterApply { bool operator()(ColumnRegex const& column_regex) { parent_.column_regexes_.emplace_back(column_regex.regex); - return true; - } + return true; } private: FilteredColumnFamilyStream& parent_; diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index c44f118e338c9..3934d22265a47 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -454,6 +454,7 @@ Status RowTransaction::DeleteFromFamily( ErrorInfo()); } + std::map::iterator column_family_row_it; if (column_family_row_it = column_family_it->second->find(request_.row_key()); column_family_row_it == column_family_it->second->end()) { @@ -463,28 +464,14 @@ Status RowTransaction::DeleteFromFamily( request_.row_key(), column_family_it->first), ErrorInfo()); } - RestoreColumnFamilyRow restore_row; - - restore_row.column_family_it = column_family_it; - restore_row.row_key = request_.row_key(); - std::vector cells; - for (auto const& column_family_row_it : column_family_row_it->second) { - for (auto const& column_row_it : column_family_row_it.second) { - RestoreColumnFamilyRow::Cell cell; - cell.column_qualifer = std::move(column_family_row_it.first); - cell.timestamp = column_row_it.first; - cell.value = std::move(column_row_it.second); - cells.push_back(cell); + auto deleted = column_family_it->second->DeleteRow(request_.row_key()); + for (const auto& column : deleted) { + for (const auto& cell : column.second) { + RestoreValue restore_value = {*column_family_it->second, request_.row_key(), std::move(column.first), cell.timestamp, std::move(cell.value)}; + undo_.emplace(restore_value); } } - restore_row.cells = std::move(cells); - column_family_it->second->DeleteRow(request_.row_key()); // Is certain - // to succeed - // unless we - // run out of - // memory. - undo_.emplace(std::move(restore_row)); return Status(); } From bfa3d240e177e15a2ca70e525777f6c8cbf6a092 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 18 Mar 2025 16:32:43 +0300 Subject: [PATCH 091/195] emulator: Fix segmentation fault caused by invalidated iterator in DeleteRow. Also implement DeleteFromFamily in terms of the new DeleteRow which returns RestoreValue vectors (and therefore eliminiate the use of one more undo log type). --- .../cloud/bigtable/emulator/column_family.cc | 25 ++++++++++++++----- google/cloud/bigtable/emulator/table.cc | 25 +++++-------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index e82965069aae1..de11408421b21 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -75,18 +75,32 @@ void ColumnFamily::SetCell(std::string const& row_key, rows_[row_key].SetCell(column_qualifier, timestamp, value); } -std::map> ColumnFamily::DeleteRow(std::string const& row_key) { +std::map> ColumnFamily::DeleteRow( + std::string const& row_key) { std::map> res; auto& column_family_row = rows_[row_key]; - for (const auto& column_it : column_family_row) { + for (auto column_it = column_family_row.begin(); + column_it != column_family_row.end(); + column_it = column_family_row.begin()) { // DeleteColumn can + // invalidate the + // iterator by + // deleting a column + // family row's keys + // (the columnn + // qualifiers, + // therefore we need + // to re-calculate the + // beginning of the + // map every loop). + // Not setting start and end timestamps selects all cells for deletion. ::google::bigtable::v2::TimestampRange time_range; - auto deleted_column = DeleteColumn(row_key, column_it.first, time_range); + auto deleted_column = DeleteColumn(row_key, column_it->first, time_range); if (deleted_column.size() > 0) { - res[std::move(column_it.first)] = std::move(deleted_column); + res[std::move(column_it->first)] = std::move(deleted_column); } } @@ -133,8 +147,7 @@ class FilteredColumnFamilyStream::FilterApply { bool operator()(ColumnRegex const& column_regex) { parent_.column_regexes_.emplace_back(column_regex.regex); - return true; - } + return true; } private: FilteredColumnFamilyStream& parent_; diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index c44f118e338c9..3934d22265a47 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -454,6 +454,7 @@ Status RowTransaction::DeleteFromFamily( ErrorInfo()); } + std::map::iterator column_family_row_it; if (column_family_row_it = column_family_it->second->find(request_.row_key()); column_family_row_it == column_family_it->second->end()) { @@ -463,28 +464,14 @@ Status RowTransaction::DeleteFromFamily( request_.row_key(), column_family_it->first), ErrorInfo()); } - RestoreColumnFamilyRow restore_row; - - restore_row.column_family_it = column_family_it; - restore_row.row_key = request_.row_key(); - std::vector cells; - for (auto const& column_family_row_it : column_family_row_it->second) { - for (auto const& column_row_it : column_family_row_it.second) { - RestoreColumnFamilyRow::Cell cell; - cell.column_qualifer = std::move(column_family_row_it.first); - cell.timestamp = column_row_it.first; - cell.value = std::move(column_row_it.second); - cells.push_back(cell); + auto deleted = column_family_it->second->DeleteRow(request_.row_key()); + for (const auto& column : deleted) { + for (const auto& cell : column.second) { + RestoreValue restore_value = {*column_family_it->second, request_.row_key(), std::move(column.first), cell.timestamp, std::move(cell.value)}; + undo_.emplace(restore_value); } } - restore_row.cells = std::move(cells); - column_family_it->second->DeleteRow(request_.row_key()); // Is certain - // to succeed - // unless we - // run out of - // memory. - undo_.emplace(std::move(restore_row)); return Status(); } From 915b3719958349bd469048718735170c7f36197d Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 18 Mar 2025 17:33:04 +0300 Subject: [PATCH 092/195] emulator: clang-format. --- google/cloud/bigtable/emulator/table.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 3934d22265a47..7836cf8b9d63d 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -454,7 +454,6 @@ Status RowTransaction::DeleteFromFamily( ErrorInfo()); } - std::map::iterator column_family_row_it; if (column_family_row_it = column_family_it->second->find(request_.row_key()); column_family_row_it == column_family_it->second->end()) { @@ -466,9 +465,11 @@ Status RowTransaction::DeleteFromFamily( } auto deleted = column_family_it->second->DeleteRow(request_.row_key()); - for (const auto& column : deleted) { - for (const auto& cell : column.second) { - RestoreValue restore_value = {*column_family_it->second, request_.row_key(), std::move(column.first), cell.timestamp, std::move(cell.value)}; + for (auto const& column : deleted) { + for (auto const& cell : column.second) { + RestoreValue restore_value = {*column_family_it->second, + request_.row_key(), std::move(column.first), + cell.timestamp, std::move(cell.value)}; undo_.emplace(restore_value); } } From 94552ac92ad07db6175ca36fc34a7e298bba7b73 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 18 Mar 2025 17:35:08 +0300 Subject: [PATCH 093/195] emulator: transaction rollback: get rid of another undo log type. This leaves us with just 2 undo log types: RestoreValue and DeleteValue, a situation that is much simpler than before. --- google/cloud/bigtable/emulator/table.cc | 13 ------------- google/cloud/bigtable/emulator/table.h | 2 +- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 7836cf8b9d63d..42a4569d1561a 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -570,19 +570,6 @@ void RowTransaction::Undo() { continue; } - auto* restore_row = absl::get_if(&op); - if (restore_row) { - for (auto const& cell : restore_row->cells) { - // Take care to use std::move() to avoid copying potentially - // very larg values (the column qualifier and cell values can - // be very large. - restore_row->column_family_it->second->SetCell( - restore_row->row_key, std::move(cell.column_qualifer), - cell.timestamp, std::move(cell.value)); - } - continue; - } - // If we get here, there is an type of undo log that has not been // implemented! std::abort(); diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 32d30294d0644..d2d8e399387f0 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -154,7 +154,7 @@ class RowTransaction { bool committed_; std::shared_ptr
table_; - std::stack> + std::stack> undo_; ::google::bigtable::v2::MutateRowRequest const& request_; }; From 481293ea0363abf9c892e7891926b3a40997a7c6 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 18 Mar 2025 17:52:03 +0300 Subject: [PATCH 094/195] emulator: Implement a DeleteFromRow that supports rollback. --- google/cloud/bigtable/emulator/table.cc | 26 +++++++++++++++++++++++++ google/cloud/bigtable/emulator/table.h | 3 +-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 42a4569d1561a..5573c38a6f5e2 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -436,6 +436,32 @@ Status RowTransaction::DeleteFromColumn( return Status(); } +Status RowTransaction::DeleteFromRow() { + bool row_existed; + for (auto& column_family : table_->column_families_) { + auto deleted_columns = column_family.second->DeleteRow(request_.row_key()); + + for (auto& column : deleted_columns) { + for (auto& cell : column.second) { + RestoreValue restrore_value = { + *column_family.second, request_.row_key(), std::move(column.first), + cell.timestamp, std::move(cell.value)}; + undo_.emplace(restrore_value); + row_existed = true; + } + } + } + + if (row_existed) { + return Status(); + } + + return Status( + StatusCode::kNotFound, + absl::StrFormat("row %s not found in table", request_.row_key()), + ErrorInfo()); +} + Status RowTransaction::DeleteFromFamily( ::google::bigtable::v2::Mutation_DeleteFromFamily const& delete_from_family) { diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index d2d8e399387f0..7c25f6afa209c 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -146,8 +146,7 @@ class RowTransaction { Status DeleteFromFamily( ::google::bigtable::v2::Mutation_DeleteFromFamily const& delete_from_family); - Status DeleteFromRow( - ::google::bigtable::v2::Mutation_DeleteFromRow const& delete_from_row); + Status DeleteFromRow(); private: void Undo(); From 2674c1a15035018522e0c71e14a60044a2d95ff3 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 18 Mar 2025 18:20:14 +0300 Subject: [PATCH 095/195] emulator: Test DeleteFromRow with rollback support and use it in MutateRow code. --- .../cloud/bigtable/emulator/rollback_test.cc | 51 +++++++++++++++++++ google/cloud/bigtable/emulator/table.cc | 9 ++-- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 57221e5d8660a..a64274e377a9f 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -677,6 +677,57 @@ TEST(TransactonRollback, DeleteFromColumnRollback) { 3000, data)); } +// Can we delete a row from all column families? +TEST(TransactonRollback, DeleteFromRowBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifer = "column_qualifier"; + auto const timestamp_micros = 1000; + auto const* data = "value"; + auto const* const second_column_family_name = "column_family_2"; + + std::vector column_families = {column_family_name, + second_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + data}; + v.push_back(p); + + p = {second_column_family_name, column_qualifer, timestamp_micros, data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, + timestamp_micros, data)); + ASSERT_STATUS_OK( + has_column(table, second_column_family_name, row_key, column_qualifer)); + ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + mutation_request_mutation->mutable_delete_from_row(); + + ASSERT_STATUS_OK(table->MutateRow(mutation_request)); + ASSERT_EQ(false, has_cell(table, column_family_name, row_key, column_qualifer, + timestamp_micros, data) + .ok()); + ASSERT_EQ(false, has_column(table, second_column_family_name, row_key, + column_qualifer) + .ok()); +} } // namespace emulator } // namespace bigtable diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 5573c38a6f5e2..58cc1daacb608 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -254,12 +254,9 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { return status; } } else if (mutation.has_delete_from_row()) { - bool row_existed = false; - for (auto& column_family : column_families_) { - row_existed |= column_family.second->DeleteRow(request.row_key()).size(); - } - if (!row_existed) { - // FIXME no such row existed + auto status = row_transaction.DeleteFromRow(); + if (!status.ok()) { + return status; } } else { return UnimplementedError( From cebb6cbdaf12ec3c8b7ec6982a5d69c5b12ecca3 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 18 Mar 2025 18:43:53 +0300 Subject: [PATCH 096/195] emulator: Update and correct a comment. --- .../cloud/bigtable/emulator/column_family.cc | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index de11408421b21..1217bf61da796 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -83,17 +83,34 @@ std::map> ColumnFamily::DeleteRow( for (auto column_it = column_family_row.begin(); column_it != column_family_row.end(); - column_it = column_family_row.begin()) { // DeleteColumn can + column_it = column_family_row.begin()) { // Why we call + // column_family_row.begin() + // every iteration: + // DeleteColumn can // invalidate the // iterator by // deleting a column // family row's keys - // (the columnn - // qualifiers, - // therefore we need - // to re-calculate the + // (the column + // qualifiers and + // their column + // rows), therefore + // we need to + // re-calculate the // beginning of the - // map every loop). + // map every loop. At + // the same time + // because we are + // removing all cells + // of every column, + // we know + // DeleteColumn will + // eventually remove + // all the columns + // and the row + // itself, so this + // loop will + // terminate. // Not setting start and end timestamps selects all cells for deletion. ::google::bigtable::v2::TimestampRange time_range; From a443044d162e7a4e0a6130753cd82b9b6fe6a998 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 18 Mar 2025 19:47:10 +0300 Subject: [PATCH 097/195] emualtor: Remove now-unused undo type. --- google/cloud/bigtable/emulator/table.h | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index abdbc5831f1c8..981af78d16cc5 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -85,23 +85,6 @@ class Table : public std::enable_shared_from_this
{ std::map> column_families_; }; -struct RestoreColumnFamilyRow { - // We hold the table lock and any operation to delete a column - // family will need to acquire the same lock. Therefore we are sure - // that a column family cannot disappear concurrent to a chain of - // mutations on the same row. So for column families alone (but not - // for other maps) it is safe to store their iterator in the undo - // log. It is unsafe for all other maps. - std::map>::iterator column_family_it; - std::string row_key; - struct Cell { - std::string column_qualifer; - std::chrono::milliseconds timestamp; - std::string value; - }; - std::vector cells; -}; - struct RestoreValue { ColumnFamily& column_family; std::string row_key; From ca1d93121e3b0cdb220dcd98d7d12b9afcf2ff70 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Wed, 19 Mar 2025 12:48:11 +0100 Subject: [PATCH 098/195] documentation and further clang-tidy --- google/cloud/bigtable/emulator/cluster.cc | 12 ++ google/cloud/bigtable/emulator/cluster.h | 51 ++++- .../cloud/bigtable/emulator/column_family.cc | 2 - .../cloud/bigtable/emulator/column_family.h | 5 + google/cloud/bigtable/emulator/emulator.cc | 12 +- google/cloud/bigtable/emulator/filter.cc | 193 +++++++++++++----- google/cloud/bigtable/emulator/filter.h | 36 ++++ google/cloud/bigtable/emulator/range_set.cc | 57 ++++++ .../cloud/bigtable/emulator/range_set_test.cc | 4 - google/cloud/bigtable/emulator/server.cc | 7 + google/cloud/bigtable/emulator/table.h | 12 +- .../cloud/bigtable/emulator/to_grpc_status.cc | 1 + .../cloud/bigtable/emulator/to_grpc_status.h | 1 + 13 files changed, 329 insertions(+), 64 deletions(-) diff --git a/google/cloud/bigtable/emulator/cluster.cc b/google/cloud/bigtable/emulator/cluster.cc index e346b757d8708..fe3397174b53c 100644 --- a/google/cloud/bigtable/emulator/cluster.cc +++ b/google/cloud/bigtable/emulator/cluster.cc @@ -26,6 +26,18 @@ namespace { namespace btadmin = google::bigtable::admin::v2; +/** + * Obtain a limited view of a `Table`'s schema, by applying a `TableView`. + * + * @param table_name table name in the form of + * `/projects/{}/instances/{}/tables/{}` to be returned in the + * @param table the table in question + * @param view the view to apply + * @param default_view the view to fall back to in case `view` is set to + * `btadmin::Table::VIEW_UNSPECIFIED`. `default_view` should not be set to + * `btadmin::Table::VIEW_UNSPECIFIED`. + * @return the trimmed schema or error + */ StatusOr ApplyView(std::string const& table_name, Table const& table, btadmin::Table_View view, btadmin::Table_View default_view) { diff --git a/google/cloud/bigtable/emulator/cluster.h b/google/cloud/bigtable/emulator/cluster.h index ff7b9f6bcd8eb..11baa11387c24 100644 --- a/google/cloud/bigtable/emulator/cluster.h +++ b/google/cloud/bigtable/emulator/cluster.h @@ -31,21 +31,70 @@ namespace emulator { */ class Cluster { public: + /** + * Create a new table according to schema. + * + * @param table_name table's name in the form of + * `/projects/{}/instances/{}/tables/{}`. + * @param schema the schema of the newly create table. + * @return the schema of the newly created table. + */ StatusOr CreateTable( std::string const& table_name, google::bigtable::admin::v2::Table schema); + /** + * List tables in the clustera. + * + * @param instance_name instances` name in the form of + * `/projects/{}/instances/{}`. + * @param view a view to limit the amount of information returned about + * tables. + * @return a vector of tables' schemas present in the instance trimmed + * according to `view`. + */ StatusOr> ListTables( std::string const& instance_name, google::bigtable::admin::v2::Table_View view) const; + /** + * Get details about a given table. + * + * @param table_name table's name in the form of + * `/projects/{}/instances/{}/tables/{}`. + * @param view a view to limit the amount of information returned about + * table. + * @return table's schema trimmed according to `view`. + */ StatusOr GetTable( std::string const& table_name, google::bigtable::admin::v2::Table_View view) const; + /** + * Delete a table by its name. + * + * @param table_name table's name in the form of + * `/projects/{}/instances/{}/tables/{}`. + * @return whether deletion succeeded. Apart from failing to to remove a + * non-existent table it might also fail if the table has deletion + * protection set. + */ Status DeleteTable(std::string const& table_name); + /** + * Check if a table exists. + * @param table_name table's name in the form of + * `/projects/{}/instances/{}/tables/{}`. + * @return true if table exists. + */ bool HasTable(std::string const& table_name) const; + /** + * Find a table by name. + * + * @param table_name table's name in the form of + * `/projects/{}/instances/{}/tables/{}`. + * @return a pointer to the table or error if it doesn't exist. + */ StatusOr> FindTable(std::string const& table_name); private: @@ -54,7 +103,7 @@ class Cluster { /** * All the tables indexed by their names. * - * The names are in the form `/ projects/{}/instances/{}/tables/{}`. We're + * The names are in the form `/projects/{}/instances/{}/tables/{}`. We're * holding the tables by `shared_ptr`s in order to be able to allow for more * concurrency - every access to a table should start with creating a copy of * the shared pointer. diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 58da352079a2e..59fe9541c5a81 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -185,7 +185,6 @@ void FilteredColumnFamilyStream::InitializeIfNeeded() const { } } -// Returns whether we've managed to find another cell in currently pointed row bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { for (; column_it_.value() != columns_.value().end(); ++(column_it_.value())) { cells_ = RangeFilteredMapView( @@ -198,7 +197,6 @@ bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { return false; } -// Returns whether we've managed to find another cell bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() const { for (; (*row_it_) != rows_.end(); ++(*row_it_)) { columns_ = RegexFiteredMapView< diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 5ee1220f68e92..9776ded7f835c 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -223,6 +223,9 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { * * `column_it_` doesn't point to `end()` * * `cell_it` points to a cell in the column family pointed to by * `column_it_` + * + * @return whether we've managed to find another cell in currently pointed + * row. */ bool PointToFirstCellAfterColumnChange() const; /** @@ -230,6 +233,8 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { * * Similarly to `PointToFirstCellAfterColumnChange()` it ensures that all * internal iterators are valid (or we've reached `end()`). + * + * @return whether we've managed to find another cell */ bool PointToFirstCellAfterRowChange() const; diff --git a/google/cloud/bigtable/emulator/emulator.cc b/google/cloud/bigtable/emulator/emulator.cc index e1dc144a93ed5..3501b6a6f753f 100644 --- a/google/cloud/bigtable/emulator/emulator.cc +++ b/google/cloud/bigtable/emulator/emulator.cc @@ -15,17 +15,9 @@ #include "google/cloud/bigtable/emulator/server.h" #include -namespace google { -namespace cloud { -namespace bigtable { -namespace emulator {} // namespace emulator -} // namespace bigtable -} // namespace cloud -} // namespace google - int main() { - using namespace google::cloud::bigtable::emulator; - auto server = CreateDefaultEmulatorServer("[::]", 8888); + auto server = google::cloud::bigtable::emulator::CreateDefaultEmulatorServer( + "[::]", 8888); std::cout << "Server running on port " << server->bound_port() << "\n"; server->Wait(); return 0; diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index a4de0e2fac43d..5d64bcc37c959 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -68,6 +68,23 @@ void CellStream::EmulateNextRow() { NextColumn()); } +/** + * A meta functor useful for building filters which act on whole rows. + * + * Some filters (e.g. `row_sample_filter`) have a per-row state (in this + * example, the state is either to filter a row out or not). This state is + * reset every time a new row is encountered. Hence, this meta functor allows + * its users to specify two underlying functors: + * * `FilterFunctor` which given the per-row state and a cell, decides whether + * to filter it out or not (if not, also how far to advance the underlying + * cell stream). + * * `StateResetFunctor` which creates a new state for every row. + * + * @tparam FilterFunctor a functor which accepts the per-row state and a cell as + * input and returns whether this cell should be included in the result. + * @tparam StateResetFunctor a zero-argument functor which creates a new per-row + * state. + */ template class PerRowStateFilter { static_assert(google::cloud::internal::is_invocable::value, @@ -84,9 +101,24 @@ class PerRowStateFilter { "Invalid result of `FilterFunctor` invocation."); public: + /** + * Create a new object. + * + * @param filter a functor which accepts the per-row state and a cell as + * input and returns whether this cell should be included in the result. + * @param reset a zero-argument functor which creates a new per-row + * state. + */ PerRowStateFilter(FilterFunctor filter, StateResetFunctor reset) : filter_(std::move(filter)), reset_(std::move(reset)) {} + /** + * Decide on what to do with a cell. + * + * @param cell_view the cell in question + * @return if empty - include the cell in the result; if not empty - instruct + * the caller by how much to advance the underlying stream. + */ absl::optional operator()(CellView const& cell_view) { if (!prev_row_ || prev_row_.value() != cell_view.row_key()) { state_ = reset_(); @@ -102,32 +134,23 @@ class PerRowStateFilter { StateResetFunctor reset_; }; -template -class PerColumnStateFilter { - static_assert( - google::cloud::internal::is_invocable::value, - "StateResetFunctor must be invocable with no arguments"); - using State = - std::decay_t>; - static_assert(std::is_default_constructible::value, - "State must be default constructible"); - static_assert(std::is_assignable::value, - "State must assignable"); - static_assert(std::is_same, - absl::optional>::value, - "Invali result of `FilterFunctor` invocation."); - +/// A functor for filtering cell streams to return only first X cells per col. +class CellsPerColumnFilter { public: - PerColumnStateFilter(FilterFunctor filter, StateResetFunctor reset) - : filter_(std::move(filter)), reset_(std::move(reset)) {} + explicit CellsPerColumnFilter(std::size_t cells_per_column_limit) + : cells_per_column_limit_(cells_per_column_limit), + cells_per_column_left_(cells_per_column_limit) {} absl::optional operator()(CellView const& cell_view) { if (!prev_ || !prev_->Matches(cell_view)) { - state_ = reset_(); + cells_per_column_left_ = cells_per_column_limit_; prev_ = Prev(cell_view); } - return filter_(state_, cell_view); + if (cells_per_column_left_ > 0) { + --cells_per_column_left_; + return {}; + } + return NextMode::kColumn; } private: @@ -150,18 +173,32 @@ class PerColumnStateFilter { std::string column_qualifier_; }; absl::optional prev_; - State state_; - FilterFunctor filter_; - StateResetFunctor reset_; + std::size_t cells_per_column_limit_; + std::size_t cells_per_column_left_; }; +/** + * A meta cell stream, which is created from a cell transforming functor. + * + * @tparam Transformer an unary functor which should accept a `CellView` and + * return a transformed version of it. + */ template class TrivialTransformer : public AbstractCellStreamImpl { public: + /** + * Create a new object. + * + * @param source underlying cell stream to be transformed. + * @param filter functor, which accepts a `CellView` and returns a transformed + * `CellView` to be returned from this stream. + */ TrivialTransformer(CellStream source, Transformer transformer) : source_(std::move(source)), transformer_(std::move(transformer)) {} - bool ApplyFilter(InternalFilter const&) override { return false; } + bool ApplyFilter(InternalFilter const& internal_filter) override { + return source_.ApplyFilter(internal_filter); + } bool HasValue() const override { return source_.HasValue(); } @@ -184,12 +221,24 @@ class TrivialTransformer : public AbstractCellStreamImpl { mutable absl::optional transformed_; }; +/** + * Create a cell stream from an underlying stream and a transforming functor. + * + * @tparam Transformer an unary functor which should accept a `CellView` and + * return a transformed version of it. + */ template CellStream MakeTrivialTransformer(CellStream source, Transformer transformer) { return CellStream(std::make_unique>( std::move(source), std::move(transformer))); } +/** + * A meta cell stream filtering an underlying stream according to a functor. + * + * @tparam Filter a functor, which given a cell, decides whether to filter it + * out or not (if not, also how far to advance the underlying cell stream). + */ template class TrivialFilter : public AbstractCellStreamImpl { static_assert( @@ -199,6 +248,19 @@ class TrivialFilter : public AbstractCellStreamImpl { "Invalid filter return type"); public: + /** + * Create a new object. + * + * @param source underlying cell stream to be filtered. + * @param filter functor, which accepts a `CellView` and decides + * whether to filter it out or not (if not, also how far to advance the + * underlying cell stream). + * @param filter_filter a functor which given an `InternalFilter` decides + * whether filtering this cell streams results and then applying the + * `InternalFilter` would yield the same results as applying + * `InternalFilter` to the underlying stream and the perform this stream's + * filtering. + */ TrivialFilter(CellStream source, Filter filter, std::function filter_filter) : source_(std::move(source)), @@ -229,6 +291,7 @@ class TrivialFilter : public AbstractCellStreamImpl { } private: + /// Consume the underlying stream until an unfiltered cell is encountered. void EnsureCurrentNotFiltered() const { while (source_.HasValue()) { auto maybe_next_mode = filter_(*source_); @@ -252,6 +315,19 @@ class TrivialFilter : public AbstractCellStreamImpl { std::function filter_filter_; }; +/** + * Create a cell stream from an underlying stream and a cell filtering functor. + * + * @param source underlying cell stream to be filtered. + * @param filter functor, which accepts a `CellView` and decides + * whether to filter it out or not (if not, also how far to advance the + * underlying cell stream). + * @param filter_filter a functor which given an `InternalFilter` decides + * whether filtering this cell stream's results and then applying the + * `InternalFilter` would yield the same results as applying + * `InternalFilter` to the underlying stream and the perform this stream's + * filtering. + */ template CellStream MakeTrivialFilter( CellStream source, Filter filter, @@ -260,6 +336,20 @@ CellStream MakeTrivialFilter( std::move(source), std::move(filter), std::move(filter_filter))); } +/** + * Create a cell stream filtering underlying stream, which has a per-row state. + * + * @param source underlying cell stream to be filtered. + * @param filter a functor which accepts the per-row state and a cell as + * input and returns whether this cell should be included in the result. + * @param reset a zero-argument functor which creates a new per-row + * state. + * @param filter_filter a functor which given an `InternalFilter` decides + * whether filtering this cell stream's results and then applying the + * `InternalFilter` would yield the same results as applying + * `InternalFilter` to the underlying stream and the perform this stream's + * filtering. + */ template CellStream MakePerRowStateFilter( CellStream source, FilterFunctor filter, StateResetFunctor state_reset, @@ -270,17 +360,6 @@ CellStream MakePerRowStateFilter( std::move(filter_filter)); } -template -CellStream MakePerColumnStateFilter( - CellStream source, FilterFunctor filter, StateResetFunctor state_reset, - std::function filter_filter = PassAllFilters) { - return MakeTrivialFilter( - std::move(source), - PerColumnStateFilter( - std::move(filter), std::move(state_reset)), - std::move(filter_filter)); -} - bool MergeCellStreams::CellStreamGreater::operator()( std::unique_ptr const& lhs, std::unique_ptr const& rhs) const { @@ -393,8 +472,18 @@ bool MergeCellStreams::SkipRowOrColumn(NextMode mode) { return true; } +/// A cell stream for handling a Condition filter. class ConditionStream : public AbstractCellStreamImpl { public: + /** + * Create a new object. + * + * @param source the underlying cell stream + * @param predicate_stream the stream deciding whether for a given row the + * true branch or false branch should be selected + * @param true_stream the stream generating cells for the true branch + * @param false_stream the stream generating cells for the false branch + */ ConditionStream(CellStream source, CellStream predicate, CellStream true_stream, CellStream false_stream) : source_(std::move(source)), @@ -502,6 +591,7 @@ class ConditionStream : public AbstractCellStreamImpl { mutable std::string current_row_; }; +/// A cell stream not generating any cells. class EmptyCellStreamImpl : public AbstractCellStreamImpl { bool ApplyFilter(InternalFilter const&) override { return true; } bool HasValue() const override { return false; } @@ -516,6 +606,18 @@ class EmptyCellStreamImpl : public AbstractCellStreamImpl { }; // NOLINTBEGIN(misc-no-recursion,readability-function-cognitive-complexity) +/** + * Create a filter DAG constructor based on the proto definition. + * + * @param filter the protobuf definition of the filter DAG to be created + * @param source_ctor a zero-argument functor which can be used to create the + * underlying cell stream, which this filter will work on. + * @param direct_sinks an accumulator which will be filled by zero-argument + * functors which will create branches of the DAG whose output should bypass + * any other filters (the `sink` filter). + * @return a zero-argument functor which will return a DAG described by + * `filter`. + */ StatusOr CreateFilterImpl( ::google::bigtable::v2::RowFilter const& filter, CellStreamConstructor source_ctor, @@ -786,17 +888,8 @@ StatusOr CreateFilterImpl( CellStreamConstructor res = [source_ctor = std::move(source_ctor), cells_per_column_limit] { auto source = source_ctor(); - return MakePerColumnStateFilter( - std::move(source), - [cells_per_column_limit]( - std::int64_t& per_column_state, - CellView const&) -> absl::optional { - if (per_column_state++ < cells_per_column_limit) { - return {}; - } - return NextMode::kColumn; - }, - []() -> std::int64_t { return 0; }, + return MakeTrivialFilter( + std::move(source), CellsPerColumnFilter(cells_per_column_limit), [](InternalFilter const& internal_filter) { return !absl::holds_alternative(internal_filter); }); @@ -969,6 +1062,14 @@ StatusOr CreateFilterImpl( } // NOLINTEND(misc-no-recursion,readability-function-cognitive-complexity) +/** + * Create a filter DAG based on the proto definition. + * + * @param filter the protobuf definition of the filter DAG to be created + * @param source_ctor a zero-argument functor which can be used to create the + * underlying cell stream, which this filter will work on. + * @return DAG described by `filter`. + */ StatusOr CreateFilter( ::google::bigtable::v2::RowFilter const& filter, CellStreamConstructor source_ctor) { diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index 73a3a08570fc1..670513949253b 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -28,6 +28,42 @@ namespace cloud { namespace bigtable { namespace emulator { +// The code declared in this file is used to construct filters according to +// `::google::bigtable::v2::RowFilter` protobuf definition. +// It describes a DAG through which cells should be routed (and potentially +// copied in case of the `Interleave` filter). +// +// The simplest way of implementing such a DAG is to create an object for every +// node of the graph, which would filter/transform the result. This, however, +// could be very inefficient. For example, if we're only interested in the last +// version of a cell of a specific column, the lowermost layers of the graph +// would have to scan the whole table. +// +// This example shows that we should apply the filters as close to the beginning +// of the graph as possible. The in-memory implementation could jump over +// uninteresting columns and avoid passing all the values around. Most of the +// filters can be applied in any order, which makes our filtering task easy. +// +// Unfortunately, some filters (e.g. `cells_per_row_limit_filter`) prevents us +// from moving filters applied later in the chain to its beginning. Hence, we +// need to keep the naive (object-per-graph-node) approach. +// +// We do attempt to apply the filtering as close to the root as possible via +// `AbstractCellStreamImpl::Apply()`. This operation has different +// implementations for different filters. +// +// The algorithm looks as follows: +// * we try to build the DAG according to the proto, from the ground up +// * every time we're about to add a new node, we first try applying the +// the graph we built so far by calling `Apply()` on the last node we added; +// * these `Apply()` calls are propagated through the graph all the way to the +// root +// * if the `Apply()` call fails (e.g. because there is a +// `cells_per_row_limit_filter` in the DAG), we will continue with adding a +// new node to the graph +// * if the `Apply` call fails then we know that the lower layers will filter +// out the unwanted data so we can skip adding the node to the graph. + /// Only return cells from rows whose keys match `regex`. struct RowKeyRegex { std::shared_ptr regex; diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index 33d8316487497..9f1012a3b38b5 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -22,6 +22,7 @@ namespace bigtable { namespace emulator { namespace detail { +/// Return -1 for lhs < rhs, 0 for lhs == rhs, 1 for lhs > rhs. int CompareRangeValues(StringRangeSet::Range::Value const& lhs, StringRangeSet::Range::Value const& rhs) { if (absl::holds_alternative(lhs)) { @@ -35,6 +36,13 @@ int CompareRangeValues(StringRangeSet::Range::Value const& lhs, absl::get(rhs)); } +/** + * Check whether there exists a row key between `lhs` and `rhs`. + * + * @param `lhs` the first (in order) of the two ranges. + * @param `rhs` the second (in order) of the two ranges. + * @return if `rhs` directly follows `lhs` in the row key order. + */ bool ConsecutiveRowKeys(StringRangeSet::Range::Value const& lhs, StringRangeSet::Range::Value const& rhs) { if (absl::holds_alternative(lhs) || @@ -45,6 +53,7 @@ bool ConsecutiveRowKeys(StringRangeSet::Range::Value const& lhs, absl::get(rhs)); } +/// Checks whether there exists a string which belongs to both `lhs` and `rhs`. bool HasOverlap(StringRangeSet::Range const& lhs, StringRangeSet::Range const& rhs) { auto const start_cmp = CompareRangeValues(lhs.start(), rhs.start()); @@ -66,6 +75,7 @@ bool HasOverlap(StringRangeSet::Range const& lhs, intersect_end->end(), intersect_end->end_open()); } +/// Checks if there exists a timestamp which belongs to both `lhs` and `rhs`. bool HasOverlap(TimestampRangeSet::Range const& lhs, TimestampRangeSet::Range const& rhs) { TimestampRangeSet::Range::Value overlap_start = @@ -75,6 +85,16 @@ bool HasOverlap(TimestampRangeSet::Range const& lhs, return !TimestampRangeSet::Range::IsEmpty(overlap_start, overlap_end); } +/** + * Checks if two disjoint ranges are adjacent. + * + * In other words, whether there doesn't exist a string, which could be squeezed + * between the ranges. + * + * @param `lhs` the first (in order) of the two ranges. + * @param `rhs` the second (in order) of the two ranges. + * @return if `rhs` directly follows `lhs` in the row key order. + */ bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, StringRangeSet::Range const& rhs) { assert(!HasOverlap(lhs, rhs)); @@ -94,6 +114,16 @@ bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, return false; } +/** + * Checks if two disjoint ranges are adjacent. + * + * In other words, whether there doesn't exist a timestamp, which could be + * squeezed between the ranges. + * + * @param `lhs` the first (in order) of the two ranges. + * @param `rhs` the second (in order) of the two ranges. + * @return if `rhs` directly follows `lhs` in the row key order. + */ bool DisjointAndSortedRangesAdjacent(TimestampRangeSet::Range const& lhs, TimestampRangeSet::Range const& rhs) { assert(!HasOverlap(lhs, rhs)); @@ -101,6 +131,20 @@ bool DisjointAndSortedRangesAdjacent(TimestampRangeSet::Range const& lhs, return lhs.end() == rhs.start(); } +/** + * A generic implementation of adding a range to set of disjoint ranges. + * + * The word "Sum" in the function name shall be understood as a set sum. + * + * /pre{The ranges should be disjoint} + * + * @tparam RangeSetType the type of a set containing disjoint ranges. This will + * be a carefully ordered `std::set`. + * @tparam RangeType the type of a single range. + * @param disjoint_ranges the set of disjoint ranges to which the + * `inserted_range` should be added. + * @param `inserted_range` the range being added. + */ template void RangeSetSumImpl(RangeSetType& disjoint_ranges, RangeType inserted_range) { // Remove all ranges which either have an overlap with `inserted_range` or are @@ -135,6 +179,19 @@ void RangeSetSumImpl(RangeSetType& disjoint_ranges, RangeType inserted_range) { disjoint_ranges.insert(std::move(inserted_range)); } +/** + * An implementation of intersecting a set of disjoint ranges with a range. + * + * /pre{The range set should be disjoint} + * + * @tparam RangeSetType the type of a set containing disjoint ranges. This will + * be a carefully ordered `std::set`. + * @tparam RangeType the type of a single range. + * @param disjoint_ranges the set of disjoint ranges which will have be modified + * to only cover points also present in `inserted_range`. + * @param `intersected_range` the range with which the range set will be + * intersected. + */ template void RangeSetIntersectImpl(RangeSetType& disjoint_ranges, RangeType const& intersected_range) { diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index 31b1c2b9e0cee..dc42bd85e0dd4 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -15,7 +15,6 @@ #include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/chrono_literals.h" -#include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/status_matchers.h" #include #include @@ -30,9 +29,6 @@ bool const kOpen = true; bool const kClosed = false; bool const kWhatever = true; // to indicate it's unimportant in the test -namespace btproto = ::google::bigtable::v2; -using ::google::cloud::testing_util::IsProtoEqual; - TEST(StringRangeValueOrder, Simple) { EXPECT_EQ(-1, detail::CompareRangeValues("A", "B")); EXPECT_EQ(0, detail::CompareRangeValues("A", "A")); diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 53a29e5dd9960..993c187c703ea 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -54,6 +54,7 @@ class EmulatorService final : public btproto::Bigtable::Service { override { return grpc::Status::OK; } + grpc::Status MutateRow(grpc::ServerContext* /* context */, btproto::MutateRowRequest const* request, btproto::MutateRowResponse* /* response */) override { @@ -63,30 +64,36 @@ class EmulatorService final : public btproto::Bigtable::Service { } return ToGrpcStatus((*maybe_table)->MutateRow(*request)); } + grpc::Status MutateRows( grpc::ServerContext* /* context */, btproto::MutateRowsRequest const* /* request */, grpc::ServerWriter* /* writer */) override { return grpc::Status::OK; } + grpc::Status CheckAndMutateRow( grpc::ServerContext* /* context */, btproto::CheckAndMutateRowRequest const* /* request */, btproto::CheckAndMutateRowResponse* /* response */) override { return grpc::Status::OK; } + grpc::Status PingAndWarm( grpc::ServerContext* /* context */, btproto::PingAndWarmRequest const* /* request */, btproto::PingAndWarmResponse* /* response */) override { return grpc::Status::OK; } + grpc::Status ReadModifyWriteRow( grpc::ServerContext* /* context */, btproto::ReadModifyWriteRowRequest const* /* request */, btproto::ReadModifyWriteRowResponse* /* response */) override { return grpc::Status::OK; } + + private: std::shared_ptr cluster_; }; diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index e1a1393771c99..81163ff43b0db 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -32,6 +32,7 @@ namespace cloud { namespace bigtable { namespace emulator { +/// Objects of this class represent Bigtable tables. class Table { public: static StatusOr> Create( @@ -67,7 +68,16 @@ class Table { std::map> column_families_; }; -// This class is public only to enable testing. +/** + * A `AbstractCellStreamImpl` which streams filtered contents of the table. + * + * Underneath is essentially a collection of `FilteredColumnFamilyStream`s. + * All filters applied to `FilteredColumnFamilyStream` are propagated to the + * underlying `FilteredColumnFamilyStream`, except for `FamilyNameRegex`, which + * is handled by this subclass. + * + * This class is public only to enable testing. + */ class FilteredTableStream : public MergeCellStreams { public: explicit FilteredTableStream( diff --git a/google/cloud/bigtable/emulator/to_grpc_status.cc b/google/cloud/bigtable/emulator/to_grpc_status.cc index 09a843df2185c..46d4d6b818698 100644 --- a/google/cloud/bigtable/emulator/to_grpc_status.cc +++ b/google/cloud/bigtable/emulator/to_grpc_status.cc @@ -22,6 +22,7 @@ namespace cloud { namespace bigtable { namespace emulator { +/// Translate google::cloud::StatusCode into grpc::StatusCode. grpc::StatusCode MapStatusCode(google::cloud::StatusCode code) { switch (code) { case google::cloud::StatusCode::kOk: diff --git a/google/cloud/bigtable/emulator/to_grpc_status.h b/google/cloud/bigtable/emulator/to_grpc_status.h index 9fe134f9e0cff..fce22b6bdaef5 100644 --- a/google/cloud/bigtable/emulator/to_grpc_status.h +++ b/google/cloud/bigtable/emulator/to_grpc_status.h @@ -23,6 +23,7 @@ namespace cloud { namespace bigtable { namespace emulator { +/// Convert a google::cloud::Status to grpc::Status. ::grpc::Status ToGrpcStatus(Status const& to_convert); } // namespace emulator From 28e9accc13522b9e1501d73fc623d1e56bfea671 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 31 Mar 2025 17:58:48 +0300 Subject: [PATCH 099/195] emulator: Fixes to avoid copying values that are possibly very large (cell contents). As well as other fixes for review. --- .../cloud/bigtable/emulator/column_family.cc | 42 ++++----------- google/cloud/bigtable/emulator/table.cc | 54 +++++++++---------- 2 files changed, 38 insertions(+), 58 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 82db4fc136dff..1bce6102a14a4 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -42,7 +42,7 @@ std::vector ColumnRow::DeleteTimeRange( std::chrono::microseconds( time_range.end_timestamp_micros())));) { Cell cell = {std::move(cell_it->first), std::move(cell_it->second)}; - deleted_cells.push_back(cell); + deleted_cells.emplace_back(cell); cells_.erase(cell_it++); } return deleted_cells; @@ -59,7 +59,7 @@ std::vector ColumnFamilyRow::DeleteColumn( ::google::bigtable::v2::TimestampRange const& time_range) { auto column_it = columns_.find(column_qualifier); if (column_it == columns_.end()) { - return std::vector(); + return {}; } auto res = column_it->second.DeleteTimeRange(time_range); if (!column_it->second.HasCells()) { @@ -83,35 +83,15 @@ std::map> ColumnFamily::DeleteRow( for (auto column_it = column_family_row.begin(); column_it != column_family_row.end(); - column_it = column_family_row.begin()) { // Why we call - // column_family_row.begin() - // every iteration: - // DeleteColumn can - // invalidate the - // iterator by - // deleting a column - // family row's keys - // (the column - // qualifiers and - // their column - // rows), therefore - // we need to - // re-calculate the - // beginning of the - // map every loop. At - // the same time - // because we are - // removing all cells - // of every column, - // we know - // DeleteColumn will - // eventually remove - // all the columns - // and the row - // itself, so this - // loop will - // terminate. - + // Why we call column_family_row.begin() every iteration: + // DeleteColumn can invalidate the iterator by deleting a column + // family row's keys (the column qualifiers and their column + // rows), therefore we need to re-calculate the beginning of the + // map every loop. At the same time because we are removing all + // cells of every column, we know DeleteColumn will eventually + // remove all the columns and the row itself, so this loop will + // terminate. + column_it = column_family_row.begin()) { // Not setting start and end timestamps selects all cells for deletion. ::google::bigtable::v2::TimestampRange time_range; diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index e28a846fd3890..2e3617cfe1014 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -412,23 +412,23 @@ Status RowTransaction::MergeToCell( Status RowTransaction::DeleteFromColumn( ::google::bigtable::v2::Mutation_DeleteFromColumn const& delete_from_column) { - auto status = table_->FindColumnFamily(delete_from_column); - if (!status.ok()) { - return status.status(); + auto maybe_column_family = table_->FindColumnFamily(delete_from_column); + if (!maybe_column_family.ok()) { + return maybe_column_family.status(); } - auto& column_family = status->get(); + auto& column_family = maybe_column_family->get(); auto deleted_cells = column_family.DeleteColumn( request_.row_key(), delete_from_column.column_qualifier(), delete_from_column.time_range()); - for (auto cell : deleted_cells) { - RestoreValue restore_value = {column_family, request_.row_key(), - delete_from_column.column_qualifier(), - std::move(cell.timestamp), - std::move(cell.value)}; - undo_.emplace(restore_value); + for (auto& cell : deleted_cells) { + RestoreValue restore_value{column_family, request_.row_key(), + delete_from_column.column_qualifier(), + std::move(cell.timestamp), + std::move(cell.value)}; + undo_.emplace(std::move(restore_value)); } return Status(); @@ -444,7 +444,7 @@ Status RowTransaction::DeleteFromRow() { RestoreValue restrore_value = { *column_family.second, request_.row_key(), std::move(column.first), cell.timestamp, std::move(cell.value)}; - undo_.emplace(restrore_value); + undo_.emplace(std::move(restrore_value)); row_existed = true; } } @@ -465,9 +465,9 @@ Status RowTransaction::DeleteFromFamily( delete_from_family) { // If the request references an incorrect schema (non-existent // column family) then return a failure status error immediately. - auto status = table_->FindColumnFamily(delete_from_family); - if (!status.ok()) { - return status.status(); + auto maybe_column_family = table_->FindColumnFamily(delete_from_family); + if (!maybe_column_family.ok()) { + return maybe_column_family.status(); } auto column_family_it = table_->find(delete_from_family.family_name()); @@ -491,10 +491,10 @@ Status RowTransaction::DeleteFromFamily( auto deleted = column_family_it->second->DeleteRow(request_.row_key()); for (auto const& column : deleted) { for (auto const& cell : column.second) { - RestoreValue restore_value = {*column_family_it->second, - request_.row_key(), std::move(column.first), - cell.timestamp, std::move(cell.value)}; - undo_.emplace(restore_value); + RestoreValue restore_value{*column_family_it->second, request_.row_key(), + std::move(column.first), cell.timestamp, + std::move(cell.value)}; + undo_.emplace(std::move(restore_value)); } } @@ -549,16 +549,16 @@ Status RowTransaction::SetCell( std::chrono::microseconds(set_cell.timestamp_micros()))); if (!cell_existed) { - DeleteValue delete_value = {column_family, column_family_row_it->first, - std::move(set_cell.column_qualifier()), - timestamp_it->first}; - undo_.emplace(delete_value); + DeleteValue delete_value{column_family, column_family_row_it->first, + std::move(set_cell.column_qualifier()), + timestamp_it->first}; + undo_.emplace(std::move(delete_value)); } else { - RestoreValue restore_value = {column_family, column_family_row_it->first, - std::move(set_cell.column_qualifier()), - timestamp_it->first, - std::move(value_to_restore)}; - undo_.emplace(restore_value); + RestoreValue restore_value{column_family, column_family_row_it->first, + std::move(set_cell.column_qualifier()), + timestamp_it->first, + std::move(value_to_restore)}; + undo_.emplace(std::move(restore_value)); } return Status(); From 86a4fa21abf9461806d222ee174a0d43ef800fb5 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 31 Mar 2025 18:10:27 +0300 Subject: [PATCH 100/195] emulator: mutation transactions: use std::chrono:duration_cast. Makes it easier and safer to change the resolution at a later date. --- google/cloud/bigtable/emulator/table.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 2e3617cfe1014..5c5146af5a357 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -581,16 +581,20 @@ void RowTransaction::Undo() { auto* delete_value = absl::get_if(&op); if (delete_value) { ::google::bigtable::v2::TimestampRange range; - auto start_micros = delete_value->timestamp.count() * 1000; + auto start_micros = std::chrono::duration_cast( + std::chrono::milliseconds(delete_value->timestamp.count())); // The following is an exclusive upper bound, 1ms higher Since // timestamps have millisecond resolution, 2 timestamps have to // be at least 1ms apart which means that setting this as the // end of the range guarantees that we delete at most 1 (because // the upper bound is exclusive). - auto end_micros = start_micros + 1000; - range.set_start_timestamp_micros(start_micros); - range.set_end_timestamp_micros(end_micros); - delete_value->column_family.DeleteColumn(delete_value->row_key, std::move(delete_value->column_qualifier), range); + auto end_micros = std::chrono::duration_cast( + std::chrono::milliseconds(delete_value->timestamp.count() + 1000)); + range.set_start_timestamp_micros(start_micros.count()); + range.set_end_timestamp_micros(end_micros.count()); + delete_value->column_family.DeleteColumn( + delete_value->row_key, std::move(delete_value->column_qualifier), + range); continue; } From 908f3c950ec591f8648a9336ff52ea51048ea24b Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 31 Mar 2025 18:50:03 +0300 Subject: [PATCH 101/195] emulator: use invariant that rows are never empty to eliminate some code. Preceding code erases all columns. The last column erase in DeleteColumn will cause the row to be erased. So we don't have to search for it again in the map and attempt to erase it. --- google/cloud/bigtable/emulator/column_family.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 1bce6102a14a4..b0243bb2939fa 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -101,8 +101,6 @@ std::map> ColumnFamily::DeleteRow( } } - rows_.erase(row_key); - return res; } From 10d3378b4f3a4861bd170c5ed41454a6c8d0bc80 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 31 Mar 2025 19:24:26 +0300 Subject: [PATCH 102/195] emulator: optimization: make it possible to delete a column via a row iterator. In this and other situations, this saves us a duplicate call to find the row by saving and passing an iterator. --- .../cloud/bigtable/emulator/column_family.cc | 40 +++++++++++++------ .../cloud/bigtable/emulator/column_family.h | 17 +++++--- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index b0243bb2939fa..17da46536790f 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -80,22 +80,23 @@ std::map> ColumnFamily::DeleteRow( std::map> res; auto& column_family_row = rows_[row_key]; + auto row_it = rows_.find(row_key); - for (auto column_it = column_family_row.begin(); - column_it != column_family_row.end(); - // Why we call column_family_row.begin() every iteration: - // DeleteColumn can invalidate the iterator by deleting a column - // family row's keys (the column qualifiers and their column - // rows), therefore we need to re-calculate the beginning of the - // map every loop. At the same time because we are removing all - // cells of every column, we know DeleteColumn will eventually - // remove all the columns and the row itself, so this loop will - // terminate. - column_it = column_family_row.begin()) { + for (auto column_it = row_it->second.begin(); + column_it != row_it->second.end(); + // Why we call row_it->second.begin() every iteration: + // DeleteColumn can invalidate the iterator by deleting a + // column family row's keys (the column qualifiers and their + // column rows), therefore we need to re-calculate the + // beginning of the map every loop. At the same time because we + // are removing all cells of every column, we know DeleteColumn + // will eventually remove all the columns and the row itself, + // so this loop will terminate. + column_it = row_it->second.begin()) { // Not setting start and end timestamps selects all cells for deletion. ::google::bigtable::v2::TimestampRange time_range; - auto deleted_column = DeleteColumn(row_key, column_it->first, time_range); + auto deleted_column = DeleteColumn(row_it, column_it->first, time_range); if (deleted_column.size() > 0) { res[std::move(column_it->first)] = std::move(deleted_column); } @@ -119,6 +120,21 @@ std::vector ColumnFamily::DeleteColumn( return std::vector(); } +std::vector ColumnFamily::DeleteColumn( + std::map::iterator row_it, + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range) { + if (row_it != rows_.end()) { + auto erased_cells = + row_it->second.DeleteColumn(column_qualifier, time_range); + if (!row_it->second.HasColumns()) { + rows_.erase(row_it); + } + return erased_cells; + } + return {}; +} + class FilteredColumnFamilyStream::FilterApply { public: explicit FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 89a28fbeba47c..2044f3f14e03e 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -188,16 +188,23 @@ class ColumnFamily { /** * Delete cells from a row falling into a given timestamp range in one column. * - * @param row_key the row key to remove the cells from. - * @param column_qualifier the column qualifier from which to delete the - * values. - * @param time_range the timestamp range dictating which values to delete. - * @return vector of deleted cells. + * @param row_key the row key to remove the cells from (or the + * iterator to the row - row_it - in the 2nd overloaded form of the + * function). + + * @param column_qualifier the column qualifier from which to delete + * the values. @param time_range the timestamp range dictating + * which values to delete. @return vector of deleted cells. */ std::vector DeleteColumn( std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); + std::vector DeleteColumn( + std::map::iterator row_it, + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); + const_iterator begin() const { return rows_.begin(); } const_iterator end() const { return rows_.end(); } const_iterator lower_bound(std::string const& row_key) const { From 00545b85ab46aa4ed48b3929271ac104ebf57ee4 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 31 Mar 2025 19:38:12 +0300 Subject: [PATCH 103/195] emulator: eliminate redundant storage of row_key in undo log. The entire undo is in the context of a row, therefore all the methods have access to a row key. This also reduces the size of the undo log. --- google/cloud/bigtable/emulator/table.cc | 17 ++++++++++------- google/cloud/bigtable/emulator/table.h | 2 -- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 5c5146af5a357..8ca4b6b6b0696 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -424,7 +424,7 @@ Status RowTransaction::DeleteFromColumn( delete_from_column.time_range()); for (auto& cell : deleted_cells) { - RestoreValue restore_value{column_family, request_.row_key(), + RestoreValue restore_value{column_family, delete_from_column.column_qualifier(), std::move(cell.timestamp), std::move(cell.value)}; @@ -442,7 +442,7 @@ Status RowTransaction::DeleteFromRow() { for (auto& column : deleted_columns) { for (auto& cell : column.second) { RestoreValue restrore_value = { - *column_family.second, request_.row_key(), std::move(column.first), + *column_family.second, std::move(column.first), cell.timestamp, std::move(cell.value)}; undo_.emplace(std::move(restrore_value)); row_existed = true; @@ -491,7 +491,7 @@ Status RowTransaction::DeleteFromFamily( auto deleted = column_family_it->second->DeleteRow(request_.row_key()); for (auto const& column : deleted) { for (auto const& cell : column.second) { - RestoreValue restore_value{*column_family_it->second, request_.row_key(), + RestoreValue restore_value{*column_family_it->second, std::move(column.first), cell.timestamp, std::move(cell.value)}; undo_.emplace(std::move(restore_value)); @@ -549,12 +549,12 @@ Status RowTransaction::SetCell( std::chrono::microseconds(set_cell.timestamp_micros()))); if (!cell_existed) { - DeleteValue delete_value{column_family, column_family_row_it->first, + DeleteValue delete_value{column_family, std::move(set_cell.column_qualifier()), timestamp_it->first}; undo_.emplace(std::move(delete_value)); } else { - RestoreValue restore_value{column_family, column_family_row_it->first, + RestoreValue restore_value{column_family, std::move(set_cell.column_qualifier()), timestamp_it->first, std::move(value_to_restore)}; @@ -565,6 +565,8 @@ Status RowTransaction::SetCell( } void RowTransaction::Undo() { + auto row_key = request_.row_key(); + while (!undo_.empty()) { auto op = undo_.top(); undo_.pop(); @@ -572,12 +574,13 @@ void RowTransaction::Undo() { auto* restore_value = absl::get_if(&op); if (restore_value) { restore_value->column_family.SetCell( - std::move(restore_value->row_key), + row_key, std::move(restore_value->column_qualifier), restore_value->timestamp, std::move(restore_value->value)); continue; } + auto* delete_value = absl::get_if(&op); if (delete_value) { ::google::bigtable::v2::TimestampRange range; @@ -593,7 +596,7 @@ void RowTransaction::Undo() { range.set_start_timestamp_micros(start_micros.count()); range.set_end_timestamp_micros(end_micros.count()); delete_value->column_family.DeleteColumn( - delete_value->row_key, std::move(delete_value->column_qualifier), + row_key, std::move(delete_value->column_qualifier), range); continue; } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 8207c0d879efc..a89de1a040200 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -88,7 +88,6 @@ class Table : public std::enable_shared_from_this
{ struct RestoreValue { ColumnFamily& column_family; - std::string row_key; std::string column_qualifier; std::chrono::milliseconds timestamp; std::string value; @@ -96,7 +95,6 @@ struct RestoreValue { struct DeleteValue { ColumnFamily& column_family; - std::string row_key; std::string column_qualifier; std::chrono::milliseconds timestamp; }; From 1efa6f5fc7da380d9723655d29c415060f0174fb Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 31 Mar 2025 20:18:08 +0300 Subject: [PATCH 104/195] emulator: Use google-standard status builder everywhere in main code. --- google/cloud/bigtable/emulator/table.cc | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 8ca4b6b6b0696..c618b2bf38976 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -454,10 +454,9 @@ Status RowTransaction::DeleteFromRow() { return Status(); } - return Status( - StatusCode::kNotFound, - absl::StrFormat("row %s not found in table", request_.row_key()), - ErrorInfo()); + return NotFoundError( + "row not found in table", + GCP_ERROR_INFO().WithMetadata("row", request_.row_key())); } Status RowTransaction::DeleteFromFamily( @@ -472,20 +471,21 @@ Status RowTransaction::DeleteFromFamily( auto column_family_it = table_->find(delete_from_family.family_name()); if (column_family_it == table_->end()) { - return Status(StatusCode::kNotFound, - absl::StrFormat("column family %s not found in table", - delete_from_family.family_name()), - ErrorInfo()); + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", + delete_from_family.family_name())); } std::map::iterator column_family_row_it; if (column_family_row_it = column_family_it->second->find(request_.row_key()); column_family_row_it == column_family_it->second->end()) { // The row does not exist - return Status(StatusCode::kNotFound, - absl::StrFormat("row key %s not found in column family %s", - request_.row_key(), column_family_it->first), - ErrorInfo()); + return NotFoundError( + "row key is not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", request_.row_key()) + .WithMetadata("column family", column_family_it->first)); } auto deleted = column_family_it->second->DeleteRow(request_.row_key()); From 0fda7c88938a509d1b2f25630c5dc6b1903c39f3 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 31 Mar 2025 20:29:32 +0300 Subject: [PATCH 105/195] emulator: Add a FIXME to ask to use google-standard Status builder everywhere in test file. --- google/cloud/bigtable/emulator/rollback_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index a64274e377a9f..a74501015f90b 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -129,6 +129,8 @@ Status has_cell( auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { return Status( + // FIXME: Change this to use google-standard Status builder + // everywhere in this test module (and not StatusCode::...). StatusCode::kNotFound, absl::StrFormat("column family %s not found in table", column_family), ErrorInfo()); From 9364a31886355c1fa44030904a4b10190e016501 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 2 Apr 2025 14:58:32 +0300 Subject: [PATCH 106/195] emulator: Remove unused local var. --- google/cloud/bigtable/emulator/column_family.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 17da46536790f..5be629faaa861 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -79,7 +79,6 @@ std::map> ColumnFamily::DeleteRow( std::string const& row_key) { std::map> res; - auto& column_family_row = rows_[row_key]; auto row_it = rows_.find(row_key); for (auto column_it = row_it->second.begin(); From 2a4422aa7fcefee0a80a7591aef1d4ee2211c443 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 2 Apr 2025 20:39:22 +0300 Subject: [PATCH 107/195] emulator: eliminate a C++-17ism. --- google/cloud/bigtable/emulator/table.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index c618b2bf38976..418458a1a3192 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -478,8 +478,8 @@ Status RowTransaction::DeleteFromFamily( } std::map::iterator column_family_row_it; - if (column_family_row_it = column_family_it->second->find(request_.row_key()); - column_family_row_it == column_family_it->second->end()) { + if (column_family_it->second->find(request_.row_key()) == + column_family_it->second->end()) { // The row does not exist return NotFoundError( "row key is not found in column family", From fecad9b8e1d0815cf05bdb59bf21b0051d0c5125 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 3 Apr 2025 14:47:21 +0300 Subject: [PATCH 108/195] emulator: DeleteRow: fix hang/crash due to use of invalidated row iterator. --- google/cloud/bigtable/emulator/column_family.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 5be629faaa861..6d2d54aefa62d 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -82,7 +82,7 @@ std::map> ColumnFamily::DeleteRow( auto row_it = rows_.find(row_key); for (auto column_it = row_it->second.begin(); - column_it != row_it->second.end(); + row_it != rows_.end() && column_it != row_it->second.end(); // Why we call row_it->second.begin() every iteration: // DeleteColumn can invalidate the iterator by deleting a // column family row's keys (the column qualifiers and their @@ -91,7 +91,14 @@ std::map> ColumnFamily::DeleteRow( // are removing all cells of every column, we know DeleteColumn // will eventually remove all the columns and the row itself, // so this loop will terminate. - column_it = row_it->second.begin()) { + // + // Unfortunately we also have to re-initialize the row_it after + // every loop execution because DeleteColumn maintains the + // invariant that a row cannot be empty, so every loop + // execution can also delete the row_key and invalidate the + // iterator. + row_it = rows_.find(row_key), + column_it = row_it->second.begin()) { // Not setting start and end timestamps selects all cells for deletion. ::google::bigtable::v2::TimestampRange time_range; From e5aabf7bf88ad0252b442d0cb63b1aa789efabc3 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 3 Apr 2025 16:30:44 +0300 Subject: [PATCH 109/195] emulator: remove non-Google-standard Status builders. --- .../cloud/bigtable/emulator/rollback_test.cc | 83 +++++++++---------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index a74501015f90b..8129b86306594 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -14,6 +14,7 @@ #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" @@ -128,30 +129,27 @@ Status has_cell( std::string const& value) { auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { - return Status( - // FIXME: Change this to use google-standard Status builder - // everywhere in this test module (and not StatusCode::...). - StatusCode::kNotFound, - absl::StrFormat("column family %s not found in table", column_family), - ErrorInfo()); + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); } auto const& cf = column_family_it->second; auto column_family_row_it = cf->find(row_key); if (column_family_row_it == cf->end()) { - return Status(StatusCode::kNotFound, - absl::StrFormat("no row key %s found in column famiily %s", - row_key, column_family), - ErrorInfo()); + return NotFoundError( + "no row key found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); } auto& column_family_row = column_family_row_it->second; auto column_row_it = column_family_row.find(column_qualifier); if (column_row_it == column_family_row.end()) { - return Status( - StatusCode::kNotFound, - absl::StrFormat("no column found with qualifer %s", column_qualifier), - ErrorInfo()); + return NotFoundError( + "no column found with qualifer", + GCP_ERROR_INFO().WithMetadata("column qualifer", column_qualifier)); } auto& column_row = column_row_it->second; @@ -159,17 +157,20 @@ Status has_cell( column_row.find(std::chrono::duration_cast( std::chrono::microseconds(timestamp_micros))); if (timestamp_it == column_row.end()) { - return Status(StatusCode::kNotFound, "timestamp not found", ErrorInfo()); + return NotFoundError( + "timestamp not found", + GCP_ERROR_INFO().WithMetadata("timestamp", + absl::StrFormat("%d", timestamp_micros))); } if (timestamp_it->second != value) { - return Status(StatusCode::kNotFound, - absl::StrFormat("wrong value: expected %s, found %s", value, - timestamp_it->second), - ErrorInfo()); + return NotFoundError("wrong value", + GCP_ERROR_INFO() + .WithMetadata("expected", value) + .WithMetadata("found", timestamp_it->second)); } - return Status(StatusCode::kOk, "", ErrorInfo()); + return Status(); } Status has_column( @@ -178,53 +179,51 @@ Status has_column( std::string const& column_qualifier) { auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { - return Status( - StatusCode::kNotFound, - absl::StrFormat("column family %s not found in table", column_family), - ErrorInfo()); + return NotFoundError( + "columnn family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); } auto const& cf = column_family_it->second; auto column_family_row_it = cf->find(row_key); if (column_family_row_it == cf->end()) { - return Status(StatusCode::kNotFound, - absl::StrFormat("no row key %s found in column famiily %s", - row_key, column_family), - ErrorInfo()); + return internal::NotFoundError( + "row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); } auto& column_family_row = column_family_row_it->second; auto column_row_it = column_family_row.find(column_qualifier); if (column_row_it == column_family_row.end()) { - return Status( - StatusCode::kNotFound, - absl::StrFormat("no column found with qualifer %s", column_qualifier), - ErrorInfo()); + return NotFoundError( + "no column found with supplied qualifer", + GCP_ERROR_INFO().WithMetadata("column qualifer", column_qualifier)); } - return Status(StatusCode::kOk, "", ErrorInfo()); + return Status(); } Status has_row(std::shared_ptr& table, std::string const& column_family, std::string const& row_key) { auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { - return Status( - StatusCode::kNotFound, - absl::StrFormat("column family %s not found in table", column_family), - ErrorInfo()); + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); } auto const& cf = column_family_it->second; auto column_family_row_it = cf->find(row_key); if (column_family_row_it == cf->end()) { - return Status(StatusCode::kNotFound, - absl::StrFormat("no row key %s found in column famiily %s", - row_key, column_family), - ErrorInfo()); + return NotFoundError("row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); } - return Status(StatusCode::kOk, "", ErrorInfo()); + return Status(); } // Does the SetCell mutation work to set a cell to a specific value? From cfda22bc3ffb5d9de5275bf0ad1e4177d27de693 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 3 Apr 2025 18:55:19 +0300 Subject: [PATCH 110/195] emulator: remove some outdated FIXMEs. - We now have atomic rollback support for all implemented mutations. --- google/cloud/bigtable/emulator/table.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 418458a1a3192..0363ea1ed5f69 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -219,8 +219,6 @@ StatusOr> Table::FindColumnFamily( } Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { - // FIXME - add atomicity - // FIXME - determine what happens when row/column family/column does not exist std::lock_guard lock(mu_); assert(request.table_name() == schema_.name()); From af5989966de0895c6e2dc92e296acff6016b0708 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 8 Apr 2025 15:17:15 +0300 Subject: [PATCH 111/195] emulator: make some std::chrono usage more type-safe. --- google/cloud/bigtable/emulator/table.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 0363ea1ed5f69..14a159bb47119 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -583,14 +583,14 @@ void RowTransaction::Undo() { if (delete_value) { ::google::bigtable::v2::TimestampRange range; auto start_micros = std::chrono::duration_cast( - std::chrono::milliseconds(delete_value->timestamp.count())); + delete_value->timestamp); // The following is an exclusive upper bound, 1ms higher Since // timestamps have millisecond resolution, 2 timestamps have to // be at least 1ms apart which means that setting this as the // end of the range guarantees that we delete at most 1 (because // the upper bound is exclusive). auto end_micros = std::chrono::duration_cast( - std::chrono::milliseconds(delete_value->timestamp.count() + 1000)); + delete_value->timestamp + std::chrono::milliseconds(1)); range.set_start_timestamp_micros(start_micros.count()); range.set_end_timestamp_micros(end_micros.count()); delete_value->column_family.DeleteColumn( From 893a8d505ed669add2e81890bf3d61b045edce27 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 8 Apr 2025 15:55:38 +0300 Subject: [PATCH 112/195] emulator: std::move() to avoid another copy of a cell. --- google/cloud/bigtable/emulator/column_family.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 6d2d54aefa62d..5718c0e169b8c 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -42,7 +42,7 @@ std::vector ColumnRow::DeleteTimeRange( std::chrono::microseconds( time_range.end_timestamp_micros())));) { Cell cell = {std::move(cell_it->first), std::move(cell_it->second)}; - deleted_cells.emplace_back(cell); + deleted_cells.emplace_back(std::move(cell)); cells_.erase(cell_it++); } return deleted_cells; From c9a76b39c7d55d0e8c958f45b08831e33e3d242e Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 8 Apr 2025 16:10:24 +0300 Subject: [PATCH 113/195] emulator: eliminate some code duplication. --- google/cloud/bigtable/emulator/column_family.cc | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 5718c0e169b8c..f79876288744a 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -115,15 +115,8 @@ std::vector ColumnFamily::DeleteColumn( std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range) { auto row_it = rows_.find(row_key); - if (row_it != rows_.end()) { - auto erased_cells = - row_it->second.DeleteColumn(column_qualifier, time_range); - if (!row_it->second.HasColumns()) { - rows_.erase(row_it); - } - return erased_cells; - } - return std::vector(); + + return DeleteColumn(row_it, column_qualifier, time_range); } std::vector ColumnFamily::DeleteColumn( From 103bfb63131e3489cc01011e7209becfd047556e Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 10 Apr 2025 12:51:56 +0300 Subject: [PATCH 114/195] emulator: SetCell: return overwritten value (if any). This prepares the way to simplify the code that prepares the undo log, and in particular it obviates the need to check to see if the key (timestamp) existed before the SetCell mutation. --- .../cloud/bigtable/emulator/column_family.cc | 27 ++++++++++++------- .../cloud/bigtable/emulator/column_family.h | 25 +++++++++++++---- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index f79876288744a..44528d054d937 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" +#include #include #include @@ -21,13 +22,22 @@ namespace cloud { namespace bigtable { namespace emulator { -void ColumnRow::SetCell(std::chrono::milliseconds timestamp, - std::string const& value) { +absl::optional ColumnRow::SetCell( + std::chrono::milliseconds timestamp, std::string const& value) { if (timestamp <= std::chrono::milliseconds::zero()) { timestamp = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()); } + + absl::optional ret = absl::nullopt; + auto cell_it = cells_.find(timestamp); + if (!(cell_it == cells_.end())) { + ret = std::move(cell_it->second); + } + cells_[timestamp] = value; + + return ret; } std::vector ColumnRow::DeleteTimeRange( @@ -48,10 +58,10 @@ std::vector ColumnRow::DeleteTimeRange( return deleted_cells; } -void ColumnFamilyRow::SetCell(std::string const& column_qualifier, +absl::optional ColumnFamilyRow::SetCell(std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value) { - columns_[column_qualifier].SetCell(timestamp, value); + return columns_[column_qualifier].SetCell(timestamp, value); } std::vector ColumnFamilyRow::DeleteColumn( @@ -68,11 +78,10 @@ std::vector ColumnFamilyRow::DeleteColumn( return res; } -void ColumnFamily::SetCell(std::string const& row_key, - std::string const& column_qualifier, - std::chrono::milliseconds timestamp, - std::string const& value) { - rows_[row_key].SetCell(column_qualifier, timestamp, value); +absl::optional ColumnFamily::SetCell( + std::string const& row_key, std::string const& column_qualifier, + std::chrono::milliseconds timestamp, std::string const& value) { + return rows_[row_key].SetCell(column_qualifier, timestamp, value); } std::map> ColumnFamily::DeleteRow( diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 2044f3f14e03e..34ebfe11b4988 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -54,8 +54,12 @@ class ColumnRow { * updated. If it equals zero then number of milliseconds since epoch will * be used instead. * @param value the value to insert/update. + * + * @return no value if the timestamp had no value before, otherwise + * the previous value of the timestamp. */ - void SetCell(std::chrono::milliseconds timestamp, std::string const& value); + absl::optional SetCell(std::chrono::milliseconds timestamp, + std::string const& value); /** * Delete cells falling into a given timestamp range. * @@ -109,9 +113,14 @@ class ColumnFamilyRow { * updated. If it equals zero then number of milliseconds since epoch will * be used instead. * @param value the value to insert/update. + * + * @return no value if the timestamp had no value before, otherwise + * the previous value of the timestamp. + * */ - void SetCell(std::string const& column_qualifier, - std::chrono::milliseconds timestamp, std::string const& value); + absl::optional SetCell(std::string const& column_qualifier, + std::chrono::milliseconds timestamp, + std::string const& value); /** * Delete cells falling into a given timestamp range in one column. * @@ -174,9 +183,15 @@ class ColumnFamily { * updated. If it equals zero then number of milliseconds since epoch will * be used instead. * @param value the value to insert/update. + * + * @return no value if the timestamp had no value before, otherwise + * the previous value of the timestamp. + * */ - void SetCell(std::string const& row_key, std::string const& column_qualifier, - std::chrono::milliseconds timestamp, std::string const& value); + absl::optional SetCell(std::string const& row_key, + std::string const& column_qualifier, + std::chrono::milliseconds timestamp, + std::string const& value); /** * Delete the whole row from this column family. * From e845437f17b62aee6ac23017904f3927aef4fcde Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 10 Apr 2025 13:15:25 +0300 Subject: [PATCH 115/195] emulator: radically simplify undo log preparation code. This is thanks to simply returning the old value (if any) rather than performing almost endless iterations in this function! :) --- google/cloud/bigtable/emulator/table.cc | 49 ++++--------------------- 1 file changed, 8 insertions(+), 41 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 14a159bb47119..8abe4d1625a5b 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -508,54 +508,21 @@ Status RowTransaction::SetCell( auto& column_family = maybe_column_family->get(); - bool cell_existed = true; + auto timestamp = std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros())); - auto column_family_row_it = column_family.find(request_.row_key()); - std::string value_to_restore; - if (column_family_row_it == column_family.end()) { - cell_existed = false; - } else { - auto& column_family_row = column_family_row_it->second; - auto column_row_it = column_family_row.find(set_cell.column_qualifier()); - if (column_row_it == column_family_row.end()) { - cell_existed = false; - } else { - auto timestamp_it = column_row_it->second.find( - std::chrono::duration_cast( - std::chrono::microseconds(set_cell.timestamp_micros()))); - if (timestamp_it == column_row_it->second.end()) { - cell_existed = false; - } else { - value_to_restore = std::move(timestamp_it->second); - } - } - } - - column_family.SetCell( - request_.row_key(), set_cell.column_qualifier(), - std::chrono::duration_cast( - std::chrono::microseconds(set_cell.timestamp_micros())), - set_cell.value()); - - // If we have added a row, a column or a cell, we need to recompute - // these iterators. - column_family_row_it = column_family.find(request_.row_key()); - auto& column_family_row = column_family_row_it->second; - auto column_row_it = column_family_row.find(set_cell.column_qualifier()); - auto timestamp_it = column_row_it->second.find( - std::chrono::duration_cast( - std::chrono::microseconds(set_cell.timestamp_micros()))); + auto maybe_old_value = + column_family.SetCell(request_.row_key(), set_cell.column_qualifier(), + timestamp, set_cell.value()); - if (!cell_existed) { + if (!maybe_old_value) { DeleteValue delete_value{column_family, - std::move(set_cell.column_qualifier()), - timestamp_it->first}; + std::move(set_cell.column_qualifier()), timestamp}; undo_.emplace(std::move(delete_value)); } else { RestoreValue restore_value{column_family, std::move(set_cell.column_qualifier()), - timestamp_it->first, - std::move(value_to_restore)}; + timestamp, std::move(maybe_old_value.value())}; undo_.emplace(std::move(restore_value)); } From a544337dbbc285196597084a527e02b15279ed13 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 11 Apr 2025 19:48:08 +0300 Subject: [PATCH 116/195] emulator: ColumnFamily::DeleteRow(): radically simplify the function. And also eliminate the risk of accessing an invalidated iterator. --- .../cloud/bigtable/emulator/column_family.cc | 33 ++++++------------- .../cloud/bigtable/emulator/column_family.h | 2 ++ 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 44528d054d937..355655cbc1866 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -89,34 +89,21 @@ std::map> ColumnFamily::DeleteRow( std::map> res; auto row_it = rows_.find(row_key); + if (row_it == rows_.end()) { + return {}; + } - for (auto column_it = row_it->second.begin(); - row_it != rows_.end() && column_it != row_it->second.end(); - // Why we call row_it->second.begin() every iteration: - // DeleteColumn can invalidate the iterator by deleting a - // column family row's keys (the column qualifiers and their - // column rows), therefore we need to re-calculate the - // beginning of the map every loop. At the same time because we - // are removing all cells of every column, we know DeleteColumn - // will eventually remove all the columns and the row itself, - // so this loop will terminate. - // - // Unfortunately we also have to re-initialize the row_it after - // every loop execution because DeleteColumn maintains the - // invariant that a row cannot be empty, so every loop - // execution can also delete the row_key and invalidate the - // iterator. - row_it = rows_.find(row_key), - column_it = row_it->second.begin()) { - // Not setting start and end timestamps selects all cells for deletion. + for (auto& column : row_it->second.columns_) { + // Not setting start and end timestamps will select all cells for deletion ::google::bigtable::v2::TimestampRange time_range; - - auto deleted_column = DeleteColumn(row_it, column_it->first, time_range); - if (deleted_column.size() > 0) { - res[std::move(column_it->first)] = std::move(deleted_column); + auto deleted_cells = column.second.DeleteTimeRange(time_range); + if (deleted_cells.size() > 0) { + res[std::move(column.first)] = std::move(deleted_cells); } } + rows_.erase(row_key); + return res; } diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 34ebfe11b4988..df81dd568bd00 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -153,6 +153,8 @@ class ColumnFamilyRow { } private: + friend class ColumnFamily; + std::map columns_; }; From ed6352b8334d412c9fd8e6739e294b56359d202f Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Sun, 13 Apr 2025 23:08:28 +0200 Subject: [PATCH 117/195] Test and fix MergeCellStreams. --- google/cloud/bigtable/emulator/filter.cc | 133 ++-- google/cloud/bigtable/emulator/filter.h | 20 +- google/cloud/bigtable/emulator/filter_test.cc | 714 +++++++++++++++++- 3 files changed, 773 insertions(+), 94 deletions(-) diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 5d64bcc37c959..8c1cfd071663c 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -256,9 +256,9 @@ class TrivialFilter : public AbstractCellStreamImpl { * whether to filter it out or not (if not, also how far to advance the * underlying cell stream). * @param filter_filter a functor which given an `InternalFilter` decides - * whether filtering this cell streams results and then applying the + * whether filtering this cell stream's results and then applying the * `InternalFilter` would yield the same results as applying - * `InternalFilter` to the underlying stream and the perform this stream's + * `InternalFilter` to the underlying stream and then perform this stream's * filtering. */ TrivialFilter(CellStream source, Filter filter, @@ -322,11 +322,11 @@ class TrivialFilter : public AbstractCellStreamImpl { * @param filter functor, which accepts a `CellView` and decides * whether to filter it out or not (if not, also how far to advance the * underlying cell stream). - * @param filter_filter a functor which given an `InternalFilter` decides - * whether filtering this cell stream's results and then applying the - * `InternalFilter` would yield the same results as applying - * `InternalFilter` to the underlying stream and the perform this stream's - * filtering. + * @param filter_filter a functor which given an `InternalFilter` decides + * whether filtering this cell stream's results and then applying the + * `InternalFilter` would yield the same results as applying + * `InternalFilter` to the underlying stream and the perform this stream's + * filtering. */ template CellStream MakeTrivialFilter( @@ -409,67 +409,83 @@ CellView const& MergeCellStreams::Value() const { bool MergeCellStreams::Next(NextMode mode) { InitializeIfNeeded(); - if (unfinished_streams_.empty()) { - return true; - } - if (mode != NextMode::kCell) { - SkipRowOrColumn(mode); - return true; - } + assert(!unfinished_streams_.empty()); + + // If we're skipping to the next column/row, we need to advance all streams + // that currently point to that column/row. + // + // To do this, we temporarily remove those streams from the heap + // (since advancing them would require re-adjusting the heap). + // These streams remain at the end of the `unfinished_streams_` vector, + // but are not considered part of the heap. The `to_readd_begin` iterator + // marks the start of the range in `unfinished_streams_` that is outside + // the heap. + std::pop_heap(unfinished_streams_.begin(), unfinished_streams_.end(), CellStreamGreater()); - auto& stream_to_advance = unfinished_streams_.back(); - stream_to_advance->Next(NextMode::kCell); - if (stream_to_advance->HasValue()) { - std::push_heap(unfinished_streams_.begin(), unfinished_streams_.end(), - CellStreamGreater()); - } else { + std::vector>::iterator first_to_advance = + std::prev(unfinished_streams_.end()); + std::vector>::iterator to_readd_begin = + first_to_advance; + + auto all_streams_to_advance_removed_from_heap = [&] () { + if (unfinished_streams_.begin() == to_readd_begin) { + // All streams removed. + return true; + } + if (mode == NextMode::kCell) { + // We only need to remove one stream, which we already did. + return true; + } + if (mode == NextMode::kRow) { + return unfinished_streams_.front()->Value().row_key() != + (*first_to_advance)->Value().row_key(); + } + assert(mode == NextMode::kColumn); + return unfinished_streams_.front()->Value().column_qualifier() != + (*first_to_advance)->Value().column_qualifier() || + unfinished_streams_.front()->Value().column_family() != + (*first_to_advance)->Value().column_family() || + unfinished_streams_.front()->Value().row_key() != + (*first_to_advance)->Value().row_key(); + }; + while (!all_streams_to_advance_removed_from_heap()) { + std::pop_heap(unfinished_streams_.begin(), to_readd_begin, + CellStreamGreater()); + --to_readd_begin; + } + while (to_readd_begin != unfinished_streams_.end()) { + (*to_readd_begin)->Next(mode); + if ((*to_readd_begin)->HasValue()) { + ++to_readd_begin; + std::push_heap(unfinished_streams_.begin(), to_readd_begin, + CellStreamGreater()); + continue; + } + // The stream is finished, delete it. + to_readd_begin->swap(unfinished_streams_.back()); unfinished_streams_.pop_back(); + // Don't advance `to_readd_begin` since it points to a different stream + // after `swap()`. } return true; } void MergeCellStreams::InitializeIfNeeded() const { if (!initialized_) { - ReassesStreams(); - initialized_ = true; - } -} - -void MergeCellStreams::ReassesStreams() const { - for (auto stream_it = unfinished_streams_.begin(); - stream_it != unfinished_streams_.end();) { - if (!(*stream_it)->HasValue()) { - stream_it->swap(unfinished_streams_.back()); - unfinished_streams_.pop_back(); - } else { - ++stream_it; - } - } - std::make_heap(unfinished_streams_.begin(), unfinished_streams_.end(), - CellStreamGreater()); -} - -bool MergeCellStreams::SkipRowOrColumn(NextMode mode) { - assert(mode != NextMode::kCell); - // The first element in `unfinished_streams_` is the stream beginning with the - // smallest Cell - the one we would normally return. Before we alter this - // stream alter all others which point to the same column/row. - for (auto stream_it = std::next(unfinished_streams_.begin()); - stream_it != unfinished_streams_.end(); ++stream_it) { - if ((mode == NextMode::kRow || - ((*stream_it)->Value().column_qualifier() == - unfinished_streams_.front()->Value().column_qualifier() && - (*stream_it)->Value().column_family() == - unfinished_streams_.front()->Value().column_family())) && - (*stream_it)->Value().row_key() == - unfinished_streams_.front()->Value().row_key()) { - (*stream_it)->Next(mode); + for (auto stream_it = unfinished_streams_.begin(); + stream_it != unfinished_streams_.end();) { + if (!(*stream_it)->HasValue()) { + stream_it->swap(unfinished_streams_.back()); + unfinished_streams_.pop_back(); + } else { + ++stream_it; + } } + std::make_heap(unfinished_streams_.begin(), unfinished_streams_.end(), + CellStreamGreater()); + initialized_ = true; } - unfinished_streams_.front()->Next(mode); - ReassesStreams(); - return true; } /// A cell stream for handling a Condition filter. @@ -597,7 +613,8 @@ class EmptyCellStreamImpl : public AbstractCellStreamImpl { bool HasValue() const override { return false; } CellView const& Value() const override { assert(false); - // The code below makes no sense but it should be dead. + // The code below makes no sense but it should be dead. It's to silence + // compiler warnings. static CellView dummy{"row", "cf", "col", std::chrono::milliseconds(0), "val"}; return dummy; diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index 670513949253b..66a09f214a368 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -46,23 +46,25 @@ namespace emulator { // // Unfortunately, some filters (e.g. `cells_per_row_limit_filter`) prevents us // from moving filters applied later in the chain to its beginning. Hence, we -// need to keep the naive (object-per-graph-node) approach. +// need to keep the naive (object-per-graph-node) approach at least as a backup +// option. // -// We do attempt to apply the filtering as close to the root as possible via -// `AbstractCellStreamImpl::Apply()`. This operation has different -// implementations for different filters. +// We do attempt to apply the filtering as close to the root as possible, +// though. It is performed via the `AbstractCellStreamImpl::Apply()` function. +// This operation has different implementations for different filters. // // The algorithm looks as follows: // * we try to build the DAG according to the proto, from the ground up -// * every time we're about to add a new node, we first try applying the -// the graph we built so far by calling `Apply()` on the last node we added; +// * every time we're about to add a new node, we first try applying the filter +// to the graph we built so far by calling `Apply()` on the last node we +// added; // * these `Apply()` calls are propagated through the graph all the way to the // root // * if the `Apply()` call fails (e.g. because there is a // `cells_per_row_limit_filter` in the DAG), we will continue with adding a // new node to the graph -// * if the `Apply` call fails then we know that the lower layers will filter -// out the unwanted data so we can skip adding the node to the graph. +// * if the `Apply` call succeeds then we know that the lower layers will filter +// out the unwanted data, so we can skip adding the node to the graph. /// Only return cells from rows whose keys match `regex`. struct RowKeyRegex { @@ -239,8 +241,6 @@ class MergeCellStreams : public AbstractCellStreamImpl { private: void InitializeIfNeeded() const; - void ReassesStreams() const; - bool SkipRowOrColumn(NextMode mode); mutable bool initialized_{false}; diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index b49161b4acffa..85f4dd86a8954 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -14,8 +14,10 @@ #include "google/cloud/bigtable/data_connection.h" #include "google/cloud/bigtable/table.h" +#include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/status_matchers.h" +#include "google/cloud/testing_util/chrono_literals.h" #include namespace google { @@ -24,35 +26,695 @@ namespace bigtable { namespace emulator { namespace { -TEST(DummyFilter, Simple) { - google::cloud::bigtable::Table table(MakeDataConnection(), - TableResource("fake", "baz", "ft")); - Filter filter = Filter::Chain( - Filter::Interleave(Filter::Chain(Filter::ColumnRegex("[0-1]"), - Filter::ApplyLabelTransformer("L01")), - Filter::Chain(Filter::ColumnRegex("[1-2]"), - Filter::ApplyLabelTransformer("L12")), - Filter::Sink()), - Filter::ColumnRegex("[0-2]")); - for (StatusOr& row : - table.ReadRows(RowSet(RowRange::InfiniteRange()), filter)) { - ASSERT_STATUS_OK(row); - std::cout << row->row_key() << ":\n"; - for (auto const& cell : row->cells()) { - std::cout << "\t" << cell.family_name() << ":" << cell.column_qualifier() - << " @ " << cell.timestamp().count() << "us\n" - << "\t\"" << cell.value() << '"' << "\n"; - if (!cell.labels().empty()) { - std::cout << "\tlabelled:"; - for (auto const& label : cell.labels()) { - std::cout << " " << label; - } - std::cout << "\n\n"; - } - } +using ::testing::Return; +using testing_util::chrono_literals::operator""_ms; + +class MockStream : public AbstractCellStreamImpl { + public: + MOCK_METHOD(bool, ApplyFilter, (InternalFilter const& internal_filter), + (override)); + MOCK_METHOD(bool, HasValue, (), (const override)); + MOCK_METHOD(CellView const&, Value, (), (const override)); + MOCK_METHOD(bool, Next, (NextMode mode), (override)); +}; + +TEST(CellStream, NextAllSupported) { + { + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillOnce(Return(true)); + CellStream(std::move(mock_impl)).Next(); + } + { + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillOnce(Return(true)); + CellStream(std::move(mock_impl)).Next(NextMode::kColumn); + } + { + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kRow)).WillOnce(Return(true)); + CellStream(std::move(mock_impl)).Next(NextMode::kRow); } } +class TestCell { + public: + TestCell(std::string row_key, std::string column_family, + std::string column_qualifier, std::chrono::milliseconds timestamp, + std::string value) + : row_key_(std::move(row_key)), + column_family_(std::move(column_family)), + column_qualifier_(std::move(column_qualifier)), + timestamp_(std::move(timestamp)), + value_(std::move(value)), + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_) { + } + + TestCell(TestCell const& other) + : row_key_(other.row_key_), + column_family_(other.column_family_), + column_qualifier_(other.column_qualifier_), + timestamp_(other.timestamp_), + value_(other.value_), + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_) { + } + TestCell(TestCell&& other) + : row_key_(std::move(other.row_key_)), + column_family_(std::move(other.column_family_)), + column_qualifier_(std::move(other.column_qualifier_)), + timestamp_(std::move(other.timestamp_)), + value_(std::move(other.value_)), + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_) { + } + + CellView const& AsCellView() const { return view_; } + + bool operator==(CellView const& cell_view) const { + return row_key_ == cell_view.row_key() && + column_family_ == cell_view.column_family() && + column_qualifier_ == cell_view.column_qualifier() && + timestamp_ == cell_view.timestamp() && value_ == cell_view.value(); + } + + private: + std::string row_key_; + std::string column_family_; + std::string column_qualifier_; + std::chrono::milliseconds timestamp_; + std::string value_; + CellView view_; +}; + +std::ostream& operator<<(std::ostream& stream, TestCell const& test_cell) { + auto& cell_view = test_cell.AsCellView(); + stream << "Cell(" << cell_view.row_key() << " " << cell_view.column_family() + << ":" << cell_view.column_qualifier() << " @" + << cell_view.timestamp().count() << "ms: " << cell_view.value() << ")"; + return stream; +} + +TEST(CellStream, NextColumnNotSupportedNoMoreData) { + std::vector cells{ + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}}; + std::vector::iterator cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillOnce(Return(false)); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillOnce([&] { + ++cur_cell; + return true; + }); + CellStream cell_stream(std::move(mock_impl)); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[0], cell_stream.Value()); + cell_stream.Next(NextMode::kColumn); + ASSERT_FALSE(cell_stream.HasValue()); +} + +TEST(CellStream, NextColumnNotSupported) { + std::vector cells{ + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed + TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, + TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed + TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, + TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed + TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; + std::vector::iterator cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillRepeatedly([&] { + ++cur_cell; + return true; + }); + + CellStream cell_stream(std::move(mock_impl)); + + cell_stream.Next(NextMode::kColumn); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[2], cell_stream.Value()); + + cell_stream.Next(NextMode::kColumn); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[4], cell_stream.Value()); + + cell_stream.Next(NextMode::kColumn); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[6], cell_stream.Value()); + + cell_stream.Next(NextMode::kColumn); + ASSERT_FALSE(cell_stream.HasValue()); +} + +TEST(CellStream, NextRowNotSupported) { + std::vector cells{ + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed + TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, + TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed + TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, + TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed + TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; + std::vector::iterator cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillRepeatedly([&] { + ++cur_cell; + return true; + }); + + CellStream cell_stream(std::move(mock_impl)); + + cell_stream.Next(NextMode::kColumn); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[2], cell_stream.Value()); + + cell_stream.Next(NextMode::kColumn); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[4], cell_stream.Value()); + + cell_stream.Next(NextMode::kColumn); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[6], cell_stream.Value()); + + cell_stream.Next(NextMode::kColumn); + ASSERT_FALSE(cell_stream.HasValue()); +} + +TEST(CellStream, NextRowUnsupported) { + std::vector cells{ + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed + TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, + TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed + TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, + TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed + TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; + std::vector::iterator cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kRow)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillRepeatedly([&] { + cur_cell = + std::find_if(cur_cell, cells.end(), [&](TestCell const& cell) { + return cell.AsCellView().row_key() != + cur_cell->AsCellView().row_key() || + cell.AsCellView().column_family() != + cur_cell->AsCellView().column_family() || + cell.AsCellView().column_qualifier() != + cur_cell->AsCellView().column_qualifier(); + }); + return true; + }); + + CellStream cell_stream(std::move(mock_impl)); + + cell_stream.Next(NextMode::kRow); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[6], cell_stream.Value()); + + cell_stream.Next(NextMode::kRow); + ASSERT_FALSE(cell_stream.HasValue()); +} + +TEST(CellStream, NextRowAndColumnUnsupported) { + std::vector cells{ + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed + TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, + TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed + TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, + TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed + TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; + std::vector::iterator cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kRow)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillRepeatedly([&] { + ++cur_cell; + return true; + }); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + + CellStream cell_stream(std::move(mock_impl)); + + cell_stream.Next(NextMode::kRow); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[6], cell_stream.Value()); + + cell_stream.Next(NextMode::kRow); + ASSERT_FALSE(cell_stream.HasValue()); +} + +class CellStreamOrderTest : public ::testing::Test, + public ::testing::WithParamInterface< + // Expectation, lhs, rhs. + std::tuple> {}; + +INSTANTIATE_TEST_SUITE_P( + , CellStreamOrderTest, + ::testing::Values( + std::make_tuple(false, TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row0", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(true, TestCell{"row2", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf0", "col1", 0_ms, "val1"}, + TestCell{"row", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(true, TestCell{"row2", "cf2", "col1", 0_ms, "val1"}, + TestCell{"row", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf", "col1", 0_ms, "val1"}, + TestCell{"row", "cf", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf", "col0", 0_ms, "val1"}, + TestCell{"row", "cf", "col1", 0_ms, "val1"}), + std::make_tuple(true, TestCell{"row", "cf", "col2", 0_ms, "val1"}, + TestCell{"row", "cf", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf", "col", 0_ms, "val1"}, + TestCell{"row", "cf", "col", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf", "col", 0_ms, "val1"}, + TestCell{"row", "cf", "col", 1_ms, "val1"}), + std::make_tuple(true, TestCell{"row", "cf", "col", 1_ms, "val1"}, + TestCell{"row", "cf", "col", 0_ms, "val1"}))); + +TEST_P(CellStreamOrderTest, Order) { + auto mock_impl_left = std::make_unique(); + auto left_cell = std::get<1>(GetParam()); + auto right_cell = std::get<2>(GetParam()); + EXPECT_CALL(*mock_impl_left, Value) + .WillRepeatedly( + [&]() -> CellView const& { return left_cell.AsCellView(); }); + EXPECT_CALL(*mock_impl_left, HasValue).WillRepeatedly([&] { + return true; + }); + + auto mock_impl_right = std::make_unique(); + EXPECT_CALL(*mock_impl_right, Value) + .WillRepeatedly( + [&]() -> CellView const& { return right_cell.AsCellView(); }); + EXPECT_CALL(*mock_impl_right, HasValue).WillRepeatedly([&] { + return true; + }); + auto left = std::make_unique(std::move(mock_impl_left)); + auto right = std::make_unique(std::move(mock_impl_right)); + EXPECT_EQ(std::get<0>(GetParam()), + MergeCellStreams::CellStreamGreater()(left, right)); +} + +TEST(MergeCellStreams, NoStreams) { + CellStream stream( + std::make_unique(std::vector{})); + EXPECT_FALSE(stream.HasValue()); +} + +TEST(MergeCellStreams, OnlyEmptyStreams) { + auto empty_impl_1 = std::make_unique(); + EXPECT_CALL(*empty_impl_1, HasValue).WillRepeatedly(Return(false)); + auto empty_impl_2 = std::make_unique(); + EXPECT_CALL(*empty_impl_2, HasValue).WillRepeatedly(Return(false)); + CellStream empty_1(std::move(empty_impl_1)); + CellStream empty_2(std::move(empty_impl_2)); + std::vector streams; + streams.emplace_back(std::move(empty_1)); + streams.emplace_back(std::move(empty_2)); + CellStream stream(std::make_unique(std::move(streams))); + EXPECT_FALSE(stream.HasValue()); +} + +TEST(MergeCellStreams, OneStream) { + std::vector cells{ + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed + TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, + TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed + TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, + TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed + TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; + + std::vector::iterator cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillOnce([&]() { + cur_cell = std::next(cells.begin(), 2); + return true; + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kRow)).WillOnce([&]() { + cur_cell = std::next(cells.begin(), 6); + return true; + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillRepeatedly([&]() { + ++cur_cell; + return true; + }); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + + std::vector streams; + streams.emplace_back(std::move(mock_impl)); + CellStream stream(std::make_unique(std::move(streams))); + + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(cells[0], stream.Value()); + + stream.Next(NextMode::kColumn); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(cells[2], stream.Value()); + + stream.Next(NextMode::kRow); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(cells[6], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(cells[7], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_FALSE(stream.HasValue()); +} + +struct TestStreamData { + TestStreamData(std::vector data) + : cells(std::move(data)), + cur_cell(cells.begin()), + stream(std::make_unique()) {} + + std::vector cells; + std::vector::iterator cur_cell; + std::unique_ptr stream; +}; + +TEST(MergeCellStreams, ThreeStreams) { + TestStreamData stream_data_1( + std::vector{TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf2", "col1", 2_ms, "val2"}}); + + TestStreamData stream_data_2( + std::vector{TestCell{"row1", "cf1", "col1", 1_ms, "val1"}, + TestCell{"row2", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row2", "cf1", "col2", 0_ms, "val3"}}); + + TestStreamData stream_data_3( + std::vector{TestCell{"row1", "cf1", "col1", 3_ms, "val1"}, + TestCell{"row2", "cf0", "col1", 1_ms, "val2"}}); + + auto prepare_stream = [](TestStreamData& stream_data) { + EXPECT_CALL(*stream_data.stream, Next(NextMode::kCell)) + .WillRepeatedly([&]() { + ++stream_data.cur_cell; + return true; + }); + EXPECT_CALL(*stream_data.stream, Value) + .WillRepeatedly([&]() -> CellView const& { + return stream_data.cur_cell->AsCellView(); + }); + EXPECT_CALL(*stream_data.stream, HasValue).WillRepeatedly([&] { + return stream_data.cur_cell != stream_data.cells.end(); + }); + return CellStream(std::move(stream_data.stream)); + }; + + std::vector streams; + streams.push_back(prepare_stream(stream_data_1)); + streams.push_back(prepare_stream(stream_data_2)); + streams.push_back(prepare_stream(stream_data_3)); + CellStream stream(std::make_unique(std::move(streams))); + + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_1.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_2.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_3.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_1.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_3.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_2.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_2.cells[2], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_FALSE(stream.HasValue()); +} + +TEST(MergeCellStreams, AdvancingRowAdvancesAllRelevantStreams) { + // When calling Next(NextMode::kRow), all streams currently pointing to the + // same row as the first stream should be advanced. + TestStreamData stream_data_1( + std::vector{TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row2", "cf2", "col1", 2_ms, "val2"}}); + + TestStreamData stream_data_2( + std::vector{TestCell{"row2", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row2", "cf1", "col2", 10_ms, "val3"}}); + + TestStreamData stream_data_3( + std::vector{TestCell{"row1", "cf1", "col1", 3_ms, "val1"}, + TestCell{"row2", "cf0", "col1", 1_ms, "val2"}}); + + TestStreamData stream_data_4( + std::vector{TestCell{"row1", "cf1", "col1", 3_ms, "val1"}}); + + auto prepare_stream = [](TestStreamData& stream_data) { + EXPECT_CALL(*stream_data.stream, Value) + .WillRepeatedly([&]() -> CellView const& { + return stream_data.cur_cell->AsCellView(); + }); + EXPECT_CALL(*stream_data.stream, HasValue).WillRepeatedly([&] { + return stream_data.cur_cell != stream_data.cells.end(); + }); + }; + prepare_stream(stream_data_1); + prepare_stream(stream_data_2); + prepare_stream(stream_data_3); + prepare_stream(stream_data_4); + + EXPECT_CALL(*stream_data_1.stream, Next(NextMode::kRow)).WillOnce([&]() { + stream_data_1.cur_cell = std::next(stream_data_1.cells.begin()); + return true; + }); + EXPECT_CALL(*stream_data_3.stream, Next(NextMode::kRow)).WillOnce([&]() { + stream_data_3.cur_cell = std::next(stream_data_3.cells.begin()); + return true; + }); + EXPECT_CALL(*stream_data_4.stream, Next(NextMode::kRow)).WillOnce([&]() { + stream_data_4.cur_cell = stream_data_4.cells.end(); + return true; + }); + + EXPECT_CALL(*stream_data_1.stream, Next(NextMode::kCell)) + .WillOnce([&]() { + ++stream_data_1.cur_cell; + return true; + }); + + EXPECT_CALL(*stream_data_2.stream, Next(NextMode::kCell)) + .Times(2) + .WillRepeatedly([&]() { + ++stream_data_2.cur_cell; + return true; + }); + + EXPECT_CALL(*stream_data_3.stream, Next(NextMode::kCell)) + .WillOnce([&]() { + ++stream_data_3.cur_cell; + return true; + }); + + std::vector streams; + streams.push_back(CellStream(std::move(stream_data_1.stream))); + streams.push_back(CellStream(std::move(stream_data_2.stream))); + streams.push_back(CellStream(std::move(stream_data_3.stream))); + streams.push_back(CellStream(std::move(stream_data_4.stream))); + CellStream stream(std::make_unique(std::move(streams))); + + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_1.cells[0], stream.Value()); + + stream.Next(NextMode::kRow); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_3.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_2.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_2.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_1.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_FALSE(stream.HasValue()); +} + +TEST(MergeCellStreams, AdvancingColumnAdvancesAllRelevantStreams) { + // When calling Next(NextMode::kColumn), all streams currently pointing to the + // same column as the first stream should be advanced. + TestStreamData stream_data( + std::vector{TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row2", "cf2", "col1", 2_ms, "val2"}}); + + TestStreamData stream_data_different_column_family( + std::vector{TestCell{"row1", "cf2", "col1", 1_ms, "val2"}}); + + TestStreamData stream_data_different_column_qualifier( + std::vector{TestCell{"row1", "cf1", "col2", 1_ms, "val2"}}); + + TestStreamData stream_data_different_row( + std::vector{TestCell{"row2", "cf1", "col1", 1_ms, "val2"}}); + + TestStreamData stream_data_same_column_different_timestamp( + std::vector{TestCell{"row1", "cf1", "col1", 10_ms, "val2"}}); + + auto prepare_stream = [](TestStreamData& stream_data) { + EXPECT_CALL(*stream_data.stream, Value) + .WillRepeatedly([&]() -> CellView const& { + return stream_data.cur_cell->AsCellView(); + }); + EXPECT_CALL(*stream_data.stream, HasValue).WillRepeatedly([&] { + return stream_data.cur_cell != stream_data.cells.end(); + }); + }; + prepare_stream(stream_data); + prepare_stream(stream_data_different_column_family); + prepare_stream(stream_data_different_column_qualifier); + prepare_stream(stream_data_different_row); + prepare_stream(stream_data_same_column_different_timestamp); + + EXPECT_CALL(*stream_data.stream, Next(NextMode::kColumn)).WillOnce([&]() { + ++stream_data.cur_cell; + return true; + }); + EXPECT_CALL(*stream_data.stream, Next(NextMode::kCell)).WillOnce([&]() { + ++stream_data.cur_cell; + return true; + }); + EXPECT_CALL(*stream_data_same_column_different_timestamp.stream, + Next(NextMode::kColumn)) + .WillOnce([&]() { + ++stream_data_same_column_different_timestamp.cur_cell; + return true; + }); + + EXPECT_CALL(*stream_data_different_column_family.stream, + Next(NextMode::kCell)) + .WillOnce([&]() { + ++stream_data_different_column_family.cur_cell; + return true; + }); + + EXPECT_CALL(*stream_data_different_column_qualifier.stream, + Next(NextMode::kCell)) + .WillOnce([&]() { + ++stream_data_different_column_qualifier.cur_cell; + return true; + }); + + EXPECT_CALL(*stream_data_different_row.stream, + Next(NextMode::kCell)) + .WillOnce([&]() { + ++stream_data_different_row.cur_cell; + return true; + }); + + std::vector streams; + streams.push_back(CellStream(std::move(stream_data.stream))); + streams.push_back( + CellStream(std::move(stream_data_different_column_family.stream))); + streams.push_back( + CellStream(std::move(stream_data_different_column_qualifier.stream))); + streams.push_back(CellStream(std::move(stream_data_different_row.stream))); + streams.push_back(CellStream( + std::move(stream_data_same_column_different_timestamp.stream))); + CellStream stream(std::make_unique(std::move(streams))); + + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data.cells[0], stream.Value()); + + stream.Next(NextMode::kColumn); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_different_column_qualifier.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_different_column_family.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_different_row.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_FALSE(stream.HasValue()); +} + } // anonymous namespace } // namespace emulator } // namespace bigtable From 7e0b3f063f53bdfc6191a99a0775119861402a4c Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 17 Apr 2025 17:12:53 +0300 Subject: [PATCH 118/195] emulator: tests: exclude "programs" from tests. (#5) "programs" are not tests in general and can be server binaries which wait forever and cause the entire test suite to hang and/or timeout. In particular add_test should only be called for binaries that are tests, not arbitrary programs. TESTED=Verified that "make test" no linger tries to run (and therefore hang on) the emulator binary. --- google/cloud/bigtable/emulator/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 788f2896817f8..804d386ee734a 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -100,7 +100,4 @@ foreach (fname ${bigtable_emulator_programs}) gRPC::grpc protobuf::libprotobuf) google_cloud_cpp_add_common_options(${target}) - if (BUILD_TESTING) - add_test(NAME ${target} COMMAND ${target}) - endif () endforeach () From 2b83160e5dd9c1416a84ce4ef5e992bad573f0d1 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 23 Apr 2025 16:33:54 +0300 Subject: [PATCH 119/195] emulator: SetCell: set timestamp to server time if passed 0 or negative timestamp. --- .../cloud/bigtable/emulator/rollback_test.cc | 73 +++++++++++++++++++ google/cloud/bigtable/emulator/table.cc | 5 ++ 2 files changed, 78 insertions(+) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 8129b86306594..8e505a4cf5ca9 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" @@ -205,6 +206,41 @@ Status has_column( return Status(); } +StatusOr> get_column( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "columnn family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return internal::NotFoundError( + "row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with supplied qualifer", + GCP_ERROR_INFO().WithMetadata("column qualifer", column_qualifier)); + } + + std::map ret( + column_row_it->second.begin(), column_row_it->second.end()); + + return ret; +} + Status has_row(std::shared_ptr& table, std::string const& column_family, std::string const& row_key) { auto column_family_it = table->find(column_family); @@ -730,6 +766,43 @@ TEST(TransactonRollback, DeleteFromRowBasicFunction) { .ok()); } +TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifer = "test"; + auto const timestamp_micros = 0; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + + ASSERT_STATUS_OK(status); + + auto status_or = + get_column(table, column_family_name, row_key, column_qualifer); + ASSERT_STATUS_OK(status_or.status()); + auto column = status_or.value(); + ASSERT_EQ(1, column.size()); + for (auto const& cell : column) { + ASSERT_GT(cell.first.count(), 0); + ASSERT_EQ(data, cell.second); + } +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 8abe4d1625a5b..a8dbf4df51001 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -511,6 +511,11 @@ Status RowTransaction::SetCell( auto timestamp = std::chrono::duration_cast( std::chrono::microseconds(set_cell.timestamp_micros())); + if (timestamp <= std::chrono::milliseconds::zero()) { + timestamp = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + } + auto maybe_old_value = column_family.SetCell(request_.row_key(), set_cell.column_qualifier(), timestamp, set_cell.value()); From 2538679bda73435199dd88ac9d945cc10445fd18 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 23 Apr 2025 18:13:37 +0300 Subject: [PATCH 120/195] emulator: transaction rollback: Check that a SetCell with timestamp 0 can be correctly rolled back. --- google/cloud/bigtable/emulator/rollback_test.cc | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 8e505a4cf5ca9..d6507243e6aa1 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -789,7 +789,6 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { v.push_back(p); auto status = set_cells(table, table_name, row_key, v); - ASSERT_STATUS_OK(status); auto status_or = @@ -801,6 +800,21 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { ASSERT_GT(cell.first.count(), 0); ASSERT_EQ(data, cell.second); } + + // Test that a SetCell mutation with timestamp set to 0 can be + // correctly rolled back. In the following, the first mutation + // (timestamp 0) should succeed and the next one should fail. The + // condition after that should be that the first one (timestamp 0) + // should be rolled back so that a row with row_key_2 key should not + // exist when the MutateRow request returns. + v.clear(); + v = {{column_family_name, column_qualifer, 0, data}, + {"non_existent_column_family_name_causes_tx_rollbaclk", column_qualifer, + 1000, data}}; + auto const* const row_key_2 = "1"; + status = set_cells(table, table_name, row_key_2, v); + ASSERT_NE(true, status.ok()); + ASSERT_FALSE(has_row(table, column_family_name, row_key_2).ok()); } } // namespace emulator From 8dd57eb755c0bfe984a71611e6e58695b38289ef Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 23 Apr 2025 18:23:01 +0300 Subject: [PATCH 121/195] emulator: docs: correct formatting of several comments in header. --- google/cloud/bigtable/emulator/column_family.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index df81dd568bd00..91038c6e55174 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -187,7 +187,7 @@ class ColumnFamily { * @param value the value to insert/update. * * @return no value if the timestamp had no value before, otherwise - * the previous value of the timestamp. + * the previous value of the timestamp. * */ absl::optional SetCell(std::string const& row_key, @@ -206,12 +206,15 @@ class ColumnFamily { * Delete cells from a row falling into a given timestamp range in one column. * * @param row_key the row key to remove the cells from (or the - * iterator to the row - row_it - in the 2nd overloaded form of the - * function). + * iterator to the row - row_it - in the 2nd overloaded form of the + * function). * @param column_qualifier the column qualifier from which to delete - * the values. @param time_range the timestamp range dictating - * which values to delete. @return vector of deleted cells. + * the values. + * + * @param time_range the timestamp range dictating which values to + * delete. + * @return vector of deleted cells. */ std::vector DeleteColumn( std::string const& row_key, std::string const& column_qualifier, From 2646a59abb459e1417ebafba4566072850069113 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 23 Apr 2025 18:25:49 +0300 Subject: [PATCH 122/195] emulator: tests: use a better EXPECT* macro in one place. --- google/cloud/bigtable/emulator/column_family_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index 637f8367d76dc..9a529affd82b4 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -179,7 +179,7 @@ row2 :col0 @10ms: qux )""", "\n" + DumpColumnFamily(fam)); - EXPECT_TRUE(fam.DeleteRow("row2").size() > 0); + EXPECT_GT(fam.DeleteRow("row2").size(), 0); EXPECT_FALSE(fam.DeleteRow("row_nonexistent").size() > 0); EXPECT_EQ("row0 :col0 @10ms: baz\n", DumpColumnFamily(fam)); From edd5b7c341c1840cfa4128af0242efef90e7976c Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 23 Apr 2025 20:27:13 +0300 Subject: [PATCH 123/195] emulator: Implement ColumnFamily::DeleteTimeStamp(). This allows us to delete cells by specifying a specific timestamp rather than a timestamp range which might otherwise involve possibly non-type-safe calculations to determine the upper bound of the timestamps (in particular doing the latter needs one to assume a specific resolution for bigtable's timestamp storage). --- .../cloud/bigtable/emulator/column_family.cc | 45 +++++++++++++++++++ .../cloud/bigtable/emulator/column_family.h | 45 +++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 355655cbc1866..12bd933b1482e 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -58,6 +58,20 @@ std::vector ColumnRow::DeleteTimeRange( return deleted_cells; } +absl::optional ColumnRow::DeleteTimeStamp( + std::chrono::milliseconds timestamp) { + absl::optional ret = absl::nullopt; + + auto cell_it = cells_.find(timestamp); + if (cell_it != cells_.end()) { + Cell cell = {std::move(cell_it->first), std::move(cell_it->second)}; + ret.emplace(std::move(cell)); + cells_.erase(cell_it); + } + + return ret; +} + absl::optional ColumnFamilyRow::SetCell(std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value) { @@ -78,6 +92,21 @@ std::vector ColumnFamilyRow::DeleteColumn( return res; } +absl::optional ColumnFamilyRow::DeleteTimeStamp( + std::string const& column_qulifier, std::chrono::milliseconds timestamp) { + auto column_it = columns_.find(column_qulifier); + if (column_it == columns_.end()) { + return absl::nullopt; + } + + auto ret = column_it->second.DeleteTimeStamp(timestamp); + if(!column_it->second.HasCells()) { + columns_.erase(column_it); + } + + return ret; +} + absl::optional ColumnFamily::SetCell( std::string const& row_key, std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value) { @@ -130,6 +159,22 @@ std::vector ColumnFamily::DeleteColumn( return {}; } +absl::optional ColumnFamily::DeleteTimeStamp( + std::string const& row_key, std::string const& column_qulifier, + std::chrono::milliseconds timestamp) { + auto row_it = rows_.find(row_key); + if (row_it == rows_.end()) { + return absl::nullopt; + } + + auto ret = row_it->second.DeleteTimeStamp(column_qulifier, timestamp); + if (!row_it->second.HasColumns()) { + rows_.erase(row_it); + } + + return ret; +} + class FilteredColumnFamilyStream::FilterApply { public: explicit FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 91038c6e55174..2324d01ccc3c0 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -69,6 +69,17 @@ class ColumnRow { std::vector DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range); + /** + * Delete a cell with the given timestamp. + * + * @param timestamp the std::chrono::milliseconds timestamp of the + * cell to delete. + * + * @return Cell representing deleted cell, if there + * was a cell with that timestamp, otherwise absl::nullopt. + */ + absl::optional DeleteTimeStamp(std::chrono::milliseconds timestamp); + bool HasCells() const { return !cells_.empty(); } using const_iterator = std::map::const_iterator; @@ -132,6 +143,21 @@ class ColumnFamilyRow { std::vector DeleteColumn( std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); + /** + * Delete a cell with the given timestamp from the column given by + * the given column qualifier. + * + * @param column_qualifer the column from which to delete the cell. + * + * @param timestamp the std::chrono::milliseconds timestamp of the + * cell to delete. + * + * @return Cell representing deleted cell, if there was a cell with + * that timestamp in then given column, otherwise absl::nullopt. + */ + absl::optional DeleteTimeStamp(std::string const& column_qulifier, + std::chrono::milliseconds timestamp); + bool HasColumns() { return !columns_.empty(); } using const_iterator = std::map::const_iterator; const_iterator begin() const { return columns_.begin(); } @@ -225,6 +251,25 @@ class ColumnFamily { std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); + /** + * Delete a cell with the given timestamp from the column given by + * the given column qualifier from the row given by row_key. + * + * @param row_key the row from which to delete the cell + * + * @param column_qualifer the column from which to delete the cell. + * + * @param timestamp the std::chrono::milliseconds timestamp of the + * cell to delete. + * + * @return Cell representing deleted cell, if there was a cell with + * that timestamp in then given column in the given row, + * otherwise absl::nullopt. + */ + absl::optional DeleteTimeStamp(std::string const& row_key, + std::string const& column_qulifier, + std::chrono::milliseconds timestamp); + const_iterator begin() const { return rows_.begin(); } const_iterator end() const { return rows_.end(); } const_iterator lower_bound(std::string const& row_key) const { From e1abe871c76a6a017cbad02897dee167b96c18e4 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 23 Apr 2025 20:37:17 +0300 Subject: [PATCH 124/195] emulator: transaction undo: radically simplify the code for DeleteValue. --- google/cloud/bigtable/emulator/table.cc | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index a8dbf4df51001..b69edae96043a 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -550,24 +550,11 @@ void RowTransaction::Undo() { continue; } - auto* delete_value = absl::get_if(&op); if (delete_value) { - ::google::bigtable::v2::TimestampRange range; - auto start_micros = std::chrono::duration_cast( - delete_value->timestamp); - // The following is an exclusive upper bound, 1ms higher Since - // timestamps have millisecond resolution, 2 timestamps have to - // be at least 1ms apart which means that setting this as the - // end of the range guarantees that we delete at most 1 (because - // the upper bound is exclusive). - auto end_micros = std::chrono::duration_cast( - delete_value->timestamp + std::chrono::milliseconds(1)); - range.set_start_timestamp_micros(start_micros.count()); - range.set_end_timestamp_micros(end_micros.count()); - delete_value->column_family.DeleteColumn( + delete_value->column_family.DeleteTimeStamp( row_key, std::move(delete_value->column_qualifier), - range); + delete_value->timestamp); continue; } From a3c10fedd97c7e0dfc48de28561abce84c1d8086 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 23 Apr 2025 21:40:50 +0300 Subject: [PATCH 125/195] emulator: rollback tests: perform some trivial refactoring. --- .../cloud/bigtable/emulator/rollback_test.cc | 106 +++++++++--------- 1 file changed, 55 insertions(+), 51 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index d6507243e6aa1..14a39773e06ba 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -262,6 +262,61 @@ Status has_row(std::shared_ptr& table, return Status(); } +// Test that SetCell does the right thing when it receives a zero or +// negative timestamp, and that the cell created can be correctly +// deleted if rollback occurs. +TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifer = "test"; + auto const timestamp_micros = 0; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + + auto status_or = + get_column(table, column_family_name, row_key, column_qualifer); + ASSERT_STATUS_OK(status_or.status()); + auto column = status_or.value(); + ASSERT_EQ(1, column.size()); + for (auto const& cell : column) { + ASSERT_GT(cell.first.count(), 0); + ASSERT_EQ(data, cell.second); + } + + // Test that a SetCell mutation with timestamp set to 0 can be + // correctly rolled back. In the following, the first mutation + // (timestamp 0) should succeed and the next one should fail. The + // condition after that should be that the first one (timestamp 0) + // should be rolled back so that a row with row_key_2 key should not + // exist when the MutateRow request returns. + v.clear(); + v = {{column_family_name, column_qualifer, 0, data}, + {"non_existent_column_family_name_causes_tx_rollbaclk", column_qualifer, + 1000, data}}; + auto const* const row_key_2 = "1"; + status = set_cells(table, table_name, row_key_2, v); + ASSERT_NE(true, status.ok()); + ASSERT_FALSE(has_row(table, column_family_name, row_key_2).ok()); +} + + // Does the SetCell mutation work to set a cell to a specific value? TEST(TransactonRollback, SetCellBasicFunction) { ::google::bigtable::admin::v2::Table schema; @@ -766,57 +821,6 @@ TEST(TransactonRollback, DeleteFromRowBasicFunction) { .ok()); } -TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { - ::google::bigtable::admin::v2::Table schema; - ::google::bigtable::admin::v2::ColumnFamily column_family; - - auto const* const table_name = "projects/test/instances/test/tables/test"; - auto const* const row_key = "0"; - auto const* const column_family_name = "test"; - auto const* const column_qualifer = "test"; - auto const timestamp_micros = 0; - auto const* data = "test"; - - std::vector column_families = {column_family_name}; - auto maybe_table = create_table(table_name, column_families); - - ASSERT_STATUS_OK(maybe_table); - auto table = maybe_table.value(); - - std::vector v; - SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, - data}; - v.push_back(p); - - auto status = set_cells(table, table_name, row_key, v); - ASSERT_STATUS_OK(status); - - auto status_or = - get_column(table, column_family_name, row_key, column_qualifer); - ASSERT_STATUS_OK(status_or.status()); - auto column = status_or.value(); - ASSERT_EQ(1, column.size()); - for (auto const& cell : column) { - ASSERT_GT(cell.first.count(), 0); - ASSERT_EQ(data, cell.second); - } - - // Test that a SetCell mutation with timestamp set to 0 can be - // correctly rolled back. In the following, the first mutation - // (timestamp 0) should succeed and the next one should fail. The - // condition after that should be that the first one (timestamp 0) - // should be rolled back so that a row with row_key_2 key should not - // exist when the MutateRow request returns. - v.clear(); - v = {{column_family_name, column_qualifer, 0, data}, - {"non_existent_column_family_name_causes_tx_rollbaclk", column_qualifer, - 1000, data}}; - auto const* const row_key_2 = "1"; - status = set_cells(table, table_name, row_key_2, v); - ASSERT_NE(true, status.ok()); - ASSERT_FALSE(has_row(table, column_family_name, row_key_2).ok()); -} - } // namespace emulator } // namespace bigtable } // namespace cloud From 3e622787e5f09b15633c36f94d26b4b7e0eaad9f Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 25 Apr 2025 16:08:30 +0300 Subject: [PATCH 126/195] emulator tests: Fix bigtable_emulator_range_set_test. The tests aborted due to an assertion in the constructor of Range that enforced the invariant that start <= end for all well-formed Ranges. However in several tests testing the ends of ranges, start was passed in as "unimportant" with end being "A" or "B". This violates the invariant and causes an abort at runtime. Fixes: TBL-62. --- .../cloud/bigtable/emulator/range_set_test.cc | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index dc42bd85e0dd4..df635e6c9c9d3 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -275,31 +275,31 @@ TEST(StringRangeSet, RangeStartLess) { TEST(StringRangeSet, RangeEndLess) { EXPECT_TRUE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), - StringRangeSet::Range("unimportant", kWhatever, "B", kOpen))); + StringRangeSet::Range("A", kWhatever, "A", kOpen), + StringRangeSet::Range("A", kWhatever, "B", kOpen))); EXPECT_FALSE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "B", kOpen), - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); + StringRangeSet::Range("A", kWhatever, "B", kOpen), + StringRangeSet::Range("A", kWhatever, "A", kOpen))); EXPECT_FALSE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); + StringRangeSet::Range("A", kWhatever, "A", kOpen), + StringRangeSet::Range("A", kWhatever, "A", kOpen))); EXPECT_TRUE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), - StringRangeSet::Range("unimportant", kWhatever, "B", kClosed))); + StringRangeSet::Range("A", kWhatever, "A", kClosed), + StringRangeSet::Range("A", kWhatever, "B", kClosed))); EXPECT_FALSE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "B", kClosed), - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); + StringRangeSet::Range("A", kWhatever, "B", kClosed), + StringRangeSet::Range("A", kWhatever, "A", kClosed))); EXPECT_FALSE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); + StringRangeSet::Range("A", kWhatever, "A", kClosed), + StringRangeSet::Range("A", kWhatever, "A", kClosed))); EXPECT_FALSE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); + StringRangeSet::Range("A", kWhatever, "A", kClosed), + StringRangeSet::Range("A", kWhatever, "A", kOpen))); EXPECT_TRUE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); + StringRangeSet::Range("A", kWhatever, "A", kOpen), + StringRangeSet::Range("A", kWhatever, "A", kClosed))); } TEST(StringRangeSet, BelowStart) { @@ -322,8 +322,8 @@ TEST(StringRangeSet, BelowStart) { } TEST(StringRangeSet, AboveEnd) { - StringRangeSet::Range const open("unimportant", kWhatever, "B", kOpen); - StringRangeSet::Range const closed("unimportant", kWhatever, "B", kClosed); + StringRangeSet::Range const open("A", kWhatever, "B", kOpen); + StringRangeSet::Range const closed("A", kWhatever, "B", kClosed); StringRangeSet::Range const infinite( "unimportant", kWhatever, StringRangeSet::Range::Infinity{}, kClosed); From ff0bed94dc39d527554a282617c9a915dff7a4bf Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 28 Apr 2025 19:40:51 +0300 Subject: [PATCH 127/195] emulator tests: Fix bigtable_emulator_range_set_test. The tests aborted due to an assertion in the constructor of Range that enforced the invariant that start <= end for all well-formed Ranges. However in several tests testing the ends of ranges, start was passed in as "unimportant" with end being "A" or "B". This violates the invariant and causes an abort at runtime. Fixes: TBL-62. --- .../cloud/bigtable/emulator/range_set_test.cc | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index dc42bd85e0dd4..df635e6c9c9d3 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -275,31 +275,31 @@ TEST(StringRangeSet, RangeStartLess) { TEST(StringRangeSet, RangeEndLess) { EXPECT_TRUE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), - StringRangeSet::Range("unimportant", kWhatever, "B", kOpen))); + StringRangeSet::Range("A", kWhatever, "A", kOpen), + StringRangeSet::Range("A", kWhatever, "B", kOpen))); EXPECT_FALSE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "B", kOpen), - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); + StringRangeSet::Range("A", kWhatever, "B", kOpen), + StringRangeSet::Range("A", kWhatever, "A", kOpen))); EXPECT_FALSE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); + StringRangeSet::Range("A", kWhatever, "A", kOpen), + StringRangeSet::Range("A", kWhatever, "A", kOpen))); EXPECT_TRUE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), - StringRangeSet::Range("unimportant", kWhatever, "B", kClosed))); + StringRangeSet::Range("A", kWhatever, "A", kClosed), + StringRangeSet::Range("A", kWhatever, "B", kClosed))); EXPECT_FALSE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "B", kClosed), - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); + StringRangeSet::Range("A", kWhatever, "B", kClosed), + StringRangeSet::Range("A", kWhatever, "A", kClosed))); EXPECT_FALSE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); + StringRangeSet::Range("A", kWhatever, "A", kClosed), + StringRangeSet::Range("A", kWhatever, "A", kClosed))); EXPECT_FALSE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed), - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen))); + StringRangeSet::Range("A", kWhatever, "A", kClosed), + StringRangeSet::Range("A", kWhatever, "A", kOpen))); EXPECT_TRUE(StringRangeSet::Range::EndLess()( - StringRangeSet::Range("unimportant", kWhatever, "A", kOpen), - StringRangeSet::Range("unimportant", kWhatever, "A", kClosed))); + StringRangeSet::Range("A", kWhatever, "A", kOpen), + StringRangeSet::Range("A", kWhatever, "A", kClosed))); } TEST(StringRangeSet, BelowStart) { @@ -322,8 +322,8 @@ TEST(StringRangeSet, BelowStart) { } TEST(StringRangeSet, AboveEnd) { - StringRangeSet::Range const open("unimportant", kWhatever, "B", kOpen); - StringRangeSet::Range const closed("unimportant", kWhatever, "B", kClosed); + StringRangeSet::Range const open("A", kWhatever, "B", kOpen); + StringRangeSet::Range const closed("A", kWhatever, "B", kClosed); StringRangeSet::Range const infinite( "unimportant", kWhatever, StringRangeSet::Range::Infinity{}, kClosed); From dcad63c88b629daf4baac8522c07170ad2414243 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 29 Apr 2025 15:18:23 +0300 Subject: [PATCH 128/195] emulator: fixes for clang-tidy and clang-format. Restricted only to changes I have made to this PR. Tested (all tests pass). --- google/cloud/bigtable/emulator/CMakeLists.txt | 2 +- .../emulator/bigtable_emulator_unit_tests.bzl | 2 +- .../cloud/bigtable/emulator/column_family.cc | 13 +- .../cloud/bigtable/emulator/column_family.h | 10 +- .../bigtable/emulator/column_family_test.cc | 10 +- .../cloud/bigtable/emulator/rollback_test.cc | 203 +++++++++--------- google/cloud/bigtable/emulator/table.cc | 23 +- google/cloud/bigtable/emulator/table.h | 7 +- 8 files changed, 137 insertions(+), 133 deletions(-) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 51a86e73d580d..858dcdeb205ab 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -57,8 +57,8 @@ if (BUILD_TESTING) column_family_test.cc filter_test.cc filtered_map_test.cc - rollback_test.cc range_set_test.cc + rollback_test.cc server_test.cc table_test.cc) export_list_to_bazel("bigtable_emulator_unit_tests.bzl" diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index 8b27b50765870..819f6dce68630 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -20,8 +20,8 @@ bigtable_emulator_unit_tests = [ "column_family_test.cc", "filter_test.cc", "filtered_map_test.cc", - "rollback_test.cc", "range_set_test.cc", + "rollback_test.cc", "server_test.cc", "table_test.cc", ] diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 12bd933b1482e..3e23cde76c80d 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -72,9 +72,9 @@ absl::optional ColumnRow::DeleteTimeStamp( return ret; } -absl::optional ColumnFamilyRow::SetCell(std::string const& column_qualifier, - std::chrono::milliseconds timestamp, - std::string const& value) { +absl::optional ColumnFamilyRow::SetCell( + std::string const& column_qualifier, std::chrono::milliseconds timestamp, + std::string const& value) { return columns_[column_qualifier].SetCell(timestamp, value); } @@ -100,7 +100,7 @@ absl::optional ColumnFamilyRow::DeleteTimeStamp( } auto ret = column_it->second.DeleteTimeStamp(timestamp); - if(!column_it->second.HasCells()) { + if (!column_it->second.HasCells()) { columns_.erase(column_it); } @@ -126,7 +126,7 @@ std::map> ColumnFamily::DeleteRow( // Not setting start and end timestamps will select all cells for deletion ::google::bigtable::v2::TimestampRange time_range; auto deleted_cells = column.second.DeleteTimeRange(time_range); - if (deleted_cells.size() > 0) { + if (!deleted_cells.empty()) { res[std::move(column.first)] = std::move(deleted_cells); } } @@ -198,7 +198,8 @@ class FilteredColumnFamilyStream::FilterApply { bool operator()(ColumnRegex const& column_regex) { parent_.column_regexes_.emplace_back(column_regex.regex); - return true; } + return true; + } private: FilteredColumnFamilyStream& parent_; diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 2324d01ccc3c0..e7928afa36516 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -147,7 +147,7 @@ class ColumnFamilyRow { * Delete a cell with the given timestamp from the column given by * the given column qualifier. * - * @param column_qualifer the column from which to delete the cell. + * @param column_qualifier the column from which to delete the cell. * * @param timestamp the std::chrono::milliseconds timestamp of the * cell to delete. @@ -247,9 +247,9 @@ class ColumnFamily { ::google::bigtable::v2::TimestampRange const& time_range); std::vector DeleteColumn( - std::map::iterator row_it, - std::string const& column_qualifier, - ::google::bigtable::v2::TimestampRange const& time_range); + std::map::iterator row_it, + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); /** * Delete a cell with the given timestamp from the column given by @@ -257,7 +257,7 @@ class ColumnFamily { * * @param row_key the row from which to delete the cell * - * @param column_qualifer the column from which to delete the cell. + * @param column_qualifier the column from which to delete the cell. * * @param timestamp the std::chrono::milliseconds timestamp of the * cell to delete. diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index 9a529affd82b4..21b8cacd0756e 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -126,8 +126,9 @@ col2 @10ms: qux EXPECT_EQ("bar", fam_row.lower_bound("col1")->second.begin()->second); EXPECT_EQ("qux", fam_row.upper_bound("col1")->second.begin()->second); - EXPECT_EQ(1, fam_row.DeleteColumn("col1", - ::google::bigtable::v2::TimestampRange{}).size()); + EXPECT_EQ( + 1, fam_row.DeleteColumn("col1", ::google::bigtable::v2::TimestampRange{}) + .size()); // Verify that there is no empty column. EXPECT_EQ(2, std::distance(fam_row.begin(), fam_row.end())); @@ -168,7 +169,8 @@ row2 :col0 @10ms: qux DumpColumnFamilyRow(fam.upper_bound("row1")->second)); EXPECT_EQ(1, fam.DeleteColumn("row1", "col0", - ::google::bigtable::v2::TimestampRange{}).size()); + ::google::bigtable::v2::TimestampRange{}) + .size()); // Verify that there is no empty row EXPECT_EQ(2, std::distance(fam.begin(), fam.end())); @@ -180,7 +182,7 @@ row2 :col0 @10ms: qux "\n" + DumpColumnFamily(fam)); EXPECT_GT(fam.DeleteRow("row2").size(), 0); - EXPECT_FALSE(fam.DeleteRow("row_nonexistent").size() > 0); + EXPECT_TRUE(fam.DeleteRow("row_nonexistent").empty()); EXPECT_EQ("row0 :col0 @10ms: baz\n", DumpColumnFamily(fam)); } diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 14a39773e06ba..c58650ed538a9 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -79,7 +79,7 @@ Status delete_from_families( struct DeleteFromColumnParams { std::string column_family; std::string column_qualifier; - ::google::bigtable::v2::TimestampRange *timestamp_range; + ::google::bigtable::v2::TimestampRange* timestamp_range; }; Status delete_from_columns( @@ -91,18 +91,18 @@ Status delete_from_columns( mutation_request.set_row_key(row_key); for (auto& param : v) { - auto* mutation_request_mutation = mutation_request.add_mutations(); - auto* delete_from_column_mutation = - mutation_request_mutation->mutable_delete_from_column(); - delete_from_column_mutation->set_family_name(param.column_family); - delete_from_column_mutation->set_column_qualifier(param.column_qualifier); - delete_from_column_mutation->set_allocated_time_range(param.timestamp_range); + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* delete_from_column_mutation = + mutation_request_mutation->mutable_delete_from_column(); + delete_from_column_mutation->set_family_name(param.column_family); + delete_from_column_mutation->set_column_qualifier(param.column_qualifier); + delete_from_column_mutation->set_allocated_time_range( + param.timestamp_range); } return table->MutateRow(mutation_request); } - Status set_cells( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, @@ -138,19 +138,18 @@ Status has_cell( auto const& cf = column_family_it->second; auto column_family_row_it = cf->find(row_key); if (column_family_row_it == cf->end()) { - return NotFoundError( - "no row key found in column family", - GCP_ERROR_INFO() - .WithMetadata("row key", row_key) - .WithMetadata("column family", column_family)); + return NotFoundError("no row key found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); } auto& column_family_row = column_family_row_it->second; auto column_row_it = column_family_row.find(column_qualifier); if (column_row_it == column_family_row.end()) { return NotFoundError( - "no column found with qualifer", - GCP_ERROR_INFO().WithMetadata("column qualifer", column_qualifier)); + "no column found with qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); } auto& column_row = column_row_it->second; @@ -181,7 +180,7 @@ Status has_column( auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { return NotFoundError( - "columnn family not found in table", + "column family not found in table", GCP_ERROR_INFO().WithMetadata("column family", column_family)); } @@ -199,8 +198,8 @@ Status has_column( auto column_row_it = column_family_row.find(column_qualifier); if (column_row_it == column_family_row.end()) { return NotFoundError( - "no column found with supplied qualifer", - GCP_ERROR_INFO().WithMetadata("column qualifer", column_qualifier)); + "no column found with supplied qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); } return Status(); @@ -213,7 +212,7 @@ StatusOr> get_column( auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { return NotFoundError( - "columnn family not found in table", + "column family not found in table", GCP_ERROR_INFO().WithMetadata("column family", column_family)); } @@ -231,8 +230,8 @@ StatusOr> get_column( auto column_row_it = column_family_row.find(column_qualifier); if (column_row_it == column_family_row.end()) { return NotFoundError( - "no column found with supplied qualifer", - GCP_ERROR_INFO().WithMetadata("column qualifer", column_qualifier)); + "no column found with supplied qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); } std::map ret( @@ -272,7 +271,7 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { auto const* const table_name = "projects/test/instances/test/tables/test"; auto const* const row_key = "0"; auto const* const column_family_name = "test"; - auto const* const column_qualifer = "test"; + auto const* const column_qualifier = "test"; auto const timestamp_micros = 0; auto const* data = "test"; @@ -283,7 +282,7 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { auto table = maybe_table.value(); std::vector v; - SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, data}; v.push_back(p); @@ -291,7 +290,7 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { ASSERT_STATUS_OK(status); auto status_or = - get_column(table, column_family_name, row_key, column_qualifer); + get_column(table, column_family_name, row_key, column_qualifier); ASSERT_STATUS_OK(status_or.status()); auto column = status_or.value(); ASSERT_EQ(1, column.size()); @@ -307,8 +306,8 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { // should be rolled back so that a row with row_key_2 key should not // exist when the MutateRow request returns. v.clear(); - v = {{column_family_name, column_qualifer, 0, data}, - {"non_existent_column_family_name_causes_tx_rollbaclk", column_qualifer, + v = {{column_family_name, column_qualifier, 0, data}, + {"non_existent_column_family_name_causes_tx_rollbaclk", column_qualifier, 1000, data}}; auto const* const row_key_2 = "1"; status = set_cells(table, table_name, row_key_2, v); @@ -316,7 +315,6 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { ASSERT_FALSE(has_row(table, column_family_name, row_key_2).ok()); } - // Does the SetCell mutation work to set a cell to a specific value? TEST(TransactonRollback, SetCellBasicFunction) { ::google::bigtable::admin::v2::Table schema; @@ -325,7 +323,7 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto const* const table_name = "projects/test/instances/test/tables/test"; auto const* const row_key = "0"; auto const* const column_family_name = "test"; - auto const* const column_qualifer = "test"; + auto const* const column_qualifier = "test"; auto const timestamp_micros = 1234; auto const* data = "test"; @@ -336,7 +334,7 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto table = maybe_table.value(); std::vector v; - SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, data}; v.push_back(p); @@ -344,8 +342,8 @@ TEST(TransactonRollback, SetCellBasicFunction) { ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - timestamp_micros, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data)); } // Test that an old value is correctly restored in a pre-populated @@ -363,7 +361,7 @@ TEST(TransactonRollback, TestRestoreValue) { // failure by setting some other not-pre-provisioned column family // name. auto const* const valid_column_family_name = "test"; - auto const* const column_qualifer = "test"; + auto const* const column_qualifier = "test"; int64_t good_mutation_timestamp_micros = 1000; auto const* const good_mutation_data = "expected to succeed"; @@ -373,14 +371,14 @@ TEST(TransactonRollback, TestRestoreValue) { auto table = maybe_table.value(); std::vector v; - SetCellParams p = {valid_column_family_name, column_qualifer, + SetCellParams p = {valid_column_family_name, column_qualifier, good_mutation_timestamp_micros, good_mutation_data}; v.push_back(p); auto status = set_cells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, - column_qualifer, good_mutation_timestamp_micros, + column_qualifier, good_mutation_timestamp_micros, good_mutation_data)); // Now atomically try 2 mutations. One modifies the above set cell, @@ -408,7 +406,7 @@ TEST(TransactonRollback, TestRestoreValue) { // RestoreValue and so should contain the old value, and not "new // data". ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, - column_qualifer, good_mutation_timestamp_micros, + column_qualifier, good_mutation_timestamp_micros, good_mutation_data)); } @@ -453,13 +451,13 @@ TEST(TransactonRollback, DeleteValue) { status = set_cells(table, table_name, row_key, v); ASSERT_NE(status.ok(), true); // We expect the chain of mutations to - // fail alltogether. + // fail altogether. status = has_cell(table, v[0].column_family_name, row_key, v[0].column_qualifier, v[0].timestamp_micros, v[0].data); ASSERT_NE(status.ok(), true); // Undo should delete the cell status = has_cell(table, v[1].column_family_name, row_key, v[1].column_qualifier, v[1].timestamp_micros, v[1].data); - ASSERT_NE(status.ok(), true); // Also the SetCell with invalud shema + ASSERT_NE(status.ok(), true); // Also the SetCell with invalid shema // should not have set anything. } @@ -503,7 +501,7 @@ TEST(TransactonRollback, DeleteColumn) { status = set_cells(table, table_name, row_key, v); ASSERT_NE(status.ok(), true); // We expect the chain of mutations to - // fail alltogether because the last one must fail. + // fail altogether because the last one must fail. // The original column ("test") should still exist. status = has_column(table, valid_column_family_name, row_key, "test"); @@ -546,14 +544,14 @@ TEST(TransactonRollback, DeleteRow) { auto status = set_cells(table, table_name, row_key, v); ASSERT_NE(status.ok(), true); // We expect the chain of mutations to - // fail alltogether because the last one must fail. + // fail altogether because the last one must fail. status = has_row(table, valid_column_family_name, row_key); ASSERT_NE(status.ok(), true); } // Does the DeleteFromfamily mutation work to delete a row from a -// specific family and does it rows with the same row key in othe +// specific family and does it rows with the same row key in other // column families alone? TEST(TransactonRollback, DeleteFromFamilyBasicFunction) { ::google::bigtable::admin::v2::Table schema; @@ -562,32 +560,33 @@ TEST(TransactonRollback, DeleteFromFamilyBasicFunction) { auto const* const table_name = "projects/test/instances/test/tables/test"; auto const* const row_key = "0"; auto const* const column_family_name = "test"; - auto const* const column_qualifer = "test"; + auto const* const column_qualifier = "test"; auto const timestamp_micros = 1234; auto const* data = "test"; auto const* const second_column_family_name = "test2"; - std::vector column_families = {column_family_name, second_column_family_name}; + std::vector column_families = {column_family_name, + second_column_family_name}; auto maybe_table = create_table(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); std::vector v; - SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, data}; v.push_back(p); - p = {second_column_family_name, column_qualifer, timestamp_micros, data}; + p = {second_column_family_name, column_qualifier, timestamp_micros, data}; v.push_back(p); auto status = set_cells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - timestamp_micros, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data)); ASSERT_STATUS_OK( - has_column(table, column_family_name, row_key, column_qualifer)); + has_column(table, column_family_name, row_key, column_qualifier)); ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); // Having established that the data is there, test the basic @@ -597,7 +596,7 @@ TEST(TransactonRollback, DeleteFromFamilyBasicFunction) { delete_from_families(table, table_name, row_key, {column_family_name})); ASSERT_NE(true, has_row(table, column_family_name, row_key).ok()); - // Ensure that we did not delete a row in anothe column family. + // Ensure that we did not delete a row in another column family. ASSERT_EQ(true, has_row(table, second_column_family_name, row_key).ok()); } @@ -610,14 +609,14 @@ TEST(TransactonRollback, DeleteFromFamilyRollback) { auto const* const table_name = "projects/test/instances/test/tables/test"; auto const* const row_key = "0"; auto const* const column_family_name = "test"; - auto const* const column_qualifer = "test"; + auto const* const column_qualifier = "test"; auto const timestamp_micros = 1234; auto const* data = "test"; // Failure of one of the mutations is simalted by having a mutation // with this column family, which has not been provisioned. Previous // successful mutations should be rolled back when RowTransaction - // sees a mutation with this invlaid column family name. + // sees a mutation with this invalid column family name. auto const* const column_family_not_in_schema = "i_do_not_exist_in_the_schema"; @@ -628,16 +627,16 @@ TEST(TransactonRollback, DeleteFromFamilyRollback) { auto table = maybe_table.value(); std::vector v; - SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, data}; v.push_back(p); auto status = set_cells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - timestamp_micros, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data)); ASSERT_STATUS_OK( - has_column(table, column_family_name, row_key, column_qualifer)); + has_column(table, column_family_name, row_key, column_qualifier)); ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); // Setup two DeleteFromfamily mutation: The first one uses the @@ -653,15 +652,16 @@ TEST(TransactonRollback, DeleteFromFamilyRollback) { // Check that the row deleted by the first mutation is restored, // with all its data. - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - timestamp_micros, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data)); ASSERT_STATUS_OK( - has_column(table, column_family_name, row_key, column_qualifer)); + has_column(table, column_family_name, row_key, column_qualifier)); ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); } -::google::bigtable::v2::TimestampRange* new_timestamp_range(int64_t start, int64_t end) { - auto* range = new(::google::bigtable::v2::TimestampRange); +::google::bigtable::v2::TimestampRange* new_timestamp_range(int64_t start, + int64_t end) { + auto* range = new (::google::bigtable::v2::TimestampRange); range->set_start_timestamp_micros(start); range->set_end_timestamp_micros(end); @@ -676,7 +676,7 @@ TEST(TransactonRollback, DeleteFromColumnBasicFunction) { auto const* const table_name = "projects/test/instances/test/tables/test"; auto const* const row_key = "0"; auto const* const column_family_name = "test"; - auto const* const column_qualifer = "test"; + auto const* const column_qualifier = "test"; auto const* data = "test"; std::vector column_families = {column_family_name}; @@ -686,28 +686,28 @@ TEST(TransactonRollback, DeleteFromColumnBasicFunction) { auto table = maybe_table.value(); std::vector v = { - {column_family_name, column_qualifer, 1000, data}, - {column_family_name, column_qualifer, 2000, data}, - {column_family_name, column_qualifer, 3000, data}, + {column_family_name, column_qualifier, 1000, data}, + {column_family_name, column_qualifier, 2000, data}, + {column_family_name, column_qualifier, 3000, data}, }; auto status = set_cells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - 1000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - 2000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - 3000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 1000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 2000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 3000, data)); std::vector dv = { - {column_family_name, column_qualifer, + {column_family_name, column_qualifier, new_timestamp_range(v[0].timestamp_micros, v[2].timestamp_micros + 1000)}}; ASSERT_STATUS_OK(delete_from_columns(table, table_name, row_key, dv)); - status = has_column(table, column_family_name, row_key, column_qualifer); + status = has_column(table, column_family_name, row_key, column_qualifier); ASSERT_EQ(false, status.ok()); } @@ -719,10 +719,11 @@ TEST(TransactonRollback, DeleteFromColumnRollback) { auto const* const table_name = "projects/test/instances/test/tables/test"; auto const* const row_key = "0"; auto const* const column_family_name = "test"; - auto const* const column_qualifer = "test"; + auto const* const column_qualifier = "test"; // Simulate mutation failure and cause rollback by attempting a // mutation with a non-existent column family name. - auto const* const bad_column_family_name = "this_column_family_does_not_exist"; + auto const* const bad_column_family_name = + "this_column_family_does_not_exist"; auto const* data = "test"; std::vector column_families = {column_family_name}; @@ -732,41 +733,43 @@ TEST(TransactonRollback, DeleteFromColumnRollback) { auto table = maybe_table.value(); std::vector v = { - {column_family_name, column_qualifer, 1000, data}, - {column_family_name, column_qualifer, 2000, data}, - {column_family_name, column_qualifer, 3000, data}, + {column_family_name, column_qualifier, 1000, data}, + {column_family_name, column_qualifier, 2000, data}, + {column_family_name, column_qualifier, 3000, data}, }; auto status = set_cells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - 1000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - 2000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - 3000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 1000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 2000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 3000, data)); // The first mutation will succeed. The second assumes a schema that // does not exist - it should fail and cause rollback of the column // deletion in the first mutation. std::vector dv = { - {column_family_name, column_qualifer, + {column_family_name, column_qualifier, new_timestamp_range(v[0].timestamp_micros, v[2].timestamp_micros + 1000)}, - {bad_column_family_name, column_qualifer, new_timestamp_range(1000, 2000)}, + {bad_column_family_name, column_qualifier, + new_timestamp_range(1000, 2000)}, }; // The mutation chains should fail and rollback should occur. ASSERT_EQ(false, delete_from_columns(table, table_name, row_key, dv).ok()); // The column should have been restored. - ASSERT_STATUS_OK(has_column(table, column_family_name, row_key, column_qualifer)); + ASSERT_STATUS_OK( + has_column(table, column_family_name, row_key, column_qualifier)); // Check that the data is where and what we expect. - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - 1000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - 2000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - 3000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 1000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 2000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 3000, data)); } // Can we delete a row from all column families? @@ -777,7 +780,7 @@ TEST(TransactonRollback, DeleteFromRowBasicFunction) { auto const* const table_name = "projects/test/instances/test/tables/test"; auto const* const row_key = "0"; auto const* const column_family_name = "column_family_1"; - auto const* const column_qualifer = "column_qualifier"; + auto const* const column_qualifier = "column_qualifier"; auto const timestamp_micros = 1000; auto const* data = "value"; auto const* const second_column_family_name = "column_family_2"; @@ -790,19 +793,19 @@ TEST(TransactonRollback, DeleteFromRowBasicFunction) { auto table = maybe_table.value(); std::vector v; - SetCellParams p = {column_family_name, column_qualifer, timestamp_micros, + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, data}; v.push_back(p); - p = {second_column_family_name, column_qualifer, timestamp_micros, data}; + p = {second_column_family_name, column_qualifier, timestamp_micros, data}; v.push_back(p); auto status = set_cells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, column_qualifer, - timestamp_micros, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data)); ASSERT_STATUS_OK( - has_column(table, second_column_family_name, row_key, column_qualifer)); + has_column(table, second_column_family_name, row_key, column_qualifier)); ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); ::google::bigtable::v2::MutateRowRequest mutation_request; @@ -813,11 +816,11 @@ TEST(TransactonRollback, DeleteFromRowBasicFunction) { mutation_request_mutation->mutable_delete_from_row(); ASSERT_STATUS_OK(table->MutateRow(mutation_request)); - ASSERT_EQ(false, has_cell(table, column_family_name, row_key, column_qualifer, - timestamp_micros, data) + ASSERT_EQ(false, has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data) .ok()); ASSERT_EQ(false, has_column(table, second_column_family_name, row_key, - column_qualifer) + column_qualifier) .ok()); } diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index b69edae96043a..0d1f79a50780e 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -224,7 +224,7 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { RowTransaction row_transaction(this->get(), request); - for (const auto& mutation : request.mutations()) { + for (auto const& mutation : request.mutations()) { if (mutation.has_set_cell()) { auto const& set_cell = mutation.set_cell(); auto status = row_transaction.SetCell(set_cell); @@ -270,7 +270,6 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { return Status(); } -// NOLINTEND(readability-function-cognitive-complexity) bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { if (!absl::holds_alternative(internal_filter)) { @@ -393,6 +392,7 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } +// NOLINTBEGIN(readability-convert-member-functions-to-static) Status RowTransaction::AddToCell( ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { return UnimplementedError( @@ -406,6 +406,7 @@ Status RowTransaction::MergeToCell( "Unsupported mutation type.", GCP_ERROR_INFO().WithMetadata("mutation", merge_to_cell.DebugString())); } +// NOLINTEND(readability-convert-member-functions-to-static) Status RowTransaction::DeleteFromColumn( ::google::bigtable::v2::Mutation_DeleteFromColumn const& @@ -422,10 +423,9 @@ Status RowTransaction::DeleteFromColumn( delete_from_column.time_range()); for (auto& cell : deleted_cells) { - RestoreValue restore_value{column_family, - delete_from_column.column_qualifier(), - std::move(cell.timestamp), - std::move(cell.value)}; + RestoreValue restore_value{ + column_family, delete_from_column.column_qualifier(), + std::move(cell.timestamp), std::move(cell.value)}; undo_.emplace(std::move(restore_value)); } @@ -439,9 +439,9 @@ Status RowTransaction::DeleteFromRow() { for (auto& column : deleted_columns) { for (auto& cell : column.second) { - RestoreValue restrore_value = { - *column_family.second, std::move(column.first), - cell.timestamp, std::move(cell.value)}; + RestoreValue restrore_value = {*column_family.second, + std::move(column.first), cell.timestamp, + std::move(cell.value)}; undo_.emplace(std::move(restrore_value)); row_existed = true; } @@ -544,9 +544,8 @@ void RowTransaction::Undo() { auto* restore_value = absl::get_if(&op); if (restore_value) { restore_value->column_family.SetCell( - row_key, - std::move(restore_value->column_qualifier), restore_value->timestamp, - std::move(restore_value->value)); + row_key, std::move(restore_value->column_qualifier), + restore_value->timestamp, std::move(restore_value->value)); continue; } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index a89de1a040200..80c9f3fcb257b 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -43,7 +43,6 @@ class Table : public std::enable_shared_from_this
{ static StatusOr> Create( google::bigtable::admin::v2::Table schema); - google::bigtable::admin::v2::Table GetSchema() const; Status Update(google::bigtable::admin::v2::Table const& new_schema, @@ -64,7 +63,8 @@ class Table : public std::enable_shared_from_this
{ std::map>::iterator end() { return column_families_.end(); } - std::map>::iterator find(std::string const &column_family) { + std::map>::iterator find( + std::string const& column_family) { return column_families_.find(column_family); } @@ -135,8 +135,7 @@ class RowTransaction { bool committed_; std::shared_ptr
table_; - std::stack> - undo_; + std::stack> undo_; ::google::bigtable::v2::MutateRowRequest const& request_; }; From a7054b0c4c6f8524b26d8e32a388019ba3c4f107 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 29 Apr 2025 15:21:17 +0300 Subject: [PATCH 129/195] emulator: Implement atomic transaction rollback support. - Introduces a RowTransaction class to proxy mutations and record undo logs in case we needs to rewind. - Re-implements all currently supported mutation operations in terms of the RowTransaction class, thereby supporting transaction rollback for all the currently supported mutation operations. - Complete working and passing tests for all types of basic and transaction rollback functionality for all currently supported row mutation types. - Rebased over upstream emulator branch. --- .gitignore | 5 + google/cloud/bigtable/emulator/CMakeLists.txt | 9 +- .../emulator/bigtable_emulator_unit_tests.bzl | 1 + .../cloud/bigtable/emulator/column_family.cc | 129 ++- .../cloud/bigtable/emulator/column_family.h | 151 +++- .../bigtable/emulator/column_family_test.cc | 14 +- .../cloud/bigtable/emulator/rollback_test.cc | 830 ++++++++++++++++++ google/cloud/bigtable/emulator/table.cc | 243 ++++- google/cloud/bigtable/emulator/table.h | 73 +- 9 files changed, 1369 insertions(+), 86 deletions(-) create mode 100644 google/cloud/bigtable/emulator/rollback_test.cc diff --git a/.gitignore b/.gitignore index b8e44a997ef9b..c9097511f9df2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,11 @@ +# Code coverage output artefacts +code_coverage.info +code_coverage_report/ + # Common build output directory names .build/ _build/ +build/ build-output/ build-out/ cmake-out/ diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 804d386ee734a..858dcdeb205ab 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -54,8 +54,13 @@ if (BUILD_TESTING) # List the unit tests, then setup the targets and dependencies. set(bigtable_emulator_unit_tests # cmake-format: sort - column_family_test.cc filter_test.cc filtered_map_test.cc - range_set_test.cc server_test.cc table_test.cc) + column_family_test.cc + filter_test.cc + filtered_map_test.cc + range_set_test.cc + rollback_test.cc + server_test.cc + table_test.cc) export_list_to_bazel("bigtable_emulator_unit_tests.bzl" "bigtable_emulator_unit_tests" YEAR "2024") diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index bc82e69a79ded..819f6dce68630 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -21,6 +21,7 @@ bigtable_emulator_unit_tests = [ "filter_test.cc", "filtered_map_test.cc", "range_set_test.cc", + "rollback_test.cc", "server_test.cc", "table_test.cc", ] diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 59fe9541c5a81..3e23cde76c80d 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -13,25 +13,36 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" +#include #include +#include namespace google { namespace cloud { namespace bigtable { namespace emulator { -void ColumnRow::SetCell(std::chrono::milliseconds timestamp, - std::string const& value) { +absl::optional ColumnRow::SetCell( + std::chrono::milliseconds timestamp, std::string const& value) { if (timestamp <= std::chrono::milliseconds::zero()) { timestamp = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()); } + + absl::optional ret = absl::nullopt; + auto cell_it = cells_.find(timestamp); + if (!(cell_it == cells_.end())) { + ret = std::move(cell_it->second); + } + cells_[timestamp] = value; + + return ret; } -std::size_t ColumnRow::DeleteTimeRange( +std::vector ColumnRow::DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range) { - std::size_t num_erased = 0; + std::vector deleted_cells; for (auto cell_it = cells_.lower_bound( std::chrono::duration_cast( std::chrono::microseconds(time_range.start_timestamp_micros()))); @@ -40,24 +51,39 @@ std::size_t ColumnRow::DeleteTimeRange( cell_it->first < std::chrono::duration_cast( std::chrono::microseconds( time_range.end_timestamp_micros())));) { + Cell cell = {std::move(cell_it->first), std::move(cell_it->second)}; + deleted_cells.emplace_back(std::move(cell)); cells_.erase(cell_it++); - ++num_erased; } - return num_erased; + return deleted_cells; +} + +absl::optional ColumnRow::DeleteTimeStamp( + std::chrono::milliseconds timestamp) { + absl::optional ret = absl::nullopt; + + auto cell_it = cells_.find(timestamp); + if (cell_it != cells_.end()) { + Cell cell = {std::move(cell_it->first), std::move(cell_it->second)}; + ret.emplace(std::move(cell)); + cells_.erase(cell_it); + } + + return ret; } -void ColumnFamilyRow::SetCell(std::string const& column_qualifier, - std::chrono::milliseconds timestamp, - std::string const& value) { - columns_[column_qualifier].SetCell(timestamp, value); +absl::optional ColumnFamilyRow::SetCell( + std::string const& column_qualifier, std::chrono::milliseconds timestamp, + std::string const& value) { + return columns_[column_qualifier].SetCell(timestamp, value); } -std::size_t ColumnFamilyRow::DeleteColumn( +std::vector ColumnFamilyRow::DeleteColumn( std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range) { auto column_it = columns_.find(column_qualifier); if (column_it == columns_.end()) { - return 0; + return {}; } auto res = column_it->second.DeleteTimeRange(time_range); if (!column_it->second.HasCells()) { @@ -66,30 +92,87 @@ std::size_t ColumnFamilyRow::DeleteColumn( return res; } -void ColumnFamily::SetCell(std::string const& row_key, - std::string const& column_qualifier, - std::chrono::milliseconds timestamp, - std::string const& value) { - rows_[row_key].SetCell(column_qualifier, timestamp, value); +absl::optional ColumnFamilyRow::DeleteTimeStamp( + std::string const& column_qulifier, std::chrono::milliseconds timestamp) { + auto column_it = columns_.find(column_qulifier); + if (column_it == columns_.end()) { + return absl::nullopt; + } + + auto ret = column_it->second.DeleteTimeStamp(timestamp); + if (!column_it->second.HasCells()) { + columns_.erase(column_it); + } + + return ret; +} + +absl::optional ColumnFamily::SetCell( + std::string const& row_key, std::string const& column_qualifier, + std::chrono::milliseconds timestamp, std::string const& value) { + return rows_[row_key].SetCell(column_qualifier, timestamp, value); } -bool ColumnFamily::DeleteRow(std::string const& row_key) { - return rows_.erase(row_key) > 0; +std::map> ColumnFamily::DeleteRow( + std::string const& row_key) { + std::map> res; + + auto row_it = rows_.find(row_key); + if (row_it == rows_.end()) { + return {}; + } + + for (auto& column : row_it->second.columns_) { + // Not setting start and end timestamps will select all cells for deletion + ::google::bigtable::v2::TimestampRange time_range; + auto deleted_cells = column.second.DeleteTimeRange(time_range); + if (!deleted_cells.empty()) { + res[std::move(column.first)] = std::move(deleted_cells); + } + } + + rows_.erase(row_key); + + return res; } -std::size_t ColumnFamily::DeleteColumn( +std::vector ColumnFamily::DeleteColumn( std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range) { auto row_it = rows_.find(row_key); + + return DeleteColumn(row_it, column_qualifier, time_range); +} + +std::vector ColumnFamily::DeleteColumn( + std::map::iterator row_it, + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range) { if (row_it != rows_.end()) { - auto num_erased_cells = + auto erased_cells = row_it->second.DeleteColumn(column_qualifier, time_range); if (!row_it->second.HasColumns()) { rows_.erase(row_it); } - return num_erased_cells; + return erased_cells; + } + return {}; +} + +absl::optional ColumnFamily::DeleteTimeStamp( + std::string const& row_key, std::string const& column_qulifier, + std::chrono::milliseconds timestamp) { + auto row_it = rows_.find(row_key); + if (row_it == rows_.end()) { + return absl::nullopt; } - return 0; + + auto ret = row_it->second.DeleteTimeStamp(column_qulifier, timestamp); + if (!row_it->second.HasColumns()) { + rows_.erase(row_it); + } + + return ret; } class FilteredColumnFamilyStream::FilterApply { diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 9776ded7f835c..e7928afa36516 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -22,6 +22,7 @@ #include "absl/types/optional.h" #include #include +#include #include namespace google { @@ -29,6 +30,11 @@ namespace cloud { namespace bigtable { namespace emulator { +struct Cell { + std::chrono::milliseconds timestamp; + std::string value; +}; + /** * Objects of this class hold contents of a specific column in a specific row. * @@ -36,6 +42,11 @@ namespace emulator { */ class ColumnRow { public: + ColumnRow() = default; + // Disable copying. + ColumnRow(ColumnRow const&) = delete; + ColumnRow& operator=(ColumnRow const&) = delete; + /** * Insert or update and existing cell at a given timestamp. * @@ -43,17 +54,32 @@ class ColumnRow { * updated. If it equals zero then number of milliseconds since epoch will * be used instead. * @param value the value to insert/update. + * + * @return no value if the timestamp had no value before, otherwise + * the previous value of the timestamp. */ - void SetCell(std::chrono::milliseconds timestamp, std::string const& value); + absl::optional SetCell(std::chrono::milliseconds timestamp, + std::string const& value); /** * Delete cells falling into a given timestamp range. * * @param time_range the timestamp range dictating which values to delete. - * @return number of deleted cells. + * @return vector of deleted cells. */ - std::size_t DeleteTimeRange( + std::vector DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range); + /** + * Delete a cell with the given timestamp. + * + * @param timestamp the std::chrono::milliseconds timestamp of the + * cell to delete. + * + * @return Cell representing deleted cell, if there + * was a cell with that timestamp, otherwise absl::nullopt. + */ + absl::optional DeleteTimeStamp(std::chrono::milliseconds timestamp); + bool HasCells() const { return !cells_.empty(); } using const_iterator = std::map::const_iterator; @@ -66,6 +92,16 @@ class ColumnRow { return cells_.upper_bound(timestamp); } + std::map::iterator find( + std::chrono::milliseconds const& timestamp) { + return cells_.find(timestamp); + } + + void erase( + std::map::iterator timestamp_it) { + cells_.erase(timestamp_it); + } + private: std::map cells_; }; @@ -88,20 +124,40 @@ class ColumnFamilyRow { * updated. If it equals zero then number of milliseconds since epoch will * be used instead. * @param value the value to insert/update. + * + * @return no value if the timestamp had no value before, otherwise + * the previous value of the timestamp. + * */ - void SetCell(std::string const& column_qualifier, - std::chrono::milliseconds timestamp, std::string const& value); + absl::optional SetCell(std::string const& column_qualifier, + std::chrono::milliseconds timestamp, + std::string const& value); /** * Delete cells falling into a given timestamp range in one column. * * @param column_qualifier the column qualifier from which to delete the * values. * @param time_range the timestamp range dictating which values to delete. - * @return number of deleted cells. + * @return vector of deleted cells. */ - std::size_t DeleteColumn( + std::vector DeleteColumn( std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); + /** + * Delete a cell with the given timestamp from the column given by + * the given column qualifier. + * + * @param column_qualifier the column from which to delete the cell. + * + * @param timestamp the std::chrono::milliseconds timestamp of the + * cell to delete. + * + * @return Cell representing deleted cell, if there was a cell with + * that timestamp in then given column, otherwise absl::nullopt. + */ + absl::optional DeleteTimeStamp(std::string const& column_qulifier, + std::chrono::milliseconds timestamp); + bool HasColumns() { return !columns_.empty(); } using const_iterator = std::map::const_iterator; const_iterator begin() const { return columns_.begin(); } @@ -113,7 +169,18 @@ class ColumnFamilyRow { return columns_.upper_bound(column_qualifier); } + std::map::iterator find( + std::string const& column_qualifier) { + return columns_.find(column_qualifier); + } + + void erase(std::map::iterator column_it) { + columns_.erase(column_it); + } + private: + friend class ColumnFamily; + std::map columns_; }; @@ -128,6 +195,11 @@ class ColumnFamilyRow { */ class ColumnFamily { public: + ColumnFamily() = default; + // Disable copying. + ColumnFamily(ColumnFamily const&) = delete; + ColumnFamily& operator=(ColumnFamily const&) = delete; + using const_iterator = std::map::const_iterator; /** @@ -139,29 +211,65 @@ class ColumnFamily { * updated. If it equals zero then number of milliseconds since epoch will * be used instead. * @param value the value to insert/update. + * + * @return no value if the timestamp had no value before, otherwise + * the previous value of the timestamp. + * */ - void SetCell(std::string const& row_key, std::string const& column_qualifier, - std::chrono::milliseconds timestamp, std::string const& value); + absl::optional SetCell(std::string const& row_key, + std::string const& column_qualifier, + std::chrono::milliseconds timestamp, + std::string const& value); /** * Delete the whole row from this column family. * * @param row_key the row key to remove. - * @return whether such a row existed. + * @return map from deleted column qualifiers to deleted cells. */ - bool DeleteRow(std::string const& row_key); + std::map> DeleteRow( + std::string const& row_key); /** * Delete cells from a row falling into a given timestamp range in one column. * - * @param row_key the row key to remove the cells from. - * @param column_qualifier the column qualifier from which to delete the - * values. - * @param time_range the timestamp range dictating which values to delete. - * @return number of deleted cells. + * @param row_key the row key to remove the cells from (or the + * iterator to the row - row_it - in the 2nd overloaded form of the + * function). + + * @param column_qualifier the column qualifier from which to delete + * the values. + * + * @param time_range the timestamp range dictating which values to + * delete. + * @return vector of deleted cells. */ - std::size_t DeleteColumn( + std::vector DeleteColumn( std::string const& row_key, std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range); + std::vector DeleteColumn( + std::map::iterator row_it, + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); + + /** + * Delete a cell with the given timestamp from the column given by + * the given column qualifier from the row given by row_key. + * + * @param row_key the row from which to delete the cell + * + * @param column_qualifier the column from which to delete the cell. + * + * @param timestamp the std::chrono::milliseconds timestamp of the + * cell to delete. + * + * @return Cell representing deleted cell, if there was a cell with + * that timestamp in then given column in the given row, + * otherwise absl::nullopt. + */ + absl::optional DeleteTimeStamp(std::string const& row_key, + std::string const& column_qulifier, + std::chrono::milliseconds timestamp); + const_iterator begin() const { return rows_.begin(); } const_iterator end() const { return rows_.end(); } const_iterator lower_bound(std::string const& row_key) const { @@ -171,6 +279,15 @@ class ColumnFamily { return rows_.upper_bound(row_key); } + std::map::iterator find( + std::string const& row_key) { + return rows_.find(row_key); + } + + void erase(std::map::iterator row_it) { + rows_.erase(row_it); + } + private: std::map rows_; }; diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index c396a0a3e570e..21b8cacd0756e 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -126,8 +126,9 @@ col2 @10ms: qux EXPECT_EQ("bar", fam_row.lower_bound("col1")->second.begin()->second); EXPECT_EQ("qux", fam_row.upper_bound("col1")->second.begin()->second); - EXPECT_EQ(1, fam_row.DeleteColumn("col1", - ::google::bigtable::v2::TimestampRange{})); + EXPECT_EQ( + 1, fam_row.DeleteColumn("col1", ::google::bigtable::v2::TimestampRange{}) + .size()); // Verify that there is no empty column. EXPECT_EQ(2, std::distance(fam_row.begin(), fam_row.end())); @@ -135,7 +136,7 @@ col2 @10ms: qux google::bigtable::v2::TimestampRange not_matching_range; not_matching_range.set_start_timestamp_micros(10); not_matching_range.set_end_timestamp_micros(20); - EXPECT_EQ(0, fam_row.DeleteColumn("col2", not_matching_range)); + EXPECT_EQ(0, fam_row.DeleteColumn("col2", not_matching_range).size()); EXPECT_EQ(R"""( col0 @10ms: baz @@ -168,7 +169,8 @@ row2 :col0 @10ms: qux DumpColumnFamilyRow(fam.upper_bound("row1")->second)); EXPECT_EQ(1, fam.DeleteColumn("row1", "col0", - ::google::bigtable::v2::TimestampRange{})); + ::google::bigtable::v2::TimestampRange{}) + .size()); // Verify that there is no empty row EXPECT_EQ(2, std::distance(fam.begin(), fam.end())); @@ -179,8 +181,8 @@ row2 :col0 @10ms: qux )""", "\n" + DumpColumnFamily(fam)); - EXPECT_TRUE(fam.DeleteRow("row2")); - EXPECT_FALSE(fam.DeleteRow("row_nonexistent")); + EXPECT_GT(fam.DeleteRow("row2").size(), 0); + EXPECT_TRUE(fam.DeleteRow("row_nonexistent").empty()); EXPECT_EQ("row0 :col0 @10ms: baz\n", DumpColumnFamily(fam)); } diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc new file mode 100644 index 0000000000000..c58650ed538a9 --- /dev/null +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -0,0 +1,830 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include "google/cloud/testing_util/status_matchers.h" +#include "gmock/gmock.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +using std::string; + +struct SetCellParams { + std::string column_family_name; + std::string column_qualifier; + int64_t timestamp_micros; + std::string data; +}; + +StatusOr> create_table( + std::string const& table_name, std::vector& column_families) { + ::google::bigtable::admin::v2::Table schema; + schema.set_name(table_name); + for (auto& column_family_name : column_families) { + (*schema.mutable_column_families())[column_family_name] = + ::google::bigtable::admin::v2::ColumnFamily(); + } + + return Table::Create(schema); +} + +Status delete_from_families( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector const& column_families) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto column_family : column_families) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* delete_from_family_mutation = + mutation_request_mutation->mutable_delete_from_family(); + delete_from_family_mutation->set_family_name(column_family); + } + + return table->MutateRow(mutation_request); +} + +struct DeleteFromColumnParams { + std::string column_family; + std::string column_qualifier; + ::google::bigtable::v2::TimestampRange* timestamp_range; +}; + +Status delete_from_columns( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector v) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto& param : v) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* delete_from_column_mutation = + mutation_request_mutation->mutable_delete_from_column(); + delete_from_column_mutation->set_family_name(param.column_family); + delete_from_column_mutation->set_column_qualifier(param.column_qualifier); + delete_from_column_mutation->set_allocated_time_range( + param.timestamp_range); + } + + return table->MutateRow(mutation_request); +} + +Status set_cells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto m : set_cell_params) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(m.column_family_name); + set_cell_mutation->set_column_qualifier(m.column_qualifier); + set_cell_mutation->set_timestamp_micros(m.timestamp_micros); + set_cell_mutation->set_value(m.data); + } + + return table->MutateRow(mutation_request); +} + +Status has_cell( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier, int64_t timestamp_micros, + std::string const& value) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return NotFoundError("no row key found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); + } + + auto& column_row = column_row_it->second; + auto timestamp_it = + column_row.find(std::chrono::duration_cast( + std::chrono::microseconds(timestamp_micros))); + if (timestamp_it == column_row.end()) { + return NotFoundError( + "timestamp not found", + GCP_ERROR_INFO().WithMetadata("timestamp", + absl::StrFormat("%d", timestamp_micros))); + } + + if (timestamp_it->second != value) { + return NotFoundError("wrong value", + GCP_ERROR_INFO() + .WithMetadata("expected", value) + .WithMetadata("found", timestamp_it->second)); + } + + return Status(); +} + +Status has_column( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return internal::NotFoundError( + "row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with supplied qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); + } + + return Status(); +} + +StatusOr> get_column( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return internal::NotFoundError( + "row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with supplied qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); + } + + std::map ret( + column_row_it->second.begin(), column_row_it->second.end()); + + return ret; +} + +Status has_row(std::shared_ptr& table, + std::string const& column_family, std::string const& row_key) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return NotFoundError("row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + return Status(); +} + +// Test that SetCell does the right thing when it receives a zero or +// negative timestamp, and that the cell created can be correctly +// deleted if rollback occurs. +TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + auto const timestamp_micros = 0; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, + data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + + auto status_or = + get_column(table, column_family_name, row_key, column_qualifier); + ASSERT_STATUS_OK(status_or.status()); + auto column = status_or.value(); + ASSERT_EQ(1, column.size()); + for (auto const& cell : column) { + ASSERT_GT(cell.first.count(), 0); + ASSERT_EQ(data, cell.second); + } + + // Test that a SetCell mutation with timestamp set to 0 can be + // correctly rolled back. In the following, the first mutation + // (timestamp 0) should succeed and the next one should fail. The + // condition after that should be that the first one (timestamp 0) + // should be rolled back so that a row with row_key_2 key should not + // exist when the MutateRow request returns. + v.clear(); + v = {{column_family_name, column_qualifier, 0, data}, + {"non_existent_column_family_name_causes_tx_rollbaclk", column_qualifier, + 1000, data}}; + auto const* const row_key_2 = "1"; + status = set_cells(table, table_name, row_key_2, v); + ASSERT_NE(true, status.ok()); + ASSERT_FALSE(has_row(table, column_family_name, row_key_2).ok()); +} + +// Does the SetCell mutation work to set a cell to a specific value? +TEST(TransactonRollback, SetCellBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + auto const timestamp_micros = 1234; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, + data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + + ASSERT_STATUS_OK(status); + + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data)); +} + +// Test that an old value is correctly restored in a pre-populated +// cell, when one of a set of SetCell mutations fails after the cell +// had been updated with a new value. +TEST(TransactonRollback, TestRestoreValue) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + auto const* const column_qualifier = "test"; + int64_t good_mutation_timestamp_micros = 1000; + auto const* const good_mutation_data = "expected to succeed"; + + std::vector column_families = {valid_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {valid_column_family_name, column_qualifier, + good_mutation_timestamp_micros, good_mutation_data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, + column_qualifier, good_mutation_timestamp_micros, + good_mutation_data)); + + // Now atomically try 2 mutations. One modifies the above set cell, + // and the other one is expected to fail. The test is that + // RestoreValue will restore the previous value in cell with + // timestamp 1000. + std::vector w; + // Everything is the same but we try and modify the value in the cell cell set + // above. + p.data = "new data"; + w.push_back(p); + + // Because "invalid_column_family" does not exist in the table + // schema, a mutation with these SetCell parameters is expected to + // fail. + p = {"invalid_column_family", "test2", 1000, "expected to fail"}; + w.push_back(p); + + status = set_cells(table, table_name, row_key, w); + ASSERT_NE(status.ok(), true); // The whole mutation chain should + // fail because the 2nd mutation + // contains an invalid column family. + + // And the first mutation should have been rolled back by + // RestoreValue and so should contain the old value, and not "new + // data". + ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, + column_qualifier, good_mutation_timestamp_micros, + good_mutation_data)); +} + +// Test that a new cell introduced in a chain of SetCell mutations is +// deleted on rollback if a subsequent mutation fails. +TEST(TransactonRollback, DeleteValue) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + std::vector column_families = {valid_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + // To test that we do not delete a row or column that we should not, + // let us first commit a transaction on the same row where we will + // do the DeleteValue test. + std::vector v = { + {valid_column_family_name, "test", 1000, "data"}}; + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); + + // We then setup a transaction chain with 2 SetCells, the first one + // should succeed to add a new cell and the second one should fail + // (because it assumes an invalid schema in column family name). We + // expect the first cell to not exist after the rollback (and of + // course also no data from the 2nd failing SetCell mutation should + // exist either). + v = {{valid_column_family_name, "test", 2000, "new data"}, + {"invalid_column_family_name", "test", 3000, "more new data"}}; + + status = set_cells(table, table_name, row_key, v); + ASSERT_NE(status.ok(), true); // We expect the chain of mutations to + // fail altogether. + status = has_cell(table, v[0].column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, v[0].data); + ASSERT_NE(status.ok(), true); // Undo should delete the cell + status = has_cell(table, v[1].column_family_name, row_key, + v[1].column_qualifier, v[1].timestamp_micros, v[1].data); + ASSERT_NE(status.ok(), true); // Also the SetCell with invalid shema + // should not have set anything. +} + +// Test that if a successful SetCell mutation in a chain of SetCell +// mutations in one transaction introduces a new column but a +// subsequent SetCell mutation fails (we simulate this by passing an +// column family name that is not in the table schema) then the column +// and any of the cells introduced is deleted in the rollback, but +// that any pre-transaction-attemot data in the row is unaffected. +TEST(TransactonRollback, DeleteColumn) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + std::vector column_families = {valid_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v = { + {valid_column_family_name, "test", 1000, "data"}}; + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); + + // Introduce a new column in a chain of SetCell mutations, a + // subsequent one of which must fail due to an invalid schema + // assumption (bad column family name). + v = {{valid_column_family_name, "new_column", 2000, "new data"}, + {"invalid_column_family_name", "test", 3000, "more new data"}}; + + status = set_cells(table, table_name, row_key, v); + ASSERT_NE(status.ok(), + true); // We expect the chain of mutations to + // fail altogether because the last one must fail. + + // The original column ("test") should still exist. + status = has_column(table, valid_column_family_name, row_key, "test"); + ASSERT_STATUS_OK(status); + + // Bit the new column introduced should have been rolled back. + status = has_column(table, v[0].column_family_name, row_key, + v[0].column_qualifier); + ASSERT_NE(status.ok(), true); +} + +// Test that a chain of SetCell mutations that initially introduces a +// new row, but one of which eventually fails, will end with the whole +// row rolled back. +TEST(TransactonRollback, DeleteRow) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + std::vector column_families = {valid_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + // First SetCell should succeed and introduce a new row with key + // "0". The second one will fail due to bad schema settings. We + // expect not to find the row after the row mutation call returns. + std::vector v = { + {valid_column_family_name, "test", 1000, "data"}, + {"invalid_column_family_name", "test", 2000, + "more new data which should never be written"}}; + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_NE(status.ok(), + true); // We expect the chain of mutations to + // fail altogether because the last one must fail. + + status = has_row(table, valid_column_family_name, row_key); + ASSERT_NE(status.ok(), true); +} + +// Does the DeleteFromfamily mutation work to delete a row from a +// specific family and does it rows with the same row key in other +// column families alone? +TEST(TransactonRollback, DeleteFromFamilyBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + auto const timestamp_micros = 1234; + auto const* data = "test"; + + auto const* const second_column_family_name = "test2"; + + std::vector column_families = {column_family_name, + second_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, + data}; + v.push_back(p); + + p = {second_column_family_name, column_qualifier, timestamp_micros, data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data)); + ASSERT_STATUS_OK( + has_column(table, column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); + + // Having established that the data is there, test the basic + // functionality of the DeleteFromFamily mutation by trying to + // delete it. + ASSERT_STATUS_OK( + delete_from_families(table, table_name, row_key, {column_family_name})); + ASSERT_NE(true, has_row(table, column_family_name, row_key).ok()); + + // Ensure that we did not delete a row in another column family. + ASSERT_EQ(true, has_row(table, second_column_family_name, row_key).ok()); +} + +// Test that DeleteFromfamily can be rolled back in case a subsequent +// mutation fails. +TEST(TransactonRollback, DeleteFromFamilyRollback) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + auto const timestamp_micros = 1234; + auto const* data = "test"; + + // Failure of one of the mutations is simalted by having a mutation + // with this column family, which has not been provisioned. Previous + // successful mutations should be rolled back when RowTransaction + // sees a mutation with this invalid column family name. + auto const* const column_family_not_in_schema = + "i_do_not_exist_in_the_schema"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, + data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data)); + ASSERT_STATUS_OK( + has_column(table, column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); + + // Setup two DeleteFromfamily mutation: The first one uses the + // correct table schema (a column family that exists and is expected + // to succeed to delete the row saved above. The second one uses a + // column family not provisioned and should fail, which should + // trigger a rollback of the previous row deletion. In the end, the + // above row should still exist and all its data should be intact. + status = + delete_from_families(table, table_name, row_key, + {column_family_name, column_family_not_in_schema}); + ASSERT_NE(true, status.ok()); // The overall chain of mutations should fail. + + // Check that the row deleted by the first mutation is restored, + // with all its data. + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data)); + ASSERT_STATUS_OK( + has_column(table, column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); +} + +::google::bigtable::v2::TimestampRange* new_timestamp_range(int64_t start, + int64_t end) { + auto* range = new (::google::bigtable::v2::TimestampRange); + range->set_start_timestamp_micros(start); + range->set_end_timestamp_micros(end); + + return range; +} + +// Does DeleteFromColumn basically work? +TEST(TransactonRollback, DeleteFromColumnBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v = { + {column_family_name, column_qualifier, 1000, data}, + {column_family_name, column_qualifier, 2000, data}, + {column_family_name, column_qualifier, 3000, data}, + }; + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 1000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 2000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 3000, data)); + + std::vector dv = { + {column_family_name, column_qualifier, + new_timestamp_range(v[0].timestamp_micros, + v[2].timestamp_micros + 1000)}}; + + ASSERT_STATUS_OK(delete_from_columns(table, table_name, row_key, dv)); + + status = has_column(table, column_family_name, row_key, column_qualifier); + ASSERT_EQ(false, status.ok()); +} + +// Does DeleteFromColumn rollback work? +TEST(TransactonRollback, DeleteFromColumnRollback) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + // Simulate mutation failure and cause rollback by attempting a + // mutation with a non-existent column family name. + auto const* const bad_column_family_name = + "this_column_family_does_not_exist"; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v = { + {column_family_name, column_qualifier, 1000, data}, + {column_family_name, column_qualifier, 2000, data}, + {column_family_name, column_qualifier, 3000, data}, + }; + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 1000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 2000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 3000, data)); + + // The first mutation will succeed. The second assumes a schema that + // does not exist - it should fail and cause rollback of the column + // deletion in the first mutation. + std::vector dv = { + {column_family_name, column_qualifier, + new_timestamp_range(v[0].timestamp_micros, + v[2].timestamp_micros + 1000)}, + {bad_column_family_name, column_qualifier, + new_timestamp_range(1000, 2000)}, + }; + // The mutation chains should fail and rollback should occur. + ASSERT_EQ(false, delete_from_columns(table, table_name, row_key, dv).ok()); + + // The column should have been restored. + ASSERT_STATUS_OK( + has_column(table, column_family_name, row_key, column_qualifier)); + // Check that the data is where and what we expect. + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 1000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 2000, data)); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, 3000, data)); +} + +// Can we delete a row from all column families? +TEST(TransactonRollback, DeleteFromRowBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifier = "column_qualifier"; + auto const timestamp_micros = 1000; + auto const* data = "value"; + auto const* const second_column_family_name = "column_family_2"; + + std::vector column_families = {column_family_name, + second_column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, + data}; + v.push_back(p); + + p = {second_column_family_name, column_qualifier, timestamp_micros, data}; + v.push_back(p); + + auto status = set_cells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data)); + ASSERT_STATUS_OK( + has_column(table, second_column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + mutation_request_mutation->mutable_delete_from_row(); + + ASSERT_STATUS_OK(table->MutateRow(mutation_request)); + ASSERT_EQ(false, has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, data) + .ok()); + ASSERT_EQ(false, has_column(table, second_column_family_name, row_key, + column_qualifier) + .ok()); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index bc7bb74337853..0d1f79a50780e 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -13,13 +13,18 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" #include "google/cloud/internal/make_status.h" #include "google/protobuf/util/field_mask_util.h" +#include +#include #include #include +#include +#include namespace google { namespace cloud { @@ -213,55 +218,43 @@ StatusOr> Table::FindColumnFamily( return std::ref(*column_family_it->second); } -// NOLINTBEGIN(readability-function-cognitive-complexity) Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { - // FIXME - add atomicity - // FIXME - determine what happens when row/column family/column does not exist std::lock_guard lock(mu_); assert(request.table_name() == schema_.name()); + + RowTransaction row_transaction(this->get(), request); + for (auto const& mutation : request.mutations()) { if (mutation.has_set_cell()) { auto const& set_cell = mutation.set_cell(); - auto maybe_column_family = FindColumnFamily(set_cell); - if (!maybe_column_family) { - return maybe_column_family.status(); + auto status = row_transaction.SetCell(set_cell); + if (!status.ok()) { + return status; } - maybe_column_family->get().SetCell( - request.row_key(), set_cell.column_qualifier(), - std::chrono::duration_cast( - std::chrono::microseconds(set_cell.timestamp_micros())), - set_cell.value()); - //} else if (mutation.has_add_to_cell()) { - // // FIXME - //} else if (mutation.has_merge_to_cell()) { - // // FIXME + } else if (mutation.has_add_to_cell()) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); + } else if (mutation.has_merge_to_cell()) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); } else if (mutation.has_delete_from_column()) { auto const& delete_from_column = mutation.delete_from_column(); - auto maybe_column_family = FindColumnFamily(delete_from_column); - if (!maybe_column_family) { - return maybe_column_family.status(); - } - if (maybe_column_family->get().DeleteColumn( - request.row_key(), delete_from_column.column_qualifier(), - delete_from_column.time_range()) == 0) { - // FIXME no such row or column + auto status = row_transaction.DeleteFromColumn(delete_from_column); + if (!status.ok()) { + return status; } } else if (mutation.has_delete_from_family()) { - auto maybe_column_family = - FindColumnFamily(mutation.delete_from_family()); - if (!maybe_column_family) { - return maybe_column_family.status(); - } - if (maybe_column_family->get().DeleteRow(request.row_key())) { - // FIXME no such row existed in that column family + auto const& delete_from_family = mutation.delete_from_family(); + auto status = row_transaction.DeleteFromFamily(delete_from_family); + if (!status.ok()) { + return status; } } else if (mutation.has_delete_from_row()) { - bool row_existed = false; - for (auto& column_family : column_families_) { - row_existed |= column_family.second->DeleteRow(request.row_key()); - } - if (!row_existed) { - // FIXME no such row existed + auto status = row_transaction.DeleteFromRow(); + if (!status.ok()) { + return status; } } else { return UnimplementedError( @@ -269,9 +262,14 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); } } + + // If we get here, all mutations on the row have succeeded. We can + // commit and return which will prevent the destructor from undoing + // the transaction. + row_transaction.commit(); + return Status(); } -// NOLINTEND(readability-function-cognitive-complexity) bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { if (!absl::holds_alternative(internal_filter)) { @@ -394,6 +392,177 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } +// NOLINTBEGIN(readability-convert-member-functions-to-static) +Status RowTransaction::AddToCell( + ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); +} + +Status RowTransaction::MergeToCell( + ::google::bigtable::v2::Mutation_MergeToCell const& merge_to_cell) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", merge_to_cell.DebugString())); +} +// NOLINTEND(readability-convert-member-functions-to-static) + +Status RowTransaction::DeleteFromColumn( + ::google::bigtable::v2::Mutation_DeleteFromColumn const& + delete_from_column) { + auto maybe_column_family = table_->FindColumnFamily(delete_from_column); + if (!maybe_column_family.ok()) { + return maybe_column_family.status(); + } + + auto& column_family = maybe_column_family->get(); + + auto deleted_cells = column_family.DeleteColumn( + request_.row_key(), delete_from_column.column_qualifier(), + delete_from_column.time_range()); + + for (auto& cell : deleted_cells) { + RestoreValue restore_value{ + column_family, delete_from_column.column_qualifier(), + std::move(cell.timestamp), std::move(cell.value)}; + undo_.emplace(std::move(restore_value)); + } + + return Status(); +} + +Status RowTransaction::DeleteFromRow() { + bool row_existed; + for (auto& column_family : table_->column_families_) { + auto deleted_columns = column_family.second->DeleteRow(request_.row_key()); + + for (auto& column : deleted_columns) { + for (auto& cell : column.second) { + RestoreValue restrore_value = {*column_family.second, + std::move(column.first), cell.timestamp, + std::move(cell.value)}; + undo_.emplace(std::move(restrore_value)); + row_existed = true; + } + } + } + + if (row_existed) { + return Status(); + } + + return NotFoundError( + "row not found in table", + GCP_ERROR_INFO().WithMetadata("row", request_.row_key())); +} + +Status RowTransaction::DeleteFromFamily( + ::google::bigtable::v2::Mutation_DeleteFromFamily const& + delete_from_family) { + // If the request references an incorrect schema (non-existent + // column family) then return a failure status error immediately. + auto maybe_column_family = table_->FindColumnFamily(delete_from_family); + if (!maybe_column_family.ok()) { + return maybe_column_family.status(); + } + + auto column_family_it = table_->find(delete_from_family.family_name()); + if (column_family_it == table_->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", + delete_from_family.family_name())); + } + + std::map::iterator column_family_row_it; + if (column_family_it->second->find(request_.row_key()) == + column_family_it->second->end()) { + // The row does not exist + return NotFoundError( + "row key is not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", request_.row_key()) + .WithMetadata("column family", column_family_it->first)); + } + + auto deleted = column_family_it->second->DeleteRow(request_.row_key()); + for (auto const& column : deleted) { + for (auto const& cell : column.second) { + RestoreValue restore_value{*column_family_it->second, + std::move(column.first), cell.timestamp, + std::move(cell.value)}; + undo_.emplace(std::move(restore_value)); + } + } + + return Status(); +} + +Status RowTransaction::SetCell( + ::google::bigtable::v2::Mutation_SetCell const& set_cell) { + auto maybe_column_family = table_->FindColumnFamily(set_cell); + if (!maybe_column_family) { + return maybe_column_family.status(); + } + + auto& column_family = maybe_column_family->get(); + + auto timestamp = std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros())); + + if (timestamp <= std::chrono::milliseconds::zero()) { + timestamp = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + } + + auto maybe_old_value = + column_family.SetCell(request_.row_key(), set_cell.column_qualifier(), + timestamp, set_cell.value()); + + if (!maybe_old_value) { + DeleteValue delete_value{column_family, + std::move(set_cell.column_qualifier()), timestamp}; + undo_.emplace(std::move(delete_value)); + } else { + RestoreValue restore_value{column_family, + std::move(set_cell.column_qualifier()), + timestamp, std::move(maybe_old_value.value())}; + undo_.emplace(std::move(restore_value)); + } + + return Status(); +} + +void RowTransaction::Undo() { + auto row_key = request_.row_key(); + + while (!undo_.empty()) { + auto op = undo_.top(); + undo_.pop(); + + auto* restore_value = absl::get_if(&op); + if (restore_value) { + restore_value->column_family.SetCell( + row_key, std::move(restore_value->column_qualifier), + restore_value->timestamp, std::move(restore_value->value)); + continue; + } + + auto* delete_value = absl::get_if(&op); + if (delete_value) { + delete_value->column_family.DeleteTimeStamp( + row_key, std::move(delete_value->column_qualifier), + delete_value->timestamp); + continue; + } + + // If we get here, there is an type of undo log that has not been + // implemented! + std::abort(); + } +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 81163ff43b0db..80c9f3fcb257b 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -20,12 +20,17 @@ #include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" +#include "absl/types/variant.h" #include #include #include +#include #include #include +#include #include +#include +#include namespace google { namespace cloud { @@ -33,7 +38,7 @@ namespace bigtable { namespace emulator { /// Objects of this class represent Bigtable tables. -class Table { +class Table : public std::enable_shared_from_this
{ public: static StatusOr> Create( google::bigtable::admin::v2::Table schema); @@ -52,10 +57,23 @@ class Table { Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const; + std::map>::iterator begin() { + return column_families_.begin(); + } + std::map>::iterator end() { + return column_families_.end(); + } + std::map>::iterator find( + std::string const& column_family) { + return column_families_.find(column_family); + } + + std::shared_ptr
get() { return shared_from_this(); } private: Table() = default; friend class RowSetIterator; + friend class RowTransaction; template StatusOr> FindColumnFamily( @@ -68,6 +86,59 @@ class Table { std::map> column_families_; }; +struct RestoreValue { + ColumnFamily& column_family; + std::string column_qualifier; + std::chrono::milliseconds timestamp; + std::string value; +}; + +struct DeleteValue { + ColumnFamily& column_family; + std::string column_qualifier; + std::chrono::milliseconds timestamp; +}; + +class RowTransaction { + public: + explicit RowTransaction( + std::shared_ptr
table, + ::google::bigtable::v2::MutateRowRequest const& request) + : request_(request) { + table_ = std::move(table); + committed_ = false; + }; + + ~RowTransaction() { + if (!committed_) { + Undo(); + } + }; + + void commit() { committed_ = true; } + + Status SetCell(::google::bigtable::v2::Mutation_SetCell const& set_cell); + Status AddToCell( + ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell); + Status MergeToCell( + ::google::bigtable::v2::Mutation_MergeToCell const& merge_to_cell); + Status DeleteFromColumn( + ::google::bigtable::v2::Mutation_DeleteFromColumn const& + delete_from_column); + Status DeleteFromFamily( + ::google::bigtable::v2::Mutation_DeleteFromFamily const& + delete_from_family); + Status DeleteFromRow(); + + private: + void Undo(); + + bool committed_; + std::shared_ptr
table_; + std::stack> undo_; + ::google::bigtable::v2::MutateRowRequest const& request_; +}; + /** * A `AbstractCellStreamImpl` which streams filtered contents of the table. * From 235ca331182d50c19373c4e62c97dafe4efe3a4b Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 6 May 2025 00:41:05 +0300 Subject: [PATCH 130/195] emulator: SampleRowKeys: Implement a row sampler. Not yet wired into the GRPC service, and tests against the CI and a new unit test (not yet implemented) need to be run. --- .../cloud/bigtable/emulator/column_family.h | 17 ++++ google/cloud/bigtable/emulator/table.cc | 85 +++++++++++++++++++ google/cloud/bigtable/emulator/table.h | 31 +++++++ 3 files changed, 133 insertions(+) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index e7928afa36516..53e163b46ab6a 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -23,6 +23,7 @@ #include #include #include +#include #include namespace google { @@ -178,6 +179,20 @@ class ColumnFamilyRow { columns_.erase(column_it); } + size_t size() const { + size_t res = 0; + + for (auto const& c : columns_) { + res += c.first.size(); + for (auto const& cr : c.second) { + res += sizeof(cr.first); + res += cr.second.size(); + } + } + + return res; + }; + private: friend class ColumnFamily; @@ -288,6 +303,8 @@ class ColumnFamily { rows_.erase(row_it); } + size_t size() const { return rows_.size(); } + private: std::map rows_; }; diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 0d1f79a50780e..9b8a4dbec1026 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -23,7 +23,13 @@ #include #include #include +#include +#include #include +#include +#include +#include +#include #include namespace google { @@ -392,6 +398,85 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } +RowSampler Table::SampleRowKeys( + google::bigtable::v2::SampleRowKeysRequest const&) { + size_t row_index = 0; + size_t max_num_rows = 0; + std::map::const_iterator row_iterator; + std::map::const_iterator row_end; + bool table_is_empty = true; + size_t offset_bytes = 0; + + std::once_flag once_flag; + + auto next_sample = [&, this] { + std::call_once(once_flag, [this, &max_num_rows, &row_iterator, &row_end, + &table_is_empty]() { + // Pick rows from just the largest column family since we are + // just sampling. However offsets will be estimated based on the + // size of the row across all column families. + for (auto const& cf : column_families_) { + if (cf.second->size() > max_num_rows) { + table_is_empty = false; + row_iterator = cf.second->begin(); + row_end = cf.second->end(); + max_num_rows = cf.second->size(); + } + } + }); + + // The signal that there are no more rows. + if (table_is_empty || row_iterator == row_end) { + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(""); + resp.set_offset_bytes(offset_bytes); + + return resp; + } + + for (auto& row = row_iterator; row_iterator != row_end; + row_index++, row_iterator++) { + // If there are any rows we need to return at least one + // row. Alwasy return the last one. + if (row_index == max_num_rows - 1) { + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(row->first); + resp.set_offset_bytes(offset_bytes); + + offset_bytes += (row->first.size() + row->second.size()); + + return resp; + } + + // Sample about one every 100 rows randomly. + if (std::rand() % 100 == 0) { + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(row->first); + resp.set_offset_bytes(offset_bytes); + + offset_bytes += (row->first.size() + row->second.size()); + + return resp; + } + + offset_bytes += (row->first.size() + row->second.size()); + } + + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(""); + resp.set_offset_bytes(offset_bytes); + return resp; + }; + + // We acquire the table lock here (in the constructor), so that + // every time we call row_sampler.Next() we always hold the lock, + // and will continue to hold it until the destructor of RowSampler + // is called. + RowSampler row_sampler(this->get(), next_sample); + + return row_sampler; +} + // NOLINTBEGIN(readability-convert-member-functions-to-static) Status RowTransaction::AddToCell( ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 80c9f3fcb257b..a7b29dc6f1c17 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -31,12 +31,15 @@ #include #include #include +#include namespace google { namespace cloud { namespace bigtable { namespace emulator { +class RowSampler; + /// Objects of this class represent Bigtable tables. class Table : public std::enable_shared_from_this
{ public: @@ -68,12 +71,16 @@ class Table : public std::enable_shared_from_this
{ return column_families_.find(column_family); } + RowSampler SampleRowKeys( + const google::bigtable::v2::SampleRowKeysRequest&); + std::shared_ptr
get() { return shared_from_this(); } private: Table() = default; friend class RowSetIterator; friend class RowTransaction; + friend class RowSampler; template StatusOr> FindColumnFamily( @@ -139,6 +146,30 @@ class RowTransaction { ::google::bigtable::v2::MutateRowRequest const& request_; }; +class RowSampler { + public: + explicit RowSampler( + std::shared_ptr
table, + std::function + next_sample_closure) { + table_ = std::move(table); + next_sample_closure_ = std::move(next_sample_closure); + + table_->mu_.lock(); + }; + + google::bigtable::v2::SampleRowKeysResponse Next() { + return next_sample_closure_(); + } + + ~RowSampler() { table_->mu_.unlock(); }; + + private: + std::shared_ptr
table_; + std::function + next_sample_closure_; +}; + /** * A `AbstractCellStreamImpl` which streams filtered contents of the table. * From a9637adc3c16ad950698424457761c8d300ffd69 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 7 May 2025 12:27:12 +0300 Subject: [PATCH 131/195] emulator: SampleRowKeys: Wire up the sampler to the SampleRowKeys GRPC call. This finishes the implementation of the SampleRowKeys RPC. --- google/cloud/bigtable/emulator/server.cc | 25 +++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 993c187c703ea..0735e6b1171e8 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -49,9 +49,28 @@ class EmulatorService final : public btproto::Bigtable::Service { grpc::Status SampleRowKeys( grpc::ServerContext* /* context */, - btproto::SampleRowKeysRequest const* /* request */, - grpc::ServerWriter* /* writer */) - override { + btproto::SampleRowKeysRequest const* request, + grpc::ServerWriter* writer) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + auto row_sampler = maybe_table.value()->SampleRowKeys(*request); + + while (true) { + auto sample = row_sampler.Next(); + + if (sample.row_key().empty()) { + auto opts = grpc::WriteOptions(); + opts.set_last_message(); + writer->WriteLast(std::move(sample), opts); + break; + } + + writer->Write(std::move(sample)); + } + return grpc::Status::OK; } From ff6cd6d8fba9af0a41bd72d2bc872dc029da8bf5 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 7 May 2025 15:48:46 +0300 Subject: [PATCH 132/195] emulator: SampleRowKeys: Consider the size of the row in all column families. --- google/cloud/bigtable/emulator/table.cc | 50 +++++++++++++++++++++---- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 9b8a4dbec1026..f7d8c5f946bfa 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -400,6 +400,9 @@ bool Table::IsDeleteProtectedNoLock() const { RowSampler Table::SampleRowKeys( google::bigtable::v2::SampleRowKeysRequest const&) { + // We pick the row key samples from just one column family (the + // largest). + std::string sample_src_cf; size_t row_index = 0; size_t max_num_rows = 0; std::map::const_iterator row_iterator; @@ -410,14 +413,21 @@ RowSampler Table::SampleRowKeys( std::once_flag once_flag; auto next_sample = [&, this] { + // The first time the closure is called, initialize the row + // iterators. The sampler works by advancing the iterator by + // varying steps every time the closure it contains is called. We + // can't initialize the iterators before the closure is first + // called since we need to be holding the table lock first (in our + // scheme it is grabbed in the constructor of the sampler). std::call_once(once_flag, [this, &max_num_rows, &row_iterator, &row_end, - &table_is_empty]() { + &table_is_empty, &sample_src_cf]() { // Pick rows from just the largest column family since we are // just sampling. However offsets will be estimated based on the // size of the row across all column families. for (auto const& cf : column_families_) { if (cf.second->size() > max_num_rows) { table_is_empty = false; + sample_src_cf = cf.first; row_iterator = cf.second->begin(); row_end = cf.second->end(); max_num_rows = cf.second->size(); @@ -425,25 +435,46 @@ RowSampler Table::SampleRowKeys( } }); - // The signal that there are no more rows. + // The signal that there are no more rows (an empty row key). if (table_is_empty || row_iterator == row_end) { google::bigtable::v2::SampleRowKeysResponse resp; resp.set_row_key(""); - resp.set_offset_bytes(offset_bytes); + resp.set_offset_bytes(0); return resp; } for (auto& row = row_iterator; row_iterator != row_end; row_index++, row_iterator++) { + + auto add_this_row_size_to_offset = [&, this] { + // First the offset due to the size of the row in the column + // family we are sampling. + offset_bytes += (row->first.size() + row->second.size()); + + // Then consider the size of the row data in other column families, + // if they contain the row. + for (auto const& cf : column_families_) { + if (cf.first == sample_src_cf) { + continue; + } + + auto r = cf.second->find(row->first); + if (r != cf.second->end()) { + offset_bytes += (row->first.size() + r->second.size()); + } + } + + }; + // If there are any rows we need to return at least one - // row. Alwasy return the last one. + // row. Always return the last one. if (row_index == max_num_rows - 1) { google::bigtable::v2::SampleRowKeysResponse resp; resp.set_row_key(row->first); resp.set_offset_bytes(offset_bytes); - offset_bytes += (row->first.size() + row->second.size()); + add_this_row_size_to_offset(); return resp; } @@ -454,12 +485,15 @@ RowSampler Table::SampleRowKeys( resp.set_row_key(row->first); resp.set_offset_bytes(offset_bytes); - offset_bytes += (row->first.size() + row->second.size()); + add_this_row_size_to_offset(); return resp; } - offset_bytes += (row->first.size() + row->second.size()); + // This is a row we are not sampling, but we still need to + // account for its size for accurate offsets of subsequent + // sampled rows. + add_this_row_size_to_offset(); } google::bigtable::v2::SampleRowKeysResponse resp; @@ -477,7 +511,7 @@ RowSampler Table::SampleRowKeys( return row_sampler; } -// NOLINTBEGIN(readability-convert-member-functions-to-static) +// Nolintbegin(readability-convert-member-functions-to-static) Status RowTransaction::AddToCell( ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { return UnimplementedError( From a8908e915a8066e9235e75e94d9eb709c2b605ff Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Wed, 7 May 2025 17:00:06 +0200 Subject: [PATCH 133/195] More filter tests and some bug fixes. --- .../cloud/bigtable/emulator/column_family.cc | 4 +- .../bigtable/emulator/column_family_test.cc | 6 +- google/cloud/bigtable/emulator/filter.cc | 45 +- google/cloud/bigtable/emulator/filter.h | 1 + google/cloud/bigtable/emulator/filter_test.cc | 622 ++++++++++++++++-- google/cloud/bigtable/emulator/table_test.cc | 23 +- 6 files changed, 639 insertions(+), 62 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 3e23cde76c80d..2981c4c24c630 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -180,7 +180,9 @@ class FilteredColumnFamilyStream::FilterApply { explicit FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} bool operator()(ColumnRange const& column_range) { - parent_.column_ranges_.Intersect(column_range.range); + if (column_range.column_family == parent_.column_family_name_) { + parent_.column_ranges_.Intersect(column_range.range); + } return true; } diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index 21b8cacd0756e..d96d93d69dd2d 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -292,10 +292,10 @@ TEST(FilteredColumnFamilyStream, FilterByColumnRange) { fam.SetCell("row2", "col1", 300_ms, "foo"); auto included_rows = std::make_shared(StringRangeSet::All()); FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + filtered_stream.ApplyFilter(ColumnRange{ + "dummy", StringRangeSet::Range("col1", false, "col4", false)}); filtered_stream.ApplyFilter( - ColumnRange{StringRangeSet::Range("col1", false, "col4", false)}); - filtered_stream.ApplyFilter( - ColumnRange{StringRangeSet::Range("col1", false, "col2", false)}); + ColumnRange{"cf1", StringRangeSet::Range("col1", false, "col2", false)}); EXPECT_EQ(R"""( row0 cf1:col1 @100ms: foo row0 cf1:col2 @200ms: foo diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 8c1cfd071663c..7e62ed87b08ce 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -88,13 +88,13 @@ void CellStream::EmulateNextRow() { template class PerRowStateFilter { static_assert(google::cloud::internal::is_invocable::value, - "StateResetFunctor must be invocable with no arguments"); + "StateResetFunctor must be invocable with no arguments."); using State = std::decay_t>; static_assert(std::is_default_constructible::value, - "State must be default constructible"); + "State must be default constructible."); static_assert(std::is_assignable::value, - "State must assignable"); + "State must be assignable."); static_assert(std::is_same, absl::optional>::value, @@ -245,7 +245,7 @@ class TrivialFilter : public AbstractCellStreamImpl { std::is_same< google::cloud::internal::invoke_result_t, absl::optional>::value, - "Invalid filter return type"); + "Invalid filter return type."); public: /** @@ -392,7 +392,7 @@ bool MergeCellStreams::ApplyFilter(InternalFilter const& internal_filter) { assert(!initialized_); bool res = true; for (auto& stream : unfinished_streams_) { - res = res && stream->ApplyFilter(internal_filter); + res = stream->ApplyFilter(internal_filter) && res; } return res; } @@ -423,12 +423,10 @@ bool MergeCellStreams::Next(NextMode mode) { std::pop_heap(unfinished_streams_.begin(), unfinished_streams_.end(), CellStreamGreater()); - std::vector>::iterator first_to_advance = - std::prev(unfinished_streams_.end()); - std::vector>::iterator to_readd_begin = - first_to_advance; + auto first_to_advance = std::prev(unfinished_streams_.end()); + auto to_readd_begin = first_to_advance; - auto all_streams_to_advance_removed_from_heap = [&] () { + auto all_streams_to_advance_removed_from_heap = [&]() { if (unfinished_streams_.begin() == to_readd_begin) { // All streams removed. return true; @@ -507,7 +505,20 @@ class ConditionStream : public AbstractCellStreamImpl { true_stream_(std::move(true_stream)), false_stream_(std::move(false_stream)) {} - bool ApplyFilter(InternalFilter const&) override { return false; } + bool ApplyFilter(InternalFilter const& internal_filter) override { + bool res = true; + if (absl::holds_alternative(internal_filter)) { + // If we're skipping whole rows we may apply it to all four streams. + // If we fail to apply to `source_` or `predicate_stream` but succeed with + // both `false_stream` and `true_stream` we should still return true + // because the stream will not yield the unwanted cells. + source_.ApplyFilter(internal_filter); + predicate_stream_.ApplyFilter(internal_filter); + } + res = true_stream_.ApplyFilter(internal_filter) && res; + res = false_stream_.ApplyFilter(internal_filter) && res; + return res; + } bool HasValue() const override { InitializeIfNeeded(); @@ -804,7 +815,7 @@ StatusOr CreateFilterImpl( family_name = std::move(family_name), range = *std::move(maybe_range)] { auto source = source_ctor(); - if (source.ApplyFilter(ColumnRange{range})) { + if (source.ApplyFilter(ColumnRange{family_name, range})) { return source; } return MakeTrivialFilter( @@ -1067,9 +1078,15 @@ StatusOr CreateFilterImpl( predicate_stream_ctor = *std::move(maybe_predicate_stream_ctor), true_stream_ctor = *std::move(maybe_true_stream_ctor), false_stream_ctor = *std::move(maybe_false_stream_ctor)] { + // The test FilterApplicationPropagation.Condition relies on the + // order of creating those streams. + auto source = source_ctor(); + auto predicate_stream = predicate_stream_ctor(); + auto true_stream = true_stream_ctor(); + auto false_stream = false_stream_ctor(); return CellStream(std::make_unique( - source_ctor(), predicate_stream_ctor(), true_stream_ctor(), - false_stream_ctor())); + std::move(source), std::move(predicate_stream), + std::move(true_stream), std::move(false_stream))); }; return res; } diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index 66a09f214a368..c01062a57ee8e 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -80,6 +80,7 @@ struct ColumnRegex { }; /// Only return cells from columns which fall into `range`. struct ColumnRange { + std::string column_family; StringRangeSet::Range range; }; /// Only return cells from timestamps which fall into `range`. diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index 85f4dd86a8954..97718274049a6 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -12,29 +12,31 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/data_connection.h" #include "google/cloud/bigtable/table.h" -#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/testing_util/chrono_literals.h" #include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/status_matchers.h" -#include "google/cloud/testing_util/chrono_literals.h" #include +#include namespace google { namespace cloud { namespace bigtable { namespace emulator { -namespace { +using ::google::bigtable::v2::RowFilter; using ::testing::Return; +using testing_util::StatusIs; using testing_util::chrono_literals::operator""_ms; class MockStream : public AbstractCellStreamImpl { public: MOCK_METHOD(bool, ApplyFilter, (InternalFilter const& internal_filter), (override)); - MOCK_METHOD(bool, HasValue, (), (const override)); - MOCK_METHOD(CellView const&, Value, (), (const override)); + MOCK_METHOD(bool, HasValue, (), (const, override)); + MOCK_METHOD(CellView const&, Value, (), (const, override)); MOCK_METHOD(bool, Next, (NextMode mode), (override)); }; @@ -113,8 +115,7 @@ std::ostream& operator<<(std::ostream& stream, TestCell const& test_cell) { } TEST(CellStream, NextColumnNotSupportedNoMoreData) { - std::vector cells{ - TestCell{"row1", "cf1", "col1", 0_ms, "val1"}}; + std::vector cells{TestCell{"row1", "cf1", "col1", 0_ms, "val1"}}; std::vector::iterator cur_cell = cells.begin(); auto mock_impl = std::make_unique(); @@ -237,8 +238,7 @@ TEST(CellStream, NextRowUnsupported) { std::vector::iterator cur_cell = cells.begin(); auto mock_impl = std::make_unique(); - EXPECT_CALL(*mock_impl, Next(NextMode::kRow)) - .WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Next(NextMode::kRow)).WillRepeatedly(Return(false)); EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { return cur_cell->AsCellView(); }); @@ -246,15 +246,13 @@ TEST(CellStream, NextRowUnsupported) { return cur_cell != cells.end(); }); EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillRepeatedly([&] { - cur_cell = - std::find_if(cur_cell, cells.end(), [&](TestCell const& cell) { - return cell.AsCellView().row_key() != - cur_cell->AsCellView().row_key() || - cell.AsCellView().column_family() != - cur_cell->AsCellView().column_family() || - cell.AsCellView().column_qualifier() != - cur_cell->AsCellView().column_qualifier(); - }); + cur_cell = std::find_if(cur_cell, cells.end(), [&](TestCell const& cell) { + return cell.AsCellView().row_key() != cur_cell->AsCellView().row_key() || + cell.AsCellView().column_family() != + cur_cell->AsCellView().column_family() || + cell.AsCellView().column_qualifier() != + cur_cell->AsCellView().column_qualifier(); + }); return true; }); @@ -281,8 +279,7 @@ TEST(CellStream, NextRowAndColumnUnsupported) { std::vector::iterator cur_cell = cells.begin(); auto mock_impl = std::make_unique(); - EXPECT_CALL(*mock_impl, Next(NextMode::kRow)) - .WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Next(NextMode::kRow)).WillRepeatedly(Return(false)); EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)) .WillRepeatedly(Return(false)); EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillRepeatedly([&] { @@ -343,20 +340,16 @@ TEST_P(CellStreamOrderTest, Order) { auto mock_impl_left = std::make_unique(); auto left_cell = std::get<1>(GetParam()); auto right_cell = std::get<2>(GetParam()); - EXPECT_CALL(*mock_impl_left, Value) - .WillRepeatedly( - [&]() -> CellView const& { return left_cell.AsCellView(); }); - EXPECT_CALL(*mock_impl_left, HasValue).WillRepeatedly([&] { - return true; + EXPECT_CALL(*mock_impl_left, Value).WillRepeatedly([&]() -> CellView const& { + return left_cell.AsCellView(); }); + EXPECT_CALL(*mock_impl_left, HasValue).WillRepeatedly([&] { return true; }); auto mock_impl_right = std::make_unique(); - EXPECT_CALL(*mock_impl_right, Value) - .WillRepeatedly( - [&]() -> CellView const& { return right_cell.AsCellView(); }); - EXPECT_CALL(*mock_impl_right, HasValue).WillRepeatedly([&] { - return true; + EXPECT_CALL(*mock_impl_right, Value).WillRepeatedly([&]() -> CellView const& { + return right_cell.AsCellView(); }); + EXPECT_CALL(*mock_impl_right, HasValue).WillRepeatedly([&] { return true; }); auto left = std::make_unique(std::move(mock_impl_left)); auto right = std::make_unique(std::move(mock_impl_right)); EXPECT_EQ(std::get<0>(GetParam()), @@ -562,11 +555,10 @@ TEST(MergeCellStreams, AdvancingRowAdvancesAllRelevantStreams) { return true; }); - EXPECT_CALL(*stream_data_1.stream, Next(NextMode::kCell)) - .WillOnce([&]() { - ++stream_data_1.cur_cell; - return true; - }); + EXPECT_CALL(*stream_data_1.stream, Next(NextMode::kCell)).WillOnce([&]() { + ++stream_data_1.cur_cell; + return true; + }); EXPECT_CALL(*stream_data_2.stream, Next(NextMode::kCell)) .Times(2) @@ -575,11 +567,10 @@ TEST(MergeCellStreams, AdvancingRowAdvancesAllRelevantStreams) { return true; }); - EXPECT_CALL(*stream_data_3.stream, Next(NextMode::kCell)) - .WillOnce([&]() { - ++stream_data_3.cur_cell; - return true; - }); + EXPECT_CALL(*stream_data_3.stream, Next(NextMode::kCell)).WillOnce([&]() { + ++stream_data_3.cur_cell; + return true; + }); std::vector streams; streams.push_back(CellStream(std::move(stream_data_1.stream))); @@ -674,8 +665,7 @@ TEST(MergeCellStreams, AdvancingColumnAdvancesAllRelevantStreams) { return true; }); - EXPECT_CALL(*stream_data_different_row.stream, - Next(NextMode::kCell)) + EXPECT_CALL(*stream_data_different_row.stream, Next(NextMode::kCell)) .WillOnce([&]() { ++stream_data_different_row.cur_cell; return true; @@ -715,7 +705,553 @@ TEST(MergeCellStreams, AdvancingColumnAdvancesAllRelevantStreams) { ASSERT_FALSE(stream.HasValue()); } -} // anonymous namespace +class InvalidFilterProtoTest : public ::testing::Test { + protected: + ::google::bigtable::v2::RowFilter filter_; + StatusOr TryCreate() { + return CreateFilter( + filter_, [] { return CellStream(std::make_unique()); }); + } +}; + +TEST_F(InvalidFilterProtoTest, PassAll) { + filter_.set_pass_all_filter(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`pass_all_filter` explicitly set to `false`"))); +} + +TEST_F(InvalidFilterProtoTest, BlockAll) { + filter_.set_block_all_filter(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`block_all_filter` explicitly set to `false`"))); +} + +TEST_F(InvalidFilterProtoTest, RowKeyRegex) { + filter_.set_row_key_regex_filter("["); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`row_key_regex_filter` is not a valid RE2 regex"))); +} + +TEST_F(InvalidFilterProtoTest, ValueRegex) { + filter_.set_value_regex_filter("["); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`value_regex_filter` is not a valid RE2 regex."))); +} + +TEST_F(InvalidFilterProtoTest, RowSampleNegative) { + filter_.set_row_sample_filter(-1); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`row_sample_filter` is not a valid probability."))); +} + +TEST_F(InvalidFilterProtoTest, RowSampleTooLarge) { + filter_.set_row_sample_filter(10); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`row_sample_filter` is not a valid probability."))); +} + +TEST_F(InvalidFilterProtoTest, FamilyNameRegex) { + filter_.set_family_name_regex_filter("["); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`family_name_regex_filter` is not a valid RE2 regex."))); +} + +TEST_F(InvalidFilterProtoTest, ColumnQualifierRegex) { + filter_.set_column_qualifier_regex_filter("["); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs( + StatusCode::kInvalidArgument, + testing::HasSubstr( + "`column_qualifier_regex_filter` is not a valid RE2 regex."))); +} + +TEST_F(InvalidFilterProtoTest, PerRowOffset) { + filter_.set_cells_per_row_offset_filter(-1); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`cells_per_row_offset_filter` is negative."))); +} + +TEST_F(InvalidFilterProtoTest, PerRowLimit) { + filter_.set_cells_per_row_limit_filter(-1); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`cells_per_row_limit_filter` is negative."))); +} + +TEST_F(InvalidFilterProtoTest, PerColumnLimit) { + filter_.set_cells_per_column_limit_filter(-1); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`cells_per_column_limit_filter` is negative."))); +} + +TEST_F(InvalidFilterProtoTest, StripValue) { + filter_.set_strip_value_transformer(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`strip_value_transformer` explicitly set to `false`."))); +} + +TEST_F(InvalidFilterProtoTest, ConditionNoPredicate) { + filter_.mutable_condition(); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`condition` must have a `predicate_filter` set."))); +} + +TEST_F(InvalidFilterProtoTest, SinkFalse) { + filter_.set_sink(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr("`sink` explicitly set to `false`."))); +} + +TEST_F(InvalidFilterProtoTest, ChainSinkFalse) { + filter_.mutable_chain()->add_filters()->set_sink(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr("`sink` explicitly set to `false`."))); +} + +TEST_F(InvalidFilterProtoTest, InterleaveSinkFalse) { + filter_.mutable_interleave()->add_filters()->set_sink(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr("`sink` explicitly set to `false`."))); +} + +TEST(FilterTest, BlockAll) { + RowFilter filter; + filter.set_block_all_filter(true); + + auto mock_impl = std::make_unique(); + CellStream(std::move(mock_impl)); + + auto maybe_stream = CreateFilter( + filter, [] { return CellStream(std::make_unique()); }); + + ASSERT_STATUS_OK(maybe_stream); + EXPECT_FALSE(maybe_stream->HasValue()); +} + +bool operator==(RowKeyRegex const& lhs, RowKeyRegex const& rhs) { + return lhs.regex == rhs.regex; +} +bool operator==(FamilyNameRegex const& lhs, FamilyNameRegex const& rhs) { + return lhs.regex == rhs.regex; +} +bool operator==(ColumnRegex const& lhs, ColumnRegex const& rhs) { + return lhs.regex == rhs.regex; +} +bool operator==(ColumnRange const& lhs, ColumnRange const& rhs) { + return lhs.range == rhs.range; +} +bool operator==(TimestampRange const& lhs, TimestampRange const& rhs) { + return lhs.range == rhs.range; +} + +class FilterPrinter { + public: + explicit FilterPrinter(std::ostream& stream) : stream_(stream) {} + void operator()(RowKeyRegex const& to_print) const { + stream_ << "RowKeyRegex(" << to_print.regex->pattern() << ")"; + } + void operator()(FamilyNameRegex const& to_print) { + stream_ << "FamilyNameRegex(" << to_print.regex->pattern() << ")"; + } + void operator()(ColumnRegex const& to_print) { + stream_ << "ColumnRegex(" << to_print.regex->pattern() << ")"; + } + void operator()(ColumnRange const& to_print) { + stream_ << "ColumnRange(" << to_print.column_family << "," << to_print.range + << ")"; + } + void operator()(TimestampRange const& to_print) { + stream_ << "TimestampRange(" << to_print.range << ")"; + } + + private: + std::ostream& stream_; +}; + +std::ostream& operator<<(std::ostream& os, InternalFilter const& filter) { + absl::visit(FilterPrinter(os), filter); + return os; +} + +class FilterApplicationPropagation : public ::testing::Test { + protected: + struct InternalFilterType { + InternalFilter internal_filter; + bool should_propagate; + }; + + FilterApplicationPropagation() + : sample_regex_(std::make_shared("foo.*")), + sample_string_range_("a", true, "b", false), + sample_ts_range_(std::chrono::milliseconds(10), + std::chrono::milliseconds(20)) { + internal_filters_.emplace( + "row_key_regex", InternalFilterType{RowKeyRegex{sample_regex_}, true}); + internal_filters_.emplace( + "family_name_regex", + InternalFilterType{FamilyNameRegex{sample_regex_}, true}); + internal_filters_.emplace( + "column_regex", InternalFilterType{ColumnRegex{sample_regex_}, true}); + internal_filters_.emplace( + "column_range", + InternalFilterType{ColumnRange{"fam", sample_string_range_}, true}); + internal_filters_.emplace( + "timestamp_range", + InternalFilterType{TimestampRange{sample_ts_range_}, true}); + } + + void PropagationNotExpected(std::string const& filter_type) { + auto filter_type_it = internal_filters_.find(filter_type); + ASSERT_NE(internal_filters_.end(), filter_type_it); + filter_type_it->second.should_propagate = false; + } + + std::shared_ptr sample_regex_; + StringRangeSet::Range sample_string_range_; + TimestampRangeSet::Range sample_ts_range_; + std::map internal_filters_; + + void TestPropagation(RowFilter const& filter, int num_applies_to_ignore) { + for (bool underlying_supports_filter : {false, true}) { + for (auto const& internal_filter_type : internal_filters_) { + auto maybe_stream = CreateFilter(filter, [&] { + auto mock_impl = std::make_unique(); + if (num_applies_to_ignore) { + // Creating the filter might trigger some `ApplyFilter` calls which + // we're not interested in in this test. Let's ignore them. + EXPECT_CALL(*mock_impl, ApplyFilter) + .Times(num_applies_to_ignore) + .WillRepeatedly(Return(false)); + } + if (internal_filter_type.second.should_propagate) { + EXPECT_CALL( + *mock_impl, + ApplyFilter(internal_filter_type.second.internal_filter)) + .WillOnce(Return(underlying_supports_filter)); + } + return CellStream(std::move(mock_impl)); + }); + ASSERT_STATUS_OK(maybe_stream); + + if (underlying_supports_filter) { + EXPECT_EQ(internal_filter_type.second.should_propagate, + maybe_stream->ApplyFilter( + internal_filter_type.second.internal_filter)) + << "for filter " << internal_filter_type.first; + } else { + EXPECT_FALSE(maybe_stream->ApplyFilter( + internal_filter_type.second.internal_filter)) + << "for filter " << internal_filter_type.first; + } + } + } + } +}; + +TEST_F(FilterApplicationPropagation, PassAll) { + RowFilter filter; + filter.set_pass_all_filter(true); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, BlockAll) { + RowFilter filter; + filter.set_block_all_filter(true); + + for (auto& internal_filter : internal_filters_) { + auto maybe_stream = + CreateFilter(filter, [&] { return CellStream(nullptr); }); + ASSERT_STATUS_OK(maybe_stream); + EXPECT_EQ(true, + maybe_stream->ApplyFilter(internal_filter.second.internal_filter)) + << " for filter " << internal_filter.first; + } +} + +TEST_F(FilterApplicationPropagation, RowKeyRegex) { + RowFilter filter; + filter.set_row_key_regex_filter("foo.*"); + + TestPropagation(filter, 1); +} + +TEST_F(FilterApplicationPropagation, RowSample) { + RowFilter filter; + filter.set_row_sample_filter(0.5); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, FamilyNameRegex) { + RowFilter filter; + filter.set_family_name_regex_filter("foo.*"); + + TestPropagation(filter, 1); +} + +TEST_F(FilterApplicationPropagation, ColumnQualifierRegex) { + RowFilter filter; + filter.set_column_qualifier_regex_filter("foo.*"); + + TestPropagation(filter, 1); +} + +TEST_F(FilterApplicationPropagation, ColumnRange) { + RowFilter filter; + filter.mutable_column_range_filter()->set_family_name("fam1"); + filter.mutable_column_range_filter()->set_start_qualifier_open("q1"); + filter.mutable_column_range_filter()->set_end_qualifier_closed("q4"); + + TestPropagation(filter, 1); +} + +TEST_F(FilterApplicationPropagation, TimestampRange) { + RowFilter filter; + filter.mutable_timestamp_range_filter()->set_start_timestamp_micros(1000); + filter.mutable_timestamp_range_filter()->set_end_timestamp_micros(2000); + + TestPropagation(filter, 1); +} + +TEST_F(FilterApplicationPropagation, ValueRegex) { + RowFilter filter; + filter.set_value_regex_filter("foo.*"); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, ValueRange) { + RowFilter filter; + filter.mutable_value_range_filter()->set_start_value_open("q1"); + filter.mutable_value_range_filter()->set_end_value_closed("q4"); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, PerRowOffset) { + RowFilter filter; + filter.set_cells_per_row_offset_filter(10); + + for (auto& filter_type : {"family_name_regex", "column_regex", "column_range", + "timestamp_range"}) { + PropagationNotExpected(filter_type); + } + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, PerRowLimit) { + RowFilter filter; + filter.set_cells_per_row_limit_filter(10); + + for (auto& filter_type : {"family_name_regex", "column_regex", "column_range", + "timestamp_range"}) { + PropagationNotExpected(filter_type); + } + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, PerColumnLimit) { + RowFilter filter; + filter.set_cells_per_column_limit_filter(10); + + PropagationNotExpected("timestamp_range"); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, StripValue) { + RowFilter filter; + filter.set_strip_value_transformer(true); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, ApplyLabel) { + RowFilter filter; + filter.set_apply_label_transformer("foo"); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, InterleaveAllSupport) { + RowFilter filter; + auto& interleave = *filter.mutable_interleave(); + interleave.add_filters()->set_pass_all_filter(true); + interleave.add_filters()->set_pass_all_filter(true); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, Condition) { + RowFilter filter; + auto& condition = *filter.mutable_condition(); + condition.mutable_predicate_filter()->set_pass_all_filter(true); + condition.mutable_true_filter()->set_pass_all_filter(true); + condition.mutable_false_filter()->set_pass_all_filter(true); + + for (bool underlying_supports_filter : {false, true}) { + for (auto& internal_filter_type : internal_filters_) { + // For lack of a better idea this test relies on the fact that the + // implementation calls the mocked source stream ctor in the following + // order: + // * for the source data + // * for the predicate stream + // * for the true branch stream + // * for the false branch stream + std::int32_t num_streams_created = 0; + auto maybe_stream = CreateFilter(filter, [&] { + auto mock_impl = std::make_unique(); + if (num_streams_created < 2 && + internal_filter_type.first == "row_key_regex") { + // source or predicate stream - they should only pass the row regexes + EXPECT_CALL(*mock_impl, + ApplyFilter(internal_filter_type.second.internal_filter)) + .WillOnce(Return(false)); // this should have no effect on the + // result. + } + if (num_streams_created >= 2) { + // true or false branch stream - they should propagate all filters + if (internal_filter_type.second.should_propagate) { + EXPECT_CALL( + *mock_impl, + ApplyFilter(internal_filter_type.second.internal_filter)) + .WillOnce(Return(underlying_supports_filter)); + } + } + ++num_streams_created; + return CellStream(std::move(mock_impl)); + }); + ASSERT_STATUS_OK(maybe_stream); + EXPECT_EQ(underlying_supports_filter, + maybe_stream->ApplyFilter( + internal_filter_type.second.internal_filter)) + << " for filter " << internal_filter_type.first; + } + } +} +class InternalFiltersAreApplied : public ::testing::Test { + protected: + RowFilter filter_; + + template + void PerformTest(std::function onApply) { + auto maybe_stream = CreateFilter(filter_, [&] { + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, ApplyFilter) + .WillOnce([onApply](InternalFilter const& internal_filter) -> bool { + auto maybe_regex = absl::get_if(&internal_filter); + EXPECT_NE(nullptr, maybe_regex); + onApply(*maybe_regex); + return true; + }); + return CellStream(std::move(mock_impl)); + }); + ASSERT_STATUS_OK(maybe_stream); + // Verify that no separate CellStream object was created when filter is + // applied internally. + EXPECT_NE(nullptr, dynamic_cast(&maybe_stream->impl())); + } +}; + +TEST_F(InternalFiltersAreApplied, RowKeyRegex) { + filter_.set_row_key_regex_filter("foo.*"); + + PerformTest([](RowKeyRegex const& row_key_regex) { + EXPECT_EQ("foo.*", row_key_regex.regex->pattern()); + }); +} + +TEST_F(InternalFiltersAreApplied, FamilyNameRegex) { + filter_.set_family_name_regex_filter("foo.*"); + + PerformTest([](FamilyNameRegex const& family_name_regex) { + EXPECT_EQ("foo.*", family_name_regex.regex->pattern()); + }); +} + +TEST_F(InternalFiltersAreApplied, ColumnRegex) { + filter_.set_column_qualifier_regex_filter("foo.*"); + + PerformTest([](ColumnRegex const& column_qualifier_regex) { + EXPECT_EQ("foo.*", column_qualifier_regex.regex->pattern()); + }); +} + +TEST_F(InternalFiltersAreApplied, ColumnRange) { + filter_.mutable_column_range_filter()->set_family_name("fam1"); + filter_.mutable_column_range_filter()->set_start_qualifier_open("q1"); + filter_.mutable_column_range_filter()->set_end_qualifier_closed("q4"); + + PerformTest([](ColumnRange const& column_range) { + EXPECT_EQ("fam1", column_range.column_family); + EXPECT_EQ("q1", column_range.range.start()); + EXPECT_TRUE(column_range.range.start_open()); + EXPECT_EQ("q4", column_range.range.end()); + EXPECT_TRUE(column_range.range.end_closed()); + }); +} + +TEST_F(InternalFiltersAreApplied, TimestampRange) { + filter_.mutable_timestamp_range_filter()->set_start_timestamp_micros(1000); + filter_.mutable_timestamp_range_filter()->set_end_timestamp_micros(2000); + + PerformTest([](TimestampRange const& timestamp_range) { + EXPECT_EQ(std::chrono::milliseconds(1), timestamp_range.range.start()); + EXPECT_EQ(std::chrono::milliseconds(2), timestamp_range.range.end()); + }); +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table_test.cc b/google/cloud/bigtable/emulator/table_test.cc index 9c9a31759bccd..1b251a7604200 100644 --- a/google/cloud/bigtable/emulator/table_test.cc +++ b/google/cloud/bigtable/emulator/table_test.cc @@ -77,6 +77,27 @@ TEST(FilteredTableStream, ColumnFamiliesAreFiltered) { EXPECT_EQ("row0 fam1:col0 @10ms: foo\n", DumpStream(stream)); } +TEST(FilteredTableStream, OnlyRightFamilyColumnsAreFiltered) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam1; + ColumnFamily fam2; + fam1.SetCell("row0", "col0", 10_ms, "foo"); + fam2.SetCell("row0", "col0", 10_ms, "foo"); + auto ffam1 = std::make_unique( + fam1, "fam1", std::make_unique(StringRangeSet::All())); + auto ffam2 = std::make_unique( + fam2, "fam2", std::make_unique(StringRangeSet::All())); + std::vector> fams; + fams.emplace_back(std::move(ffam1)); + fams.emplace_back(std::move(ffam2)); + FilteredTableStream stream(std::move(fams)); + + stream.ApplyFilter( + ColumnRange{"fam2", StringRangeSet::Range("a", false, "b", false)}); + EXPECT_EQ("row0 fam1:col0 @10ms: foo\n", DumpStream(stream)); +} + TEST(FilteredTableStream, OtherFiltersArePropagated) { using testing_util::chrono_literals::operator""_ms; @@ -110,7 +131,7 @@ TEST(FilteredTableStream, OtherFiltersArePropagated) { EXPECT_TRUE(stream.ApplyFilter(ColumnRegex{qualifier_pattern})); EXPECT_TRUE(stream.ApplyFilter( - ColumnRange{StringRangeSet::Range("co", false, "com", false)})); + ColumnRange{"fam1", StringRangeSet::Range("co", false, "com", false)})); EXPECT_TRUE(stream.ApplyFilter( TimestampRange{TimestampRangeSet::Range(0_ms, 300_ms)})); From de711bd3447ff7ee6e1732d6839d503454337a01 Mon Sep 17 00:00:00 2001 From: Marek Dopiera Date: Wed, 7 May 2025 19:54:21 +0200 Subject: [PATCH 134/195] Make linter happy. --- google/cloud/bigtable/emulator/filter_test.cc | 54 +++++++++---------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index 97718274049a6..8e2fb2d78aa68 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -79,7 +79,7 @@ class TestCell { value_(other.value_), view_(row_key_, column_family_, column_qualifier_, timestamp_, value_) { } - TestCell(TestCell&& other) + TestCell(TestCell&& other) noexcept : row_key_(std::move(other.row_key_)), column_family_(std::move(other.column_family_)), column_qualifier_(std::move(other.column_qualifier_)), @@ -107,7 +107,7 @@ class TestCell { }; std::ostream& operator<<(std::ostream& stream, TestCell const& test_cell) { - auto& cell_view = test_cell.AsCellView(); + auto const& cell_view = test_cell.AsCellView(); stream << "Cell(" << cell_view.row_key() << " " << cell_view.column_family() << ":" << cell_view.column_qualifier() << " @" << cell_view.timestamp().count() << "ms: " << cell_view.value() << ")"; @@ -116,7 +116,7 @@ std::ostream& operator<<(std::ostream& stream, TestCell const& test_cell) { TEST(CellStream, NextColumnNotSupportedNoMoreData) { std::vector cells{TestCell{"row1", "cf1", "col1", 0_ms, "val1"}}; - std::vector::iterator cur_cell = cells.begin(); + auto cur_cell = cells.begin(); auto mock_impl = std::make_unique(); EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillOnce(Return(false)); @@ -147,7 +147,7 @@ TEST(CellStream, NextColumnNotSupported) { TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; - std::vector::iterator cur_cell = cells.begin(); + auto cur_cell = cells.begin(); auto mock_impl = std::make_unique(); EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)) @@ -191,7 +191,7 @@ TEST(CellStream, NextRowNotSupported) { TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; - std::vector::iterator cur_cell = cells.begin(); + auto cur_cell = cells.begin(); auto mock_impl = std::make_unique(); EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)) @@ -235,7 +235,7 @@ TEST(CellStream, NextRowUnsupported) { TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; - std::vector::iterator cur_cell = cells.begin(); + auto cur_cell = cells.begin(); auto mock_impl = std::make_unique(); EXPECT_CALL(*mock_impl, Next(NextMode::kRow)).WillRepeatedly(Return(false)); @@ -276,7 +276,7 @@ TEST(CellStream, NextRowAndColumnUnsupported) { TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; - std::vector::iterator cur_cell = cells.begin(); + auto cur_cell = cells.begin(); auto mock_impl = std::make_unique(); EXPECT_CALL(*mock_impl, Next(NextMode::kRow)).WillRepeatedly(Return(false)); @@ -387,7 +387,7 @@ TEST(MergeCellStreams, OneStream) { TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; - std::vector::iterator cur_cell = cells.begin(); + auto cur_cell = cells.begin(); auto mock_impl = std::make_unique(); EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillOnce([&]() { @@ -433,7 +433,7 @@ TEST(MergeCellStreams, OneStream) { } struct TestStreamData { - TestStreamData(std::vector data) + explicit TestStreamData(std::vector data) : cells(std::move(data)), cur_cell(cells.begin()), stream(std::make_unique()) {} @@ -573,10 +573,10 @@ TEST(MergeCellStreams, AdvancingRowAdvancesAllRelevantStreams) { }); std::vector streams; - streams.push_back(CellStream(std::move(stream_data_1.stream))); - streams.push_back(CellStream(std::move(stream_data_2.stream))); - streams.push_back(CellStream(std::move(stream_data_3.stream))); - streams.push_back(CellStream(std::move(stream_data_4.stream))); + streams.emplace_back(std::move(stream_data_1.stream)); + streams.emplace_back(std::move(stream_data_2.stream)); + streams.emplace_back(std::move(stream_data_3.stream)); + streams.emplace_back(std::move(stream_data_4.stream)); CellStream stream(std::make_unique(std::move(streams))); ASSERT_TRUE(stream.HasValue()); @@ -672,14 +672,13 @@ TEST(MergeCellStreams, AdvancingColumnAdvancesAllRelevantStreams) { }); std::vector streams; - streams.push_back(CellStream(std::move(stream_data.stream))); - streams.push_back( - CellStream(std::move(stream_data_different_column_family.stream))); - streams.push_back( - CellStream(std::move(stream_data_different_column_qualifier.stream))); - streams.push_back(CellStream(std::move(stream_data_different_row.stream))); - streams.push_back(CellStream( - std::move(stream_data_same_column_different_timestamp.stream))); + streams.emplace_back(std::move(stream_data.stream)); + streams.emplace_back(std::move(stream_data_different_column_family.stream)); + streams.emplace_back( + std::move(stream_data_different_column_qualifier.stream)); + streams.emplace_back(std::move(stream_data_different_row.stream)); + streams.emplace_back( + std::move(stream_data_same_column_different_timestamp.stream)); CellStream stream(std::make_unique(std::move(streams))); ASSERT_TRUE(stream.HasValue()); @@ -866,9 +865,6 @@ TEST(FilterTest, BlockAll) { RowFilter filter; filter.set_block_all_filter(true); - auto mock_impl = std::make_unique(); - CellStream(std::move(mock_impl)); - auto maybe_stream = CreateFilter( filter, [] { return CellStream(std::make_unique()); }); @@ -1081,8 +1077,8 @@ TEST_F(FilterApplicationPropagation, PerRowOffset) { RowFilter filter; filter.set_cells_per_row_offset_filter(10); - for (auto& filter_type : {"family_name_regex", "column_regex", "column_range", - "timestamp_range"}) { + for (auto const& filter_type : {"family_name_regex", "column_regex", + "column_range", "timestamp_range"}) { PropagationNotExpected(filter_type); } @@ -1093,8 +1089,8 @@ TEST_F(FilterApplicationPropagation, PerRowLimit) { RowFilter filter; filter.set_cells_per_row_limit_filter(10); - for (auto& filter_type : {"family_name_regex", "column_regex", "column_range", - "timestamp_range"}) { + for (auto const& filter_type : {"family_name_regex", "column_regex", + "column_range", "timestamp_range"}) { PropagationNotExpected(filter_type); } @@ -1190,7 +1186,7 @@ class InternalFiltersAreApplied : public ::testing::Test { auto mock_impl = std::make_unique(); EXPECT_CALL(*mock_impl, ApplyFilter) .WillOnce([onApply](InternalFilter const& internal_filter) -> bool { - auto maybe_regex = absl::get_if(&internal_filter); + auto const* maybe_regex = absl::get_if(&internal_filter); EXPECT_NE(nullptr, maybe_regex); onApply(*maybe_regex); return true; From fb3f00ff67599ed67c60531ff67d484c7b1e88a2 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 7 May 2025 21:23:46 +0300 Subject: [PATCH 135/195] emulator: SampleRowKeys: Fix crash due to incorrect usage of lambda capture. Replace incorrect capture of a local variable by reference (which crashes the emulator due to use of dangling references all over the place) with a capture by value of a std::shared_ptr which is certain to be preserved by the capture even when the function surrounding the lambda returns. --- google/cloud/bigtable/emulator/table.cc | 77 ++++++++++++++----------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index f7d8c5f946bfa..40c14503ccd5b 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -398,45 +398,50 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } +// NOLINTBEGIN(readability-function-cognitive-complexity) RowSampler Table::SampleRowKeys( google::bigtable::v2::SampleRowKeysRequest const&) { - // We pick the row key samples from just one column family (the - // largest). - std::string sample_src_cf; - size_t row_index = 0; - size_t max_num_rows = 0; - std::map::const_iterator row_iterator; - std::map::const_iterator row_end; - bool table_is_empty = true; - size_t offset_bytes = 0; - - std::once_flag once_flag; - - auto next_sample = [&, this] { + struct SamplingContext { + // We pick the row key samples from just one column family (the + // largest). + std::string sample_src_cf; + size_t row_index = 0; + size_t max_num_rows = 0; + std::map::const_iterator row_iterator; + std::map::const_iterator row_end; + bool table_is_empty = true; + size_t offset_bytes = 0; + std::once_flag once_flag; + }; + + std::shared_ptr sampling_context = + std::make_shared(); + + auto next_sample = [=]() mutable { // The first time the closure is called, initialize the row // iterators. The sampler works by advancing the iterator by // varying steps every time the closure it contains is called. We // can't initialize the iterators before the closure is first // called since we need to be holding the table lock first (in our // scheme it is grabbed in the constructor of the sampler). - std::call_once(once_flag, [this, &max_num_rows, &row_iterator, &row_end, - &table_is_empty, &sample_src_cf]() { + std::call_once(sampling_context->once_flag, [=]() { // Pick rows from just the largest column family since we are // just sampling. However offsets will be estimated based on the // size of the row across all column families. for (auto const& cf : column_families_) { - if (cf.second->size() > max_num_rows) { - table_is_empty = false; - sample_src_cf = cf.first; - row_iterator = cf.second->begin(); - row_end = cf.second->end(); - max_num_rows = cf.second->size(); + if (cf.second->size() > sampling_context->max_num_rows) { + sampling_context->table_is_empty = false; + sampling_context->sample_src_cf = cf.first; + sampling_context->row_iterator = cf.second->begin(); + sampling_context->row_end = cf.second->end(); + sampling_context->max_num_rows = cf.second->size(); } } }); // The signal that there are no more rows (an empty row key). - if (table_is_empty || row_iterator == row_end) { + if (sampling_context->table_is_empty || + sampling_context->row_iterator == sampling_context->row_end) { google::bigtable::v2::SampleRowKeysResponse resp; resp.set_row_key(""); resp.set_offset_bytes(0); @@ -444,35 +449,36 @@ RowSampler Table::SampleRowKeys( return resp; } - for (auto& row = row_iterator; row_iterator != row_end; - row_index++, row_iterator++) { - - auto add_this_row_size_to_offset = [&, this] { + for (auto& row = sampling_context->row_iterator; + sampling_context->row_iterator != sampling_context->row_end; + sampling_context->row_index++, sampling_context->row_iterator++) { + auto add_this_row_size_to_offset = [=] { // First the offset due to the size of the row in the column // family we are sampling. - offset_bytes += (row->first.size() + row->second.size()); + sampling_context->offset_bytes += + (row->first.size() + row->second.size()); // Then consider the size of the row data in other column families, // if they contain the row. for (auto const& cf : column_families_) { - if (cf.first == sample_src_cf) { + if (cf.first == sampling_context->sample_src_cf) { continue; } auto r = cf.second->find(row->first); if (r != cf.second->end()) { - offset_bytes += (row->first.size() + r->second.size()); + sampling_context->offset_bytes += + (row->first.size() + r->second.size()); } } - }; // If there are any rows we need to return at least one // row. Always return the last one. - if (row_index == max_num_rows - 1) { + if (sampling_context->row_index == sampling_context->max_num_rows - 1) { google::bigtable::v2::SampleRowKeysResponse resp; resp.set_row_key(row->first); - resp.set_offset_bytes(offset_bytes); + resp.set_offset_bytes(sampling_context->offset_bytes); add_this_row_size_to_offset(); @@ -483,7 +489,7 @@ RowSampler Table::SampleRowKeys( if (std::rand() % 100 == 0) { google::bigtable::v2::SampleRowKeysResponse resp; resp.set_row_key(row->first); - resp.set_offset_bytes(offset_bytes); + resp.set_offset_bytes(sampling_context->offset_bytes); add_this_row_size_to_offset(); @@ -498,7 +504,7 @@ RowSampler Table::SampleRowKeys( google::bigtable::v2::SampleRowKeysResponse resp; resp.set_row_key(""); - resp.set_offset_bytes(offset_bytes); + resp.set_offset_bytes(sampling_context->offset_bytes); return resp; }; @@ -510,8 +516,9 @@ RowSampler Table::SampleRowKeys( return row_sampler; } +// NOLINTEND(readability-function-cognitive-complexity) -// Nolintbegin(readability-convert-member-functions-to-static) +// NOLINTBEGIN(readability-convert-member-functions-to-static) Status RowTransaction::AddToCell( ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { return UnimplementedError( From 10cdcbaa2d9847e4ec5f118d9fb120f134948223 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 7 May 2025 21:39:46 +0300 Subject: [PATCH 136/195] emulator: SampleRowKeys: Fix some typos in an important comment. --- google/cloud/bigtable/emulator/table.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 40c14503ccd5b..7f58db001a38e 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -418,12 +418,14 @@ RowSampler Table::SampleRowKeys( std::make_shared(); auto next_sample = [=]() mutable { - // The first time the closure is called, initialize the row - // iterators. The sampler works by advancing the iterator by - // varying steps every time the closure it contains is called. We - // can't initialize the iterators before the closure is first - // called since we need to be holding the table lock first (in our - // scheme it is grabbed in the constructor of the sampler). + // The first time the closure is called, initialize the + // context. The sampler works by advancing the iterator by varying + // steps every time the closure it contains is called in the + // server RPC context or elsewhere. We can't initialize the + // iterators before the closure is first called since we need to + // be holding the table lock first (in our scheme it is grabbed in + // the constructor of the RowSampler and the lock is kept until + // the RowSampler is destroyed.) std::call_once(sampling_context->once_flag, [=]() { // Pick rows from just the largest column family since we are // just sampling. However offsets will be estimated based on the From 49616557eaae9906ccb261c7eaa2029e921e46d2 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 8 May 2025 00:23:17 +0300 Subject: [PATCH 137/195] emulator: SampleRowKeys: Fix a couple more bugs. - Fix an infinite loop in returning samples. - Fix a bug in which the offset of the last row sampled was set to 0. bigtable_table_sample_rows_integration_test now passes. Yay! --- .../ci/run_integration_tests_emulator_cmake.sh | 7 +++++-- google/cloud/bigtable/emulator/table.cc | 14 +++++++++++++- google/cloud/bigtable/tools/run_emulator_utils.sh | 2 +- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh b/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh index 9621925f22514..4f856aefcc20b 100755 --- a/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh +++ b/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh @@ -44,9 +44,12 @@ CBT_INSTANCE_ADMIN_EMULATOR_START=( source module /google/cloud/bigtable/tools/run_emulator_utils.sh cd "${BINARY_DIR}" -start_emulators 8480 8490 +start_emulators 8888 8490 + +#gdb -ex run --args ctest "${ctest_args[@]}" +#gdb --args ctest "${ctest_args[@]}" +ctest "${ctest_args[@]}" -ctest -R "^bigtable_" "${ctest_args[@]}" exit_status=$? kill_emulators diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 7f58db001a38e..89d936e4a1947 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -446,7 +446,7 @@ RowSampler Table::SampleRowKeys( sampling_context->row_iterator == sampling_context->row_end) { google::bigtable::v2::SampleRowKeysResponse resp; resp.set_row_key(""); - resp.set_offset_bytes(0); + resp.set_offset_bytes(sampling_context->offset_bytes); return resp; } @@ -484,6 +484,14 @@ RowSampler Table::SampleRowKeys( add_this_row_size_to_offset(); + // We are returning early (without letting the for loop + // control update the iterators) so ensure that we consider a + // new row next time, otherwise we will be stuck in an + // infinite loop (will never advance the row iterator past the + // end of the map). + sampling_context->row_index++; + sampling_context->row_iterator++; + return resp; } @@ -495,6 +503,10 @@ RowSampler Table::SampleRowKeys( add_this_row_size_to_offset(); + sampling_context->row_index++; + sampling_context->row_iterator++; + + return resp; } diff --git a/google/cloud/bigtable/tools/run_emulator_utils.sh b/google/cloud/bigtable/tools/run_emulator_utils.sh index 807ae6efb41a9..e688aded4edf7 100755 --- a/google/cloud/bigtable/tools/run_emulator_utils.sh +++ b/google/cloud/bigtable/tools/run_emulator_utils.sh @@ -92,7 +92,7 @@ function start_emulators() { io::log "Launching Cloud Bigtable emulators in the background" trap kill_emulators EXIT - local -r CBT_EMULATOR_CMD="/usr/local/google-cloud-sdk/platform/bigtable-emulator/cbtemulator" + local -r CBT_EMULATOR_CMD="/home/marebri/devel/google-cloud-cpp/build/google/cloud/bigtable/emulator/emulator" "${CBT_EMULATOR_CMD}" -port "${emulator_port}" >emulator.log 2>&1 Date: Thu, 8 May 2025 00:30:27 +0300 Subject: [PATCH 138/195] emulator: SampleRowKeys: Revert 2 ci/ files to upstream contents. These were mistakenly checked in. --- .../bigtable/ci/run_integration_tests_emulator_cmake.sh | 7 ++----- google/cloud/bigtable/tools/run_emulator_utils.sh | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh b/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh index 4f856aefcc20b..9621925f22514 100755 --- a/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh +++ b/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh @@ -44,12 +44,9 @@ CBT_INSTANCE_ADMIN_EMULATOR_START=( source module /google/cloud/bigtable/tools/run_emulator_utils.sh cd "${BINARY_DIR}" -start_emulators 8888 8490 - -#gdb -ex run --args ctest "${ctest_args[@]}" -#gdb --args ctest "${ctest_args[@]}" -ctest "${ctest_args[@]}" +start_emulators 8480 8490 +ctest -R "^bigtable_" "${ctest_args[@]}" exit_status=$? kill_emulators diff --git a/google/cloud/bigtable/tools/run_emulator_utils.sh b/google/cloud/bigtable/tools/run_emulator_utils.sh index e688aded4edf7..807ae6efb41a9 100755 --- a/google/cloud/bigtable/tools/run_emulator_utils.sh +++ b/google/cloud/bigtable/tools/run_emulator_utils.sh @@ -92,7 +92,7 @@ function start_emulators() { io::log "Launching Cloud Bigtable emulators in the background" trap kill_emulators EXIT - local -r CBT_EMULATOR_CMD="/home/marebri/devel/google-cloud-cpp/build/google/cloud/bigtable/emulator/emulator" + local -r CBT_EMULATOR_CMD="/usr/local/google-cloud-sdk/platform/bigtable-emulator/cbtemulator" "${CBT_EMULATOR_CMD}" -port "${emulator_port}" >emulator.log 2>&1 Date: Thu, 8 May 2025 19:30:19 +0300 Subject: [PATCH 139/195] emulator: SampleRowKeys: improve the generation of randomness. - Use more modern means to generate the random numbers (std::uniform_int_distribution and friends). - FIX: Seed the random number generator from the random device. --- google/cloud/bigtable/emulator/table.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 89d936e4a1947..6caa7010acbd4 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -23,12 +23,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include @@ -412,6 +414,11 @@ RowSampler Table::SampleRowKeys( bool table_is_empty = true; size_t offset_bytes = 0; std::once_flag once_flag; + + std::random_device rd; + std::mt19937 gen = std::mt19937(rd()); + std::uniform_int_distribution<> distrib = + std::uniform_int_distribution<>(1, INT_MAX); }; std::shared_ptr sampling_context = @@ -496,7 +503,7 @@ RowSampler Table::SampleRowKeys( } // Sample about one every 100 rows randomly. - if (std::rand() % 100 == 0) { + if (sampling_context->distrib(sampling_context->gen) % 100 == 0) { google::bigtable::v2::SampleRowKeysResponse resp; resp.set_row_key(row->first); resp.set_offset_bytes(sampling_context->offset_bytes); @@ -506,7 +513,6 @@ RowSampler Table::SampleRowKeys( sampling_context->row_index++; sampling_context->row_iterator++; - return resp; } From 00329124444fdd6c7b86400578164f98312ce2b7 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 12 May 2025 22:38:58 +0300 Subject: [PATCH 140/195] emulator: Implement Conditional Mutations * Implement Conditional Mutations (CheckAndMutate) * Implement tests covering all new code --- google/cloud/bigtable/emulator/CMakeLists.txt | 1 + .../emulator/bigtable_emulator_unit_tests.bzl | 1 + .../emulator/conditional_mutations_test.cc | 227 ++++++++++++++++++ google/cloud/bigtable/emulator/server.cc | 16 +- google/cloud/bigtable/emulator/table.cc | 165 ++++++++++--- google/cloud/bigtable/emulator/table.h | 33 ++- 6 files changed, 403 insertions(+), 40 deletions(-) create mode 100644 google/cloud/bigtable/emulator/conditional_mutations_test.cc diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 858dcdeb205ab..675d5bd3c7fca 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -55,6 +55,7 @@ if (BUILD_TESTING) set(bigtable_emulator_unit_tests # cmake-format: sort column_family_test.cc + conditional_mutations_test.cc filter_test.cc filtered_map_test.cc range_set_test.cc diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index 819f6dce68630..35613235b4034 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -18,6 +18,7 @@ bigtable_emulator_unit_tests = [ "column_family_test.cc", + "conditional_mutations_test.cc", "filter_test.cc", "filtered_map_test.cc", "range_set_test.cc", diff --git a/google/cloud/bigtable/emulator/conditional_mutations_test.cc b/google/cloud/bigtable/emulator/conditional_mutations_test.cc new file mode 100644 index 0000000000000..57e1b89a41969 --- /dev/null +++ b/google/cloud/bigtable/emulator/conditional_mutations_test.cc @@ -0,0 +1,227 @@ +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/testing_util/status_matchers.h" +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +struct SetCellParams { + std::string column_family_name; + std::string column_qualifier; + int64_t timestamp_micros; + std::string data; +}; + +StatusOr> create_table( + std::string const& table_name, std::vector& column_families) { + ::google::bigtable::admin::v2::Table schema; + schema.set_name(table_name); + for (auto& column_family_name : column_families) { + (*schema.mutable_column_families())[column_family_name] = + ::google::bigtable::admin::v2::ColumnFamily(); + } + + return Table::Create(schema); +} + +Status has_cell( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier, int64_t timestamp_micros, + std::string const& value) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return NotFoundError("no row key found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); + } + + auto& column_row = column_row_it->second; + auto timestamp_it = + column_row.find(std::chrono::duration_cast( + std::chrono::microseconds(timestamp_micros))); + if (timestamp_it == column_row.end()) { + return NotFoundError( + "timestamp not found", + GCP_ERROR_INFO().WithMetadata("timestamp", + absl::StrFormat("%d", timestamp_micros))); + } + + if (timestamp_it->second != value) { + return NotFoundError("wrong value", + GCP_ERROR_INFO() + .WithMetadata("expected", value) + .WithMetadata("found", timestamp_it->second)); + } + + return Status(); +} + +Status set_cells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto m : set_cell_params) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(m.column_family_name); + set_cell_mutation->set_column_qualifier(m.column_qualifier); + set_cell_mutation->set_timestamp_micros(m.timestamp_micros); + set_cell_mutation->set_value(m.data); + } + + return table->MutateRow(mutation_request); +} + +TEST(ConditionalMutations, TestTrueMutations) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const column_family_name = "test_column_family"; + auto const* const row_key = "0"; + auto const* const column_qualifier = "column_1"; + auto timestamp_micros = 1000; + auto const* const true_mutation_value = "set by a true mutation"; + auto const* const false_mutation_value = "set by a false mutation"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + ::google::bigtable::v2::Mutation true_mutation; + auto* set_cell_mutation = true_mutation.mutable_set_cell(); + set_cell_mutation->set_family_name(column_family_name); + set_cell_mutation->set_column_qualifier(column_qualifier); + set_cell_mutation->set_timestamp_micros(timestamp_micros); + set_cell_mutation->set_value(true_mutation_value); + + std::vector true_mutations = {true_mutation}; + + ::google::bigtable::v2::Mutation false_mutation; + set_cell_mutation = false_mutation.mutable_set_cell(); + set_cell_mutation->set_family_name(column_family_name); + set_cell_mutation->set_column_qualifier(column_qualifier); + set_cell_mutation->set_timestamp_micros(timestamp_micros); + set_cell_mutation->set_value(false_mutation_value); + + std::vector false_mutations = { + false_mutation}; + + std::vector v = { + {column_family_name, "column_2", 1000, "some_value"}}; + ASSERT_STATUS_OK(set_cells(table, table_name, row_key, v)); + ASSERT_STATUS_OK(has_cell(table, v[0].column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); + + google::bigtable::v2::CheckAndMutateRowRequest cond_mut_with_pass_all; + + cond_mut_with_pass_all.set_row_key(row_key); + cond_mut_with_pass_all.set_table_name(table_name); + cond_mut_with_pass_all.mutable_predicate_filter()->set_pass_all_filter(true); + cond_mut_with_pass_all.mutable_true_mutations()->Assign( + true_mutations.begin(), true_mutations.end()); + cond_mut_with_pass_all.mutable_false_mutations()->Assign( + false_mutations.begin(), false_mutations.end()); + + auto status_or = table->CheckAndMutateRow(cond_mut_with_pass_all); + ASSERT_STATUS_OK(status_or); + + // pass_all_filter means that true_mutation should have succeeded, + // so check for the true_mutation cell value e.t.c. + ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, + column_qualifier, timestamp_micros, + true_mutation_value)); + + // And just for good measure, ensure that false_mutation was not written. + ASSERT_EQ(false, + has_cell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, false_mutation_value) + .ok()); +} + +TEST(ConditionalMutations, RejectInvalidRequest) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const column_family_name = "test_column_family"; + auto const* const row_key = "0"; + auto const* const column_qualifier = "column_1"; + auto timestamp_micros = 1000; + auto const* const true_mutation_value = "set by a true mutation"; + auto const* const false_mutation_value = "set by a false mutation"; + + std::vector column_families = {column_family_name}; + auto maybe_table = create_table(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + ::google::bigtable::v2::Mutation true_mutation; + auto* set_cell_mutation = true_mutation.mutable_set_cell(); + set_cell_mutation->set_family_name(column_family_name); + set_cell_mutation->set_column_qualifier(column_qualifier); + set_cell_mutation->set_timestamp_micros(timestamp_micros); + set_cell_mutation->set_value(true_mutation_value); + + std::vector true_mutations = {true_mutation}; + + ::google::bigtable::v2::Mutation false_mutation; + set_cell_mutation = false_mutation.mutable_set_cell(); + set_cell_mutation->set_family_name(column_family_name); + set_cell_mutation->set_column_qualifier(column_qualifier); + set_cell_mutation->set_timestamp_micros(timestamp_micros); + set_cell_mutation->set_value(false_mutation_value); + + // Will be configured so that row_key is not set. + std::vector false_mutations = { + false_mutation}; + + google::bigtable::v2::CheckAndMutateRowRequest cond_mutation_no_row_key; + + cond_mutation_no_row_key.set_table_name(table_name); + cond_mutation_no_row_key.mutable_true_mutations()->Assign( + true_mutations.begin(), true_mutations.end()); + cond_mutation_no_row_key.mutable_false_mutations()->Assign( + false_mutations.begin(), false_mutations.end()); + + auto status_or = table->CheckAndMutateRow(cond_mutation_no_row_key); + ASSERT_EQ(false, status_or.ok()); + + // Will be configured so that both true_mutations and + // false_mutations are empty. + google::bigtable::v2::CheckAndMutateRowRequest cond_mutation_no_mutations; + cond_mutation_no_mutations.set_row_key(row_key); + cond_mutation_no_row_key.set_table_name(table_name); + ASSERT_EQ(false, table->CheckAndMutateRow(cond_mutation_no_mutations).ok()); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 993c187c703ea..f759574a6e72f 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -74,8 +74,20 @@ class EmulatorService final : public btproto::Bigtable::Service { grpc::Status CheckAndMutateRow( grpc::ServerContext* /* context */, - btproto::CheckAndMutateRowRequest const* /* request */, - btproto::CheckAndMutateRowResponse* /* response */) override { + btproto::CheckAndMutateRowRequest const* request, + btproto::CheckAndMutateRowResponse* response) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + auto maybe_response = (*maybe_table)->CheckAndMutateRow(*request); + if (!maybe_response.ok()) { + return ToGrpcStatus(maybe_response.status()); + } + + *response = std::move(maybe_response.value()); + return grpc::Status::OK; } diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 0d1f79a50780e..3550b6e911be4 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -16,14 +16,19 @@ #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" #include "google/cloud/internal/make_status.h" #include "google/protobuf/util/field_mask_util.h" +#include #include #include +#include #include #include #include +#include +#include #include namespace google { @@ -220,14 +225,34 @@ StatusOr> Table::FindColumnFamily( Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { std::lock_guard lock(mu_); - assert(request.table_name() == schema_.name()); - RowTransaction row_transaction(this->get(), request); + return DoMutationsWithPossibleRollback(request.row_key(), + request.mutations()); +} + +Status Table::DoMutationsWithPossibleRollback( + std::string const& row_key, + google::protobuf::RepeatedPtrField const& + mutations) { + RowTransaction row_transaction(this->get(), row_key); - for (auto const& mutation : request.mutations()) { + for (auto const& mutation : mutations) { if (mutation.has_set_cell()) { auto const& set_cell = mutation.set_cell(); - auto status = row_transaction.SetCell(set_cell); + + absl::optional timestamp_override = + absl::nullopt; + + auto timestamp = std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros())); + + if (timestamp <= std::chrono::milliseconds::zero()) { + timestamp = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + timestamp_override.emplace(std::move(timestamp)); + } + + auto status = row_transaction.SetCell(set_cell, timestamp_override); if (!status.ok()) { return status; } @@ -271,6 +296,27 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { return Status(); } +StatusOr Table::CreateCellStream( + std::shared_ptr range_set, + absl::optional maybe_row_filter) const { + auto table_stream_ctor = [range_set = std::move(range_set), this] { + std::vector> per_cf_streams; + per_cf_streams.reserve(column_families_.size()); + for (auto const& column_family : column_families_) { + per_cf_streams.emplace_back(std::make_unique( + *column_family.second, column_family.first, range_set)); + } + return CellStream( + std::make_unique(std::move(per_cf_streams))); + }; + + if (maybe_row_filter.has_value()) { + return CreateFilter(maybe_row_filter.value(), table_stream_ctor); + } + + return table_stream_ctor(); +} + bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { if (!absl::holds_alternative(internal_filter)) { return MergeCellStreams::ApplyFilter(internal_filter); @@ -329,6 +375,68 @@ StatusOr CreateStringRangeSet( return res; } +StatusOr +Table::CheckAndMutateRow( + google::bigtable::v2::CheckAndMutateRowRequest const& request) { + std::lock_guard lock(mu_); + + auto const& row_key = request.row_key(); + if (row_key.empty()) { + return InvalidArgumentError( + "row key required", + GCP_ERROR_INFO().WithMetadata("CheckAndMutateRowRequest", + request.DebugString())); + } + + if (request.true_mutations_size() == 0 && + request.false_mutations_size() == 0) { + return InvalidArgumentError( + "both true mutations and false mutations are empty", + GCP_ERROR_INFO().WithMetadata("CheckAndMutateRowRequest", + request.DebugString())); + } + + auto range_set = std::make_shared(); + range_set->Sum(StringRangeSet::Range(row_key, false, row_key, false)); + + StatusOr maybe_stream; + if (request.has_predicate_filter()) { + maybe_stream = + CreateCellStream(range_set, std::move(request.predicate_filter())); + } else { + maybe_stream = CreateCellStream(range_set, absl::nullopt); + } + + if (!maybe_stream) { + return maybe_stream.status(); + } + + bool a_cell_is_found = false; + + CellStream& stream = *maybe_stream; + if (stream) { // At least one cell/value found when filter is applied + a_cell_is_found = true; + } + + Status status; + if (a_cell_is_found) { + status = DoMutationsWithPossibleRollback(request.row_key(), + request.true_mutations()); + } else { + status = DoMutationsWithPossibleRollback(request.row_key(), + request.false_mutations()); + } + + if (!status.ok()) { + return status; + } + + google::bigtable::v2::CheckAndMutateRowResponse success_response; + success_response.set_predicate_matched(a_cell_is_found); + + return success_response; +} + Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const { std::shared_ptr row_set; @@ -342,22 +450,14 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, row_set = std::make_shared(StringRangeSet::All()); } std::lock_guard lock(mu_); - auto table_stream_ctor = [row_set = std::move(row_set), this] { - std::vector> per_cf_streams; - per_cf_streams.reserve(column_families_.size()); - for (auto const& column_family : column_families_) { - per_cf_streams.emplace_back(std::make_unique( - *column_family.second, column_family.first, row_set)); - } - return CellStream( - std::make_unique(std::move(per_cf_streams))); - }; + StatusOr maybe_stream; if (request.has_filter()) { - maybe_stream = CreateFilter(request.filter(), table_stream_ctor); + maybe_stream = CreateCellStream(row_set, std::move(request.filter())); } else { - maybe_stream = table_stream_ctor(); + maybe_stream = CreateCellStream(row_set, absl::nullopt); } + if (!maybe_stream) { return maybe_stream.status(); } @@ -419,7 +519,7 @@ Status RowTransaction::DeleteFromColumn( auto& column_family = maybe_column_family->get(); auto deleted_cells = column_family.DeleteColumn( - request_.row_key(), delete_from_column.column_qualifier(), + row_key_, delete_from_column.column_qualifier(), delete_from_column.time_range()); for (auto& cell : deleted_cells) { @@ -435,7 +535,7 @@ Status RowTransaction::DeleteFromColumn( Status RowTransaction::DeleteFromRow() { bool row_existed; for (auto& column_family : table_->column_families_) { - auto deleted_columns = column_family.second->DeleteRow(request_.row_key()); + auto deleted_columns = column_family.second->DeleteRow(row_key_); for (auto& column : deleted_columns) { for (auto& cell : column.second) { @@ -452,9 +552,8 @@ Status RowTransaction::DeleteFromRow() { return Status(); } - return NotFoundError( - "row not found in table", - GCP_ERROR_INFO().WithMetadata("row", request_.row_key())); + return NotFoundError("row not found in table", + GCP_ERROR_INFO().WithMetadata("row", row_key_)); } Status RowTransaction::DeleteFromFamily( @@ -476,17 +575,17 @@ Status RowTransaction::DeleteFromFamily( } std::map::iterator column_family_row_it; - if (column_family_it->second->find(request_.row_key()) == + if (column_family_it->second->find(row_key_) == column_family_it->second->end()) { // The row does not exist return NotFoundError( "row key is not found in column family", GCP_ERROR_INFO() - .WithMetadata("row key", request_.row_key()) + .WithMetadata("row key", row_key_) .WithMetadata("column family", column_family_it->first)); } - auto deleted = column_family_it->second->DeleteRow(request_.row_key()); + auto deleted = column_family_it->second->DeleteRow(row_key_); for (auto const& column : deleted) { for (auto const& cell : column.second) { RestoreValue restore_value{*column_family_it->second, @@ -499,8 +598,12 @@ Status RowTransaction::DeleteFromFamily( return Status(); } +// timestamp_override, if provided, will be used instead of +// set_cell.timestamp. The override is used to set the timestamp to +// the server time in case a timestamp <= 0 is provided. Status RowTransaction::SetCell( - ::google::bigtable::v2::Mutation_SetCell const& set_cell) { + ::google::bigtable::v2::Mutation_SetCell const& set_cell, + absl::optional timestamp_override) { auto maybe_column_family = table_->FindColumnFamily(set_cell); if (!maybe_column_family) { return maybe_column_family.status(); @@ -511,14 +614,12 @@ Status RowTransaction::SetCell( auto timestamp = std::chrono::duration_cast( std::chrono::microseconds(set_cell.timestamp_micros())); - if (timestamp <= std::chrono::milliseconds::zero()) { - timestamp = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()); + if (timestamp_override.has_value()) { + timestamp = timestamp_override.value(); } - auto maybe_old_value = - column_family.SetCell(request_.row_key(), set_cell.column_qualifier(), - timestamp, set_cell.value()); + auto maybe_old_value = column_family.SetCell( + row_key_, set_cell.column_qualifier(), timestamp, set_cell.value()); if (!maybe_old_value) { DeleteValue delete_value{column_family, @@ -535,7 +636,7 @@ Status RowTransaction::SetCell( } void RowTransaction::Undo() { - auto row_key = request_.row_key(); + auto row_key = row_key_; while (!undo_.empty()) { auto op = undo_.top(); diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 80c9f3fcb257b..94ee305fea979 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -17,16 +17,20 @@ #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "absl/types/variant.h" +#include "google/protobuf/repeated_ptr_field.h" #include #include #include #include +#include #include #include +#include #include #include #include @@ -53,6 +57,8 @@ class Table : public std::enable_shared_from_this
{ bool IsDeleteProtected() const; + StatusOr CheckAndMutateRow( + google::bigtable::v2::CheckAndMutateRowRequest const& request); Status MutateRow(google::bigtable::v2::MutateRowRequest const& request); Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, @@ -80,6 +86,13 @@ class Table : public std::enable_shared_from_this
{ MESSAGE const& message) const; bool IsDeleteProtectedNoLock() const; Status Construct(google::bigtable::admin::v2::Table schema); + StatusOr CreateCellStream( + std::shared_ptr range_set, + absl::optional) const; + Status DoMutationsWithPossibleRollback( + std::string const& row_key, + google::protobuf::RepeatedPtrField const& + mutations); mutable std::mutex mu_; google::bigtable::admin::v2::Table schema_; @@ -101,10 +114,9 @@ struct DeleteValue { class RowTransaction { public: - explicit RowTransaction( - std::shared_ptr
table, - ::google::bigtable::v2::MutateRowRequest const& request) - : request_(request) { + explicit RowTransaction(std::shared_ptr
table, + std::string const& row_key) + : row_key_(row_key) { table_ = std::move(table); committed_ = false; }; @@ -117,7 +129,12 @@ class RowTransaction { void commit() { committed_ = true; } - Status SetCell(::google::bigtable::v2::Mutation_SetCell const& set_cell); + // timestamp_override, if provided, will be used instead of + // set_cell.timestamp. The override is used to set the timestamp to + // the server time in case a timestamp <= 0 is provided. + Status SetCell(::google::bigtable::v2::Mutation_SetCell const& set_cell, + absl::optional timestamp_override = + absl::nullopt); Status AddToCell( ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell); Status MergeToCell( @@ -136,7 +153,11 @@ class RowTransaction { bool committed_; std::shared_ptr
table_; std::stack> undo_; - ::google::bigtable::v2::MutateRowRequest const& request_; + // row_key_ is initialized from the request proto and therefore it + // is safe to access it while the mutation request is ongoing. We + // store a reference to it to avoid copying a potentially very large + // (up to 4KB) value. + std::string const& row_key_; }; /** From 341d6729f36bfe2097b7772e9d7c7a8994b6b581 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 13 May 2025 19:19:59 +0300 Subject: [PATCH 141/195] emulator: server: implement MutateRows. This is a partial fix for TBL-60. It partially fixes that issue because the test now successfully executes the bulk mutation that was failing (it called the MutateRows RPC). The test (bigtable_table_sample_rows_integration_test) still fails, but that happens at a subsequent location and that will be fixed in an upcoming PR. This is presented as its own PR since it is a single, logical, useful change. TESTED=That part of the integration test now passes. References: TBL-60. --- google/cloud/bigtable/emulator/server.cc | 40 ++++++++++++++++++- google/cloud/bigtable/emulator/table.h | 9 +++++ .../cloud/bigtable/emulator/to_grpc_status.cc | 27 ++++++++++++- .../cloud/bigtable/emulator/to_grpc_status.h | 3 ++ 4 files changed, 75 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index f759574a6e72f..a7f1fb33def7b 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -18,9 +18,16 @@ #include "google/cloud/internal/make_status.h" #include #include +#include #include +#include +#include +#include #include #include +#include +#include +#include namespace google { namespace cloud { @@ -67,8 +74,37 @@ class EmulatorService final : public btproto::Bigtable::Service { grpc::Status MutateRows( grpc::ServerContext* /* context */, - btproto::MutateRowsRequest const* /* request */, - grpc::ServerWriter* /* writer */) override { + btproto::MutateRowsRequest const* request, + grpc::ServerWriter* writer) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + int64_t index = 0; + google::bigtable::v2::MutateRowsResponse response; + + for (auto const& entry : request->entries()) { + response.Clear(); + + auto status = (*maybe_table) + ->DoMutationsWithPossibleRollbackLocked( + entry.row_key(), entry.mutations()); + + auto* response_entry = response.add_entries(); + response_entry->set_index(index++); + auto* s = response_entry->mutable_status(); + *s = ToGoogleRPCStatus(status); + + if (index == request->entries_size()) { + auto opts = grpc::WriteOptions(); + opts.set_last_message(); + writer->WriteLast(response, opts); + } else { + writer->Write(response); + } + } + return grpc::Status::OK; } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 94ee305fea979..b43201ae172ab 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -34,6 +34,7 @@ #include #include #include +#include #include namespace google { @@ -60,6 +61,14 @@ class Table : public std::enable_shared_from_this
{ StatusOr CheckAndMutateRow( google::bigtable::v2::CheckAndMutateRowRequest const& request); Status MutateRow(google::bigtable::v2::MutateRowRequest const& request); + Status DoMutationsWithPossibleRollbackLocked( + std::string const& row_key, + google::protobuf::RepeatedPtrField const& + mutations) { + std::lock_guard lock(mu_); + + return DoMutationsWithPossibleRollback(row_key, mutations); + } Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const; diff --git a/google/cloud/bigtable/emulator/to_grpc_status.cc b/google/cloud/bigtable/emulator/to_grpc_status.cc index 46d4d6b818698..13b049ab5c2b5 100644 --- a/google/cloud/bigtable/emulator/to_grpc_status.cc +++ b/google/cloud/bigtable/emulator/to_grpc_status.cc @@ -64,7 +64,7 @@ grpc::StatusCode MapStatusCode(google::cloud::StatusCode code) { } } -::grpc::Status ToGrpcStatus(Status const& to_convert) { +google::rpc::ErrorInfo ErrorInfoFromStatus(Status const& to_convert) { google::rpc::ErrorInfo error_info; error_info.set_reason(to_convert.error_info().reason()); error_info.set_domain(to_convert.error_info().domain()); @@ -73,11 +73,25 @@ ::grpc::Status ToGrpcStatus(Status const& to_convert) { md_name_value.second; } + return error_info; +} + +google::rpc::Status RPCStatusFromStatusAndErrorInfo( + Status const& to_convert, google::rpc::ErrorInfo const& error_info) { google::rpc::Status rpc_status; rpc_status.set_code(static_cast(to_convert.code())); rpc_status.set_message(to_convert.message()); auto& rpc_status_details = *rpc_status.add_details(); - rpc_status_details.PackFrom(std::move(error_info)); + rpc_status_details.PackFrom(error_info); + + return rpc_status; +} + +::grpc::Status ToGrpcStatus(Status const& to_convert) { + google::rpc::ErrorInfo error_info = ErrorInfoFromStatus(to_convert); + + google::rpc::Status rpc_status = + RPCStatusFromStatusAndErrorInfo(to_convert, std::move(error_info)); std::string serialized_rpc_status; rpc_status.SerializeToString(&serialized_rpc_status); @@ -85,6 +99,15 @@ ::grpc::Status ToGrpcStatus(Status const& to_convert) { std::move(serialized_rpc_status)); } +::google::rpc::Status ToGoogleRPCStatus(Status const& to_convert) { + google::rpc::ErrorInfo error_info = ErrorInfoFromStatus(to_convert); + + google::rpc::Status rpc_status = + RPCStatusFromStatusAndErrorInfo(to_convert, std::move(error_info)); + + return rpc_status; +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/to_grpc_status.h b/google/cloud/bigtable/emulator/to_grpc_status.h index fce22b6bdaef5..c561db22b46b2 100644 --- a/google/cloud/bigtable/emulator/to_grpc_status.h +++ b/google/cloud/bigtable/emulator/to_grpc_status.h @@ -16,6 +16,7 @@ #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TO_GRPC_STATUS_H #include "google/cloud/status.h" +#include "google/rpc/status.pb.h" #include namespace google { @@ -26,6 +27,8 @@ namespace emulator { /// Convert a google::cloud::Status to grpc::Status. ::grpc::Status ToGrpcStatus(Status const& to_convert); +// Convert a gooogle::cloud::Status to a google::rpc::Status +::google::rpc::Status ToGoogleRPCStatus(Status const& to_convert); } // namespace emulator } // namespace bigtable } // namespace cloud From 337d8bdac1ea1affbf7bd9b1fc345ba438254918 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 14 May 2025 15:24:04 +0300 Subject: [PATCH 142/195] emulator: Implement the DropRowRanges RPC. TESTED=bigtable_mutations_integration_test now passes when this commit is merged to the integration branch. TESTED=Passing unit tests for both modes of DropRowRanges (in this commit). References: TBL-59 Fixes: TBL-59 --- google/cloud/bigtable/emulator/CMakeLists.txt | 1 + .../emulator/bigtable_emulator_unit_tests.bzl | 1 + .../cloud/bigtable/emulator/column_family.h | 16 +- .../bigtable/emulator/drop_row_range_test.cc | 255 ++++++++++++++++++ google/cloud/bigtable/emulator/server.cc | 13 +- google/cloud/bigtable/emulator/table.cc | 44 +++ google/cloud/bigtable/emulator/table.h | 3 + 7 files changed, 329 insertions(+), 4 deletions(-) create mode 100644 google/cloud/bigtable/emulator/drop_row_range_test.cc diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 675d5bd3c7fca..ecf00ea70bf4a 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -56,6 +56,7 @@ if (BUILD_TESTING) # cmake-format: sort column_family_test.cc conditional_mutations_test.cc + drop_row_range_test.cc filter_test.cc filtered_map_test.cc range_set_test.cc diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index 35613235b4034..ffffeab82539e 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -19,6 +19,7 @@ bigtable_emulator_unit_tests = [ "column_family_test.cc", "conditional_mutations_test.cc", + "drop_row_range_test.cc", "filter_test.cc", "filtered_map_test.cc", "range_set_test.cc", diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index e7928afa36516..930ff80f1a05d 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -201,6 +201,7 @@ class ColumnFamily { ColumnFamily& operator=(ColumnFamily const&) = delete; using const_iterator = std::map::const_iterator; + using iterator = std::map::iterator; /** * Insert or update and existing cell at a given row, column and timestamp. @@ -271,23 +272,34 @@ class ColumnFamily { std::chrono::milliseconds timestamp); const_iterator begin() const { return rows_.begin(); } + iterator begin() { return rows_.begin(); } const_iterator end() const { return rows_.end(); } + iterator end() { return rows_.end(); } const_iterator lower_bound(std::string const& row_key) const { return rows_.lower_bound(row_key); } + iterator lower_bound(std::string const& row_key) { + return rows_.lower_bound(row_key); + } const_iterator upper_bound(std::string const& row_key) const { return rows_.upper_bound(row_key); } + iterator upper_bound(std::string const& row_key) { + return rows_.upper_bound(row_key); + } std::map::iterator find( std::string const& row_key) { return rows_.find(row_key); } - void erase(std::map::iterator row_it) { - rows_.erase(row_it); + iterator erase( + std::map::iterator row_it) { + return rows_.erase(row_it); } + void clear() { rows_.clear(); } + private: std::map rows_; }; diff --git a/google/cloud/bigtable/emulator/drop_row_range_test.cc b/google/cloud/bigtable/emulator/drop_row_range_test.cc new file mode 100644 index 0000000000000..56446b047e5e0 --- /dev/null +++ b/google/cloud/bigtable/emulator/drop_row_range_test.cc @@ -0,0 +1,255 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include "google/cloud/testing_util/status_matchers.h" +#include "gmock/gmock.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +struct SetCellParams { + std::string column_family_name; + std::string column_qualifier; + int64_t timestamp_micros; + std::string data; +}; + +StatusOr> CreateTable( + std::string const& table_name, std::vector& column_families) { + ::google::bigtable::admin::v2::Table schema; + schema.set_name(table_name); + for (auto& column_family_name : column_families) { + (*schema.mutable_column_families())[column_family_name] = + ::google::bigtable::admin::v2::ColumnFamily(); + } + + return Table::Create(schema); +} + +Status SetCells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto m : set_cell_params) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(m.column_family_name); + set_cell_mutation->set_column_qualifier(m.column_qualifier); + set_cell_mutation->set_timestamp_micros(m.timestamp_micros); + set_cell_mutation->set_value(m.data); + } + + return table->MutateRow(mutation_request); +} + +Status SetCellsInMultipleRows( + std::shared_ptr table, + std::string const& table_name, + std::map> params) { + for (auto& p : params) { + auto status = SetCells(table, table_name, p.first, p.second); + if (!status.ok()) { + return status; + } + } + + return Status(); +} + +Status HasCell( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier, int64_t timestamp_micros, + std::string const& value) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return NotFoundError("no row key found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); + } + + auto& column_row = column_row_it->second; + auto timestamp_it = + column_row.find(std::chrono::duration_cast( + std::chrono::microseconds(timestamp_micros))); + if (timestamp_it == column_row.end()) { + return NotFoundError( + "timestamp not found", + GCP_ERROR_INFO().WithMetadata("timestamp", + absl::StrFormat("%d", timestamp_micros))); + } + + if (timestamp_it->second != value) { + return NotFoundError("wrong value", + GCP_ERROR_INFO() + .WithMetadata("expected", value) + .WithMetadata("found", timestamp_it->second)); + } + + return Status(); +} + +StatusOr HasRow( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return false; + } + + return true; +} + +TEST(DropRowRange, DropAll) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + std::vector column_families = {"column_family_1", + "column_family_2"}; + + auto maybe_table = CreateTable(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); + + std::map> params = { + {"0", + {{column_families[0], "column_1", 1000, "data_0"}, + {column_families[1], "column_1", 3000, "data_2"}}}, + {"1", + {{column_families[0], "column_1", 2000, "data_1"}, + {column_families[1], "column_1", 4000, "data_3"}}}}; + + ASSERT_STATUS_OK(SetCellsInMultipleRows(table, table_name, params)); + + ::google::bigtable::admin::v2::DropRowRangeRequest request; + request.set_name(table_name); + request.set_delete_all_data_from_table(true); + + auto status = table->DropRowRange(request); + ASSERT_STATUS_OK(status); + + for (auto& p : params) { + for (auto& set_cell_params : p.second) { + auto status_or = + HasRow(table, set_cell_params.column_family_name, p.first); + ASSERT_STATUS_OK(status_or); + ASSERT_FALSE(status_or.value()); + } + } +} + +TEST(DropRowRange, DropSome) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + std::vector column_families = {"column_family_1", + "column_family_2"}; + + auto maybe_table = CreateTable(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); + + std::map> params = { + {"a", + { + {column_families[0], "column_1", 1000, "data_0"}, + }}, + {"aa", + {{column_families[0], "column_1", 2000, "data_1"}, + {column_families[1], "column_1", 5000, "data_5"}}}, + {"aaa", {{column_families[0], "column_1", 3000, "data_2"}}}, + {"aab", {{column_families[0], "column_1", 4000, "data_3"}}}, + {"ab", {{column_families[1], "column_1", 6000, "data_6"}}}, + }; + + ASSERT_STATUS_OK(SetCellsInMultipleRows(table, table_name, params)); + + ::google::bigtable::admin::v2::DropRowRangeRequest request; + request.set_name(table_name); + std::string prefix = "aa"; + request.set_row_key_prefix(prefix); + + auto status = table->DropRowRange(request); + ASSERT_STATUS_OK(status); + + for (auto& p : params) { + for (auto& set_cell_params : p.second) { + if (absl::StartsWith(p.first, prefix)) { + auto status_or = + HasRow(table, set_cell_params.column_family_name, p.first); + ASSERT_STATUS_OK(status_or); + ASSERT_FALSE(status_or.value()); + } else { + auto status_or = + HasRow(table, set_cell_params.column_family_name, p.first); + ASSERT_STATUS_OK(status_or); + ASSERT_TRUE(status_or.value()); + } + } + } +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index a7f1fb33def7b..82573d9bbf814 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -252,9 +252,18 @@ class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { } grpc::Status DropRowRange(grpc::ServerContext* /* context */, - btadmin::DropRowRangeRequest const* /* request */, + btadmin::DropRowRangeRequest const* request, google::protobuf::Empty* /* response */) override { - // FIXME + auto maybe_table = cluster_->FindTable(request->name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + auto status = (*maybe_table)->DropRowRange(*request); + if (!status.ok()) { + return ToGrpcStatus(status); + } + return grpc::Status::OK; } diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 3550b6e911be4..af45cf8efe93f 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -20,8 +20,10 @@ #include "google/cloud/bigtable/internal/google_bytes_traits.h" #include "google/cloud/internal/make_status.h" #include "google/protobuf/util/field_mask_util.h" +#include #include #include +#include #include #include #include @@ -492,6 +494,48 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } +Status Table::DropRowRange( + ::google::bigtable::admin::v2::DropRowRangeRequest const& request) { + std::lock_guard lock(mu_); + + if (!request.has_row_key_prefix() && + !request.has_delete_all_data_from_table()) { + return InvalidArgumentError( + "Neither row prefix nor deleted all data from table is set", + GCP_ERROR_INFO().WithMetadata("DropRowRange request", + request.DebugString())); + } + + if (request.has_delete_all_data_from_table()) { + for (auto& column_family : column_families_) { + column_family.second->clear(); + } + + return Status(); + } + + auto const& row_prefix = request.row_key_prefix(); + if (row_prefix.empty()) { + return InvalidArgumentError( + "Row prefix provided is empty.", + GCP_ERROR_INFO().WithMetadata("DropRowRange request", + request.DebugString())); + } + + for (auto& cf : column_families_) { + for (auto row_it = cf.second->lower_bound(row_prefix); + row_it != cf.second->end();) { + if (absl::StartsWith(row_it->first, row_prefix)) { + row_it = cf.second->erase(row_it); + } else { + break; + } + } + } + + return Status(); +} + // NOLINTBEGIN(readability-convert-member-functions-to-static) Status RowTransaction::AddToCell( ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index b43201ae172ab..40de01a5a4eed 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -85,6 +85,9 @@ class Table : public std::enable_shared_from_this
{ std::shared_ptr
get() { return shared_from_this(); } + Status DropRowRange( + ::google::bigtable::admin::v2::DropRowRangeRequest const& request); + private: Table() = default; friend class RowSetIterator; From c33e39f8ef2bea6b193accf030b5bbb9dd4d1bb2 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 14 May 2025 21:02:42 +0300 Subject: [PATCH 143/195] emulator: Trivial fixes for Google C++ style. - CamelCase for functions in tests that did not conform to that standard. - Mechanical fix by ./ci/cloudbuild/build.sh -t checkers-ci All tests continue to pass. --- .../cloud/bigtable/emulator/column_family.h | 3 +- .../emulator/conditional_mutations_test.cc | 37 ++- .../bigtable/emulator/drop_row_range_test.cc | 9 +- .../cloud/bigtable/emulator/rollback_test.cc | 232 +++++++++--------- 4 files changed, 136 insertions(+), 145 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 930ff80f1a05d..d38824289c0cd 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -293,8 +293,7 @@ class ColumnFamily { return rows_.find(row_key); } - iterator erase( - std::map::iterator row_it) { + iterator erase(std::map::iterator row_it) { return rows_.erase(row_it); } diff --git a/google/cloud/bigtable/emulator/conditional_mutations_test.cc b/google/cloud/bigtable/emulator/conditional_mutations_test.cc index 57e1b89a41969..540213cac1c93 100644 --- a/google/cloud/bigtable/emulator/conditional_mutations_test.cc +++ b/google/cloud/bigtable/emulator/conditional_mutations_test.cc @@ -18,7 +18,7 @@ struct SetCellParams { std::string data; }; -StatusOr> create_table( +StatusOr> CreateTable( std::string const& table_name, std::vector& column_families) { ::google::bigtable::admin::v2::Table schema; schema.set_name(table_name); @@ -30,11 +30,10 @@ StatusOr> create_table( return Table::Create(schema); } -Status has_cell( - std::shared_ptr& table, - std::string const& column_family, std::string const& row_key, - std::string const& column_qualifier, int64_t timestamp_micros, - std::string const& value) { +Status HasCell(std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier, int64_t timestamp_micros, + std::string const& value) { auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { return NotFoundError( @@ -80,7 +79,7 @@ Status has_cell( return Status(); } -Status set_cells( +Status SetCells( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, std::vector& set_cell_params) { @@ -110,7 +109,7 @@ TEST(ConditionalMutations, TestTrueMutations) { auto const* const false_mutation_value = "set by a false mutation"; std::vector column_families = {column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -136,10 +135,10 @@ TEST(ConditionalMutations, TestTrueMutations) { std::vector v = { {column_family_name, "column_2", 1000, "some_value"}}; - ASSERT_STATUS_OK(set_cells(table, table_name, row_key, v)); - ASSERT_STATUS_OK(has_cell(table, v[0].column_family_name, row_key, - v[0].column_qualifier, v[0].timestamp_micros, - v[0].data)); + ASSERT_STATUS_OK(SetCells(table, table_name, row_key, v)); + ASSERT_STATUS_OK(HasCell(table, v[0].column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); google::bigtable::v2::CheckAndMutateRowRequest cond_mut_with_pass_all; @@ -156,15 +155,13 @@ TEST(ConditionalMutations, TestTrueMutations) { // pass_all_filter means that true_mutation should have succeeded, // so check for the true_mutation cell value e.t.c. - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, timestamp_micros, - true_mutation_value)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, true_mutation_value)); // And just for good measure, ensure that false_mutation was not written. - ASSERT_EQ(false, - has_cell(table, column_family_name, row_key, column_qualifier, - timestamp_micros, false_mutation_value) - .ok()); + ASSERT_EQ(false, HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, false_mutation_value) + .ok()); } TEST(ConditionalMutations, RejectInvalidRequest) { @@ -177,7 +174,7 @@ TEST(ConditionalMutations, RejectInvalidRequest) { auto const* const false_mutation_value = "set by a false mutation"; std::vector column_families = {column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); diff --git a/google/cloud/bigtable/emulator/drop_row_range_test.cc b/google/cloud/bigtable/emulator/drop_row_range_test.cc index 56446b047e5e0..9ee7905daa551 100644 --- a/google/cloud/bigtable/emulator/drop_row_range_test.cc +++ b/google/cloud/bigtable/emulator/drop_row_range_test.cc @@ -93,11 +93,10 @@ Status SetCellsInMultipleRows( return Status(); } -Status HasCell( - std::shared_ptr& table, - std::string const& column_family, std::string const& row_key, - std::string const& column_qualifier, int64_t timestamp_micros, - std::string const& value) { +Status HasCell(std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier, int64_t timestamp_micros, + std::string const& value) { auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { return NotFoundError( diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index c58650ed538a9..ad8f5b8c1294d 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -46,7 +46,7 @@ struct SetCellParams { std::string data; }; -StatusOr> create_table( +StatusOr> CreateTable( std::string const& table_name, std::vector& column_families) { ::google::bigtable::admin::v2::Table schema; schema.set_name(table_name); @@ -58,7 +58,7 @@ StatusOr> create_table( return Table::Create(schema); } -Status delete_from_families( +Status DeleteFromFamilies( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, std::vector const& column_families) { @@ -82,7 +82,7 @@ struct DeleteFromColumnParams { ::google::bigtable::v2::TimestampRange* timestamp_range; }; -Status delete_from_columns( +Status DeleteFromColumns( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, std::vector v) { @@ -103,7 +103,7 @@ Status delete_from_columns( return table->MutateRow(mutation_request); } -Status set_cells( +Status SetCells( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, std::vector& set_cell_params) { @@ -123,11 +123,10 @@ Status set_cells( return table->MutateRow(mutation_request); } -Status has_cell( - std::shared_ptr& table, - std::string const& column_family, std::string const& row_key, - std::string const& column_qualifier, int64_t timestamp_micros, - std::string const& value) { +Status HasCell(std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier, int64_t timestamp_micros, + std::string const& value) { auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { return NotFoundError( @@ -173,7 +172,7 @@ Status has_cell( return Status(); } -Status has_column( +Status HasColumn( std::shared_ptr& table, std::string const& column_family, std::string const& row_key, std::string const& column_qualifier) { @@ -205,7 +204,7 @@ Status has_column( return Status(); } -StatusOr> get_column( +StatusOr> GetColumn( std::shared_ptr& table, std::string const& column_family, std::string const& row_key, std::string const& column_qualifier) { @@ -240,8 +239,8 @@ StatusOr> get_column( return ret; } -Status has_row(std::shared_ptr& table, - std::string const& column_family, std::string const& row_key) { +Status HasRow(std::shared_ptr& table, + std::string const& column_family, std::string const& row_key) { auto column_family_it = table->find(column_family); if (column_family_it == table->end()) { return NotFoundError( @@ -276,7 +275,7 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { auto const* data = "test"; std::vector column_families = {column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -286,11 +285,11 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { data}; v.push_back(p); - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); auto status_or = - get_column(table, column_family_name, row_key, column_qualifier); + GetColumn(table, column_family_name, row_key, column_qualifier); ASSERT_STATUS_OK(status_or.status()); auto column = status_or.value(); ASSERT_EQ(1, column.size()); @@ -310,9 +309,9 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { {"non_existent_column_family_name_causes_tx_rollbaclk", column_qualifier, 1000, data}}; auto const* const row_key_2 = "1"; - status = set_cells(table, table_name, row_key_2, v); + status = SetCells(table, table_name, row_key_2, v); ASSERT_NE(true, status.ok()); - ASSERT_FALSE(has_row(table, column_family_name, row_key_2).ok()); + ASSERT_FALSE(HasRow(table, column_family_name, row_key_2).ok()); } // Does the SetCell mutation work to set a cell to a specific value? @@ -328,7 +327,7 @@ TEST(TransactonRollback, SetCellBasicFunction) { auto const* data = "test"; std::vector column_families = {column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -338,12 +337,12 @@ TEST(TransactonRollback, SetCellBasicFunction) { data}; v.push_back(p); - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, timestamp_micros, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data)); } // Test that an old value is correctly restored in a pre-populated @@ -366,7 +365,7 @@ TEST(TransactonRollback, TestRestoreValue) { auto const* const good_mutation_data = "expected to succeed"; std::vector column_families = {valid_column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -375,11 +374,11 @@ TEST(TransactonRollback, TestRestoreValue) { good_mutation_timestamp_micros, good_mutation_data}; v.push_back(p); - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, - column_qualifier, good_mutation_timestamp_micros, - good_mutation_data)); + ASSERT_STATUS_OK(HasCell(table, valid_column_family_name, row_key, + column_qualifier, good_mutation_timestamp_micros, + good_mutation_data)); // Now atomically try 2 mutations. One modifies the above set cell, // and the other one is expected to fail. The test is that @@ -397,7 +396,7 @@ TEST(TransactonRollback, TestRestoreValue) { p = {"invalid_column_family", "test2", 1000, "expected to fail"}; w.push_back(p); - status = set_cells(table, table_name, row_key, w); + status = SetCells(table, table_name, row_key, w); ASSERT_NE(status.ok(), true); // The whole mutation chain should // fail because the 2nd mutation // contains an invalid column family. @@ -405,9 +404,9 @@ TEST(TransactonRollback, TestRestoreValue) { // And the first mutation should have been rolled back by // RestoreValue and so should contain the old value, and not "new // data". - ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, - column_qualifier, good_mutation_timestamp_micros, - good_mutation_data)); + ASSERT_STATUS_OK(HasCell(table, valid_column_family_name, row_key, + column_qualifier, good_mutation_timestamp_micros, + good_mutation_data)); } // Test that a new cell introduced in a chain of SetCell mutations is @@ -425,7 +424,7 @@ TEST(TransactonRollback, DeleteValue) { // name. auto const* const valid_column_family_name = "test"; std::vector column_families = {valid_column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -434,11 +433,11 @@ TEST(TransactonRollback, DeleteValue) { // do the DeleteValue test. std::vector v = { {valid_column_family_name, "test", 1000, "data"}}; - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, - v[0].column_qualifier, v[0].timestamp_micros, - v[0].data)); + ASSERT_STATUS_OK(HasCell(table, valid_column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); // We then setup a transaction chain with 2 SetCells, the first one // should succeed to add a new cell and the second one should fail @@ -449,14 +448,14 @@ TEST(TransactonRollback, DeleteValue) { v = {{valid_column_family_name, "test", 2000, "new data"}, {"invalid_column_family_name", "test", 3000, "more new data"}}; - status = set_cells(table, table_name, row_key, v); + status = SetCells(table, table_name, row_key, v); ASSERT_NE(status.ok(), true); // We expect the chain of mutations to // fail altogether. - status = has_cell(table, v[0].column_family_name, row_key, - v[0].column_qualifier, v[0].timestamp_micros, v[0].data); + status = HasCell(table, v[0].column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, v[0].data); ASSERT_NE(status.ok(), true); // Undo should delete the cell - status = has_cell(table, v[1].column_family_name, row_key, - v[1].column_qualifier, v[1].timestamp_micros, v[1].data); + status = HasCell(table, v[1].column_family_name, row_key, + v[1].column_qualifier, v[1].timestamp_micros, v[1].data); ASSERT_NE(status.ok(), true); // Also the SetCell with invalid shema // should not have set anything. } @@ -480,17 +479,17 @@ TEST(TransactonRollback, DeleteColumn) { // name. auto const* const valid_column_family_name = "test"; std::vector column_families = {valid_column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); std::vector v = { {valid_column_family_name, "test", 1000, "data"}}; - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, valid_column_family_name, row_key, - v[0].column_qualifier, v[0].timestamp_micros, - v[0].data)); + ASSERT_STATUS_OK(HasCell(table, valid_column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); // Introduce a new column in a chain of SetCell mutations, a // subsequent one of which must fail due to an invalid schema @@ -498,18 +497,18 @@ TEST(TransactonRollback, DeleteColumn) { v = {{valid_column_family_name, "new_column", 2000, "new data"}, {"invalid_column_family_name", "test", 3000, "more new data"}}; - status = set_cells(table, table_name, row_key, v); + status = SetCells(table, table_name, row_key, v); ASSERT_NE(status.ok(), true); // We expect the chain of mutations to // fail altogether because the last one must fail. // The original column ("test") should still exist. - status = has_column(table, valid_column_family_name, row_key, "test"); + status = HasColumn(table, valid_column_family_name, row_key, "test"); ASSERT_STATUS_OK(status); // Bit the new column introduced should have been rolled back. - status = has_column(table, v[0].column_family_name, row_key, - v[0].column_qualifier); + status = + HasColumn(table, v[0].column_family_name, row_key, v[0].column_qualifier); ASSERT_NE(status.ok(), true); } @@ -529,7 +528,7 @@ TEST(TransactonRollback, DeleteRow) { // name. auto const* const valid_column_family_name = "test"; std::vector column_families = {valid_column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -541,12 +540,12 @@ TEST(TransactonRollback, DeleteRow) { {"invalid_column_family_name", "test", 2000, "more new data which should never be written"}}; - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_NE(status.ok(), true); // We expect the chain of mutations to // fail altogether because the last one must fail. - status = has_row(table, valid_column_family_name, row_key); + status = HasRow(table, valid_column_family_name, row_key); ASSERT_NE(status.ok(), true); } @@ -568,7 +567,7 @@ TEST(TransactonRollback, DeleteFromFamilyBasicFunction) { std::vector column_families = {column_family_name, second_column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -581,23 +580,23 @@ TEST(TransactonRollback, DeleteFromFamilyBasicFunction) { p = {second_column_family_name, column_qualifier, timestamp_micros, data}; v.push_back(p); - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, timestamp_micros, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data)); ASSERT_STATUS_OK( - has_column(table, column_family_name, row_key, column_qualifier)); - ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); + HasColumn(table, column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(HasRow(table, column_family_name, row_key)); // Having established that the data is there, test the basic // functionality of the DeleteFromFamily mutation by trying to // delete it. ASSERT_STATUS_OK( - delete_from_families(table, table_name, row_key, {column_family_name})); - ASSERT_NE(true, has_row(table, column_family_name, row_key).ok()); + DeleteFromFamilies(table, table_name, row_key, {column_family_name})); + ASSERT_NE(true, HasRow(table, column_family_name, row_key).ok()); // Ensure that we did not delete a row in another column family. - ASSERT_EQ(true, has_row(table, second_column_family_name, row_key).ok()); + ASSERT_EQ(true, HasRow(table, second_column_family_name, row_key).ok()); } // Test that DeleteFromfamily can be rolled back in case a subsequent @@ -621,7 +620,7 @@ TEST(TransactonRollback, DeleteFromFamilyRollback) { "i_do_not_exist_in_the_schema"; std::vector column_families = {column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -631,13 +630,13 @@ TEST(TransactonRollback, DeleteFromFamilyRollback) { data}; v.push_back(p); - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, timestamp_micros, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data)); ASSERT_STATUS_OK( - has_column(table, column_family_name, row_key, column_qualifier)); - ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); + HasColumn(table, column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(HasRow(table, column_family_name, row_key)); // Setup two DeleteFromfamily mutation: The first one uses the // correct table schema (a column family that exists and is expected @@ -646,21 +645,21 @@ TEST(TransactonRollback, DeleteFromFamilyRollback) { // trigger a rollback of the previous row deletion. In the end, the // above row should still exist and all its data should be intact. status = - delete_from_families(table, table_name, row_key, - {column_family_name, column_family_not_in_schema}); + DeleteFromFamilies(table, table_name, row_key, + {column_family_name, column_family_not_in_schema}); ASSERT_NE(true, status.ok()); // The overall chain of mutations should fail. // Check that the row deleted by the first mutation is restored, // with all its data. - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, timestamp_micros, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data)); ASSERT_STATUS_OK( - has_column(table, column_family_name, row_key, column_qualifier)); - ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); + HasColumn(table, column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(HasRow(table, column_family_name, row_key)); } -::google::bigtable::v2::TimestampRange* new_timestamp_range(int64_t start, - int64_t end) { +::google::bigtable::v2::TimestampRange* NewTimestampRange(int64_t start, + int64_t end) { auto* range = new (::google::bigtable::v2::TimestampRange); range->set_start_timestamp_micros(start); range->set_end_timestamp_micros(end); @@ -680,7 +679,7 @@ TEST(TransactonRollback, DeleteFromColumnBasicFunction) { auto const* data = "test"; std::vector column_families = {column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -691,23 +690,22 @@ TEST(TransactonRollback, DeleteFromColumnBasicFunction) { {column_family_name, column_qualifier, 3000, data}, }; - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, 1000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, 2000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, 3000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 1000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 2000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 3000, data)); std::vector dv = { {column_family_name, column_qualifier, - new_timestamp_range(v[0].timestamp_micros, - v[2].timestamp_micros + 1000)}}; + NewTimestampRange(v[0].timestamp_micros, v[2].timestamp_micros + 1000)}}; - ASSERT_STATUS_OK(delete_from_columns(table, table_name, row_key, dv)); + ASSERT_STATUS_OK(DeleteFromColumns(table, table_name, row_key, dv)); - status = has_column(table, column_family_name, row_key, column_qualifier); + status = HasColumn(table, column_family_name, row_key, column_qualifier); ASSERT_EQ(false, status.ok()); } @@ -727,7 +725,7 @@ TEST(TransactonRollback, DeleteFromColumnRollback) { auto const* data = "test"; std::vector column_families = {column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -738,38 +736,36 @@ TEST(TransactonRollback, DeleteFromColumnRollback) { {column_family_name, column_qualifier, 3000, data}, }; - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, 1000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, 2000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, 3000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 1000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 2000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 3000, data)); // The first mutation will succeed. The second assumes a schema that // does not exist - it should fail and cause rollback of the column // deletion in the first mutation. std::vector dv = { {column_family_name, column_qualifier, - new_timestamp_range(v[0].timestamp_micros, - v[2].timestamp_micros + 1000)}, - {bad_column_family_name, column_qualifier, - new_timestamp_range(1000, 2000)}, + NewTimestampRange(v[0].timestamp_micros, v[2].timestamp_micros + 1000)}, + {bad_column_family_name, column_qualifier, NewTimestampRange(1000, 2000)}, }; // The mutation chains should fail and rollback should occur. - ASSERT_EQ(false, delete_from_columns(table, table_name, row_key, dv).ok()); + ASSERT_EQ(false, DeleteFromColumns(table, table_name, row_key, dv).ok()); // The column should have been restored. ASSERT_STATUS_OK( - has_column(table, column_family_name, row_key, column_qualifier)); + HasColumn(table, column_family_name, row_key, column_qualifier)); // Check that the data is where and what we expect. - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, 1000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, 2000, data)); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, 3000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 1000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 2000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 3000, data)); } // Can we delete a row from all column families? @@ -787,7 +783,7 @@ TEST(TransactonRollback, DeleteFromRowBasicFunction) { std::vector column_families = {column_family_name, second_column_family_name}; - auto maybe_table = create_table(table_name, column_families); + auto maybe_table = CreateTable(table_name, column_families); ASSERT_STATUS_OK(maybe_table); auto table = maybe_table.value(); @@ -800,13 +796,13 @@ TEST(TransactonRollback, DeleteFromRowBasicFunction) { p = {second_column_family_name, column_qualifier, timestamp_micros, data}; v.push_back(p); - auto status = set_cells(table, table_name, row_key, v); + auto status = SetCells(table, table_name, row_key, v); ASSERT_STATUS_OK(status); - ASSERT_STATUS_OK(has_cell(table, column_family_name, row_key, - column_qualifier, timestamp_micros, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data)); ASSERT_STATUS_OK( - has_column(table, second_column_family_name, row_key, column_qualifier)); - ASSERT_STATUS_OK(has_row(table, column_family_name, row_key)); + HasColumn(table, second_column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(HasRow(table, column_family_name, row_key)); ::google::bigtable::v2::MutateRowRequest mutation_request; mutation_request.set_table_name(table_name); @@ -816,11 +812,11 @@ TEST(TransactonRollback, DeleteFromRowBasicFunction) { mutation_request_mutation->mutable_delete_from_row(); ASSERT_STATUS_OK(table->MutateRow(mutation_request)); - ASSERT_EQ(false, has_cell(table, column_family_name, row_key, - column_qualifier, timestamp_micros, data) + ASSERT_EQ(false, HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data) .ok()); - ASSERT_EQ(false, has_column(table, second_column_family_name, row_key, - column_qualifier) + ASSERT_EQ(false, HasColumn(table, second_column_family_name, row_key, + column_qualifier) .ok()); } From 5fae4983e2e2e7c5f565ca29f512c72cf72254cb Mon Sep 17 00:00:00 2001 From: Adam Czajkowski <48181325+prawilny@users.noreply.github.com> Date: Thu, 22 May 2025 16:09:47 +0200 Subject: [PATCH 144/195] test: add emulator filter tests and fix the issues encountered --- .../cloud/bigtable/emulator/column_family.cc | 35 +- .../cloud/bigtable/emulator/column_family.h | 30 +- .../bigtable/emulator/column_family_test.cc | 22 +- google/cloud/bigtable/emulator/filter.cc | 28 +- google/cloud/bigtable/emulator/filter.h | 2 +- google/cloud/bigtable/emulator/filter_test.cc | 741 ++++++++++++++++-- google/cloud/bigtable/emulator/filtered_map.h | 187 ++++- .../bigtable/emulator/filtered_map_test.cc | 116 ++- google/cloud/bigtable/emulator/range_set.h | 9 - .../cloud/bigtable/emulator/range_set_test.cc | 10 - 10 files changed, 1006 insertions(+), 174 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 2981c4c24c630..1b2ce1f8a9b9e 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -24,11 +24,6 @@ namespace emulator { absl::optional ColumnRow::SetCell( std::chrono::milliseconds timestamp, std::string const& value) { - if (timestamp <= std::chrono::milliseconds::zero()) { - timestamp = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()); - } - absl::optional ret = absl::nullopt; auto cell_it = cells_.find(timestamp); if (!(cell_it == cells_.end())) { @@ -43,14 +38,17 @@ absl::optional ColumnRow::SetCell( std::vector ColumnRow::DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range) { std::vector deleted_cells; - for (auto cell_it = cells_.lower_bound( - std::chrono::duration_cast( - std::chrono::microseconds(time_range.start_timestamp_micros()))); + absl::optional maybe_end_micros = time_range.end_timestamp_micros(); + if (maybe_end_micros.value_or(0) == 0) { + maybe_end_micros.reset(); + } + for (auto cell_it = maybe_end_micros ? + upper_bound(std::chrono::duration_cast( + std::chrono::microseconds(*maybe_end_micros))) : begin(); cell_it != cells_.end() && - (time_range.end_timestamp_micros() == 0 || - cell_it->first < std::chrono::duration_cast( + cell_it->first >= std::chrono::duration_cast( std::chrono::microseconds( - time_range.end_timestamp_micros())));) { + time_range.start_timestamp_micros()));) { Cell cell = {std::move(cell_it->first), std::move(cell_it->second)}; deleted_cells.emplace_back(std::move(cell)); cells_.erase(cell_it++); @@ -214,9 +212,9 @@ FilteredColumnFamilyStream::FilteredColumnFamilyStream( row_ranges_(std::move(row_set)), column_ranges_(StringRangeSet::All()), timestamp_ranges_(TimestampRangeSet::All()), - rows_(RangeFilteredMapView(column_family, - *row_ranges_), - std::cref(row_regexes_)) {} + rows_( + StringRangeFilteredMapView(column_family, *row_ranges_), + std::cref(row_regexes_)) {} bool FilteredColumnFamilyStream::ApplyFilter( InternalFilter const& internal_filter) { @@ -272,7 +270,7 @@ void FilteredColumnFamilyStream::InitializeIfNeeded() const { bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { for (; column_it_.value() != columns_.value().end(); ++(column_it_.value())) { - cells_ = RangeFilteredMapView( + cells_ = TimestampRangeFilteredMapView( column_it_.value()->second, timestamp_ranges_); cell_it_ = cells_.value().begin(); if (cell_it_.value() != cells_.value().end()) { @@ -284,10 +282,9 @@ bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() const { for (; (*row_it_) != rows_.end(); ++(*row_it_)) { - columns_ = RegexFiteredMapView< - RangeFilteredMapView>( - RangeFilteredMapView( - (*row_it_)->second, column_ranges_), + columns_ = RegexFiteredMapView>( + StringRangeFilteredMapView((*row_it_)->second, + column_ranges_), column_regexes_); column_it_ = columns_.value().begin(); if (PointToFirstCellAfterColumnChange()) { diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index d38824289c0cd..32cc9ed535991 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -51,8 +51,7 @@ class ColumnRow { * Insert or update and existing cell at a given timestamp. * * @param timestamp the time stamp at which the value will be inserted or - * updated. If it equals zero then number of milliseconds since epoch will - * be used instead. + * updated. * @param value the value to insert/update. * * @return no value if the timestamp had no value before, otherwise @@ -92,18 +91,17 @@ class ColumnRow { return cells_.upper_bound(timestamp); } - std::map::iterator find( - std::chrono::milliseconds const& timestamp) { + const_iterator find(std::chrono::milliseconds const& timestamp) { return cells_.find(timestamp); } - void erase( - std::map::iterator timestamp_it) { + void erase(const_iterator timestamp_it) { cells_.erase(timestamp_it); } private: - std::map cells_; + // Note the order - the iterator return the freshest cells first. + std::map> cells_; }; /** @@ -121,8 +119,7 @@ class ColumnFamilyRow { * * @param column_qualifier the column qualifier at which to update the value. * @param timestamp the time stamp at which the value will be inserted or - * updated. If it equals zero then number of milliseconds since epoch will - * be used instead. + * updated. * @param value the value to insert/update. * * @return no value if the timestamp had no value before, otherwise @@ -209,8 +206,7 @@ class ColumnFamily { * @param row_key the row key at which to update the value. * @param column_qualifier the column qualifier at which to update the value. * @param timestamp the time stamp at which the value will be inserted or - * updated. If it equals zero then number of milliseconds since epoch will - * be used instead. + * updated. * @param value the value to insert/update. * * @return no value if the timestamp had no value before, otherwise @@ -374,11 +370,11 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { std::vector> column_regexes_; mutable TimestampRangeSet timestamp_ranges_; - RegexFiteredMapView> rows_; + RegexFiteredMapView> rows_; mutable absl::optional>> + StringRangeFilteredMapView>> columns_; - mutable absl::optional> + mutable absl::optional> cells_; // If row_it_ == rows_.end() we've reached the end. @@ -386,13 +382,13 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { // if (row_it_ != rows_.end()) then // cell_it_ != cells.end() && column_it_ != columns_.end(). mutable absl::optional>::const_iterator> + StringRangeFilteredMapView>::const_iterator> row_it_; mutable absl::optional>::const_iterator> + StringRangeFilteredMapView>::const_iterator> column_it_; mutable absl::optional< - RangeFilteredMapView::const_iterator> + TimestampRangeFilteredMapView::const_iterator> cell_it_; mutable absl::optional cur_value_; mutable bool initialized_{false}; diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index d96d93d69dd2d..afb40febc4507 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -67,8 +67,14 @@ TEST(ColumnRow, Trivial) { col_row.SetCell(0_ms, "baz"); col_row.SetCell(20_ms, "qux"); + EXPECT_EQ("qux", col_row.lower_bound(30_ms)->second); + EXPECT_EQ("qux", col_row.lower_bound(20_ms)->second); EXPECT_EQ("bar", col_row.lower_bound(10_ms)->second); - EXPECT_EQ("qux", col_row.upper_bound(10_ms)->second); + EXPECT_EQ("baz", col_row.lower_bound(0_ms)->second); + EXPECT_EQ("qux", col_row.upper_bound(30_ms)->second); + EXPECT_EQ("bar", col_row.upper_bound(20_ms)->second); + EXPECT_EQ("baz", col_row.upper_bound(10_ms)->second); + EXPECT_EQ(col_row.end(), col_row.upper_bound(0_ms)); } TEST(ColumnRow, DeleteTimeRangeFinite) { @@ -223,14 +229,14 @@ TEST(FilteredColumnFamilyStream, Unfiltered) { FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); EXPECT_EQ(R"""( row0 cf1:col0 @10ms: foo -row0 cf1:col1 @20ms: bar row0 cf1:col1 @30ms: baz +row0 cf1:col1 @20ms: bar row1 cf1:col0 @10ms: foo -row1 cf1:col1 @20ms: foo row1 cf1:col1 @30ms: foo +row1 cf1:col1 @20ms: foo row2 cf1:col0 @10ms: qux -row2 cf1:col2 @40ms: qux row2 cf1:col2 @50ms: qux +row2 cf1:col2 @40ms: qux )""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } @@ -270,12 +276,12 @@ TEST(FilteredColumnFamilyStream, FilterByTimestampRange) { TimestampRange{TimestampRangeSet::Range(100_ms, 200_ms)}); EXPECT_EQ(R"""( row0 cf1:col0 @100ms: foo -row0 cf1:col2 @100ms: foo -row0 cf1:col2 @120ms: foo row0 cf1:col2 @140ms: foo -row1 cf1:col2 @100ms: foo -row1 cf1:col2 @120ms: foo +row0 cf1:col2 @120ms: foo +row0 cf1:col2 @100ms: foo row1 cf1:col2 @140ms: foo +row1 cf1:col2 @120ms: foo +row1 cf1:col2 @100ms: foo )""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 7e62ed87b08ce..f4e9302cb65e3 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -19,7 +19,6 @@ #include "google/cloud/internal/make_status.h" #include "google/cloud/status_or.h" #include -#include #include namespace google { @@ -30,6 +29,9 @@ namespace { bool PassAllFilters(InternalFilter const&) { return true; } +// We need to ensure that the value outlives the reference stored in CellView. +std::string const kStrippedValue; + } // namespace void CellStream::Next(NextMode mode) { @@ -325,8 +327,8 @@ class TrivialFilter : public AbstractCellStreamImpl { * @param filter_filter a functor which given an `InternalFilter` decides * whether filtering this cell stream's results and then applying the * `InternalFilter` would yield the same results as applying - * `InternalFilter` to the underlying stream and the perform this stream's - * filtering. + * `InternalFilter` to the underlying stream and then performing this + * stream's filtering. */ template CellStream MakeTrivialFilter( @@ -539,14 +541,14 @@ class ConditionStream : public AbstractCellStreamImpl { if (condition_true_) { true_stream_.Next(mode); if (!true_stream_ || - !internal::CompareRowKey(current_row_, true_stream_->row_key())) { + internal::CompareRowKey(current_row_, true_stream_->row_key()) != 0) { source_.Next(NextMode::kRow); OnNewRow(); } } else { false_stream_.Next(mode); - if (!false_stream_ || - !internal::CompareRowKey(current_row_, false_stream_->row_key())) { + if (!false_stream_ || internal::CompareRowKey( + current_row_, false_stream_->row_key()) != 0) { source_.Next(NextMode::kRow); OnNewRow(); } @@ -870,7 +872,7 @@ StatusOr CreateFilterImpl( if (per_row_state-- <= 0) { return {}; } - return NextMode::kRow; + return NextMode::kCell; }, [cells_per_row_offset]() { return cells_per_row_offset; }, [](InternalFilter const& internal_filter) { @@ -972,7 +974,7 @@ StatusOr CreateFilterImpl( CellStreamConstructor res = [source_ctor = std::move(source_ctor)] { auto source = source_ctor(); return MakeTrivialTransformer(std::move(source), [](CellView cell_view) { - cell_view.SetValue(""); + cell_view.SetValue(kStrippedValue); return cell_view; }); }; @@ -1046,6 +1048,16 @@ StatusOr CreateFilterImpl( "`condition` must have a `predicate_filter` set.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } + if (!filter.condition().has_true_filter() && + !filter.condition().has_false_filter()) { + return InvalidArgumentError( + "`condition` must have `true_filter` or `false_filter` set.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + // FIXME: validate that `sink` is not present in condition's predicate. + // Expected error: + // INVALID_ARGUMENT: Error in field 'condition filter predicate' : sink + // cannot be nested in a condition filter auto maybe_predicate_stream_ctor = CreateFilterImpl( filter.condition().predicate_filter(), source_ctor, direct_sinks); diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index c01062a57ee8e..1addb61939db7 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -44,7 +44,7 @@ namespace emulator { // uninteresting columns and avoid passing all the values around. Most of the // filters can be applied in any order, which makes our filtering task easy. // -// Unfortunately, some filters (e.g. `cells_per_row_limit_filter`) prevents us +// Unfortunately, some filters (e.g. `cells_per_row_limit_filter`) prevent us // from moving filters applied later in the chain to its beginning. Hence, we // need to keep the naive (object-per-graph-node) approach at least as a backup // option. diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index 8e2fb2d78aa68..c99f50e340ee2 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -13,13 +13,11 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/filter.h" -#include "google/cloud/bigtable/data_connection.h" -#include "google/cloud/bigtable/table.h" #include "google/cloud/testing_util/chrono_literals.h" -#include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/status_matchers.h" -#include +#include "gmock/gmock.h" #include +#include namespace google { namespace cloud { @@ -62,13 +60,15 @@ class TestCell { public: TestCell(std::string row_key, std::string column_family, std::string column_qualifier, std::chrono::milliseconds timestamp, - std::string value) + std::string value, std::optional label = {}) : row_key_(std::move(row_key)), column_family_(std::move(column_family)), column_qualifier_(std::move(column_qualifier)), timestamp_(std::move(timestamp)), value_(std::move(value)), - view_(row_key_, column_family_, column_qualifier_, timestamp_, value_) { + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_), + label_(std::move(label)) { + maybe_label_view(); } TestCell(TestCell const& other) @@ -77,24 +77,44 @@ class TestCell { column_qualifier_(other.column_qualifier_), timestamp_(other.timestamp_), value_(other.value_), - view_(row_key_, column_family_, column_qualifier_, timestamp_, value_) { + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_), + label_(other.label_) { + maybe_label_view(); } + TestCell(TestCell&& other) noexcept : row_key_(std::move(other.row_key_)), column_family_(std::move(other.column_family_)), column_qualifier_(std::move(other.column_qualifier_)), timestamp_(std::move(other.timestamp_)), value_(std::move(other.value_)), - view_(row_key_, column_family_, column_qualifier_, timestamp_, value_) { + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_), + label_(std::move(other.label_)) { + maybe_label_view(); + } + + TestCell Labeled(std::string const& label) { + TestCell labeled_copy = *this; + labeled_copy.label_ = label; + labeled_copy.maybe_label_view(); + return labeled_copy; } CellView const& AsCellView() const { return view_; } bool operator==(CellView const& cell_view) const { + bool labels_equal = (!label_.has_value() && !cell_view.HasLabel()) || + (label_.has_value() && cell_view.HasLabel() && + label_.value() == cell_view.label()); return row_key_ == cell_view.row_key() && column_family_ == cell_view.column_family() && column_qualifier_ == cell_view.column_qualifier() && - timestamp_ == cell_view.timestamp() && value_ == cell_view.value(); + timestamp_ == cell_view.timestamp() && value_ == cell_view.value() && + labels_equal; + } + + bool operator==(TestCell const& other) const { + return operator==(other.AsCellView()); } private: @@ -104,6 +124,13 @@ class TestCell { std::chrono::milliseconds timestamp_; std::string value_; CellView view_; + std::optional label_; + + void maybe_label_view() { + if (label_) { + view_.SetLabel(label_.value()); + } + } }; std::ostream& operator<<(std::ostream& stream, TestCell const& test_cell) { @@ -181,50 +208,6 @@ TEST(CellStream, NextColumnNotSupported) { ASSERT_FALSE(cell_stream.HasValue()); } -TEST(CellStream, NextRowNotSupported) { - std::vector cells{ - TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, - TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, - TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed - TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, - TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed - TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, - TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed - TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; - auto cur_cell = cells.begin(); - - auto mock_impl = std::make_unique(); - EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)) - .WillRepeatedly(Return(false)); - EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { - return cur_cell->AsCellView(); - }); - EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { - return cur_cell != cells.end(); - }); - EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillRepeatedly([&] { - ++cur_cell; - return true; - }); - - CellStream cell_stream(std::move(mock_impl)); - - cell_stream.Next(NextMode::kColumn); - ASSERT_TRUE(cell_stream.HasValue()); - EXPECT_EQ(cells[2], cell_stream.Value()); - - cell_stream.Next(NextMode::kColumn); - ASSERT_TRUE(cell_stream.HasValue()); - EXPECT_EQ(cells[4], cell_stream.Value()); - - cell_stream.Next(NextMode::kColumn); - ASSERT_TRUE(cell_stream.HasValue()); - EXPECT_EQ(cells[6], cell_stream.Value()); - - cell_stream.Next(NextMode::kColumn); - ASSERT_FALSE(cell_stream.HasValue()); -} - TEST(CellStream, NextRowUnsupported) { std::vector cells{ TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, @@ -834,6 +817,34 @@ TEST_F(InvalidFilterProtoTest, ConditionNoPredicate) { "`condition` must have a `predicate_filter` set."))); } +TEST_F(InvalidFilterProtoTest, ConditionNeitherTrueNorFalse) { + filter_.mutable_condition()->mutable_predicate_filter()->set_pass_all_filter( + true); + + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs( + StatusCode::kInvalidArgument, + testing::HasSubstr( + "`condition` must have `true_filter` or `false_filter` set."))); +} + +TEST_F(InvalidFilterProtoTest, ConditionPredicateSink) { + filter_.mutable_condition()->mutable_predicate_filter()->set_sink(true); + filter_.mutable_condition()->mutable_true_filter()->pass_all_filter(); + filter_.mutable_condition()->mutable_false_filter()->pass_all_filter(); + + auto maybe_stream = TryCreate(); + + // FIXME unskip this test after fixing condition validation. + GTEST_SKIP() << "Searching filter graph for sink nodes unimplemented."; + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "sink cannot be nested in a condition filter"))); +} + TEST_F(InvalidFilterProtoTest, SinkFalse) { filter_.set_sink(false); auto maybe_stream = TryCreate(); @@ -1013,6 +1024,13 @@ TEST_F(FilterApplicationPropagation, BlockAll) { } } +TEST_F(FilterApplicationPropagation, Sink) { + RowFilter filter; + filter.set_sink(true); + + TestPropagation(filter, 0); +} + TEST_F(FilterApplicationPropagation, RowKeyRegex) { RowFilter filter; filter.set_row_key_regex_filter("foo.*"); @@ -1176,6 +1194,7 @@ TEST_F(FilterApplicationPropagation, Condition) { } } } + class InternalFiltersAreApplied : public ::testing::Test { protected: RowFilter filter_; @@ -1248,6 +1267,622 @@ TEST_F(InternalFiltersAreApplied, TimestampRange) { }); } +class VectorCellStream : public AbstractCellStreamImpl { + public: + explicit VectorCellStream(std::vector cells) + : cells_{std::move(cells)}, current_cell_{cells_.begin()} {} + bool ApplyFilter(InternalFilter const&) override { return false; } + bool HasValue() const override { return current_cell_ != cells_.end(); } + CellView const& Value() const override { return current_cell_->AsCellView(); } + bool Next(NextMode mode) override { + if (mode != NextMode::kCell) { + return false; + } + ++current_cell_; + return true; + } + + private: + std::vector cells_; + std::vector::const_iterator current_cell_; +}; + +class FilterWorkTest : public ::testing::Test { + public: + protected: + static StatusOr> GetFilterOutput( + std::vector const&& input_cells, RowFilter const& filter) { + auto maybe_stream = CreateFilter(filter, [input_cells] { + return CellStream(std::make_unique(input_cells)); + }); + if (!maybe_stream.status().ok()) { + return maybe_stream.status(); + } + + std::vector filter_output; + while (maybe_stream->HasValue()) { + auto& v = maybe_stream.value(); + filter_output.emplace_back( + v->row_key(), v->column_family(), v->column_qualifier(), + v->timestamp(), v->value(), + v->HasLabel() ? std::optional{v->label()} + : std::optional{}); + maybe_stream->Next(); + } + return filter_output; + } +}; + +TEST_F(FilterWorkTest, Pass) { + RowFilter filter; + filter.set_pass_all_filter(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, PassLabels) { + RowFilter filter; + filter.set_pass_all_filter(true); + + std::vector cells{ + TestCell{"r", "cf", "q", 0_ms, "v", "label1"}, + TestCell{"r", "cf", "q", 0_ms, "v", "label2"}, + TestCell{"r", "cf", "q", 0_ms, "v", "label3"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, Sink) { + RowFilter filter; + filter.set_sink(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + // Next row + TestCell{"r2", "cf", "q", 0_ms, "v"}, + // Next cell + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, Block) { + RowFilter filter; + filter.set_block_all_filter(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r1", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_TRUE(maybe_output->empty()); +} + +TEST_F(FilterWorkTest, RowRegex) { + RowFilter filter; + filter.set_row_key_regex_filter("r2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, ValueRegex) { + RowFilter filter; + filter.set_value_regex_filter("v2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v1"}, + TestCell{"r2", "cf", "q", 0_ms, "v2"}, + TestCell{"r2", "cf", "q", 0_ms, "v3"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(1, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); +} + +TEST_F(FilterWorkTest, SampleRows) { + RowFilter filter; + filter.set_row_sample_filter(0.5); + + size_t samples = 100; + std::vector cells; + cells.reserve(samples); + for (size_t i = 0; i < samples; i++) { + cells.emplace_back("r" + std::to_string(i), "cf", "q", 0_ms, "v"); + } + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_NE(0, maybe_output->size()); + EXPECT_NE(samples, maybe_output->size()); +} + +TEST_F(FilterWorkTest, FamilyNameRegex) { + RowFilter filter; + filter.set_family_name_regex_filter("cf2"); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf3", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, QualifierRegex) { + RowFilter filter; + filter.set_column_qualifier_regex_filter("q2"); + + std::vector cells{ + TestCell{"r1", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q3", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, ColumnRange) { + RowFilter filter; + filter.mutable_column_range_filter()->set_family_name("cf"); + filter.mutable_column_range_filter()->set_start_qualifier_open("q1"); + filter.mutable_column_range_filter()->set_end_qualifier_closed("q2"); + + std::vector cells{ + TestCell{"r1", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q3", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, ValueRange) { + RowFilter filter; + filter.mutable_value_range_filter()->set_start_value_open("v1"); + filter.mutable_value_range_filter()->set_end_value_closed("v2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v1"}, + TestCell{"r2", "cf", "q", 0_ms, "v2"}, + TestCell{"r2", "cf", "q", 0_ms, "v2"}, + TestCell{"r3", "cf", "q", 0_ms, "v3"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, CellsPerRowOffset) { + RowFilter filter; + filter.set_cells_per_row_offset_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(5, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[3], maybe_output->at(1)); + EXPECT_EQ(cells[5], maybe_output->at(2)); + EXPECT_EQ(cells[7], maybe_output->at(3)); + EXPECT_EQ(cells[8], maybe_output->at(4)); +} + +TEST_F(FilterWorkTest, CellsPerRowLimit) { + RowFilter filter; + filter.set_cells_per_row_limit_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(4, maybe_output->size()); + EXPECT_EQ(cells[0], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); + EXPECT_EQ(cells[4], maybe_output->at(2)); + EXPECT_EQ(cells[6], maybe_output->at(3)); +} + +TEST_F(FilterWorkTest, LatestCellsPerColumnLimit) { + RowFilter filter; + filter.set_cells_per_column_limit_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(6, maybe_output->size()); + EXPECT_EQ(cells[0], maybe_output->at(0)); + EXPECT_EQ(cells[1], maybe_output->at(1)); + EXPECT_EQ(cells[2], maybe_output->at(2)); + EXPECT_EQ(cells[3], maybe_output->at(3)); + EXPECT_EQ(cells[4], maybe_output->at(4)); + EXPECT_EQ(cells[6], maybe_output->at(5)); +} + +TEST_F(FilterWorkTest, TimestampRange) { + RowFilter filter; + filter.mutable_timestamp_range_filter()->set_start_timestamp_micros(2000); + filter.mutable_timestamp_range_filter()->set_end_timestamp_micros(3000); + + std::vector cells{ + TestCell{"r1", "cf", "q", 3_ms, "v"}, + TestCell{"r2", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(1, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); +} + +TEST_F(FilterWorkTest, Label) { + RowFilter filter; + std::string label = "lbl"; + filter.set_apply_label_transformer(label); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r1", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + TestCell expected{"r1", "cf", "q", 0_ms, "v", label}; + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(expected, maybe_output->at(0)); + EXPECT_EQ(expected, maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, StripValue) { + RowFilter filter; + filter.set_strip_value_transformer(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r1", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + TestCell expected{"r1", "cf", "q", 0_ms, ""}; + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(expected, maybe_output->at(0)); + EXPECT_EQ(expected, maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, Chain) { + RowFilter filter; + filter.mutable_chain()->add_filters()->set_cells_per_row_offset_filter(1); + filter.mutable_chain()->add_filters()->set_cells_per_row_limit_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r1", "cf3", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q3", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 3_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(3, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[4], maybe_output->at(1)); + EXPECT_EQ(cells[7], maybe_output->at(2)); +} + +TEST_F(FilterWorkTest, ChainEmpty) { + RowFilter filter; + filter.mutable_chain()->Clear(); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, ChainSink) { + RowFilter filter; + filter.mutable_chain()->add_filters()->set_sink(true); + filter.mutable_chain()->add_filters()->set_sink(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, Interleave) { + RowFilter filter; + filter.mutable_interleave()->add_filters()->set_family_name_regex_filter( + "cf1"); + filter.mutable_interleave()->add_filters()->set_family_name_regex_filter( + "cf2"); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r3", "cf1", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, InterleaveEmpty) { + RowFilter filter; + filter.mutable_interleave()->Clear(); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(0, maybe_output->size()); +} + +TEST_F(FilterWorkTest, InterleaveSink) { + RowFilter filter; + filter.mutable_interleave()->add_filters()->set_sink(true); + filter.mutable_interleave()->add_filters()->set_block_all_filter(true); + filter.mutable_interleave()->add_filters()->set_sink(true); + filter.mutable_interleave()->add_filters()->set_pass_all_filter(true); + filter.mutable_interleave()->add_filters()->set_sink(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(cells.size() * 4, maybe_output->size()); + for (size_t i = 0; i < maybe_output->size() / 3; i++) { + EXPECT_EQ(cells[0], maybe_output->at(i)); + EXPECT_EQ(cells[1], maybe_output->at(i + maybe_output->size() / 3)); + EXPECT_EQ(cells[2], maybe_output->at(i + 2 * maybe_output->size() / 3)); + } +} + +// The test case from the example given next to `sink` protobuf definition. +TEST_F(FilterWorkTest, RegexInterleaveChainLabelSinkRegex) { + RowFilter filter; + + RowFilter* c0 = filter.mutable_chain()->add_filters(); + RowFilter* c1 = filter.mutable_chain()->add_filters(); + RowFilter* c2 = filter.mutable_chain()->add_filters(); + + RowFilter* c1i0 = c1->mutable_interleave()->add_filters(); + RowFilter* c1i1 = c1->mutable_interleave()->add_filters(); + + RowFilter* c1i1c0 = c1i1->mutable_chain()->add_filters(); + RowFilter* c1i1c1 = c1i1->mutable_chain()->add_filters(); + + c0->set_family_name_regex_filter("A"); + + c1i0->set_pass_all_filter(true); + c1i1c0->set_apply_label_transformer("foo"); + c1i1c1->set_sink(true); + + c2->set_column_qualifier_regex_filter("B"); + + std::vector cells{ + TestCell("r", "A", "A", 1_ms, "w"), + TestCell("r", "A", "B", 2_ms, "x"), + TestCell("r", "B", "B", 4_ms, "z"), + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + TestCell labeled0 = cells[0].Labeled("foo"); + TestCell labeled1 = cells[1].Labeled("foo"); + + ASSERT_EQ(3, maybe_output->size()); + EXPECT_EQ(labeled0, maybe_output->at(0)); + EXPECT_TRUE(maybe_output->at(1) == labeled1 || + maybe_output->at(1) == cells[1]); + EXPECT_TRUE(maybe_output->at(2) == labeled1 || + maybe_output->at(2) == cells[1]); + EXPECT_NE(maybe_output->at(1).AsCellView().HasLabel(), + maybe_output->at(2).AsCellView().HasLabel()); +} + +TEST_F(FilterWorkTest, ConditionEmptyNonempty) { + RowFilter filter; + filter.mutable_condition() + ->mutable_predicate_filter() + ->set_value_regex_filter("t"); + filter.mutable_condition() + ->mutable_true_filter() + ->set_apply_label_transformer("TRUE"); + filter.mutable_condition() + ->mutable_false_filter() + ->set_apply_label_transformer("FALSE"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 3_ms, "t"}, + TestCell{"r1", "cf", "q", 2_ms, "t"}, + TestCell{"r1", "cf", "q", 1_ms, "t"}, + TestCell{"r2", "cf", "q", 3_ms, "f"}, + TestCell{"r2", "cf", "q", 2_ms, "t"}, + TestCell{"r2", "cf", "q", 1_ms, "f"}, + TestCell{"r3", "cf", "q", 3_ms, "f"}, + TestCell{"r3", "cf", "q", 2_ms, "f"}, + TestCell{"r3", "cf", "q", 1_ms, "f"}, + TestCell{"r4", "cf", "q", 3_ms, "t"}, + TestCell{"r4", "cf", "q", 2_ms, "f"}, + TestCell{"r4", "cf", "q", 1_ms, "t"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(cells.size(), maybe_output->size()); + EXPECT_EQ(cells[1].Labeled("TRUE"), maybe_output->at(1)); + EXPECT_EQ(cells[2].Labeled("TRUE"), maybe_output->at(2)); + EXPECT_EQ(cells[3].Labeled("TRUE"), maybe_output->at(3)); + EXPECT_EQ(cells[4].Labeled("TRUE"), maybe_output->at(4)); + EXPECT_EQ(cells[5].Labeled("TRUE"), maybe_output->at(5)); + EXPECT_EQ(cells[6].Labeled("FALSE"), maybe_output->at(6)); + EXPECT_EQ(cells[7].Labeled("FALSE"), maybe_output->at(7)); + EXPECT_EQ(cells[8].Labeled("FALSE"), maybe_output->at(8)); + EXPECT_EQ(cells[9].Labeled("TRUE"), maybe_output->at(9)); + EXPECT_EQ(cells[10].Labeled("TRUE"), maybe_output->at(10)); + EXPECT_EQ(cells[11].Labeled("TRUE"), maybe_output->at(11)); +} + +TEST_F(FilterWorkTest, ConditionBranchFilterNextDifferentThanCell) { + RowFilter filter; + filter.mutable_condition() + ->mutable_predicate_filter() + ->set_value_regex_filter("t"); + filter.mutable_condition() + ->mutable_true_filter() + ->mutable_chain() + ->add_filters() + ->set_apply_label_transformer("TRUE"); + filter.mutable_condition() + ->mutable_true_filter() + ->mutable_chain() + ->add_filters() + ->set_cells_per_column_limit_filter(1); + filter.mutable_condition() + ->mutable_false_filter() + ->mutable_chain() + ->add_filters() + ->set_apply_label_transformer("FALSE"); + filter.mutable_condition() + ->mutable_false_filter() + ->mutable_chain() + ->add_filters() + ->set_column_qualifier_regex_filter("q2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 3_ms, "t"}, + TestCell{"r1", "cf", "q", 2_ms, "t"}, + TestCell{"r1", "cf", "q", 1_ms, "t"}, + TestCell{"r2", "cf", "q", 3_ms, "f"}, + TestCell{"r2", "cf", "q", 2_ms, "t"}, + TestCell{"r2", "cf", "q", 1_ms, "f"}, + TestCell{"r3", "cf1", "q2", 1_ms, "f"}, + TestCell{"r3", "cf2", "q1", 2_ms, "f"}, + TestCell{"r3", "cf3", "q2", 3_ms, "f"}, + TestCell{"r4", "cf", "q", 3_ms, "f"}, + TestCell{"r4", "cf", "q", 2_ms, "f"}, + TestCell{"r4", "cf", "q", 1_ms, "t"}, + }; + auto maybe_output = GetFilterOutput(std::move(cells), filter); + ASSERT_STATUS_OK(maybe_output); + + std::vector expected{ + TestCell{"r1", "cf", "q", 3_ms, "t", "TRUE"}, + TestCell{"r2", "cf", "q", 3_ms, "f", "TRUE"}, + TestCell{"r3", "cf1", "q2", 1_ms, "f", "FALSE"}, + TestCell{"r3", "cf3", "q2", 3_ms, "f", "FALSE"}, + TestCell{"r4", "cf", "q", 3_ms, "f", "TRUE"}, + }; + EXPECT_EQ(expected, *maybe_output); +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h index a6f369ba38fc3..8b0e8b88d987f 100644 --- a/google/cloud/bigtable/emulator/filtered_map.h +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -26,23 +26,21 @@ namespace bigtable { namespace emulator { /** - * A map view filtering elements by whether their keys fall into a range set. + * A map view filtering elements by whether their keys fall into a string range + * set. * - * Objects of this type provide a lightweight wrapper around `std::map`, which - * provides a iterator, which will skip over unwanted elements. + * Objects of this type provide a lightweight wrapper around `std::map`-like + * object, which provides a iterator, which will skip over unwanted elements. * * This class is not very generic. It should be thought of as a crude way of * deduplicating code. * - * The unfiltered elements' keys should fall into a given range set - either - * `StringRangeSet` or by `TimestampRangeSet`. + * The unfiltered elements' keys should fall into a given string range set. * - * @tparam Map the type of the map to be wrapped, an instantiation of `std::map` - * @tparam PermittedRanges the type of the filter, either `StringRangeSet` or - * `TimestampRangeSet` + * @tparam Map the type of the map-like object to be wrapped. */ -template -class RangeFilteredMapView { +template +class StringRangeFilteredMapView { public: // NOLINTNEXTLINE(readability-identifier-naming) class const_iterator { @@ -56,10 +54,10 @@ class RangeFilteredMapView { using pointer = value_type const*; const_iterator( - RangeFilteredMapView const& parent, + StringRangeFilteredMapView const& parent, typename Map::const_iterator unfiltered_pos, - typename std::set:: + typename std::set:: const_iterator filter_pos) : parent_(std::cref(parent)), unfiltered_pos_(std::move(unfiltered_pos)), @@ -132,11 +130,11 @@ class RangeFilteredMapView { } } - std::reference_wrapper parent_; + std::reference_wrapper parent_; typename Map::const_iterator unfiltered_pos_; - typename std::set< - typename PermittedRanges::Range, - typename PermittedRanges::Range::StartLess>::const_iterator filter_pos_; + typename std::set::const_iterator + filter_pos_; }; /** @@ -150,7 +148,8 @@ class RangeFilteredMapView { * @unfiltered the map whose elements need to be filtered. * @filter the range set which dictates which ranges should remain unfiltered. */ - RangeFilteredMapView(Map const& unfiltered, PermittedRanges const& filter) + StringRangeFilteredMapView(Map const& unfiltered, + StringRangeSet const& filter) : unfiltered_(std::cref(unfiltered)), filter_(std::cref(filter)) {} const_iterator begin() const { @@ -164,21 +163,165 @@ class RangeFilteredMapView { private: std::reference_wrapper unfiltered_; - std::reference_wrapper filter_; + std::reference_wrapper filter_; +}; + +/** + * A map view filtering elements by whether their keys fall into a timestamp + * range set. + * + * Objects of this type provide a lightweight wrapper around `std::map`-like + * object, which provides a iterator, which will skip over unwanted elements. + * + * This class is not very generic. It should be thought of as a crude way of + * deduplicating code. + * + * The unfiltered elements' keys should fall into a given timestamp range set. + * + * Note that the implementation assumes that the input `std::map`-like object's + * iterator is sorted high-to-low. + * + * @tparam Map the type of the map-like object to be wrapped. + */ +template +class TimestampRangeFilteredMapView { + public: + // NOLINTNEXTLINE(readability-identifier-naming) + class const_iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = + typename std::iterator_traits::value_type; + using difference_type = typename std::iterator_traits< + typename Map::const_iterator>::difference_type; + using reference = value_type const&; + using pointer = value_type const*; + + // Note that the set whose iterator is received here is sorted + // "earliest-start-first", whereas we need to have the iterator sorted + // "latest-end-first". Fortunately, the set is disjoint, so we can simply + // use reverse iterator. + const_iterator( + TimestampRangeFilteredMapView const& parent, + typename Map::const_iterator unfiltered_pos, + typename std::set:: + const_reverse_iterator filter_pos) + : parent_(std::cref(parent)), + unfiltered_pos_(std::move(unfiltered_pos)), + filter_pos_(std::move(filter_pos)) { + AdvanceToNextRange(); + EnsureIteratorValid(); + } + + const_iterator& operator++() { + ++unfiltered_pos_; + EnsureIteratorValid(); + return *this; + } + + const_iterator operator++(int) { + const_iterator retval = *this; + ++(*this); + return retval; + } + + bool operator==(const_iterator const& other) const { + return unfiltered_pos_ == other.unfiltered_pos_; + } + + bool operator!=(const_iterator const& other) const { + return !(*this == other); + } + + reference operator*() const { return *unfiltered_pos_; } + pointer operator->() const { return &*unfiltered_pos_; } + + private: + // Adjust `unfiltered_pos_` after we transition to a different range. + void AdvanceToNextRange() { + if (filter_pos_ == + parent_.get().filter_.get().disjoint_ranges().crend()) { + // We've reached the end. + unfiltered_pos_ = parent_.get().unfiltered_.get().end(); + return; + } + if (unfiltered_pos_ == parent_.get().unfiltered_.get().end()) { + // unfiltered_pos_ is already pointing far enough. + return; + } + if (!filter_pos_->IsAboveEnd(unfiltered_pos_->first)) { + // unfiltered_pos_ is already pointing far enough. + return; + } + // Timestamp ranges always have end open, so we always use upper_bound(). + unfiltered_pos_ = + parent_.get().unfiltered_.get().upper_bound(filter_pos_->end()); + } + + // After `unfiltered_pos_` was increased, make sure it's within a valid + // range. + void EnsureIteratorValid() { + // `unfiltered_pos_` may point to a row which is past the end of the range + // pointed by filter_pos_. Make sure this only happens when the iteration + // reaches its end. + while (unfiltered_pos_ != parent_.get().unfiltered_.get().end() && + filter_pos_ != + parent_.get().filter_.get().disjoint_ranges().crend() && + filter_pos_->IsBelowStart(unfiltered_pos_->first)) { + ++filter_pos_; + AdvanceToNextRange(); + } + } + + std::reference_wrapper parent_; + typename Map::const_iterator unfiltered_pos_; + typename std::set:: + const_reverse_iterator filter_pos_; + }; + + /** + * Create a new object. + * + * Objects of this class store references to arguments passed in the + * constructor. The user is responsible for making sure that the referenced + * objects continue to exist throughout the lifetime of this object. They + * should also not change. + * + * @unfiltered the map whose elements need to be filtered. + * @filter the range set which dictates which ranges should remain unfiltered. + */ + TimestampRangeFilteredMapView(Map const& unfiltered, + TimestampRangeSet const& filter) + : unfiltered_(std::cref(unfiltered)), filter_(std::cref(filter)) {} + + const_iterator begin() const { + return const_iterator(*this, unfiltered_.get().begin(), + filter_.get().disjoint_ranges().crbegin()); + } + const_iterator end() const { + return const_iterator(*this, unfiltered_.get().end(), + filter_.get().disjoint_ranges().crend()); + } + + private: + std::reference_wrapper unfiltered_; + std::reference_wrapper filter_; }; /** * A map view filtering elements by whether their keys match a regex. * - * Objects of this type provide a lightweight wrapper around `std::map`, which - * provides a iterator, which will skip over unwanted elements. + * Objects of this type provide a lightweight wrapper around `std::map`-like + * object, which provides a iterator, which will skip over unwanted elements. * * This class is not very generic. It should be thought of as a crude way of * deduplicating code. * * Elements whose keys match all regexes are not filtered out. * - * @tparam Map the type of the map to be wrapped, an instantiation of `std::map` + * @tparam Map the type of the map-like object to be wrapped. */ template class RegexFiteredMapView { diff --git a/google/cloud/bigtable/emulator/filtered_map_test.cc b/google/cloud/bigtable/emulator/filtered_map_test.cc index 69dd3856d5b98..e8fe9d989e4d7 100644 --- a/google/cloud/bigtable/emulator/filtered_map_test.cc +++ b/google/cloud/bigtable/emulator/filtered_map_test.cc @@ -25,6 +25,8 @@ namespace cloud { namespace bigtable { namespace emulator { +using testing_util::chrono_literals::operator""_ms; + bool const kOpen = true; bool const kClosed = false; @@ -46,83 +48,85 @@ std::vector Vec(std::initializer_list const& v) { return res; } -TEST(RangeFilteredMapView, NoFilter) { +template +std::vector TSKeys(Map const& map) { + std::vector res; + std::transform(map.begin(), map.end(), std::back_inserter(res), + [](typename Map::const_iterator::value_type const& elem) { + return elem.first; + }); + return res; +} + +TEST(StringRangeFilteredMapView, NoFilter) { std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; auto filter = StringRangeSet::All(); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"zero", "one", "two"}), Keys(filtered)); } -TEST(RangeFilteredMapView, EmptyFilter) { +TEST(StringRangeFilteredMapView, EmptyFilter) { std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; auto filter = StringRangeSet::Empty(); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({}), Keys(filtered)); } -TEST(RangeFilteredMapView, OneOpen) { +TEST(StringRangeFilteredMapView, OneOpen) { std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); } -TEST(RangeFilteredMapView, OneClosed) { +TEST(StringRangeFilteredMapView, OneClosed) { std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb", "AAB"}), Keys(filtered)); } -TEST(RangeFilteredMapView, NoEntriesAfterClosedFilter) { +TEST(StringRangeFilteredMapView, NoEntriesAfterClosedFilter) { std::map unfiltered{ {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb"}), Keys(filtered)); } -TEST(RangeFilteredMapView, NoEntriesAfterOpenFilter) { +TEST(StringRangeFilteredMapView, NoEntriesAfterOpenFilter) { std::map unfiltered{ {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); } -TEST(RangeFilteredMapView, NoEntriesBeforeClosedFilter) { +TEST(StringRangeFilteredMapView, NoEntriesBeforeClosedFilter) { std::map unfiltered{ {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb", "AAB"}), Keys(filtered)); } -TEST(RangeFilteredMapView, NoEntriesBeforeOpenFilter) { +TEST(StringRangeFilteredMapView, NoEntriesBeforeOpenFilter) { std::map unfiltered{ {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); } -TEST(RangeFilteredMapView, MultipleFilters) { +TEST(StringRangeFilteredMapView, MultipleFilters) { std::map unfiltered{ {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}, {"BB", 0}, {"BBB", 0}, {"BBBb", 0}, {"CCCa", 0}, @@ -131,13 +135,71 @@ TEST(RangeFilteredMapView, MultipleFilters) { filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kClosed)); filter.Sum(StringRangeSet::Range("BBB", kClosed, "BBC", kOpen)); filter.Sum(StringRangeSet::Range("CCC", kClosed, "CCD", kOpen)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb", "AAB", "BBB", "BBBb", "CCCa", "CCCb"}), Keys(filtered)); } +TEST(TimestampRangeFilteredMapView, NoFilter) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 1}, {2_ms, 2}}; + auto filter = TimestampRangeSet::All(); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({2_ms, 1_ms, 0_ms}), TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, EmptyFilter) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 1}, {2_ms, 2}}; + auto filter = TimestampRangeSet::Empty(); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({}), TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, FiniteRange) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 0}, {2_ms, 0}, {3_ms, 0}, {4_ms, 0}}; + auto filter = TimestampRangeSet::Empty(); + filter.Sum(TimestampRangeSet::Range(1_ms, 3_ms)); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({2_ms, 1_ms}), TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, InfiniteRange) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 0}, {2_ms, 0}, {3_ms, 0}, {4_ms, 0}}; + auto filter = TimestampRangeSet::Empty(); + filter.Sum(TimestampRangeSet::Range(1_ms, 0_ms)); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({4_ms, 3_ms, 2_ms, 1_ms}), TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, MultipleFilters) { + std::chrono::milliseconds max_millis(std::numeric_limits::max()); + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 0}, {2_ms, 0}, {3_ms, 0}, + {4_ms, 0}, {5_ms, 0}, {6_ms, 0}, {7_ms, 0}, + {8_ms, 0}, {9_ms, 0}, {10_ms, 0}, {11_ms, 0}, + {12_ms, 0}, {13_ms, 0}, {14_ms, 0}, {max_millis, 0}, + }; + auto filter = TimestampRangeSet::Empty(); + filter.Sum(TimestampRangeSet::Range(1_ms, 3_ms)); + filter.Sum(TimestampRangeSet::Range(3_ms, 5_ms)); + filter.Sum(TimestampRangeSet::Range(6_ms, 8_ms)); + filter.Sum(TimestampRangeSet::Range(10_ms, 12_ms)); + filter.Sum(TimestampRangeSet::Range(13_ms, 0_ms)); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({max_millis, 14_ms, 13_ms, 11_ms, 10_ms, 7_ms, 6_ms, + 4_ms, 3_ms, 2_ms, 1_ms}), + TSKeys(filtered)); +} + TEST(RegexFiteredMapView, NoFilter) { std::vector> patterns; std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; diff --git a/google/cloud/bigtable/emulator/range_set.h b/google/cloud/bigtable/emulator/range_set.h index dd5e87cdd69ef..0495543766a79 100644 --- a/google/cloud/bigtable/emulator/range_set.h +++ b/google/cloud/bigtable/emulator/range_set.h @@ -146,18 +146,9 @@ class TimestampRangeSet { google::bigtable::v2::TimestampRange const& timestamp_range); Value start() const { return start_; } - Value start_finite() const { return start_; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - bool start_open() const { return false; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - bool start_closed() const { return true; } void set_start(Range const& source) { start_ = source.start_; } Value end() const { return end_; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - bool end_open() const { return true; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - bool end_closed() const { return false; } void set_end(Range const& source) { end_ = source.end_; } bool IsBelowStart(Value value) const { return value < start_; } diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index df635e6c9c9d3..b214d753d7c7f 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -528,12 +528,7 @@ TEST(TimestampRangeSet, FromInfiniteTimestampRange) { google::bigtable::v2::TimestampRange{}); ASSERT_STATUS_OK(infinite); EXPECT_EQ(0_ms, infinite->start()); - EXPECT_EQ(0_ms, infinite->start_finite()); EXPECT_EQ(0_ms, infinite->end()); - EXPECT_TRUE(infinite->start_closed()); - EXPECT_TRUE(infinite->end_open()); - EXPECT_FALSE(infinite->start_open()); - EXPECT_FALSE(infinite->end_closed()); } TEST(TimestampRangeSet, FromFiniteTimestampRange) { @@ -544,12 +539,7 @@ TEST(TimestampRangeSet, FromFiniteTimestampRange) { auto finite = TimestampRangeSet::Range::FromTimestampRange(proto); ASSERT_STATUS_OK(finite); EXPECT_EQ(1_ms, finite->start()); - EXPECT_EQ(1_ms, finite->start_finite()); EXPECT_EQ(123456_ms, finite->end()); - EXPECT_TRUE(finite->start_closed()); - EXPECT_TRUE(finite->end_open()); - EXPECT_FALSE(finite->start_open()); - EXPECT_FALSE(finite->end_closed()); } TEST(TimestampRangeSet, RangeStartLess) { From 519b1f1fdd1f96a5bdc82529b63dfaf7dee5aea8 Mon Sep 17 00:00:00 2001 From: Adam Czajkowski <48181325+prawilny@users.noreply.github.com> Date: Fri, 23 May 2025 09:37:29 +0200 Subject: [PATCH 145/195] Remove redundant includes --- google/cloud/bigtable/emulator/column_family_test.cc | 2 -- google/cloud/bigtable/emulator/drop_row_range_test.cc | 2 -- google/cloud/bigtable/emulator/filtered_map_test.cc | 3 --- google/cloud/bigtable/emulator/rollback_test.cc | 2 -- google/cloud/bigtable/emulator/table.cc | 2 -- google/cloud/bigtable/emulator/table_test.cc | 3 --- 6 files changed, 14 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index afb40febc4507..3f94fffce0952 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -13,9 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" -#include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/chrono_literals.h" -#include "google/cloud/testing_util/is_proto_equal.h" #include #include diff --git a/google/cloud/bigtable/emulator/drop_row_range_test.cc b/google/cloud/bigtable/emulator/drop_row_range_test.cc index 9ee7905daa551..7bb62616f86e9 100644 --- a/google/cloud/bigtable/emulator/drop_row_range_test.cc +++ b/google/cloud/bigtable/emulator/drop_row_range_test.cc @@ -13,13 +13,11 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" -#include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" -#include "gmock/gmock.h" #include #include #include diff --git a/google/cloud/bigtable/emulator/filtered_map_test.cc b/google/cloud/bigtable/emulator/filtered_map_test.cc index e8fe9d989e4d7..86bf4fd4e8d91 100644 --- a/google/cloud/bigtable/emulator/filtered_map_test.cc +++ b/google/cloud/bigtable/emulator/filtered_map_test.cc @@ -13,10 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/filtered_map.h" -#include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/chrono_literals.h" -#include "google/cloud/testing_util/is_proto_equal.h" -#include "google/cloud/testing_util/status_matchers.h" #include #include diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index ad8f5b8c1294d..b108ff050cdd5 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -13,13 +13,11 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" -#include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" -#include "gmock/gmock.h" #include #include #include diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index af45cf8efe93f..98a6e308e4eec 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -15,9 +15,7 @@ #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/filter.h" -#include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/range_set.h" -#include "google/cloud/bigtable/internal/google_bytes_traits.h" #include "google/cloud/internal/make_status.h" #include "google/protobuf/util/field_mask_util.h" #include diff --git a/google/cloud/bigtable/emulator/table_test.cc b/google/cloud/bigtable/emulator/table_test.cc index 1b251a7604200..04f115a229002 100644 --- a/google/cloud/bigtable/emulator/table_test.cc +++ b/google/cloud/bigtable/emulator/table_test.cc @@ -13,10 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/table.h" -#include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/chrono_literals.h" -#include "google/cloud/testing_util/is_proto_equal.h" -#include "google/cloud/testing_util/status_matchers.h" #include namespace google { From cdb2d6f52472549ae022616e94ec5e2ff4d3f208 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 26 May 2025 16:22:10 +0300 Subject: [PATCH 146/195] emulator: SampleRowKeys: Start rewrite that uses a filter and CellStream instead. --- google/cloud/bigtable/emulator/server.cc | 24 +--- google/cloud/bigtable/emulator/table.cc | 141 ++--------------------- google/cloud/bigtable/emulator/table.h | 28 +---- 3 files changed, 12 insertions(+), 181 deletions(-) diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index d2cd47517390a..c865d5e693284 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -56,28 +56,8 @@ class EmulatorService final : public btproto::Bigtable::Service { grpc::Status SampleRowKeys( grpc::ServerContext* /* context */, - btproto::SampleRowKeysRequest const* request, - grpc::ServerWriter* writer) override { - auto maybe_table = cluster_->FindTable(request->table_name()); - if (!maybe_table) { - return ToGrpcStatus(maybe_table.status()); - } - - auto row_sampler = maybe_table.value()->SampleRowKeys(*request); - - while (true) { - auto sample = row_sampler.Next(); - - if (sample.row_key().empty()) { - auto opts = grpc::WriteOptions(); - opts.set_last_message(); - writer->WriteLast(std::move(sample), opts); - break; - } - - writer->Write(std::move(sample)); - } - + btproto::SampleRowKeysRequest const* /* request */, + grpc::ServerWriter* /* writer */) override { return grpc::Status::OK; } diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 527dc83aa7267..786db853b6c1b 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -502,141 +502,18 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } -// NOLINTBEGIN(readability-function-cognitive-complexity) -RowSampler Table::SampleRowKeys( - google::bigtable::v2::SampleRowKeysRequest const&) { - struct SamplingContext { - // We pick the row key samples from just one column family (the - // largest). - std::string sample_src_cf; - size_t row_index = 0; - size_t max_num_rows = 0; - std::map::const_iterator row_iterator; - std::map::const_iterator row_end; - bool table_is_empty = true; - size_t offset_bytes = 0; - std::once_flag once_flag; - - std::random_device rd; - std::mt19937 gen = std::mt19937(rd()); - std::uniform_int_distribution<> distrib = - std::uniform_int_distribution<>(1, INT_MAX); - }; - - std::shared_ptr sampling_context = - std::make_shared(); - - auto next_sample = [=]() mutable { - // The first time the closure is called, initialize the - // context. The sampler works by advancing the iterator by varying - // steps every time the closure it contains is called in the - // server RPC context or elsewhere. We can't initialize the - // iterators before the closure is first called since we need to - // be holding the table lock first (in our scheme it is grabbed in - // the constructor of the RowSampler and the lock is kept until - // the RowSampler is destroyed.) - std::call_once(sampling_context->once_flag, [=]() { - // Pick rows from just the largest column family since we are - // just sampling. However offsets will be estimated based on the - // size of the row across all column families. - for (auto const& cf : column_families_) { - if (cf.second->size() > sampling_context->max_num_rows) { - sampling_context->table_is_empty = false; - sampling_context->sample_src_cf = cf.first; - sampling_context->row_iterator = cf.second->begin(); - sampling_context->row_end = cf.second->end(); - sampling_context->max_num_rows = cf.second->size(); - } - } - }); - - // The signal that there are no more rows (an empty row key). - if (sampling_context->table_is_empty || - sampling_context->row_iterator == sampling_context->row_end) { - google::bigtable::v2::SampleRowKeysResponse resp; - resp.set_row_key(""); - resp.set_offset_bytes(sampling_context->offset_bytes); - - return resp; - } - - for (auto& row = sampling_context->row_iterator; - sampling_context->row_iterator != sampling_context->row_end; - sampling_context->row_index++, sampling_context->row_iterator++) { - auto add_this_row_size_to_offset = [=] { - // First the offset due to the size of the row in the column - // family we are sampling. - sampling_context->offset_bytes += - (row->first.size() + row->second.size()); - - // Then consider the size of the row data in other column families, - // if they contain the row. - for (auto const& cf : column_families_) { - if (cf.first == sampling_context->sample_src_cf) { - continue; - } - - auto r = cf.second->find(row->first); - if (r != cf.second->end()) { - sampling_context->offset_bytes += - (row->first.size() + r->second.size()); - } - } - }; - - // If there are any rows we need to return at least one - // row. Always return the last one. - if (sampling_context->row_index == sampling_context->max_num_rows - 1) { - google::bigtable::v2::SampleRowKeysResponse resp; - resp.set_row_key(row->first); - resp.set_offset_bytes(sampling_context->offset_bytes); - - add_this_row_size_to_offset(); - - // We are returning early (without letting the for loop - // control update the iterators) so ensure that we consider a - // new row next time, otherwise we will be stuck in an - // infinite loop (will never advance the row iterator past the - // end of the map). - sampling_context->row_index++; - sampling_context->row_iterator++; - - return resp; - } +StatusOr Table::GetSampledRowsCellStream(double pass_probabilty) { + auto row_set = std::make_shared(StringRangeSet::All()); - // Sample about one every 100 rows randomly. - if (sampling_context->distrib(sampling_context->gen) % 100 == 0) { - google::bigtable::v2::SampleRowKeysResponse resp; - resp.set_row_key(row->first); - resp.set_offset_bytes(sampling_context->offset_bytes); + google::bigtable::v2::RowFilter sample_filter; + sample_filter.set_row_sample_filter(pass_probabilty); - add_this_row_size_to_offset(); - - sampling_context->row_index++; - sampling_context->row_iterator++; - - return resp; - } - - // This is a row we are not sampling, but we still need to - // account for its size for accurate offsets of subsequent - // sampled rows. - add_this_row_size_to_offset(); - } - - google::bigtable::v2::SampleRowKeysResponse resp; - resp.set_row_key(""); - resp.set_offset_bytes(sampling_context->offset_bytes); - return resp; - }; - - // We acquire the table lock here (in the constructor), so that - // every time we call row_sampler.Next() we always hold the lock, - // and will continue to hold it until the destructor of RowSampler - // is called. - RowSampler row_sampler(this->get(), next_sample); + auto maybe_stream = CreateCellStream(row_set, sample_filter); + if (!maybe_stream) { + return maybe_stream.status(); + } - return row_sampler; + return maybe_stream; } // NOLINTEND(readability-function-cognitive-complexity) diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 1a231b90dee87..8d646e91cfe71 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -86,8 +86,7 @@ class Table : public std::enable_shared_from_this
{ return column_families_.find(column_family); } - RowSampler SampleRowKeys( - const google::bigtable::v2::SampleRowKeysRequest&); + StatusOr GetSampledRowsCellStream(double pass_probabilty); std::shared_ptr
get() { return shared_from_this(); } @@ -98,7 +97,6 @@ class Table : public std::enable_shared_from_this
{ Table() = default; friend class RowSetIterator; friend class RowTransaction; - friend class RowSampler; template StatusOr> FindColumnFamily( @@ -179,30 +177,6 @@ class RowTransaction { std::string const& row_key_; }; -class RowSampler { - public: - explicit RowSampler( - std::shared_ptr
table, - std::function - next_sample_closure) { - table_ = std::move(table); - next_sample_closure_ = std::move(next_sample_closure); - - table_->mu_.lock(); - }; - - google::bigtable::v2::SampleRowKeysResponse Next() { - return next_sample_closure_(); - } - - ~RowSampler() { table_->mu_.unlock(); }; - - private: - std::shared_ptr
table_; - std::function - next_sample_closure_; -}; - /** * A `AbstractCellStreamImpl` which streams filtered contents of the table. * From c6bc6e7b2b12ffb0d1e65016b20cd04e7e56448c Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 26 May 2025 16:26:41 +0300 Subject: [PATCH 147/195] emulator: trivial mechanical formatting fixes. --- google/cloud/bigtable/emulator/server.cc | 3 ++- google/cloud/bigtable/emulator/table.cc | 6 ++---- google/cloud/bigtable/emulator/table.h | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index c865d5e693284..82573d9bbf814 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -57,7 +57,8 @@ class EmulatorService final : public btproto::Bigtable::Service { grpc::Status SampleRowKeys( grpc::ServerContext* /* context */, btproto::SampleRowKeysRequest const* /* request */, - grpc::ServerWriter* /* writer */) override { + grpc::ServerWriter* /* writer */) + override { return grpc::Status::OK; } diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 786db853b6c1b..cd6247aa5d882 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -37,8 +37,6 @@ #include #include #include -#include -#include #include namespace google { @@ -502,11 +500,11 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } -StatusOr Table::GetSampledRowsCellStream(double pass_probabilty) { +StatusOr Table::GetSampledRowsCellStream(double pass_probability) { auto row_set = std::make_shared(StringRangeSet::All()); google::bigtable::v2::RowFilter sample_filter; - sample_filter.set_row_sample_filter(pass_probabilty); + sample_filter.set_row_sample_filter(pass_probability); auto maybe_stream = CreateCellStream(row_set, sample_filter); if (!maybe_stream) { diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 8d646e91cfe71..8e00218f8d2d6 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -86,7 +86,7 @@ class Table : public std::enable_shared_from_this
{ return column_families_.find(column_family); } - StatusOr GetSampledRowsCellStream(double pass_probabilty); + StatusOr GetSampledRowsCellStream(double pass_probability); std::shared_ptr
get() { return shared_from_this(); } From 35045290c3db4485c1a5416d3873b6944b0761e9 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 26 May 2025 18:12:48 +0300 Subject: [PATCH 148/195] emulator: Re-implement SampleRowKeys to use the sampled CellStream. --- google/cloud/bigtable/emulator/server.cc | 76 +++++++++++++++++++++++- google/cloud/bigtable/emulator/table.cc | 1 - 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 82573d9bbf814..5533ad5946dd6 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -27,6 +27,7 @@ #include #include #include +#include #include namespace google { @@ -56,9 +57,80 @@ class EmulatorService final : public btproto::Bigtable::Service { grpc::Status SampleRowKeys( grpc::ServerContext* /* context */, - btproto::SampleRowKeysRequest const* /* request */, - grpc::ServerWriter* /* writer */) + btproto::SampleRowKeysRequest const* request, + grpc::ServerWriter* writer) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + auto& table = maybe_table.value(); + + // Return ~ 1/100 rows. + auto maybe_stream = table->GetSampledRowsCellStream(static_cast(1)/100); + if (!maybe_stream) { + return ToGrpcStatus(maybe_stream.status()); + } + + auto& stream = *maybe_stream; + size_t offset_bytes = 0; + + for (; stream; ++stream) { + size_t row_size = 0; + + row_size += stream->row_key().size(); + row_size += stream->column_family().size(); + row_size += stream->column_qualifier().size(); + row_size += sizeof(stream->timestamp()); + row_size += stream->value().size(); + + // This is an estimate + offset_bytes += (100 * row_size); + + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(stream->row_key()); + resp.set_offset_bytes(offset_bytes); + + writer->Write(std::move(resp)); + } + + // We need to return at least one row if the table is not empty; + if (!offset_bytes) { + for (auto & column_family_it : *table) { + auto row_it = column_family_it.second->begin(); + if (row_it == column_family_it.second->end()) { + // Empty column family + continue; + } + + // We have a row + offset_bytes += row_it->first.size(); + offset_bytes += row_it->second.size(); + + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(row_it->first); + resp.set_offset_bytes(offset_bytes); + + writer->Write(std::move(resp)); + + break; + } + } + + // Client code expects the last response to be an empty row key + // and moreover it also expects the offset for the last response + // to be more than every other offset. + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(""); + // Client test code expects offset_bytes to be strictly + // increasing. + resp.set_offset_bytes(offset_bytes + 1); + auto opts = grpc::WriteOptions(); + opts.set_last_message(); + writer->WriteLast(std::move(resp), opts); + + return grpc::Status::OK; } diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index cd6247aa5d882..ab8a4ff51e395 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -514,7 +514,6 @@ StatusOr Table::GetSampledRowsCellStream(double pass_probability) { return maybe_stream; } -// NOLINTEND(readability-function-cognitive-complexity) Status Table::DropRowRange( ::google::bigtable::admin::v2::DropRowRangeRequest const& request) { std::lock_guard lock(mu_); From ef4d3c2b7f091903893d1a4716da7473264ef5b9 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 26 May 2025 18:14:49 +0300 Subject: [PATCH 149/195] emulator: server.cc: mechanical formatting fixes. --- google/cloud/bigtable/emulator/server.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 5533ad5946dd6..e89d52f7d75a6 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -58,8 +58,7 @@ class EmulatorService final : public btproto::Bigtable::Service { grpc::Status SampleRowKeys( grpc::ServerContext* /* context */, btproto::SampleRowKeysRequest const* request, - grpc::ServerWriter* writer) - override { + grpc::ServerWriter* writer) override { auto maybe_table = cluster_->FindTable(request->table_name()); if (!maybe_table) { return ToGrpcStatus(maybe_table.status()); @@ -68,7 +67,8 @@ class EmulatorService final : public btproto::Bigtable::Service { auto& table = maybe_table.value(); // Return ~ 1/100 rows. - auto maybe_stream = table->GetSampledRowsCellStream(static_cast(1)/100); + auto maybe_stream = + table->GetSampledRowsCellStream(static_cast(1) / 100); if (!maybe_stream) { return ToGrpcStatus(maybe_stream.status()); } @@ -97,7 +97,7 @@ class EmulatorService final : public btproto::Bigtable::Service { // We need to return at least one row if the table is not empty; if (!offset_bytes) { - for (auto & column_family_it : *table) { + for (auto& column_family_it : *table) { auto row_it = column_family_it.second->begin(); if (row_it == column_family_it.second->end()) { // Empty column family @@ -130,7 +130,6 @@ class EmulatorService final : public btproto::Bigtable::Service { opts.set_last_message(); writer->WriteLast(std::move(resp), opts); - return grpc::Status::OK; } From 4997b64fca5fba3e288dfc3a6876f006292720db Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 29 May 2025 14:54:29 +0300 Subject: [PATCH 150/195] emulator: SetCell: Fix the handling of 0 and negative timestamps. If < -1, return an error and fail the entire mutation chain. If -1, substitute the current system timestamp. If 0, store as is. Passing unit checking the above also added. --- .../cloud/bigtable/emulator/column_family.cc | 5 -- .../cloud/bigtable/emulator/rollback_test.cc | 61 ++++++++++++++++--- google/cloud/bigtable/emulator/table.cc | 15 +++-- 3 files changed, 63 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 2981c4c24c630..bc10dca786548 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -24,11 +24,6 @@ namespace emulator { absl::optional ColumnRow::SetCell( std::chrono::milliseconds timestamp, std::string const& value) { - if (timestamp <= std::chrono::milliseconds::zero()) { - timestamp = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()); - } - absl::optional ret = absl::nullopt; auto cell_it = cells_.find(timestamp); if (!(cell_it == cells_.end())) { diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index ad8f5b8c1294d..515dc6d096e91 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -263,6 +263,16 @@ Status HasRow(std::shared_ptr& table, // Test that SetCell does the right thing when it receives a zero or // negative timestamp, and that the cell created can be correctly // deleted if rollback occurs. +// +// In particular: +// +// Supplied with a timestamp of -1, it should store the current system time as +// timestamp. +// +// Supplied with a timestamp of 0, it should store it as is. +// +// Supplied with a timestamp < -1, it should return an error and fail the entire +// mutation chain. TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -294,16 +304,11 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { auto column = status_or.value(); ASSERT_EQ(1, column.size()); for (auto const& cell : column) { - ASSERT_GT(cell.first.count(), 0); + ASSERT_EQ(cell.first.count(), 0); ASSERT_EQ(data, cell.second); } - // Test that a SetCell mutation with timestamp set to 0 can be - // correctly rolled back. In the following, the first mutation - // (timestamp 0) should succeed and the next one should fail. The - // condition after that should be that the first one (timestamp 0) - // should be rolled back so that a row with row_key_2 key should not - // exist when the MutateRow request returns. + // Test that a mutation with timestamp 0 can be rolled back. v.clear(); v = {{column_family_name, column_qualifier, 0, data}, {"non_existent_column_family_name_causes_tx_rollbaclk", column_qualifier, @@ -312,6 +317,48 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { status = SetCells(table, table_name, row_key_2, v); ASSERT_NE(true, status.ok()); ASSERT_FALSE(HasRow(table, column_family_name, row_key_2).ok()); + + // Test that a mutation with timestamp 0 succeeds and stores 0 as + // the timestamp. + v.clear(); + v = { + {column_family_name, column_qualifier, 0, data}, + }; + auto const* const row_key_3 = "2"; + status = SetCells(table, table_name, row_key_3, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(HasCell(table, v[0].column_family_name, row_key_3, + v[0].column_qualifier, 0, v[0].data)); + + // Test that a mutation with timestamp < -1 fails + v.clear(); + v = { + {column_family_name, column_qualifier, -2, data}, + }; + auto const* const row_key_4 = "3"; + status = SetCells(table, table_name, row_key_4, v); + ASSERT_FALSE(status.ok()); + + // Test that a mutation with timestamp -1 suceeds and stores the + // system time. + v.clear(); + v = { + {column_family_name, column_qualifier, -1, data}, + }; + auto const* const row_key_5 = "4"; + auto system_time_ms_before = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + status = SetCells(table, table_name, row_key_5, v); + ASSERT_STATUS_OK(status); + auto column_or = GetColumn( + table, v[0].column_family_name, row_key_5, v[0].column_qualifier); + ASSERT_STATUS_OK(column_or.status()); + auto col = column_or.value(); + ASSERT_EQ(col.size(), 1); + auto cell_it = col.begin(); + ASSERT_NE(cell_it, col.end()); + ASSERT_EQ(cell_it->second, v[0].data); + ASSERT_GE(cell_it->first, system_time_ms_before); } // Does the SetCell mutation work to set a cell to a specific value? diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index af45cf8efe93f..58a3b4b626556 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -245,13 +245,16 @@ Status Table::DoMutationsWithPossibleRollback( absl::optional timestamp_override = absl::nullopt; - auto timestamp = std::chrono::duration_cast( - std::chrono::microseconds(set_cell.timestamp_micros())); + if (set_cell.timestamp_micros() < -1) { + return InvalidArgumentError( + "Timestamp micros cannot be < -1.", + GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); + } - if (timestamp <= std::chrono::milliseconds::zero()) { - timestamp = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()); - timestamp_override.emplace(std::move(timestamp)); + if (set_cell.timestamp_micros() == -1) { + timestamp_override.emplace( + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch())); } auto status = row_transaction.SetCell(set_cell, timestamp_override); From 9b456f100e0a4a33e56b937158a57944dee52504 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 29 May 2025 23:39:01 +0300 Subject: [PATCH 151/195] emulator: SampleRowKeys: Re-write to compute exact offset for row keys. --- google/cloud/bigtable/emulator/server.cc | 66 +------------- google/cloud/bigtable/emulator/table.cc | 108 ++++++++++++++++++++++- google/cloud/bigtable/emulator/table.h | 9 +- 3 files changed, 112 insertions(+), 71 deletions(-) diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index e89d52f7d75a6..1cd1b6742d410 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -66,71 +66,7 @@ class EmulatorService final : public btproto::Bigtable::Service { auto& table = maybe_table.value(); - // Return ~ 1/100 rows. - auto maybe_stream = - table->GetSampledRowsCellStream(static_cast(1) / 100); - if (!maybe_stream) { - return ToGrpcStatus(maybe_stream.status()); - } - - auto& stream = *maybe_stream; - size_t offset_bytes = 0; - - for (; stream; ++stream) { - size_t row_size = 0; - - row_size += stream->row_key().size(); - row_size += stream->column_family().size(); - row_size += stream->column_qualifier().size(); - row_size += sizeof(stream->timestamp()); - row_size += stream->value().size(); - - // This is an estimate - offset_bytes += (100 * row_size); - - google::bigtable::v2::SampleRowKeysResponse resp; - resp.set_row_key(stream->row_key()); - resp.set_offset_bytes(offset_bytes); - - writer->Write(std::move(resp)); - } - - // We need to return at least one row if the table is not empty; - if (!offset_bytes) { - for (auto& column_family_it : *table) { - auto row_it = column_family_it.second->begin(); - if (row_it == column_family_it.second->end()) { - // Empty column family - continue; - } - - // We have a row - offset_bytes += row_it->first.size(); - offset_bytes += row_it->second.size(); - - google::bigtable::v2::SampleRowKeysResponse resp; - resp.set_row_key(row_it->first); - resp.set_offset_bytes(offset_bytes); - - writer->Write(std::move(resp)); - - break; - } - } - - // Client code expects the last response to be an empty row key - // and moreover it also expects the offset for the last response - // to be more than every other offset. - google::bigtable::v2::SampleRowKeysResponse resp; - resp.set_row_key(""); - // Client test code expects offset_bytes to be strictly - // increasing. - resp.set_offset_bytes(offset_bytes + 1); - auto opts = grpc::WriteOptions(); - opts.set_last_message(); - writer->WriteLast(std::move(resp), opts); - - return grpc::Status::OK; + return ToGrpcStatus(table->SampleRowKeys(0.0001, writer)); } grpc::Status MutateRow(grpc::ServerContext* /* context */, diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 8071ad43f5bfe..9c6bb29cd2174 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -503,18 +504,117 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } -StatusOr Table::GetSampledRowsCellStream(double pass_probability) { - auto row_set = std::make_shared(StringRangeSet::All()); +Status Table::SampleRowKeys( + double pass_probability, + grpc::ServerWriter* writer) { + std::lock_guard lock(mu_); + + // First, stream all rows and cells and compute the offsets. + auto all_rows_set = std::make_shared(StringRangeSet::All()); + auto maybe_all_rows_steam = CreateCellStream(all_rows_set, absl::nullopt); + if (!maybe_all_rows_steam) { + return maybe_all_rows_steam.status(); + } + + auto& stream = *maybe_all_rows_steam; + + std::map row_offset_map; + size_t row_offset = 0; + + std::string current_row_key; + bool first_row = true; + + std::map column_family_size_map; + std::map column_qualifer_size_map; + size_t timestamp_total_row_size = 0; + size_t value_total_row_size = 0; + + for (; stream; ++stream) { + auto row_key = stream->row_key(); + + if ((row_key != current_row_key) || first_row) { + row_offset += current_row_key.size(); + + for (auto const& cf : column_family_size_map) { + row_offset += cf.second; + } + + for (auto const& cq : column_qualifer_size_map) { + row_offset += cq.second; + } + + row_offset += timestamp_total_row_size; + row_offset += value_total_row_size; + + // The rows before this (row_key) have this size in total. + row_offset_map[row_key] = row_offset; + + current_row_key = row_key; + + first_row = false; + + column_family_size_map.clear(); + column_qualifer_size_map.clear(); + timestamp_total_row_size = 0; + value_total_row_size = 0; + } + + column_family_size_map.emplace(stream->column_family(), + stream->column_family().size()); + column_qualifer_size_map.emplace(stream->column_qualifier(), + stream->column_qualifier().size()); + timestamp_total_row_size += sizeof(stream->timestamp()); + value_total_row_size += stream->value().size(); + } google::bigtable::v2::RowFilter sample_filter; sample_filter.set_row_sample_filter(pass_probability); - auto maybe_stream = CreateCellStream(row_set, sample_filter); + auto maybe_stream = CreateCellStream(all_rows_set, sample_filter); if (!maybe_stream) { return maybe_stream.status(); } - return maybe_stream; + auto& sampled_stream = *maybe_stream; + + bool wrote_a_sample = false; + + for (; sampled_stream; ++sampled_stream) { + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(sampled_stream->row_key()); + resp.set_offset_bytes(row_offset_map[sampled_stream->row_key()]); + + writer->Write(std::move(resp)); + + wrote_a_sample = true; + } + + // Cloud bigtable client tests expect that, if they populated the + // table with at least one row, then at least one row sampele is + // returned. + // + // In such a case, return the last row key. + if (!wrote_a_sample && row_offset_map.size() > 0) { + auto it = std::prev(row_offset_map.end()); + + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(it->first); + resp.set_offset_bytes(it->second); + } + + // Client code expects the last response to be an empty row key + // and moreover it also expects the offset for the last response + // to be more than every other offset. + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(""); + // Client test code expects offset_bytes to be strictly + // increasing. + resp.set_offset_bytes(row_offset + 1); + auto opts = grpc::WriteOptions(); + opts.set_last_message(); + writer->WriteLast(std::move(resp), opts); + + return Status(); } Status Table::DropRowRange( diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 8e00218f8d2d6..e94ad33065ff4 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -32,10 +32,12 @@ #include #include #include +#include #include #include #include #include +#include #include namespace google { @@ -43,7 +45,10 @@ namespace cloud { namespace bigtable { namespace emulator { -class RowSampler; +struct CellStreamWithRowOffsets { + CellStream stream; + std::map offset_map; +}; /// Objects of this class represent Bigtable tables. class Table : public std::enable_shared_from_this
{ @@ -86,7 +91,7 @@ class Table : public std::enable_shared_from_this
{ return column_families_.find(column_family); } - StatusOr GetSampledRowsCellStream(double pass_probability); + Status SampleRowKeys(double pass_probability, grpc::ServerWriter* writer); std::shared_ptr
get() { return shared_from_this(); } From e870f915d125cbeb9c1d93bc8494cdfd561803e0 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 29 May 2025 23:53:23 +0300 Subject: [PATCH 152/195] emulator: Remove unused code from earlier PR iteration. --- google/cloud/bigtable/emulator/column_family.h | 15 --------------- google/cloud/bigtable/emulator/table.h | 5 ----- 2 files changed, 20 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index a1cdba009f3fc..e6d8e5b234e58 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -179,20 +179,6 @@ class ColumnFamilyRow { columns_.erase(column_it); } - size_t size() const { - size_t res = 0; - - for (auto const& c : columns_) { - res += c.first.size(); - for (auto const& cr : c.second) { - res += sizeof(cr.first); - res += cr.second.size(); - } - } - - return res; - }; - private: friend class ColumnFamily; @@ -312,7 +298,6 @@ class ColumnFamily { return rows_.erase(row_it); } - size_t size() const { return rows_.size(); } void clear() { rows_.clear(); } private: diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index e94ad33065ff4..f9a4b07516546 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -45,11 +45,6 @@ namespace cloud { namespace bigtable { namespace emulator { -struct CellStreamWithRowOffsets { - CellStream stream; - std::map offset_map; -}; - /// Objects of this class represent Bigtable tables. class Table : public std::enable_shared_from_this
{ public: From bd7f315a9cdc38010c000c600d04a831023cfa0c Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 30 May 2025 00:05:55 +0300 Subject: [PATCH 153/195] emulator: mechanical fixes of typos. --- google/cloud/bigtable/emulator/rollback_test.cc | 2 +- google/cloud/bigtable/emulator/table.cc | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 515dc6d096e91..919bccc0ec286 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -339,7 +339,7 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { status = SetCells(table, table_name, row_key_4, v); ASSERT_FALSE(status.ok()); - // Test that a mutation with timestamp -1 suceeds and stores the + // Test that a mutation with timestamp -1 succeeds and stores the // system time. v.clear(); v = { diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 9c6bb29cd2174..dd5caa1b8861b 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -525,7 +525,7 @@ Status Table::SampleRowKeys( bool first_row = true; std::map column_family_size_map; - std::map column_qualifer_size_map; + std::map column_qualifier_size_map; size_t timestamp_total_row_size = 0; size_t value_total_row_size = 0; @@ -539,7 +539,7 @@ Status Table::SampleRowKeys( row_offset += cf.second; } - for (auto const& cq : column_qualifer_size_map) { + for (auto const& cq : column_qualifier_size_map) { row_offset += cq.second; } @@ -554,14 +554,14 @@ Status Table::SampleRowKeys( first_row = false; column_family_size_map.clear(); - column_qualifer_size_map.clear(); + column_qualifier_size_map.clear(); timestamp_total_row_size = 0; value_total_row_size = 0; } column_family_size_map.emplace(stream->column_family(), stream->column_family().size()); - column_qualifer_size_map.emplace(stream->column_qualifier(), + column_qualifier_size_map.emplace(stream->column_qualifier(), stream->column_qualifier().size()); timestamp_total_row_size += sizeof(stream->timestamp()); value_total_row_size += stream->value().size(); From eab03f2314a781219c795618e7de82893e743dfa Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 30 May 2025 00:11:01 +0300 Subject: [PATCH 154/195] emulator: clang-tidy fixes. --- google/cloud/bigtable/emulator/table.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index dd5caa1b8861b..52e5294718cd2 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -239,6 +239,7 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { request.mutations()); } +// NOLINTBEGIN(readability-function-cognitive-complexity) Status Table::DoMutationsWithPossibleRollback( std::string const& row_key, google::protobuf::RepeatedPtrField const& @@ -307,6 +308,7 @@ Status Table::DoMutationsWithPossibleRollback( return Status(); } +// NOLINTEND(readability-function-cognitive-complexity) StatusOr Table::CreateCellStream( std::shared_ptr range_set, @@ -594,7 +596,7 @@ Status Table::SampleRowKeys( // returned. // // In such a case, return the last row key. - if (!wrote_a_sample && row_offset_map.size() > 0) { + if (!wrote_a_sample && !row_offset_map.empty()) { auto it = std::prev(row_offset_map.end()); google::bigtable::v2::SampleRowKeysResponse resp; From 5c35c23bac43a4e90dfd7b89b385f70a4b5fd24b Mon Sep 17 00:00:00 2001 From: Adam Czajkowski Date: Thu, 5 Jun 2025 15:21:37 +0200 Subject: [PATCH 155/195] Formatter and clang-tidy improvements --- .../cloud/bigtable/emulator/column_family.cc | 9 ++- .../cloud/bigtable/emulator/column_family.h | 11 ++-- google/cloud/bigtable/emulator/filter_test.cc | 66 +++++++++---------- google/cloud/bigtable/emulator/filtered_map.h | 4 +- .../bigtable/emulator/filtered_map_test.cc | 14 ++-- 5 files changed, 54 insertions(+), 50 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 1b2ce1f8a9b9e..80ded97f3c18d 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -42,9 +42,12 @@ std::vector ColumnRow::DeleteTimeRange( if (maybe_end_micros.value_or(0) == 0) { maybe_end_micros.reset(); } - for (auto cell_it = maybe_end_micros ? - upper_bound(std::chrono::duration_cast( - std::chrono::microseconds(*maybe_end_micros))) : begin(); + for (auto cell_it = + maybe_end_micros + ? upper_bound( + std::chrono::duration_cast( + std::chrono::microseconds(*maybe_end_micros))) + : begin(); cell_it != cells_.end() && cell_it->first >= std::chrono::duration_cast( std::chrono::microseconds( diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 32cc9ed535991..d408d32fef0ce 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -95,9 +95,7 @@ class ColumnRow { return cells_.find(timestamp); } - void erase(const_iterator timestamp_it) { - cells_.erase(timestamp_it); - } + void erase(const_iterator timestamp_it) { cells_.erase(timestamp_it); } private: // Note the order - the iterator return the freshest cells first. @@ -371,11 +369,10 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { mutable TimestampRangeSet timestamp_ranges_; RegexFiteredMapView> rows_; - mutable absl::optional>> + mutable absl::optional< + RegexFiteredMapView>> columns_; - mutable absl::optional> - cells_; + mutable absl::optional> cells_; // If row_it_ == rows_.end() we've reached the end. // We maintain the following invariant: diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index c99f50e340ee2..010adbd51bfff 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -60,7 +60,7 @@ class TestCell { public: TestCell(std::string row_key, std::string column_family, std::string column_qualifier, std::chrono::milliseconds timestamp, - std::string value, std::optional label = {}) + std::string value, absl::optional label = {}) : row_key_(std::move(row_key)), column_family_(std::move(column_family)), column_qualifier_(std::move(column_qualifier)), @@ -124,7 +124,7 @@ class TestCell { std::chrono::milliseconds timestamp_; std::string value_; CellView view_; - std::optional label_; + absl::optional label_; void maybe_label_view() { if (label_) { @@ -1269,8 +1269,8 @@ TEST_F(InternalFiltersAreApplied, TimestampRange) { class VectorCellStream : public AbstractCellStreamImpl { public: - explicit VectorCellStream(std::vector cells) - : cells_{std::move(cells)}, current_cell_{cells_.begin()} {} + explicit VectorCellStream(std::vector const& cells) + : cells_{cells}, current_cell_{cells_.begin()} {} bool ApplyFilter(InternalFilter const&) override { return false; } bool HasValue() const override { return current_cell_ != cells_.end(); } CellView const& Value() const override { return current_cell_->AsCellView(); } @@ -1291,7 +1291,7 @@ class FilterWorkTest : public ::testing::Test { public: protected: static StatusOr> GetFilterOutput( - std::vector const&& input_cells, RowFilter const& filter) { + std::vector const& input_cells, RowFilter const& filter) { auto maybe_stream = CreateFilter(filter, [input_cells] { return CellStream(std::make_unique(input_cells)); }); @@ -1305,8 +1305,8 @@ class FilterWorkTest : public ::testing::Test { filter_output.emplace_back( v->row_key(), v->column_family(), v->column_qualifier(), v->timestamp(), v->value(), - v->HasLabel() ? std::optional{v->label()} - : std::optional{}); + v->HasLabel() ? absl::optional{v->label()} + : absl::optional{}); maybe_stream->Next(); } return filter_output; @@ -1322,7 +1322,7 @@ TEST_F(FilterWorkTest, Pass) { TestCell{"r2", "cf", "q", 0_ms, "v"}, TestCell{"r2", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); EXPECT_EQ(cells, *maybe_output); @@ -1337,7 +1337,7 @@ TEST_F(FilterWorkTest, PassLabels) { TestCell{"r", "cf", "q", 0_ms, "v", "label2"}, TestCell{"r", "cf", "q", 0_ms, "v", "label3"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); EXPECT_EQ(cells, *maybe_output); @@ -1354,7 +1354,7 @@ TEST_F(FilterWorkTest, Sink) { // Next cell TestCell{"r2", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); EXPECT_EQ(cells, *maybe_output); @@ -1368,7 +1368,7 @@ TEST_F(FilterWorkTest, Block) { TestCell{"r1", "cf", "q", 0_ms, "v"}, TestCell{"r1", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); EXPECT_TRUE(maybe_output->empty()); @@ -1384,7 +1384,7 @@ TEST_F(FilterWorkTest, RowRegex) { TestCell{"r2", "cf", "q", 0_ms, "v"}, TestCell{"r3", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(2, maybe_output->size()); @@ -1401,7 +1401,7 @@ TEST_F(FilterWorkTest, ValueRegex) { TestCell{"r2", "cf", "q", 0_ms, "v2"}, TestCell{"r2", "cf", "q", 0_ms, "v3"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(1, maybe_output->size()); @@ -1418,7 +1418,7 @@ TEST_F(FilterWorkTest, SampleRows) { for (size_t i = 0; i < samples; i++) { cells.emplace_back("r" + std::to_string(i), "cf", "q", 0_ms, "v"); } - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); EXPECT_NE(0, maybe_output->size()); @@ -1435,7 +1435,7 @@ TEST_F(FilterWorkTest, FamilyNameRegex) { TestCell{"r2", "cf2", "q", 0_ms, "v"}, TestCell{"r2", "cf3", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(2, maybe_output->size()); @@ -1453,7 +1453,7 @@ TEST_F(FilterWorkTest, QualifierRegex) { TestCell{"r2", "cf", "q2", 0_ms, "v"}, TestCell{"r2", "cf", "q3", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(2, maybe_output->size()); @@ -1473,7 +1473,7 @@ TEST_F(FilterWorkTest, ColumnRange) { TestCell{"r2", "cf", "q2", 0_ms, "v"}, TestCell{"r2", "cf", "q3", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(2, maybe_output->size()); @@ -1492,7 +1492,7 @@ TEST_F(FilterWorkTest, ValueRange) { TestCell{"r2", "cf", "q", 0_ms, "v2"}, TestCell{"r3", "cf", "q", 0_ms, "v3"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(2, maybe_output->size()); @@ -1515,7 +1515,7 @@ TEST_F(FilterWorkTest, CellsPerRowOffset) { TestCell{"r4", "cf", "q", 0_ms, "v"}, TestCell{"r4", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(5, maybe_output->size()); @@ -1540,7 +1540,7 @@ TEST_F(FilterWorkTest, CellsPerRowLimit) { TestCell{"r4", "cf", "q", 0_ms, "v"}, TestCell{"r4", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(4, maybe_output->size()); @@ -1565,7 +1565,7 @@ TEST_F(FilterWorkTest, LatestCellsPerColumnLimit) { TestCell{"r4", "cf", "q", 0_ms, "v"}, TestCell{"r4", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(6, maybe_output->size()); @@ -1587,7 +1587,7 @@ TEST_F(FilterWorkTest, TimestampRange) { TestCell{"r2", "cf", "q", 2_ms, "v"}, TestCell{"r3", "cf", "q", 1_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(1, maybe_output->size()); @@ -1603,7 +1603,7 @@ TEST_F(FilterWorkTest, Label) { TestCell{"r1", "cf", "q", 0_ms, "v"}, TestCell{"r1", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); TestCell expected{"r1", "cf", "q", 0_ms, "v", label}; @@ -1621,7 +1621,7 @@ TEST_F(FilterWorkTest, StripValue) { TestCell{"r1", "cf", "q", 0_ms, "v"}, TestCell{"r1", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); TestCell expected{"r1", "cf", "q", 0_ms, ""}; @@ -1647,7 +1647,7 @@ TEST_F(FilterWorkTest, Chain) { TestCell{"r3", "cf", "q", 2_ms, "v"}, TestCell{"r3", "cf", "q", 1_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(3, maybe_output->size()); @@ -1665,7 +1665,7 @@ TEST_F(FilterWorkTest, ChainEmpty) { TestCell{"r2", "cf", "q", 0_ms, "v"}, TestCell{"r2", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); EXPECT_EQ(cells, *maybe_output); @@ -1681,7 +1681,7 @@ TEST_F(FilterWorkTest, ChainSink) { TestCell{"r2", "cf", "q", 0_ms, "v"}, TestCell{"r2", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); EXPECT_EQ(cells, *maybe_output); @@ -1700,7 +1700,7 @@ TEST_F(FilterWorkTest, Interleave) { TestCell{"r2", "cf2", "q", 0_ms, "v"}, TestCell{"r3", "cf1", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); EXPECT_EQ(cells, *maybe_output); @@ -1714,7 +1714,7 @@ TEST_F(FilterWorkTest, InterleaveEmpty) { TestCell{"r1", "cf", "q", 0_ms, "v"}, TestCell{"r2", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); EXPECT_EQ(0, maybe_output->size()); @@ -1733,7 +1733,7 @@ TEST_F(FilterWorkTest, InterleaveSink) { TestCell{"r2", "cf", "q", 0_ms, "v"}, TestCell{"r2", "cf", "q", 0_ms, "v"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(cells.size() * 4, maybe_output->size()); @@ -1771,7 +1771,7 @@ TEST_F(FilterWorkTest, RegexInterleaveChainLabelSinkRegex) { TestCell("r", "A", "B", 2_ms, "x"), TestCell("r", "B", "B", 4_ms, "z"), }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); TestCell labeled0 = cells[0].Labeled("foo"); @@ -1813,7 +1813,7 @@ TEST_F(FilterWorkTest, ConditionEmptyNonempty) { TestCell{"r4", "cf", "q", 2_ms, "f"}, TestCell{"r4", "cf", "q", 1_ms, "t"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); ASSERT_EQ(cells.size(), maybe_output->size()); @@ -1870,7 +1870,7 @@ TEST_F(FilterWorkTest, ConditionBranchFilterNextDifferentThanCell) { TestCell{"r4", "cf", "q", 2_ms, "f"}, TestCell{"r4", "cf", "q", 1_ms, "t"}, }; - auto maybe_output = GetFilterOutput(std::move(cells), filter); + auto maybe_output = GetFilterOutput(cells, filter); ASSERT_STATUS_OK(maybe_output); std::vector expected{ diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h index 8b0e8b88d987f..e94359eaf16fe 100644 --- a/google/cloud/bigtable/emulator/filtered_map.h +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -206,10 +206,10 @@ class TimestampRangeFilteredMapView { typename Map::const_iterator unfiltered_pos, typename std::set:: - const_reverse_iterator filter_pos) + const_reverse_iterator const& filter_pos) : parent_(std::cref(parent)), unfiltered_pos_(std::move(unfiltered_pos)), - filter_pos_(std::move(filter_pos)) { + filter_pos_(filter_pos) { AdvanceToNextRange(); EnsureIteratorValid(); } diff --git a/google/cloud/bigtable/emulator/filtered_map_test.cc b/google/cloud/bigtable/emulator/filtered_map_test.cc index 86bf4fd4e8d91..e1b3edfcbf483 100644 --- a/google/cloud/bigtable/emulator/filtered_map_test.cc +++ b/google/cloud/bigtable/emulator/filtered_map_test.cc @@ -144,7 +144,8 @@ TEST(TimestampRangeFilteredMapView, NoFilter) { auto filter = TimestampRangeSet::All(); TimestampRangeFilteredMapView filtered(unfiltered, filter); - EXPECT_EQ(std::vector({2_ms, 1_ms, 0_ms}), TSKeys(filtered)); + EXPECT_EQ(std::vector({2_ms, 1_ms, 0_ms}), + TSKeys(filtered)); } TEST(TimestampRangeFilteredMapView, EmptyFilter) { @@ -163,7 +164,8 @@ TEST(TimestampRangeFilteredMapView, FiniteRange) { filter.Sum(TimestampRangeSet::Range(1_ms, 3_ms)); TimestampRangeFilteredMapView filtered(unfiltered, filter); - EXPECT_EQ(std::vector({2_ms, 1_ms}), TSKeys(filtered)); + EXPECT_EQ(std::vector({2_ms, 1_ms}), + TSKeys(filtered)); } TEST(TimestampRangeFilteredMapView, InfiniteRange) { @@ -173,7 +175,8 @@ TEST(TimestampRangeFilteredMapView, InfiniteRange) { filter.Sum(TimestampRangeSet::Range(1_ms, 0_ms)); TimestampRangeFilteredMapView filtered(unfiltered, filter); - EXPECT_EQ(std::vector({4_ms, 3_ms, 2_ms, 1_ms}), TSKeys(filtered)); + EXPECT_EQ(std::vector({4_ms, 3_ms, 2_ms, 1_ms}), + TSKeys(filtered)); } TEST(TimestampRangeFilteredMapView, MultipleFilters) { @@ -192,8 +195,9 @@ TEST(TimestampRangeFilteredMapView, MultipleFilters) { filter.Sum(TimestampRangeSet::Range(13_ms, 0_ms)); TimestampRangeFilteredMapView filtered(unfiltered, filter); - EXPECT_EQ(std::vector({max_millis, 14_ms, 13_ms, 11_ms, 10_ms, 7_ms, 6_ms, - 4_ms, 3_ms, 2_ms, 1_ms}), + EXPECT_EQ(std::vector({max_millis, 14_ms, 13_ms, + 11_ms, 10_ms, 7_ms, 6_ms, + 4_ms, 3_ms, 2_ms, 1_ms}), TSKeys(filtered)); } From 620fc1f891b36408dceeddc91abd4cf6ef457fc0 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 10 Jun 2025 22:09:37 +0300 Subject: [PATCH 156/195] emulator: Fixes for typos. These are mechanical fixes by typos(1). All tests continue to pass and everything still builds. --- google/cloud/bigtable/emulator/column_family.cc | 8 ++++---- google/cloud/bigtable/emulator/column_family.h | 4 ++-- google/cloud/bigtable/emulator/rollback_test.cc | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index bc10dca786548..185d3a9033f0b 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -88,8 +88,8 @@ std::vector ColumnFamilyRow::DeleteColumn( } absl::optional ColumnFamilyRow::DeleteTimeStamp( - std::string const& column_qulifier, std::chrono::milliseconds timestamp) { - auto column_it = columns_.find(column_qulifier); + std::string const& column_qualifier, std::chrono::milliseconds timestamp) { + auto column_it = columns_.find(column_qualifier); if (column_it == columns_.end()) { return absl::nullopt; } @@ -155,14 +155,14 @@ std::vector ColumnFamily::DeleteColumn( } absl::optional ColumnFamily::DeleteTimeStamp( - std::string const& row_key, std::string const& column_qulifier, + std::string const& row_key, std::string const& column_qualifier, std::chrono::milliseconds timestamp) { auto row_it = rows_.find(row_key); if (row_it == rows_.end()) { return absl::nullopt; } - auto ret = row_it->second.DeleteTimeStamp(column_qulifier, timestamp); + auto ret = row_it->second.DeleteTimeStamp(column_qualifier, timestamp); if (!row_it->second.HasColumns()) { rows_.erase(row_it); } diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index d38824289c0cd..b4f3c24789f22 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -155,7 +155,7 @@ class ColumnFamilyRow { * @return Cell representing deleted cell, if there was a cell with * that timestamp in then given column, otherwise absl::nullopt. */ - absl::optional DeleteTimeStamp(std::string const& column_qulifier, + absl::optional DeleteTimeStamp(std::string const& column_qualifier, std::chrono::milliseconds timestamp); bool HasColumns() { return !columns_.empty(); } @@ -268,7 +268,7 @@ class ColumnFamily { * otherwise absl::nullopt. */ absl::optional DeleteTimeStamp(std::string const& row_key, - std::string const& column_qulifier, + std::string const& column_qualifier, std::chrono::milliseconds timestamp); const_iterator begin() const { return rows_.begin(); } diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 515dc6d096e91..919bccc0ec286 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -339,7 +339,7 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { status = SetCells(table, table_name, row_key_4, v); ASSERT_FALSE(status.ok()); - // Test that a mutation with timestamp -1 suceeds and stores the + // Test that a mutation with timestamp -1 succeeds and stores the // system time. v.clear(); v = { From 1a25b1335b35a5f34b5e27fc11786b5eed9c67f4 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 12 Jun 2025 19:32:12 +0300 Subject: [PATCH 157/195] Implement the ReadModifyWriteRow RPC. * emulator: ReadModifyWrite: Finalize internal API design for the RPC implementation. * emulator: Implement ReadModifyWrite in the Column Family code. Changes in the RowTransaction code and unit tests remain, but this should be all that is needed for for ReadModifyWrite where the column families are concerned. * emulator: ReadModifyWrite: Introduce Table and RowTransaction API. * emulator: Implement Table::ReadModifyWriteRow. * emulator: Fix typo. * emulator: ReadModifyWrite: Checkpoint (WIP). * emualtor: Complete implementation of RowTransaction::ReadModifyWriteRow. * emulator: mechanical typo fixes. * emulator: server.cc: Finish implementing the ReadModifyWrite RPC. * emulator: mechanical spelling fixes (by typos). * emulator: ReadModifyWrite: Eliminate some code duplication. * Fixes for review. - move a potentially large value (the cell value). --- .../cloud/bigtable/emulator/column_family.cc | 94 ++++++++++++++ .../cloud/bigtable/emulator/column_family.h | 56 ++++++++ google/cloud/bigtable/emulator/server.cc | 16 ++- google/cloud/bigtable/emulator/table.cc | 121 ++++++++++++++++++ google/cloud/bigtable/emulator/table.h | 9 ++ 5 files changed, 294 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 185d3a9033f0b..92f0ac4ca05b3 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/internal/big_endian.h" #include #include +#include #include namespace google { @@ -22,6 +24,98 @@ namespace cloud { namespace bigtable { namespace emulator { +// FIXME: Workaround our current incorrect ordering of +// timestamps. Remove when that is fixed and they are in decreasing +// order, at which point we can just pick the first element. +std::map::iterator latest( + std::map& cells_not_empty) { + assert(!cells_not_empty.empty()); + + auto first_it = cells_not_empty.begin(); + auto last_it = std::prev(cells_not_empty.end()); + auto latest_it = first_it->first >= last_it->first ? first_it : last_it; + + return latest_it; +} + +StatusOr ColumnRow::ReadModifyWrite( + std::int64_t inc_value) { + auto system_ms = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + + if (cells_.empty()) { + std::string value = google::cloud::internal::EncodeBigEndian(inc_value); + cells_[system_ms] = value; + + return ReadModifyWriteCellResult{system_ms, std::move(value), + absl::nullopt}; + } + + // FIXME: Workaround our current incorrect ordering of + // timestamps. Remove when that is fixed and they are in decreasing + // order, at which point we can just pick the first element. + auto latest_it = latest(cells_); + + auto maybe_old_value = + google::cloud::internal::DecodeBigEndian( + latest_it->second); + if (!maybe_old_value) { + return maybe_old_value.status(); + } + + auto value = google::cloud::internal::EncodeBigEndian( + inc_value + maybe_old_value.value()); + + if (latest_it->first < system_ms) { + // We need to add a cell with the current system timestamp + cells_[system_ms] = value; + + return ReadModifyWriteCellResult{system_ms, std::move(value), + absl::nullopt}; + } + + // Latest timestamp is >= system time. Overwrite latest timestamp + auto old_value = std::move(latest_it->second); + latest_it->second = value; + + return ReadModifyWriteCellResult{latest_it->first, std::move(value), + std::move(old_value)}; +} + +ReadModifyWriteCellResult ColumnRow::ReadModifyWrite( + std::string const& append_value) { + auto system_ms = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + if (cells_.empty()) { + cells_[system_ms] = append_value; + + return ReadModifyWriteCellResult{system_ms, std::move(append_value), + absl::nullopt}; + } + + // FIXME: Workaround our current incorrect ordering of + // timestamps. Remove when that is fixed and they are in decreasing + // order, at which point we can just pick the first element. + auto latest_it = latest(cells_); + + if (latest_it->first < system_ms) { + // We need to add a cell with the current system timestamp + auto value = latest_it->second + append_value; + cells_[system_ms] = value; + + return ReadModifyWriteCellResult{system_ms, std::move(value), + absl::nullopt}; + } + + // Latest timestamp is >= system time. Overwrite latest timestamp + auto value = latest_it->second + append_value; + auto old_value = std::move(latest_it->second); + latest_it->second = value; + + return ReadModifyWriteCellResult{latest_it->first, value, + std::move(old_value)}; +} + absl::optional ColumnRow::SetCell( std::chrono::milliseconds timestamp, std::string const& value) { absl::optional ret = absl::nullopt; diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index b4f3c24789f22..4db78899bd5db 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -19,11 +19,17 @@ #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/read_modify_write_rule.h" +#include "google/cloud/internal/big_endian.h" +#include "google/cloud/internal/make_status.h" #include "absl/types/optional.h" #include #include #include #include +#include +#include +#include namespace google { namespace cloud { @@ -35,6 +41,24 @@ struct Cell { std::string value; }; +// ReadModifyWriteCellResult supports undo and return value +// construction for the ReadModifyWrite RPC. +// +// The timestamp and value written are always returned in timestamp +// and value and will be used to construct the Row returned by the +// RPC. +// +// If maybe_old_value has a value, then a timestamp was overwritten +// and the ReadModifyWriteCellResult will be used to create a +// RestoreValue for undo log. Otherwise, a new cell was added and the +// ReadmodifyWriteCellResult will be used to create a DeleteValue for +// the undo log. +struct ReadModifyWriteCellResult { + std::chrono::milliseconds timestamp; + std::string value; + absl::optional maybe_old_value; +}; + /** * Objects of this class hold contents of a specific column in a specific row. * @@ -47,6 +71,12 @@ class ColumnRow { ColumnRow(ColumnRow const&) = delete; ColumnRow& operator=(ColumnRow const&) = delete; + + StatusOr ReadModifyWrite(std::int64_t inc_value); + + ReadModifyWriteCellResult ReadModifyWrite(std::string const& append_value); + + /** * Insert or update and existing cell at a given timestamp. * @@ -83,8 +113,12 @@ class ColumnRow { bool HasCells() const { return !cells_.empty(); } using const_iterator = std::map::const_iterator; + using iterator = + std::map::iterator; const_iterator begin() const { return cells_.begin(); } const_iterator end() const { return cells_.end(); } + iterator begin() { return cells_.begin(); } + iterator end() { return cells_.end(); } const_iterator lower_bound(std::chrono::milliseconds timestamp) const { return cells_.lower_bound(timestamp); } @@ -116,6 +150,16 @@ class ColumnRow { */ class ColumnFamilyRow { public: + StatusOr ReadModifyWrite( + std::string const& column_qualifier, std::int64_t inc_value) { + return columns_[column_qualifier].ReadModifyWrite(inc_value); + }; + + ReadModifyWriteCellResult ReadModifyWrite(std::string const& column_qualifier, + std::string const& append_value) { + return columns_[column_qualifier].ReadModifyWrite(append_value); + } + /** * Insert or update and existing cell at a given column and timestamp. * @@ -203,6 +247,18 @@ class ColumnFamily { using const_iterator = std::map::const_iterator; using iterator = std::map::iterator; + StatusOr ReadModifyWrite( + std::string const& row_key, std::string const& column_qualifier, + std::int64_t inc_value) { + return rows_[row_key].ReadModifyWrite(column_qualifier, inc_value); + }; + + ReadModifyWriteCellResult ReadModifyWrite(std::string const& row_key, + std::string const& column_qualifier, + std::string const& append_value) { + return rows_[row_key].ReadModifyWrite(column_qualifier, append_value); + }; + /** * Insert or update and existing cell at a given row, column and timestamp. * diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 82573d9bbf814..045daa55a42bf 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -136,8 +136,20 @@ class EmulatorService final : public btproto::Bigtable::Service { grpc::Status ReadModifyWriteRow( grpc::ServerContext* /* context */, - btproto::ReadModifyWriteRowRequest const* /* request */, - btproto::ReadModifyWriteRowResponse* /* response */) override { + btproto::ReadModifyWriteRowRequest const* request, + btproto::ReadModifyWriteRowResponse* response) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + auto maybe_response = (*maybe_table)->ReadModifyWriteRow(*request); + if (!maybe_response) { + return ToGrpcStatus(maybe_response.status()); + } + + *response = std::move(maybe_response.value()); + return grpc::Status::OK; } diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 58a3b4b626556..474c482bb0794 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -539,6 +539,24 @@ Status Table::DropRowRange( return Status(); } +StatusOr<::google::bigtable::v2::ReadModifyWriteRowResponse> +Table::ReadModifyWriteRow( + google::bigtable::v2::ReadModifyWriteRowRequest const& request) { + std::lock_guard lock(mu_); + + RowTransaction row_transaction(this->get(), request.row_key()); + + auto maybe_response = row_transaction.ReadModifyWriteRow(request); + if (!maybe_response) { + return maybe_response.status(); + } + + row_transaction.commit(); + + return std::move(maybe_response.value()); +} + + // NOLINTBEGIN(readability-convert-member-functions-to-static) Status RowTransaction::AddToCell( ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { @@ -682,6 +700,109 @@ Status RowTransaction::SetCell( return Status(); } +// ProcessReadModifyWriteRuleResult records the result of a +// ReadModifyWriteRule computation for possible undo in the undo log +// and also updates the tmp_families temporary table (containing only +// one row) with the modified cell for later return. +void ProcessReadModifyWriteResult( + ColumnFamily& column_family, std::string const& row_key, + std::stack>& undo, + google::bigtable::v2::ReadModifyWriteRule const& rule, + ReadModifyWriteCellResult& result, + std::map& tmp_families) { + if (result.maybe_old_value.has_value()) { + // We overwrote a cell, we need to record a RestoreValue in the undo log + RestoreValue restore_value{column_family, rule.column_qualifier(), + result.timestamp, + std::move(result.maybe_old_value.value())}; + undo.emplace(std::move(restore_value)); + } else { + // We created a new cell -- we would need to delete it in any rollback + DeleteValue delete_value{column_family, rule.column_qualifier(), + result.timestamp}; + undo.emplace(std::move(delete_value)); + } + + // Record the cell in our local mini table here to use in + // assembling a row of changed cells for return. + tmp_families[rule.family_name()].SetCell(row_key, rule.column_qualifier(), + result.timestamp, std::move(result.value)); +} + +// NOLINTBEGIN(readability-function-cognitive-complexity) +StatusOr<::google::bigtable::v2::ReadModifyWriteRowResponse> +RowTransaction::ReadModifyWriteRow( + google::bigtable::v2::ReadModifyWriteRowRequest const& request) { + if (row_key_.empty()) { + return InvalidArgumentError( + "row key not set", + GCP_ERROR_INFO().WithMetadata("request", request.DebugString())); + } + + // tmp_families is a small one row mini table used to accumulate + // changed cells efficiently for later return in the row returned by + // the RPC. + std::map tmp_families; + + for (auto const& rule : request.rules()) { + auto maybe_column_family = table_->FindColumnFamily(rule); + if (!maybe_column_family) { + return maybe_column_family.status(); + } + + auto& column_family = maybe_column_family->get(); + if (rule.has_append_value()) { + auto result = column_family.ReadModifyWrite( + row_key_, rule.column_qualifier(), rule.append_value()); + + ProcessReadModifyWriteResult(column_family, row_key_, undo_, rule, result, + tmp_families); + + } else if (rule.has_increment_amount()) { + auto maybe_result = column_family.ReadModifyWrite( + row_key_, rule.column_qualifier(), rule.increment_amount()); + if (!maybe_result) { + return maybe_result.status(); + } + + auto& result = maybe_result.value(); + + ProcessReadModifyWriteResult(column_family, row_key_, undo_, rule, result, + tmp_families); + + } else { + return InvalidArgumentError( + "either append value or increment amount must be set", + GCP_ERROR_INFO().WithMetadata("rule", rule.DebugString())); + } + } + + // Now assemble the returned value. + google::bigtable::v2::ReadModifyWriteRowResponse resp; + auto* row = resp.mutable_row(); + + for (auto& fam : tmp_families) { + auto* family = row->add_families(); + family->set_name(fam.first); + for (auto& row : fam.second) { + for (auto const& cfr : row.second) { + auto* col = family->add_columns(); + col->set_qualifier(cfr.first); + for (auto const& cr : cfr.second) { + auto* cell = col->add_cells(); + cell->set_timestamp_micros( + std::chrono::duration_cast(cr.first) + .count()); + cell->set_value(std::move(cr.second)); + } + } + } + } + + return resp; +} +// NOLINTEND(readability-function-cognitive-complexity) + void RowTransaction::Undo() { auto row_key = row_key_; diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 40de01a5a4eed..aaf0d270efcf7 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -72,6 +72,11 @@ class Table : public std::enable_shared_from_this
{ Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const; + + StatusOr<::google::bigtable::v2::ReadModifyWriteRowResponse> + ReadModifyWriteRow( + google::bigtable::v2::ReadModifyWriteRowRequest const& request); + std::map>::iterator begin() { return column_families_.begin(); } @@ -159,6 +164,10 @@ class RowTransaction { delete_from_family); Status DeleteFromRow(); + StatusOr<::google::bigtable::v2::ReadModifyWriteRowResponse> + ReadModifyWriteRow( + google::bigtable::v2::ReadModifyWriteRowRequest const& request); + private: void Undo(); From 6db78d16ad5f9a279c7dc916033301d02affc14a Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 12 Jun 2025 19:39:54 +0300 Subject: [PATCH 158/195] emulator: ReadModifyWrite: Eliminate some code duplication. --- google/cloud/bigtable/emulator/column_family.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 92f0ac4ca05b3..078b9869d743c 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -98,9 +98,10 @@ ReadModifyWriteCellResult ColumnRow::ReadModifyWrite( // order, at which point we can just pick the first element. auto latest_it = latest(cells_); + auto value = latest_it->second + append_value; + if (latest_it->first < system_ms) { // We need to add a cell with the current system timestamp - auto value = latest_it->second + append_value; cells_[system_ms] = value; return ReadModifyWriteCellResult{system_ms, std::move(value), @@ -108,7 +109,6 @@ ReadModifyWriteCellResult ColumnRow::ReadModifyWrite( } // Latest timestamp is >= system time. Overwrite latest timestamp - auto value = latest_it->second + append_value; auto old_value = std::move(latest_it->second); latest_it->second = value; From 70d0473902e72f6c8ffb6fb3e191ec94b8d55e1d Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 12 Jun 2025 20:07:06 +0300 Subject: [PATCH 159/195] Mechanical fixes by CI automation. (#17) * emulator: mechanical formatting fixes by ci tool. * emulator: make clang-tidy happy. --- google/cloud/bigtable/emulator/column_family.cc | 3 +-- google/cloud/bigtable/emulator/column_family.h | 5 +---- google/cloud/bigtable/emulator/rollback_test.cc | 9 +++++---- google/cloud/bigtable/emulator/table.cc | 6 ++++-- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 078b9869d743c..2d2313acf143f 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -57,8 +57,7 @@ StatusOr ColumnRow::ReadModifyWrite( auto latest_it = latest(cells_); auto maybe_old_value = - google::cloud::internal::DecodeBigEndian( - latest_it->second); + google::cloud::internal::DecodeBigEndian(latest_it->second); if (!maybe_old_value) { return maybe_old_value.status(); } diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 4db78899bd5db..ac8b498760107 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -71,12 +71,10 @@ class ColumnRow { ColumnRow(ColumnRow const&) = delete; ColumnRow& operator=(ColumnRow const&) = delete; - StatusOr ReadModifyWrite(std::int64_t inc_value); ReadModifyWriteCellResult ReadModifyWrite(std::string const& append_value); - /** * Insert or update and existing cell at a given timestamp. * @@ -113,8 +111,7 @@ class ColumnRow { bool HasCells() const { return !cells_.empty(); } using const_iterator = std::map::const_iterator; - using iterator = - std::map::iterator; + using iterator = std::map::iterator; const_iterator begin() const { return cells_.begin(); } const_iterator end() const { return cells_.end(); } iterator begin() { return cells_.begin(); } diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 919bccc0ec286..4f73ff72c0dc2 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -346,12 +346,13 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { {column_family_name, column_qualifier, -1, data}, }; auto const* const row_key_5 = "4"; - auto system_time_ms_before = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()); + auto system_time_ms_before = + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); status = SetCells(table, table_name, row_key_5, v); ASSERT_STATUS_OK(status); - auto column_or = GetColumn( - table, v[0].column_family_name, row_key_5, v[0].column_qualifier); + auto column_or = GetColumn(table, v[0].column_family_name, row_key_5, + v[0].column_qualifier); ASSERT_STATUS_OK(column_or.status()); auto col = column_or.value(); ASSERT_EQ(col.size(), 1); diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 474c482bb0794..f3d4a46cd5414 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -232,6 +232,7 @@ Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { request.mutations()); } +// NOLINTBEGIN(readability-function-cognitive-complexity) Status Table::DoMutationsWithPossibleRollback( std::string const& row_key, google::protobuf::RepeatedPtrField const& @@ -300,6 +301,7 @@ Status Table::DoMutationsWithPossibleRollback( return Status(); } +// NOLINTEND(readability-function-cognitive-complexity) StatusOr Table::CreateCellStream( std::shared_ptr range_set, @@ -556,7 +558,6 @@ Table::ReadModifyWriteRow( return std::move(maybe_response.value()); } - // NOLINTBEGIN(readability-convert-member-functions-to-static) Status RowTransaction::AddToCell( ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { @@ -726,7 +727,8 @@ void ProcessReadModifyWriteResult( // Record the cell in our local mini table here to use in // assembling a row of changed cells for return. tmp_families[rule.family_name()].SetCell(row_key, rule.column_qualifier(), - result.timestamp, std::move(result.value)); + result.timestamp, + std::move(result.value)); } // NOLINTBEGIN(readability-function-cognitive-complexity) From 9e5efee175ae1112422ccbcbb3bfd012d33cb9db Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Sat, 14 Jun 2025 18:11:25 +0300 Subject: [PATCH 160/195] emulator: trivial: fix typo in rollback_test.cc (#18) * emulator: trivial: fix typo in rollback_test.cc * emulator: mechanical formatting fixes. --- .../cloud/bigtable/emulator/rollback_test.cc | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 4f73ff72c0dc2..8ac83ace48a99 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -273,7 +273,7 @@ Status HasRow(std::shared_ptr& table, // // Supplied with a timestamp < -1, it should return an error and fail the entire // mutation chain. -TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { +TEST(TransactionRollback, ZeroOrNegativeTimestampHandling) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -363,7 +363,7 @@ TEST(TransactonRollback, ZeroOrNegativeTimestampHandling) { } // Does the SetCell mutation work to set a cell to a specific value? -TEST(TransactonRollback, SetCellBasicFunction) { +TEST(TransactionRollback, SetCellBasicFunction) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -396,7 +396,7 @@ TEST(TransactonRollback, SetCellBasicFunction) { // Test that an old value is correctly restored in a pre-populated // cell, when one of a set of SetCell mutations fails after the cell // had been updated with a new value. -TEST(TransactonRollback, TestRestoreValue) { +TEST(TransactionRollback, TestRestoreValue) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -459,7 +459,7 @@ TEST(TransactonRollback, TestRestoreValue) { // Test that a new cell introduced in a chain of SetCell mutations is // deleted on rollback if a subsequent mutation fails. -TEST(TransactonRollback, DeleteValue) { +TEST(TransactionRollback, DeleteValue) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -514,7 +514,7 @@ TEST(TransactonRollback, DeleteValue) { // column family name that is not in the table schema) then the column // and any of the cells introduced is deleted in the rollback, but // that any pre-transaction-attemot data in the row is unaffected. -TEST(TransactonRollback, DeleteColumn) { +TEST(TransactionRollback, DeleteColumn) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -563,7 +563,7 @@ TEST(TransactonRollback, DeleteColumn) { // Test that a chain of SetCell mutations that initially introduces a // new row, but one of which eventually fails, will end with the whole // row rolled back. -TEST(TransactonRollback, DeleteRow) { +TEST(TransactionRollback, DeleteRow) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -600,7 +600,7 @@ TEST(TransactonRollback, DeleteRow) { // Does the DeleteFromfamily mutation work to delete a row from a // specific family and does it rows with the same row key in other // column families alone? -TEST(TransactonRollback, DeleteFromFamilyBasicFunction) { +TEST(TransactionRollback, DeleteFromFamilyBasicFunction) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -649,7 +649,7 @@ TEST(TransactonRollback, DeleteFromFamilyBasicFunction) { // Test that DeleteFromfamily can be rolled back in case a subsequent // mutation fails. -TEST(TransactonRollback, DeleteFromFamilyRollback) { +TEST(TransactionRollback, DeleteFromFamilyRollback) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -716,7 +716,7 @@ ::google::bigtable::v2::TimestampRange* NewTimestampRange(int64_t start, } // Does DeleteFromColumn basically work? -TEST(TransactonRollback, DeleteFromColumnBasicFunction) { +TEST(TransactionRollback, DeleteFromColumnBasicFunction) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -758,7 +758,7 @@ TEST(TransactonRollback, DeleteFromColumnBasicFunction) { } // Does DeleteFromColumn rollback work? -TEST(TransactonRollback, DeleteFromColumnRollback) { +TEST(TransactionRollback, DeleteFromColumnRollback) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -817,7 +817,7 @@ TEST(TransactonRollback, DeleteFromColumnRollback) { } // Can we delete a row from all column families? -TEST(TransactonRollback, DeleteFromRowBasicFunction) { +TEST(TransactionRollback, DeleteFromRowBasicFunction) { ::google::bigtable::admin::v2::Table schema; ::google::bigtable::admin::v2::ColumnFamily column_family; From 323b5005cf6e145e84ce6a5789b50b55b15b2ee0 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 18 Jun 2025 20:50:21 +0300 Subject: [PATCH 161/195] emulator: ReadModifyWrite: Implement passing unit tests and fix a bug they revealed in the code. --- .../cloud/bigtable/emulator/rollback_test.cc | 476 ++++++++++++++++++ google/cloud/bigtable/emulator/table.cc | 55 +- google/cloud/bigtable/emulator/table.h | 6 + 3 files changed, 513 insertions(+), 24 deletions(-) diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/rollback_test.cc index 8ac83ace48a99..e1e1cb240082f 100644 --- a/google/cloud/bigtable/emulator/rollback_test.cc +++ b/google/cloud/bigtable/emulator/rollback_test.cc @@ -15,6 +15,7 @@ #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/internal/big_endian.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" @@ -26,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +39,7 @@ namespace google { namespace cloud { namespace bigtable { namespace emulator { +using ::google::protobuf::TextFormat; using std::string; struct SetCellParams { @@ -868,6 +871,479 @@ TEST(TransactionRollback, DeleteFromRowBasicFunction) { .ok()); } +StatusOr GetColumn( + google::bigtable::v2::ReadModifyWriteRowResponse const& resp, + std::string const& row_key, int family_index, std::string const& qual) { + if (!resp.has_row()) { + return NotFoundError( + "response has no row", + GCP_ERROR_INFO().WithMetadata("response message", resp.DebugString())); + } + + if (resp.row().key() != row_key) { + return InvalidArgumentError( + "row key does not match", + GCP_ERROR_INFO().WithMetadata(row_key, resp.row().key())); + } + + if (family_index < 0) { + return InvalidArgumentError( + "supplied family index < 0", + GCP_ERROR_INFO().WithMetadata("family_index", + absl::StrFormat("%d", family_index))); + } + + if (family_index > resp.row().families_size() - 1) { + return internal::InvalidArgumentError( + "supplied family index is out of range", + GCP_ERROR_INFO().WithMetadata("family index", + absl::StrFormat("%d", family_index))); + } + + // Check that column families and column qualifiers in the response + // are neither empty nor repeated. + std::set families; + for (int i = 0; i < resp.row().families_size(); i++) { + auto ret = families.emplace(resp.row().families(i).name()); + // The family name should not be empty and should not be + // repeated. Neither should the column qualifiers be empty or + // repeated. + if (ret.first->empty() || !ret.second) { + return internal::InvalidArgumentError( + "empty or repeated family name", + GCP_ERROR_INFO().WithMetadata("ReadModifyWriteRowResponse", + resp.DebugString())); + } + + std::set column_qualifiers; + for (auto const& col : resp.row().families(i).columns()) { + auto ret = column_qualifiers.emplace(col.qualifier()); + if (ret.first->empty() || !ret.second) { + return internal::InvalidArgumentError( + "empty or repeated column qualifier", + GCP_ERROR_INFO().WithMetadata("ReadModifyWriteRowResponse", + resp.DebugString())); + } + } + } + + for (auto const& col : resp.row().families(family_index).columns()) { + if (col.qualifier() == qual) { + return col; + } + } + + return NotFoundError("column not found", + GCP_ERROR_INFO().WithMetadata("qualifier", qual)); +} + +// Test that ReadModifyWrite does the correct thing when the row +// and/or the column is unset (it should introduce new cells with the +// timestamp of current system time and assume the missing values are +// 0 or an empty string). +TEST(ReadModifyWrite, Unsetcase) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + + std::vector column_families = {"column_family"}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + auto constexpr kRMWText = R"pb( + table_name: "projects/test/instances/test/tables/test" + row_key: "0" + rules: + [ { + family_name: "column_family" + column_qualifier: "column_1" + increment_amount: 1 + } + , { + family_name: "column_family" + column_qualifier: "column_2" + append_value: "a string" + }] + )pb"; + + google::bigtable::v2::ReadModifyWriteRowRequest request; + ASSERT_TRUE(TextFormat::ParseFromString(kRMWText, &request)); + + auto system_time_ms_before = + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + + auto maybe_response = table->ReadModifyWriteRow(request); + ASSERT_STATUS_OK(maybe_response); + + auto& response = maybe_response.value(); + ASSERT_EQ(response.row().key(), "0"); + ASSERT_EQ(response.row().families_size(), 1); + ASSERT_EQ(response.row().families(0).name(), "column_family"); + ASSERT_EQ(response.row().families(0).columns_size(), 2); + + auto maybe_column = GetColumn(response, "0", 0, "column_1"); + ASSERT_STATUS_OK(maybe_column); + auto& col = maybe_column.value(); + ASSERT_EQ(col.cells_size(), 1); + ASSERT_GE(std::chrono::duration_cast( + std::chrono::microseconds(col.cells(0).timestamp_micros())), + system_time_ms_before); + ASSERT_EQ(col.cells(0).value(), + internal::EncodeBigEndian(static_cast(1))); + + auto maybe_column_2 = GetColumn(response, "0", 0, "column_2"); + ASSERT_STATUS_OK(maybe_column_2); + col = maybe_column_2.value(); + ASSERT_EQ(col.cells_size(), 1); + ASSERT_GE(std::chrono::duration_cast( + std::chrono::microseconds(col.cells(0).timestamp_micros())), + system_time_ms_before); + ASSERT_EQ(col.cells(0).value(), "a string"); + + auto maybe_cells = GetColumn(table, "column_family", "0", "column_1"); + ASSERT_STATUS_OK(maybe_cells); + auto& cells = maybe_cells.value(); + ASSERT_EQ(cells.size(), 1); + auto cell_it = cells.begin(); + ASSERT_GE(cell_it->first, system_time_ms_before); + ASSERT_EQ(cell_it->second, + internal::EncodeBigEndian(static_cast(1))); + + auto maybe_cells_2 = GetColumn(table, "column_family", "0", "column_2"); + ASSERT_STATUS_OK(maybe_cells_2); + cells = maybe_cells_2.value(); + ASSERT_EQ(cells.size(), 1); + cell_it = cells.begin(); + ASSERT_GE(cell_it->first, system_time_ms_before); + ASSERT_EQ(cell_it->second, "a string"); +} + +// Test that the RPC does the right thing when the latest cell in the +// column has a newer timestamp than system time. In particular, it +// should update the latest cell with a new value (and not create a +// new cell). This also tests that the RPC chooses the latest cell to +// update (and will catch bugs in cell ordering). +TEST(ReadModifyWrite, SetAndNewerTimestampCase) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + + std::vector column_families = {"column_family"}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + auto usecs_in_day = (static_cast(24) * 60 * 60 * 1000 * 1000); + + auto far_future_us = (std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000) + + usecs_in_day; + ASSERT_GT(far_future_us, + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + + auto far_future_us_latest = far_future_us + 1000; + + std::vector p = { + {"column_family", "column_1", far_future_us, "older"}, + {"column_family", "column_1", far_future_us_latest, "latest"}, + {"column_family", "column_2", far_future_us, + internal::EncodeBigEndian(static_cast(100))}, + {"column_family", "column_2", far_future_us_latest, + internal::EncodeBigEndian(static_cast(200))}, + }; + + auto status = SetCells(table, table_name, "0", p); + ASSERT_STATUS_OK(status); + + auto constexpr kRMWText = R"pb( + table_name: "projects/test/instances/test/tables/test" + row_key: "0" + rules: + [ { + family_name: "column_family" + column_qualifier: "column_1" + append_value: "_with_suffix" + } + , { + family_name: "column_family" + column_qualifier: "column_2" + increment_amount: 1 + }] + )pb"; + + google::bigtable::v2::ReadModifyWriteRowRequest request; + ASSERT_TRUE(TextFormat::ParseFromString(kRMWText, &request)); + + auto maybe_response = table->ReadModifyWriteRow(request); + ASSERT_STATUS_OK(maybe_response); + + auto& response = maybe_response.value(); + ASSERT_EQ(response.row().key(), "0"); + ASSERT_EQ(response.row().families_size(), 1); + ASSERT_EQ(response.row().families(0).name(), "column_family"); + ASSERT_EQ(response.row().families(0).columns_size(), 2); + + auto maybe_column = GetColumn(response, "0", 0, "column_1"); + ASSERT_STATUS_OK(maybe_column); + auto& col = maybe_column.value(); + ASSERT_EQ(col.cells_size(), 1); + ASSERT_EQ(col.cells(0).timestamp_micros(), far_future_us_latest); + ASSERT_EQ(col.cells(0).value(), "latest_with_suffix"); + + auto maybe_column_2 = GetColumn(response, "0", 0, "column_2"); + ASSERT_STATUS_OK(maybe_column_2); + col = maybe_column_2.value(); + ASSERT_EQ(col.cells_size(), 1); + ASSERT_EQ(col.cells(0).timestamp_micros(), far_future_us_latest); + ASSERT_EQ(col.cells(0).value(), + internal::EncodeBigEndian(static_cast(201))); + + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_1", far_future_us, "older")); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_1", + far_future_us_latest, "latest_with_suffix")); + + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_2", far_future_us, + internal::EncodeBigEndian(static_cast(100)))); + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_2", far_future_us_latest, + internal::EncodeBigEndian(static_cast(201)))); +} + +// Test that the RPC does the right thing when the latest cell in the +// column has an older timestamp than system time. In particular, a +// new cell with the current system time should be added to the cell +// to contain the value after adding or appending. +TEST(ReadModifyWrite, SetAndOlderTimestampCase) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + + std::vector column_families = {"column_family"}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + auto usecs_in_day = (static_cast(24) * 60 * 60 * 1000 * 1000); + + auto far_past_us = (std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000) - + usecs_in_day; + ASSERT_LT(far_past_us, + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + auto far_past_us_oldest = far_past_us - 1000; + + std::vector p = { + {"column_family", "column_1", far_past_us, "old"}, + {"column_family", "column_1", far_past_us_oldest, "oldest"}, + {"column_family", "column_2", far_past_us, + internal::EncodeBigEndian(static_cast(100))}, + {"column_family", "column_2", far_past_us_oldest, + internal::EncodeBigEndian(static_cast(200))}, + }; + + auto status = SetCells(table, table_name, "0", p); + ASSERT_STATUS_OK(status); + + auto constexpr kRMWText = R"pb( + table_name: "projects/test/instances/test/tables/test" + row_key: "0" + rules: + [ { + family_name: "column_family" + column_qualifier: "column_1" + append_value: "_with_suffix" + } + , { + family_name: "column_family" + column_qualifier: "column_2" + increment_amount: 1 + }] + )pb"; + + google::bigtable::v2::ReadModifyWriteRowRequest request; + ASSERT_TRUE(TextFormat::ParseFromString(kRMWText, &request)); + + auto system_time_us_before = + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000; + + auto maybe_response = table->ReadModifyWriteRow(request); + ASSERT_STATUS_OK(maybe_response); + + auto& response = maybe_response.value(); + ASSERT_EQ(response.row().key(), "0"); + ASSERT_EQ(response.row().families_size(), 1); + ASSERT_EQ(response.row().families(0).name(), "column_family"); + ASSERT_EQ(response.row().families(0).columns_size(), 2); + + auto maybe_column = GetColumn(response, "0", 0, "column_1"); + ASSERT_STATUS_OK(maybe_column); + auto& col = maybe_column.value(); + ASSERT_EQ(col.cells_size(), 1); + ASSERT_GE(col.cells(0).timestamp_micros(), system_time_us_before); + ASSERT_EQ(col.cells(0).value(), "old_with_suffix"); + + auto maybe_column_2 = GetColumn(response, "0", 0, "column_2"); + ASSERT_STATUS_OK(maybe_column_2); + auto& integer_col = maybe_column_2.value(); + ASSERT_EQ(integer_col.cells_size(), 1); + ASSERT_GE(integer_col.cells(0).timestamp_micros(), system_time_us_before); + ASSERT_EQ(integer_col.cells(0).value(), + internal::EncodeBigEndian(static_cast(101))); + + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_1", far_past_us, "old")); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_1", + far_past_us_oldest, "oldest")); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_1", + col.cells(0).timestamp_micros(), "old_with_suffix")); + + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_2", far_past_us, + internal::EncodeBigEndian(static_cast(100)))); + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_2", far_past_us_oldest, + internal::EncodeBigEndian(static_cast(200)))); + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_2", + integer_col.cells(0).timestamp_micros(), + internal::EncodeBigEndian(static_cast(101)))); +} + +// Test that the RPC does the right thing when the latest cell in the +// column has a newer timestamp than system time, and we need to roll +// back. In particular the changes to the latest cell should be rolled +// back. +TEST(ReadModifyWrite, RollbackNewerTimestamp) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + + std::vector column_families = {"column_family"}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + auto usecs_in_day = (static_cast(24) * 60 * 60 * 1000 * 1000); + + auto far_future_us = (std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000) + + usecs_in_day; + + ASSERT_GT(far_future_us, + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + + std::vector p = { + {"column_family", "column_1", far_future_us, "prefix"}, + }; + + auto status = SetCells(table, table_name, "0", p); + ASSERT_STATUS_OK(status); + + // The rules are evaluated in order. In this case, the 2nd rule + // refers to a column family that does not exist and should trigger + // a rollback. + auto constexpr kRMWText = R"pb( + table_name: "projects/test/instances/test/tables/test" + row_key: "0" + rules: + [ { + family_name: "column_family" + column_qualifier: "column_1" + append_value: "_with_suffix" + } + , { + family_name: "does_not_exist" + column_qualifier: "column_2" + increment_amount: 1 + }] + )pb"; + + google::bigtable::v2::ReadModifyWriteRowRequest request; + ASSERT_TRUE(TextFormat::ParseFromString(kRMWText, &request)); + + auto maybe_response = table->ReadModifyWriteRow(request); + ASSERT_EQ(false, maybe_response.ok()); + + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_1", + far_future_us, "prefix")); +} + +// Test that the RPC does the right thing when the latest cell in the +// column has a older timestamp than system time, and we need to roll +// back. In particular, the added cell should be deleted (no +// additional cell should be available after the failed transaction). +TEST(ReadModifyWrite, RollbackOlderTimestamp) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + + std::vector column_families = {"column_family"}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + auto usecs_in_day = (static_cast(24) * 60 * 60 * 1000 * 1000); + + auto far_past_us = (std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000) - + usecs_in_day; + ASSERT_LT(far_past_us, + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + + std::vector p = { + {"column_family", "column_1", far_past_us, "old"}, + }; + + auto status = SetCells(table, table_name, "0", p); + ASSERT_STATUS_OK(status); + + // The rules are evaluated in order. In this case, the 2nd rule + // refers to a column family that does not exist and should trigger + // a rollback. + auto constexpr kRMWText = R"pb( + table_name: "projects/test/instances/test/tables/test" + row_key: "0" + rules: + [ { + family_name: "column_family" + column_qualifier: "column_1" + append_value: "_with_suffix" + } + , { + family_name: "does_not_exist" + column_qualifier: "column_2" + increment_amount: 1 + }] + )pb"; + + google::bigtable::v2::ReadModifyWriteRowRequest request; + ASSERT_TRUE(TextFormat::ParseFromString(kRMWText, &request)); + + auto maybe_response = table->ReadModifyWriteRow(request); + ASSERT_EQ(false, maybe_response.ok()); + + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_1", far_past_us, "old")); +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index f3d4a46cd5414..f95717822a6a2 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -31,6 +31,7 @@ #include #include #include +#include #include namespace google { @@ -731,7 +732,35 @@ void ProcessReadModifyWriteResult( std::move(result.value)); } -// NOLINTBEGIN(readability-function-cognitive-complexity) +google::bigtable::v2::ReadModifyWriteRowResponse +FamiliesToReadModifyWriteResponse( + std::string const& row_key, + std::map const& families) { + google::bigtable::v2::ReadModifyWriteRowResponse resp; + auto* row = resp.mutable_row(); + row->set_key(row_key); + + for (auto const& fam : families) { + auto* family = row->add_families(); + family->set_name(fam.first); + for (auto const& r : fam.second) { + for (auto const& cfr : r.second) { + auto* col = family->add_columns(); + col->set_qualifier(cfr.first); + for (auto const& cr : cfr.second) { + auto* cell = col->add_cells(); + cell->set_timestamp_micros( + std::chrono::duration_cast(cr.first) + .count()); + cell->set_value(std::move(cr.second)); + } + } + } + } + + return resp; +} + StatusOr<::google::bigtable::v2::ReadModifyWriteRowResponse> RowTransaction::ReadModifyWriteRow( google::bigtable::v2::ReadModifyWriteRowRequest const& request) { @@ -780,30 +809,8 @@ RowTransaction::ReadModifyWriteRow( } // Now assemble the returned value. - google::bigtable::v2::ReadModifyWriteRowResponse resp; - auto* row = resp.mutable_row(); - - for (auto& fam : tmp_families) { - auto* family = row->add_families(); - family->set_name(fam.first); - for (auto& row : fam.second) { - for (auto const& cfr : row.second) { - auto* col = family->add_columns(); - col->set_qualifier(cfr.first); - for (auto const& cr : cfr.second) { - auto* cell = col->add_cells(); - cell->set_timestamp_micros( - std::chrono::duration_cast(cr.first) - .count()); - cell->set_value(std::move(cr.second)); - } - } - } - } - - return resp; + return FamiliesToReadModifyWriteResponse(row_key_, tmp_families); } -// NOLINTEND(readability-function-cognitive-complexity) void RowTransaction::Undo() { auto row_key = row_key_; diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index aaf0d270efcf7..0af050bfd6476 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -36,6 +36,7 @@ #include #include #include +#include namespace google { namespace cloud { @@ -181,6 +182,11 @@ class RowTransaction { std::string const& row_key_; }; +google::bigtable::v2::ReadModifyWriteRowResponse +FamiliesToReadModifyWriteResponse( + std::string const& row_key, + std::map const& families); + /** * A `AbstractCellStreamImpl` which streams filtered contents of the table. * From f8d303b40dd466832d38cb0627b2af0b2a96aecb Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 18 Jun 2025 21:20:03 +0300 Subject: [PATCH 162/195] emulator: tests: rename rollback_test.cc to something more appropriate. (#20) It now has tests well beyond the initial rollback tests, dealing with all manner of bigtable mutations, so call it mutations_test.cc. --- google/cloud/bigtable/emulator/CMakeLists.txt | 2 +- google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl | 2 +- .../bigtable/emulator/{rollback_test.cc => mutations_test.cc} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename google/cloud/bigtable/emulator/{rollback_test.cc => mutations_test.cc} (100%) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index ecf00ea70bf4a..5a656edc1932a 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -59,8 +59,8 @@ if (BUILD_TESTING) drop_row_range_test.cc filter_test.cc filtered_map_test.cc + mutations_test.cc range_set_test.cc - rollback_test.cc server_test.cc table_test.cc) export_list_to_bazel("bigtable_emulator_unit_tests.bzl" diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl index ffffeab82539e..cf47318619e60 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -22,8 +22,8 @@ bigtable_emulator_unit_tests = [ "drop_row_range_test.cc", "filter_test.cc", "filtered_map_test.cc", + "mutations_test.cc", "range_set_test.cc", - "rollback_test.cc", "server_test.cc", "table_test.cc", ] diff --git a/google/cloud/bigtable/emulator/rollback_test.cc b/google/cloud/bigtable/emulator/mutations_test.cc similarity index 100% rename from google/cloud/bigtable/emulator/rollback_test.cc rename to google/cloud/bigtable/emulator/mutations_test.cc From 4db993a797b4271cd80e66f27659db6f2d76f312 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 18 Jun 2025 21:56:02 +0300 Subject: [PATCH 163/195] emulator: mechanical fixes by CI tool. --- google/cloud/bigtable/emulator/table.cc | 2 +- google/cloud/bigtable/emulator/table.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index ae9eab1a46fe8..8f851b7b6087d 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -565,7 +565,7 @@ Status Table::SampleRowKeys( column_family_size_map.emplace(stream->column_family(), stream->column_family().size()); column_qualifier_size_map.emplace(stream->column_qualifier(), - stream->column_qualifier().size()); + stream->column_qualifier().size()); timestamp_total_row_size += sizeof(stream->timestamp()); value_total_row_size += stream->value().size(); } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 04891f5136054..08c310f4334e7 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -91,7 +91,9 @@ class Table : public std::enable_shared_from_this
{ return column_families_.find(column_family); } - Status SampleRowKeys(double pass_probability, grpc::ServerWriter* writer); + Status SampleRowKeys( + double pass_probability, + grpc::ServerWriter* writer); std::shared_ptr
get() { return shared_from_this(); } From e2155d939cd1af5c8ccc1126cd11ec5a119c151f Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 24 Jun 2025 18:57:15 +0300 Subject: [PATCH 164/195] emulator: Complex Types: Implement AddToCell mutation This implements the AddToCell mutation. * Unit tested and tests passing * Supports atomic rollback * Support Sum, Min and Max aggregations for non-negative Int64 numbers * It is now possible to create an aggregate column family (supporting AddToCell or MergeToCell) --- .../cloud/bigtable/emulator/column_family.cc | 83 ++++- .../cloud/bigtable/emulator/column_family.h | 112 ++++++ .../bigtable/emulator/column_family_test.cc | 2 + .../cloud/bigtable/emulator/mutations_test.cc | 330 +++++++++++++++--- google/cloud/bigtable/emulator/table.cc | 182 +++++++++- google/cloud/bigtable/emulator/table.h | 3 +- 6 files changed, 657 insertions(+), 55 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 2d2313acf143f..0a8475869fcf6 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -14,10 +14,15 @@ #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/internal/big_endian.h" +#include #include +#include #include #include +#include #include +#include +#include namespace google { namespace cloud { @@ -118,9 +123,32 @@ ReadModifyWriteCellResult ColumnRow::ReadModifyWrite( absl::optional ColumnRow::SetCell( std::chrono::milliseconds timestamp, std::string const& value) { absl::optional ret = absl::nullopt; + + auto cell_it = cells_.find(timestamp); + if (!(cell_it == cells_.end())) { + ret = std::move(cell_it->second); + } + + cells_[timestamp] = value; + + return ret; +} + +StatusOr> ColumnRow::UpdateCell( + std::chrono::milliseconds timestamp, std::string& value, + std::function(std::string const&, + std::string&&)> const& update_fn) { + absl::optional ret = absl::nullopt; + auto cell_it = cells_.find(timestamp); if (!(cell_it == cells_.end())) { + auto maybe_update_value = update_fn(cell_it->second, std::move(value)); + if (!maybe_update_value) { + return maybe_update_value.status(); + } ret = std::move(cell_it->second); + maybe_update_value.value().swap(cell_it->second); + return ret; } cells_[timestamp] = value; @@ -166,6 +194,15 @@ absl::optional ColumnFamilyRow::SetCell( return columns_[column_qualifier].SetCell(timestamp, value); } +StatusOr> ColumnFamilyRow::UpdateCell( + std::string const& column_qualifier, std::chrono::milliseconds timestamp, + std::string& value, + std::function(std::string const&, + std::string&&)> const& update_fn) { + return columns_[column_qualifier].UpdateCell(timestamp, value, + std::move(update_fn)); +} + std::vector ColumnFamilyRow::DeleteColumn( std::string const& column_qualifier, ::google::bigtable::v2::TimestampRange const& time_range) { @@ -201,6 +238,13 @@ absl::optional ColumnFamily::SetCell( return rows_[row_key].SetCell(column_qualifier, timestamp, value); } +StatusOr> ColumnFamily::UpdateCell( + std::string const& row_key, std::string const& column_qualifier, + std::chrono::milliseconds timestamp, std::string& value) { + return rows_[row_key].UpdateCell(column_qualifier, timestamp, value, + update_cell_); +} + std::map> ColumnFamily::DeleteRow( std::string const& row_key) { std::map> res; @@ -215,11 +259,11 @@ std::map> ColumnFamily::DeleteRow( ::google::bigtable::v2::TimestampRange time_range; auto deleted_cells = column.second.DeleteTimeRange(time_range); if (!deleted_cells.empty()) { - res[std::move(column.first)] = std::move(deleted_cells); + res[column.first] = std::move(deleted_cells); } } - rows_.erase(row_key); + rows_.erase(row_it); return res; } @@ -385,6 +429,41 @@ bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() const { return false; } +StatusOr> +ColumnFamily::ConstructAggregateColumnFamily( + google::bigtable::admin::v2::Type value_type) { + auto cf = std::make_shared(); + + if (value_type.has_aggregate_type()) { + auto const& aggregate_type = value_type.aggregate_type(); + switch (aggregate_type.aggregator_case()) { + case google::bigtable::admin::v2::Type::Aggregate::kSum: + cf->update_cell_ = cf->SumUpdateCellBEInt64; + break; + case google::bigtable::admin::v2::Type::Aggregate::kMin: + cf->update_cell_ = cf->MinUpdateCellBEInt64; + break; + case google::bigtable::admin::v2::Type::Aggregate::kMax: + cf->update_cell_ = cf->MaxUpdateCellBEInt64; + break; + default: + return InvalidArgumentError( + "unsupported aggregation type", + GCP_ERROR_INFO().WithMetadata( + "aggregation case", + absl::StrFormat("%d", aggregate_type.aggregator_case()))); + } + + cf->value_type_ = std::move(value_type); + + return cf; + } + + return InvalidArgumentError( + "no aggregate type set in the supplied value_type", + GCP_ERROR_INFO().WithMetadata("supplied value type", + value_type.DebugString())); +} } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index ac8b498760107..7becdb5a29d8e 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -15,6 +15,7 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H +#include "google/cloud/bigtable/cell.h" #include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" @@ -22,10 +23,14 @@ #include "google/cloud/bigtable/read_modify_write_rule.h" #include "google/cloud/internal/big_endian.h" #include "google/cloud/internal/make_status.h" +#include "google/cloud/status_or.h" #include "absl/types/optional.h" #include #include +#include +#include #include +#include #include #include #include @@ -88,6 +93,12 @@ class ColumnRow { */ absl::optional SetCell(std::chrono::milliseconds timestamp, std::string const& value); + + StatusOr> UpdateCell( + std::chrono::milliseconds timestamp, std::string& value, + std::function(std::string const&, + std::string&&)> const& update_fn); + /** * Delete cells falling into a given timestamp range. * @@ -173,6 +184,13 @@ class ColumnFamilyRow { absl::optional SetCell(std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value); + + StatusOr> UpdateCell( + std::string const& column_qualifier, std::chrono::milliseconds timestamp, + std::string& value, + std::function(std::string const&, + std::string&&)> const& update_fn); + /** * Delete cells falling into a given timestamp range in one column. * @@ -237,6 +255,13 @@ class ColumnFamilyRow { class ColumnFamily { public: ColumnFamily() = default; + // ConstructAggregateColumnFamily can be used to return an aggregate + // ColumnFamily that can support AddToCell or MergeToCell and + // similar aggregate complex types. To construct an ordinary + // ColumnFamily, use the default constructor ColumnFamily(). + static StatusOr> ConstructAggregateColumnFamily( + google::bigtable::admin::v2::Type value_type); + // Disable copying. ColumnFamily(ColumnFamily const&) = delete; ColumnFamily& operator=(ColumnFamily const&) = delete; @@ -274,6 +299,21 @@ class ColumnFamily { std::string const& column_qualifier, std::chrono::milliseconds timestamp, std::string const& value); + + /** + * UpdateCell is like SetCell except that, when a cell exists with + * the same timestamp, an update function (that depends on the column + * family type) is called to derive a new value from the new and + * existing value, and that is the value that is written. + * + * Simple (non-aggregate) column families have a default update + * function that just returns the new value. + * + */ + StatusOr> UpdateCell( + std::string const& row_key, std::string const& column_qualifier, + std::chrono::milliseconds timestamp, std::string& value); + /** * Delete the whole row from this column family. * @@ -351,9 +391,81 @@ class ColumnFamily { } void clear() { rows_.clear(); } + absl::optional GetValueType() { + return value_type_; + }; private: std::map rows_; + + // Support for aggregate and other complex types. + absl::optional value_type_ = absl::nullopt; + + static StatusOr DefaultUpdateCell( + std::string const& /*existing_value*/, std::string&& new_value) { + return new_value; + }; + + static StatusOr SumUpdateCellBEInt64( + std::string const& existing_value, std::string&& new_value) { + auto existing_value_int = + google::cloud::internal::DecodeBigEndian(existing_value); + if (!existing_value_int) { + return existing_value_int.status(); + } + + auto new_value_int = + google::cloud::internal::DecodeBigEndian(new_value); + if (!new_value_int) { + return new_value_int.status(); + } + + return google::cloud::internal::EncodeBigEndian(existing_value_int.value() + + new_value_int.value()); + }; + + static StatusOr MaxUpdateCellBEInt64( + std::string const& existing_value, std::string&& new_value) { + auto existing_int = + google::cloud::internal::DecodeBigEndian(existing_value); + if (!existing_int) { + return existing_int.status(); + } + auto new_int = google::cloud::internal::DecodeBigEndian( + std::move(new_value)); + if (!new_int) { + return new_int.status(); + } + + if (existing_int.value() > new_int.value()) { + return existing_value; + } + + return new_value; + }; + + static StatusOr MinUpdateCellBEInt64( + std::string const& existing_value, std::string&& new_value) { + auto existing_int = + google::cloud::internal::DecodeBigEndian(existing_value); + if (!existing_int) { + return existing_int.status(); + } + auto new_int = google::cloud::internal::DecodeBigEndian( + std::move(new_value)); + if (!new_int) { + return new_int.status(); + } + + if (existing_int.value() < new_int.value()) { + return existing_value; + } + + return new_value; + }; + + std::function(std::string const&, std::string&&)> + update_cell_ = DefaultUpdateCell; }; /** diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index d96d93d69dd2d..ff76b03290080 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -18,6 +18,8 @@ #include "google/cloud/testing_util/is_proto_equal.h" #include #include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/mutations_test.cc b/google/cloud/bigtable/emulator/mutations_test.cc index e1e1cb240082f..1a49c8be79d80 100644 --- a/google/cloud/bigtable/emulator/mutations_test.cc +++ b/google/cloud/bigtable/emulator/mutations_test.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -61,6 +62,55 @@ StatusOr> CreateTable( return Table::Create(schema); } +::google::bigtable::admin::v2::ColumnFamily MakeBEAggregateCFProto( + ::google::bigtable::admin::v2::Type_Aggregate::AggregatorCase aggregator) { + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto* value_type = column_family.mutable_value_type(); + auto* kind_aggregate_type = value_type->mutable_aggregate_type(); + switch (aggregator) { + case google::bigtable::admin::v2::Type::Aggregate::kSum: + kind_aggregate_type->mutable_sum(); + break; + case google::bigtable::admin::v2::Type::Aggregate::kMax: + kind_aggregate_type->mutable_max(); + break; + case google::bigtable::admin::v2::Type::Aggregate::kMin: + kind_aggregate_type->mutable_min(); + break; + default: + std::abort(); + } + auto* input_type = kind_aggregate_type->mutable_input_type(); + auto* int64_type = input_type->mutable_int64_type(); + // We need to set the encoding + auto* encoding = int64_type->mutable_encoding(); + encoding->mutable_big_endian_bytes(); + + // What do we do about the state_type? + // FIXME: Is this correct? + auto* state_type = kind_aggregate_type->mutable_state_type(); + int64_type = state_type->mutable_int64_type(); + encoding = int64_type->mutable_encoding(); + encoding->mutable_big_endian_bytes(); + + return column_family; +} + +::google::bigtable::admin::v2::Table CreateSchema( + std::string const& table_name, + std::map const& + column_families) { + ::google::bigtable::admin::v2::Table schema; + + schema.set_name(table_name); + for (auto const& cf : column_families) { + (*schema.mutable_column_families())[cf.first] = cf.second; + } + + return schema; +} + Status DeleteFromFamilies( std::shared_ptr& table, std::string const& table_name, std::string const& row_key, @@ -189,11 +239,10 @@ Status HasColumn( auto const& cf = column_family_it->second; auto column_family_row_it = cf->find(row_key); if (column_family_row_it == cf->end()) { - return internal::NotFoundError( - "row key not found in column family", - GCP_ERROR_INFO() - .WithMetadata("row key", row_key) - .WithMetadata("column family", column_family)); + return NotFoundError("row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); } auto& column_family_row = column_family_row_it->second; @@ -221,11 +270,10 @@ StatusOr> GetColumn( auto const& cf = column_family_it->second; auto column_family_row_it = cf->find(row_key); if (column_family_row_it == cf->end()) { - return internal::NotFoundError( - "row key not found in column family", - GCP_ERROR_INFO() - .WithMetadata("row key", row_key) - .WithMetadata("column family", column_family)); + return NotFoundError("row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); } auto& column_family_row = column_family_row_it->second; @@ -871,6 +919,200 @@ TEST(TransactionRollback, DeleteFromRowBasicFunction) { .ok()); } +// Does AddToCell reject requests to add to a cell in a column family +// not provisioned for aggregation? +TEST(TransactionRollback, AddToCellRejectsRequestsToNonAggregateColumnFamily) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifier = "column_qualifier"; + auto const timestamp_micros = 1000; + + auto maybe_table = Table::Create( + CreateSchema(table_name, {{column_family_name, column_family}})); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* add_to_cell_mutation = mutation_request_mutation->mutable_add_to_cell(); + + add_to_cell_mutation->set_family_name(column_family_name); + auto* mutable_column_qualifier = + add_to_cell_mutation->mutable_column_qualifier(); + mutable_column_qualifier->set_raw_value(column_qualifier); + auto* mutable_timestamp = add_to_cell_mutation->mutable_timestamp(); + mutable_timestamp->set_raw_timestamp_micros(timestamp_micros); + auto* mutable_input = add_to_cell_mutation->mutable_input(); + mutable_input->set_int_value(100); + + // Should fail because `column_family' has not been provisioned for + // aggregation. i.e. its value_type is not set all, in this case (it + // would need to be set to `Aggregate'. + ASSERT_EQ(false, table->MutateRow(mutation_request).ok()); +} + +// Test basic functionality of AddToCell Sum aggregation. +TEST(TransactionRollback, AddToCellTestSum) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifier = "column_qualifier"; + auto const timestamp_micros = 1000; + + auto maybe_table = Table::Create(CreateSchema( + table_name, {{column_family_name, + MakeBEAggregateCFProto( + google::bigtable::admin::v2::Type::Aggregate::kSum)}})); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* add_to_cell_mutation = mutation_request_mutation->mutable_add_to_cell(); + + add_to_cell_mutation->set_family_name(column_family_name); + auto* mutable_column_qualifier = + add_to_cell_mutation->mutable_column_qualifier(); + mutable_column_qualifier->set_raw_value(column_qualifier); + auto* mutable_timestamp = add_to_cell_mutation->mutable_timestamp(); + mutable_timestamp->set_raw_timestamp_micros(timestamp_micros); + auto* mutable_input = add_to_cell_mutation->mutable_input(); + mutable_input->set_int_value(100); + + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(100)) + .ok()); + + // Try and add 200 + mutable_input->set_int_value(200); + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(300)) + .ok()); + + // Try and subtract 50 + mutable_input->set_int_value(-50); + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(250)) + .ok()); +} + +// Test basic functionality of AddToCell Max aggregation. +TEST(TransactionRollback, AddToCellTestMax) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifier = "column_qualifier"; + auto const timestamp_micros = 1000; + + auto maybe_table = Table::Create(CreateSchema( + table_name, {{column_family_name, + MakeBEAggregateCFProto( + google::bigtable::admin::v2::Type::Aggregate::kMax)}})); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* add_to_cell_mutation = mutation_request_mutation->mutable_add_to_cell(); + + add_to_cell_mutation->set_family_name(column_family_name); + auto* mutable_column_qualifier = + add_to_cell_mutation->mutable_column_qualifier(); + mutable_column_qualifier->set_raw_value(column_qualifier); + auto* mutable_timestamp = add_to_cell_mutation->mutable_timestamp(); + mutable_timestamp->set_raw_timestamp_micros(timestamp_micros); + auto* mutable_input = add_to_cell_mutation->mutable_input(); + mutable_input->set_int_value(100); + + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(100)) + .ok()); + + mutable_input->set_int_value(200); + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(200)) + .ok()); +} + +// Test basic functionality of AddToCell Min aggregation. +TEST(TransactionRollback, AddToCellTestMin) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifier = "column_qualifier"; + auto const timestamp_micros = 1000; + + auto maybe_table = Table::Create(CreateSchema( + table_name, {{column_family_name, + MakeBEAggregateCFProto( + google::bigtable::admin::v2::Type::Aggregate::kMin)}})); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* add_to_cell_mutation = mutation_request_mutation->mutable_add_to_cell(); + + add_to_cell_mutation->set_family_name(column_family_name); + auto* mutable_column_qualifier = + add_to_cell_mutation->mutable_column_qualifier(); + mutable_column_qualifier->set_raw_value(column_qualifier); + auto* mutable_timestamp = add_to_cell_mutation->mutable_timestamp(); + mutable_timestamp->set_raw_timestamp_micros(timestamp_micros); + auto* mutable_input = add_to_cell_mutation->mutable_input(); + mutable_input->set_int_value(100); + + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(100)) + .ok()); + + mutable_input->set_int_value(50); + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(50)) + .ok()); +} + StatusOr GetColumn( google::bigtable::v2::ReadModifyWriteRowResponse const& resp, std::string const& row_key, int family_index, std::string const& qual) { @@ -894,7 +1136,7 @@ StatusOr GetColumn( } if (family_index > resp.row().families_size() - 1) { - return internal::InvalidArgumentError( + return InvalidArgumentError( "supplied family index is out of range", GCP_ERROR_INFO().WithMetadata("family index", absl::StrFormat("%d", family_index))); @@ -909,7 +1151,7 @@ StatusOr GetColumn( // repeated. Neither should the column qualifiers be empty or // repeated. if (ret.first->empty() || !ret.second) { - return internal::InvalidArgumentError( + return InvalidArgumentError( "empty or repeated family name", GCP_ERROR_INFO().WithMetadata("ReadModifyWriteRowResponse", resp.DebugString())); @@ -919,7 +1161,7 @@ StatusOr GetColumn( for (auto const& col : resp.row().families(i).columns()) { auto ret = column_qualifiers.emplace(col.qualifier()); if (ret.first->empty() || !ret.second) { - return internal::InvalidArgumentError( + return InvalidArgumentError( "empty or repeated column qualifier", GCP_ERROR_INFO().WithMetadata("ReadModifyWriteRowResponse", resp.DebugString())); @@ -989,8 +1231,8 @@ TEST(ReadModifyWrite, Unsetcase) { ASSERT_GE(std::chrono::duration_cast( std::chrono::microseconds(col.cells(0).timestamp_micros())), system_time_ms_before); - ASSERT_EQ(col.cells(0).value(), - internal::EncodeBigEndian(static_cast(1))); + ASSERT_EQ(col.cells(0).value(), ::google::cloud::internal::EncodeBigEndian( + static_cast(1))); auto maybe_column_2 = GetColumn(response, "0", 0, "column_2"); ASSERT_STATUS_OK(maybe_column_2); @@ -1007,8 +1249,8 @@ TEST(ReadModifyWrite, Unsetcase) { ASSERT_EQ(cells.size(), 1); auto cell_it = cells.begin(); ASSERT_GE(cell_it->first, system_time_ms_before); - ASSERT_EQ(cell_it->second, - internal::EncodeBigEndian(static_cast(1))); + ASSERT_EQ(cell_it->second, ::google::cloud::internal::EncodeBigEndian( + static_cast(1))); auto maybe_cells_2 = GetColumn(table, "column_family", "0", "column_2"); ASSERT_STATUS_OK(maybe_cells_2); @@ -1051,9 +1293,11 @@ TEST(ReadModifyWrite, SetAndNewerTimestampCase) { {"column_family", "column_1", far_future_us, "older"}, {"column_family", "column_1", far_future_us_latest, "latest"}, {"column_family", "column_2", far_future_us, - internal::EncodeBigEndian(static_cast(100))}, + ::google::cloud::internal::EncodeBigEndian( + static_cast(100))}, {"column_family", "column_2", far_future_us_latest, - internal::EncodeBigEndian(static_cast(200))}, + ::google::cloud::internal::EncodeBigEndian( + static_cast(200))}, }; auto status = SetCells(table, table_name, "0", p); @@ -1099,20 +1343,22 @@ TEST(ReadModifyWrite, SetAndNewerTimestampCase) { col = maybe_column_2.value(); ASSERT_EQ(col.cells_size(), 1); ASSERT_EQ(col.cells(0).timestamp_micros(), far_future_us_latest); - ASSERT_EQ(col.cells(0).value(), - internal::EncodeBigEndian(static_cast(201))); + ASSERT_EQ(col.cells(0).value(), ::google::cloud::internal::EncodeBigEndian( + static_cast(201))); ASSERT_STATUS_OK( HasCell(table, "column_family", "0", "column_1", far_future_us, "older")); ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_1", far_future_us_latest, "latest_with_suffix")); - ASSERT_STATUS_OK( - HasCell(table, "column_family", "0", "column_2", far_future_us, - internal::EncodeBigEndian(static_cast(100)))); - ASSERT_STATUS_OK( - HasCell(table, "column_family", "0", "column_2", far_future_us_latest, - internal::EncodeBigEndian(static_cast(201)))); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_2", + far_future_us, + ::google::cloud::internal::EncodeBigEndian( + static_cast(100)))); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_2", + far_future_us_latest, + ::google::cloud::internal::EncodeBigEndian( + static_cast(201)))); } // Test that the RPC does the right thing when the latest cell in the @@ -1145,9 +1391,11 @@ TEST(ReadModifyWrite, SetAndOlderTimestampCase) { {"column_family", "column_1", far_past_us, "old"}, {"column_family", "column_1", far_past_us_oldest, "oldest"}, {"column_family", "column_2", far_past_us, - internal::EncodeBigEndian(static_cast(100))}, + ::google::cloud::internal::EncodeBigEndian( + static_cast(100))}, {"column_family", "column_2", far_past_us_oldest, - internal::EncodeBigEndian(static_cast(200))}, + ::google::cloud::internal::EncodeBigEndian( + static_cast(200))}, }; auto status = SetCells(table, table_name, "0", p); @@ -1200,7 +1448,8 @@ TEST(ReadModifyWrite, SetAndOlderTimestampCase) { ASSERT_EQ(integer_col.cells_size(), 1); ASSERT_GE(integer_col.cells(0).timestamp_micros(), system_time_us_before); ASSERT_EQ(integer_col.cells(0).value(), - internal::EncodeBigEndian(static_cast(101))); + ::google::cloud::internal::EncodeBigEndian( + static_cast(101))); ASSERT_STATUS_OK( HasCell(table, "column_family", "0", "column_1", far_past_us, "old")); @@ -1209,16 +1458,17 @@ TEST(ReadModifyWrite, SetAndOlderTimestampCase) { ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_1", col.cells(0).timestamp_micros(), "old_with_suffix")); - ASSERT_STATUS_OK( - HasCell(table, "column_family", "0", "column_2", far_past_us, - internal::EncodeBigEndian(static_cast(100)))); - ASSERT_STATUS_OK( - HasCell(table, "column_family", "0", "column_2", far_past_us_oldest, - internal::EncodeBigEndian(static_cast(200)))); - ASSERT_STATUS_OK( - HasCell(table, "column_family", "0", "column_2", - integer_col.cells(0).timestamp_micros(), - internal::EncodeBigEndian(static_cast(101)))); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_2", far_past_us, + ::google::cloud::internal::EncodeBigEndian( + static_cast(100)))); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_2", + far_past_us_oldest, + ::google::cloud::internal::EncodeBigEndian( + static_cast(200)))); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_2", + integer_col.cells(0).timestamp_micros(), + ::google::cloud::internal::EncodeBigEndian( + static_cast(101)))); } // Test that the RPC does the right thing when the latest cell in the diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index f95717822a6a2..598ff4595352d 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -18,9 +18,12 @@ #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" +#include "google/cloud/internal/big_endian.h" #include "google/cloud/internal/make_status.h" +#include "google/cloud/status.h" #include "google/protobuf/util/field_mask_util.h" #include +#include #include #include #include @@ -28,9 +31,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -82,10 +87,29 @@ Status Table::Construct(google::bigtable::admin::v2::Table schema) { "`automated_backup_policy` not empty.", GCP_ERROR_INFO().WithMetadata("schema", schema_.DebugString())); } + for (auto const& column_family_def : schema_.column_families()) { - column_families_.emplace(column_family_def.first, - std::make_shared()); + absl::optional opt_value_type = + absl::nullopt; + + // Support for complex types (AddToCell aggregations, e.t.c.). + if (column_family_def.second.has_value_type()) { + opt_value_type = column_family_def.second.value_type(); + } + + if (opt_value_type.has_value()) { + auto cf = + ColumnFamily::ConstructAggregateColumnFamily(opt_value_type.value()); + if (!cf) { + return cf.status(); + } + column_families_.emplace(column_family_def.first, cf.value()); + } else { + column_families_.emplace(column_family_def.first, + std::make_shared()); + } } + return Status(); } @@ -122,6 +146,7 @@ StatusOr Table::ModifyColumnFamilies( GCP_ERROR_INFO().WithMetadata( "modification", modification.DebugString())); } + using google::protobuf::util::FieldMaskUtil; using google::protobuf::util::FieldMaskUtil; @@ -144,13 +169,36 @@ StatusOr Table::ModifyColumnFamilies( "mask", effective_mask.DebugString())); } } + + // Disallow the modification of the type of data stored in the + // column family (the aggregate type -- which is currently the + // only supported type -- can always be set during column family + // creation). + if (FieldMaskUtil::IsPathInFieldMask("value_type", effective_mask)) { + return InvalidArgumentError( + "The value_type cannot be changed after column family creation", + GCP_ERROR_INFO().WithMetadata("mask", + effective_mask.DebugString())); + } + FieldMaskUtil::MergeMessageTo(modification.update(), effective_mask, FieldMaskUtil::MergeOptions(), &(cf_it->second)); } else if (modification.has_create()) { - if (!new_column_families - .emplace(modification.id(), std::make_shared()) - .second) { + std::shared_ptr cf; + // Have we been asked to create an aggregate column family? + if (modification.create().has_value_type()) { + auto value_type = modification.create().value_type(); + auto maybe_cf = + ColumnFamily::ConstructAggregateColumnFamily(value_type); + if (!maybe_cf) { + return maybe_cf.status(); + } + cf = std::move(maybe_cf.value()); + } else { + cf = std::make_shared(); + } + if (!new_column_families.emplace(modification.id(), cf).second) { return AlreadyExistsError( "Column family already exists.", GCP_ERROR_INFO().WithMetadata("modification", @@ -264,9 +312,31 @@ Status Table::DoMutationsWithPossibleRollback( return status; } } else if (mutation.has_add_to_cell()) { - return UnimplementedError( - "Unsupported mutation type.", - GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); + auto const& add_to_cell = mutation.add_to_cell(); + + absl::optional timestamp_override = + absl::nullopt; + + std::chrono::milliseconds timestamp = std::chrono::milliseconds::zero(); + + if (add_to_cell.has_timestamp() && + add_to_cell.timestamp().has_raw_timestamp_micros()) { + timestamp = std::chrono::duration_cast( + std::chrono::microseconds( + add_to_cell.timestamp().raw_timestamp_micros())); + } + + // If no valid timestamp is provided, override with the system time. + if (timestamp <= std::chrono::milliseconds::zero()) { + timestamp = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + timestamp_override.emplace(std::move(timestamp)); + } + + auto status = row_transaction.AddToCell(add_to_cell, timestamp_override); + if (!status.ok()) { + return status; + } } else if (mutation.has_merge_to_cell()) { return UnimplementedError( "Unsupported mutation type.", @@ -561,10 +631,98 @@ Table::ReadModifyWriteRow( // NOLINTBEGIN(readability-convert-member-functions-to-static) Status RowTransaction::AddToCell( - ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell) { - return UnimplementedError( - "Unsupported mutation type.", - GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); + ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell, + absl::optional timestamp_override) { + auto status = table_->FindColumnFamily(add_to_cell); + if (!status.ok()) { + return status.status(); + } + + auto& cf = status->get(); + auto cf_value_type = cf.GetValueType(); + if (!cf_value_type.has_value() || + !cf_value_type.value().has_aggregate_type()) { + return InvalidArgumentError( + "column family is not configured to contain aggregation cells or " + "aggregation type not properly configured", + GCP_ERROR_INFO().WithMetadata("column family", + add_to_cell.family_name())); + } + + // Ensure that we support the aggregation that is configured in the + // column family. + switch (cf_value_type.value().aggregate_type().aggregator_case()) { + case google::bigtable::admin::v2::Type::Aggregate::kSum: + case google::bigtable::admin::v2::Type::Aggregate::kMin: + case google::bigtable::admin::v2::Type::Aggregate::kMax: + break; + default: + return UnimplementedError( + "column family configured with unimplemented aggregation", + GCP_ERROR_INFO() + .WithMetadata("column family", add_to_cell.family_name()) + .WithMetadata("configured aggregation", + absl::StrFormat("%d", cf_value_type.value() + .aggregate_type() + .aggregator_case()))); + } + + if (!add_to_cell.has_input()) { + return InvalidArgumentError( + "input not set", + GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); + } + + switch (add_to_cell.input().kind_case()) { + case google::bigtable::v2::Value::kIntValue: + if (!add_to_cell.input().has_int_value()) { + return InvalidArgumentError("input value not set", + GCP_ERROR_INFO().WithMetadata( + "mutation", add_to_cell.DebugString())); + } + break; + default: + return InvalidArgumentError( + "only int64 values are supported", + GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); + } + auto int64_input = add_to_cell.input().int_value(); + + auto value = google::cloud::internal::EncodeBigEndian(int64_input); + auto row_key = row_key_; + + std::chrono::milliseconds ts_ms; + if (timestamp_override.has_value()) { + ts_ms = timestamp_override.value(); + } else { + ts_ms = std::chrono::duration_cast( + std::chrono::microseconds( + add_to_cell.timestamp().raw_timestamp_micros())); + } + + if (!add_to_cell.has_column_qualifier() || + !add_to_cell.column_qualifier().has_raw_value()) { + return InvalidArgumentError( + "column qualifier not set", + GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); + } + auto column_qualifier = add_to_cell.column_qualifier().raw_value(); + + auto maybe_old_value = cf.UpdateCell(row_key, column_qualifier, ts_ms, value); + if (!maybe_old_value) { + return maybe_old_value.status(); + } + + if (!maybe_old_value.value()) { + DeleteValue delete_value{cf, std::move(column_qualifier), ts_ms}; + undo_.emplace(std::move(delete_value)); + } else { + RestoreValue restore_value{cf, std::move(column_qualifier), ts_ms, + std::move(maybe_old_value.value().value())}; + undo_.emplace(std::move(restore_value)); + } + + return Status(); } Status RowTransaction::MergeToCell( diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 0af050bfd6476..186c90ae2c6c4 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -154,7 +154,8 @@ class RowTransaction { absl::optional timestamp_override = absl::nullopt); Status AddToCell( - ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell); + ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell, + absl::optional timestamp_override); Status MergeToCell( ::google::bigtable::v2::Mutation_MergeToCell const& merge_to_cell); Status DeleteFromColumn( From ca0ed08ce4318ddb7f4ffd37bfea6142fb705a2e Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 26 Jun 2025 18:49:28 +0300 Subject: [PATCH 165/195] emulator: tests: prepare to reuse some code from mutations_test.cc. --- google/cloud/bigtable/emulator/CMakeLists.txt | 2 + .../emulator/bigtable_emulator_common.bzl | 2 + .../cloud/bigtable/emulator/mutations_test.cc | 40 +------------------ 3 files changed, 5 insertions(+), 39 deletions(-) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 5a656edc1932a..c58856c8c9cf7 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -33,6 +33,8 @@ add_library( server.h table.cc table.h + test_util.h + test_util.cc to_grpc_status.cc to_grpc_status.h) diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index 1b733901fde4a..29519b5409272 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -26,6 +26,7 @@ bigtable_emulator_common_hdrs = [ "row_streamer.h", "server.h", "table.h", + "test_util.h", "to_grpc_status.h", ] @@ -38,5 +39,6 @@ bigtable_emulator_common_srcs = [ "row_streamer.cc", "server.cc", "table.cc", + "test_util.cc", "to_grpc_status.cc", ] diff --git a/google/cloud/bigtable/emulator/mutations_test.cc b/google/cloud/bigtable/emulator/mutations_test.cc index 245f82ad07fc2..219bc069053c0 100644 --- a/google/cloud/bigtable/emulator/mutations_test.cc +++ b/google/cloud/bigtable/emulator/mutations_test.cc @@ -14,6 +14,7 @@ #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/test_util.h" #include "google/cloud/internal/big_endian.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" @@ -41,25 +42,6 @@ namespace emulator { using ::google::protobuf::TextFormat; using std::string; -struct SetCellParams { - std::string column_family_name; - std::string column_qualifier; - int64_t timestamp_micros; - std::string data; -}; - -StatusOr> CreateTable( - std::string const& table_name, std::vector& column_families) { - ::google::bigtable::admin::v2::Table schema; - schema.set_name(table_name); - for (auto& column_family_name : column_families) { - (*schema.mutable_column_families())[column_family_name] = - ::google::bigtable::admin::v2::ColumnFamily(); - } - - return Table::Create(schema); -} - ::google::bigtable::admin::v2::ColumnFamily MakeBEAggregateCFProto( ::google::bigtable::admin::v2::Type_Aggregate::AggregatorCase aggregator) { ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -154,26 +136,6 @@ Status DeleteFromColumns( return table->MutateRow(mutation_request); } -Status SetCells( - std::shared_ptr& table, - std::string const& table_name, std::string const& row_key, - std::vector& set_cell_params) { - ::google::bigtable::v2::MutateRowRequest mutation_request; - mutation_request.set_table_name(table_name); - mutation_request.set_row_key(row_key); - - for (auto m : set_cell_params) { - auto* mutation_request_mutation = mutation_request.add_mutations(); - auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); - set_cell_mutation->set_family_name(m.column_family_name); - set_cell_mutation->set_column_qualifier(m.column_qualifier); - set_cell_mutation->set_timestamp_micros(m.timestamp_micros); - set_cell_mutation->set_value(m.data); - } - - return table->MutateRow(mutation_request); -} - Status HasCell(std::shared_ptr& table, std::string const& column_family, std::string const& row_key, std::string const& column_qualifier, int64_t timestamp_micros, From df894e813c6c70937fe77dc0b2ccc9d0e1a139fd Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 26 Jun 2025 19:02:00 +0300 Subject: [PATCH 166/195] emulator: fix api usage and compilation. --- google/cloud/bigtable/emulator/column_family.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 648c3b0db0333..49fbc78dc3243 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -33,7 +33,8 @@ namespace emulator { // timestamps. Remove when that is fixed and they are in decreasing // order, at which point we can just pick the first element. std::map::iterator latest( - std::map& cells_not_empty) { + std::map>& + cells_not_empty) { assert(!cells_not_empty.empty()); auto first_it = cells_not_empty.begin(); From d446920f93a74dd4df3767fc03615cb6c0100e3d Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 26 Jun 2025 19:05:19 +0300 Subject: [PATCH 167/195] emulator: refactor to reuse existing testing code. --- google/cloud/bigtable/emulator/test_util.cc | 44 +++++++++++++++++++++ google/cloud/bigtable/emulator/test_util.h | 28 +++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 google/cloud/bigtable/emulator/test_util.cc create mode 100644 google/cloud/bigtable/emulator/test_util.h diff --git a/google/cloud/bigtable/emulator/test_util.cc b/google/cloud/bigtable/emulator/test_util.cc new file mode 100644 index 0000000000000..458595e8f9a2a --- /dev/null +++ b/google/cloud/bigtable/emulator/test_util.cc @@ -0,0 +1,44 @@ +#include "google/cloud/bigtable/emulator/test_util.h" +#include "google/cloud/bigtable/emulator/table.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +StatusOr> CreateTable( + std::string const& table_name, std::vector& column_families) { + ::google::bigtable::admin::v2::Table schema; + schema.set_name(table_name); + for (auto& column_family_name : column_families) { + (*schema.mutable_column_families())[column_family_name] = + ::google::bigtable::admin::v2::ColumnFamily(); + } + + return Table::Create(schema); +} + +Status SetCells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto m : set_cell_params) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(m.column_family_name); + set_cell_mutation->set_column_qualifier(m.column_qualifier); + set_cell_mutation->set_timestamp_micros(m.timestamp_micros); + set_cell_mutation->set_value(m.data); + } + + return table->MutateRow(mutation_request); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/test_util.h b/google/cloud/bigtable/emulator/test_util.h new file mode 100644 index 0000000000000..04bb2f0caf6f0 --- /dev/null +++ b/google/cloud/bigtable/emulator/test_util.h @@ -0,0 +1,28 @@ +#include "google/cloud/bigtable/emulator/table.h" +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +struct SetCellParams { + std::string column_family_name; + std::string column_qualifier; + int64_t timestamp_micros; + std::string data; +}; + +Status SetCells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params); + +StatusOr> CreateTable( + std::string const& table_name, std::vector& column_families); + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google From fa5e09ec0a29a7933921b8598d10b62f9826a684 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 26 Jun 2025 21:38:08 +0300 Subject: [PATCH 168/195] emulator: filters: ColumnRange: implement end-to-end test. --- google/cloud/bigtable/emulator/filter_test.cc | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index 010adbd51bfff..976bde6f92ba4 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -13,11 +13,19 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/test_util.h" #include "google/cloud/testing_util/chrono_literals.h" #include "google/cloud/testing_util/status_matchers.h" #include "gmock/gmock.h" +#include #include +#include +#include #include +#include +#include namespace google { namespace cloud { @@ -1883,6 +1891,71 @@ TEST_F(FilterWorkTest, ConditionBranchFilterNextDifferentThanCell) { EXPECT_EQ(expected, *maybe_output); } +// Test our implementation of the ColumnRange filter, by actually +// streaming cells from actual table data (hence end to end). +TEST(FiltersEndToEnd, ColumnRange) { + std::vector column_families = {"family1", "family2", "family3"}; + auto maybe_table = CreateTable("table", column_families); + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + std::vector created = { + {"family1", "a00", 0, "bar"}, {"family1", "b00", 0, "bar"}, + {"family1", "b01", 0, "bar"}, {"family1", "b02", 0, "bar"}, + {"family2", "a00", 0, "bar"}, {"family2", "b01", 0, "bar"}, + {"family2", "b00", 0, "bar"}, {"family3", "a00", 0, "bar"}, + }; + + std::string row_key = "column-range-row-key"; + + auto status = SetCells(table, "table", row_key, created); + ASSERT_STATUS_OK(status); + + auto all_rows_set = std::make_shared(StringRangeSet::All()); + + RowFilter filter; + filter.mutable_column_range_filter()->set_family_name("family1"); + filter.mutable_column_range_filter()->set_start_qualifier_closed("b00"); + filter.mutable_column_range_filter()->set_end_qualifier_open("b02"); + + struct Cell { + std::string row_key; + std::string column_family; + std::string column_qualifier; + std::int64_t timestamp_micros; + std::string value; + + bool operator==(Cell const& other) const { + return this->row_key == other.row_key && + this->column_family == other.column_family && + this->column_qualifier == other.column_qualifier && + this->timestamp_micros == other.timestamp_micros && + this->value == other.value; + } + }; + + auto maybe_stream = table->CreateCellStream(all_rows_set, filter); + ASSERT_STATUS_OK(maybe_stream); + + std::vector expected = { + {row_key, "family1", "b00", 0, "bar"}, + {row_key, "family1", "b01", 0, "bar"}, + }; + + std::vector actual; + auto& stream = *maybe_stream; + for (; stream; ++stream) { + actual.push_back({stream->row_key(), stream->column_family(), + stream->column_qualifier(), + stream->timestamp().count() * 1000, stream->value()}); + } + + ASSERT_EQ(expected.size(), actual.size()); + + ASSERT_TRUE( + std::is_permutation(expected.begin(), expected.end(), actual.begin())); +} + } // namespace emulator } // namespace bigtable } // namespace cloud From 37353162848b08d912103a6c231f0c3a46cd0b27 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 26 Jun 2025 22:54:05 +0300 Subject: [PATCH 169/195] emulator: make CreateCellStream public. We need it for tests of filters and ReadRow that do not have to or cannot start up the GRPC network server infrastructure. --- google/cloud/bigtable/emulator/table.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 186c90ae2c6c4..f281eaa17d500 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -71,6 +71,10 @@ class Table : public std::enable_shared_from_this
{ return DoMutationsWithPossibleRollback(row_key, mutations); } + StatusOr CreateCellStream( + std::shared_ptr range_set, + absl::optional) const; + Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const; @@ -104,9 +108,6 @@ class Table : public std::enable_shared_from_this
{ MESSAGE const& message) const; bool IsDeleteProtectedNoLock() const; Status Construct(google::bigtable::admin::v2::Table schema); - StatusOr CreateCellStream( - std::shared_ptr range_set, - absl::optional) const; Status DoMutationsWithPossibleRollback( std::string const& row_key, google::protobuf::RepeatedPtrField const& From a9cc374b1a904f498c61ef78158a1720ea0ec0fb Mon Sep 17 00:00:00 2001 From: Adam Czajkowski Date: Fri, 27 Jun 2025 11:01:29 +0200 Subject: [PATCH 170/195] test: add blackbox gRPC server tests (#11) --- google/cloud/bigtable/emulator/server_test.cc | 200 +++++++++++++++++- 1 file changed, 199 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/server_test.cc b/google/cloud/bigtable/emulator/server_test.cc index 76b970d819d33..ceba09422f381 100644 --- a/google/cloud/bigtable/emulator/server_test.cc +++ b/google/cloud/bigtable/emulator/server_test.cc @@ -13,11 +13,209 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/server.h" +#include +#include +#include +#include +#include +#include +#include namespace google { namespace cloud { namespace bigtable { -namespace emulator {} // namespace emulator +namespace emulator { + +class ServerTest : public ::testing::Test { + protected: + std::unique_ptr server_; + std::shared_ptr channel_; + grpc::ClientContext ctx_; + + void SetUp() override { + server_ = CreateDefaultEmulatorServer("127.0.0.1", 0); + channel_ = grpc::CreateChannel( + "localhost:" + std::to_string(server_->bound_port()), + grpc::InsecureChannelCredentials()); + } + + std::unique_ptr DataClient() { + return google::bigtable::v2::Bigtable::NewStub(channel_); + } + + std::unique_ptr + TableAdminClient() { + return google::bigtable::admin::v2::BigtableTableAdmin::NewStub(channel_); + } +}; + +TEST_F(ServerTest, DataCheckAndMutateRow) { + google::bigtable::v2::CheckAndMutateRowRequest request; + google::bigtable::v2::CheckAndMutateRowResponse response; + + grpc::Status status = + DataClient()->CheckAndMutateRow(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataExecuteQuery) { + google::bigtable::v2::ExecuteQueryRequest request; + + grpc::Status status = DataClient()->ExecuteQuery(&ctx_, request)->Finish(); + GTEST_SKIP() << "Data API's ExecuteQuery is not supported by the emulator."; + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataGenerateInitialChangeStreamPartitions) { + google::bigtable::v2::GenerateInitialChangeStreamPartitionsRequest request; + + grpc::Status status = + DataClient() + ->GenerateInitialChangeStreamPartitions(&ctx_, request) + ->Finish(); + GTEST_SKIP() << "Data API's GenerateInitialChangeStreamPartitions is not " + "supported by the emulator."; + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataMutateRow) { + google::bigtable::v2::MutateRowRequest request; + google::bigtable::v2::MutateRowResponse response; + + grpc::Status status = DataClient()->MutateRow(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataMutateRows) { + google::bigtable::v2::MutateRowsRequest request; + + grpc::Status status = DataClient()->MutateRows(&ctx_, request)->Finish(); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataPingAndWarm) { + google::bigtable::v2::PingAndWarmRequest request; + google::bigtable::v2::PingAndWarmResponse response; + + grpc::Status status = DataClient()->PingAndWarm(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataReadChangeStream) { + google::bigtable::v2::ReadChangeStreamRequest request; + + grpc::Status status = + DataClient()->ReadChangeStream(&ctx_, request)->Finish(); + GTEST_SKIP() + << "Data API's ReadChangeStream is not supported by the emulator."; + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataReadModifyWriteRow) { + google::bigtable::v2::ReadModifyWriteRowRequest request; + google::bigtable::v2::ReadModifyWriteRowResponse response; + + grpc::Status status = + DataClient()->ReadModifyWriteRow(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataReadRows) { + google::bigtable::v2::ReadRowsRequest request; + + grpc::Status status = DataClient()->ReadRows(&ctx_, request)->Finish(); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataSampleRowKeys) { + google::bigtable::v2::SampleRowKeysRequest request; + google::bigtable::v2::SampleRowKeysResponse response; + + grpc::Status status = DataClient()->SampleRowKeys(&ctx_, request)->Finish(); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminCheckConsistency) { + google::bigtable::admin::v2::CheckConsistencyRequest request; + google::bigtable::admin::v2::CheckConsistencyResponse response; + + grpc::Status status = + TableAdminClient()->CheckConsistency(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminCreateTable) { + google::bigtable::admin::v2::CreateTableRequest request; + google::bigtable::admin::v2::Table response; + + grpc::Status status = + TableAdminClient()->CreateTable(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminDeleteTable) { + google::bigtable::admin::v2::DeleteTableRequest request; + google::protobuf::Empty response; + + grpc::Status status = + TableAdminClient()->DeleteTable(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminDropRowRange) { + google::bigtable::admin::v2::DropRowRangeRequest request; + google::protobuf::Empty response; + + grpc::Status status = + TableAdminClient()->DropRowRange(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminGenerateConsistencyToken) { + google::bigtable::admin::v2::GenerateConsistencyTokenRequest request; + google::bigtable::admin::v2::GenerateConsistencyTokenResponse response; + + grpc::Status status = + TableAdminClient()->GenerateConsistencyToken(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminGetTable) { + google::bigtable::admin::v2::GetTableRequest request; + google::bigtable::admin::v2::Table response; + + grpc::Status status = TableAdminClient()->GetTable(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminListTables) { + google::bigtable::admin::v2::ListTablesRequest request; + google::bigtable::admin::v2::ListTablesResponse response; + + grpc::Status status = + TableAdminClient()->ListTables(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminModifyColumnFamilies) { + google::bigtable::admin::v2::ModifyColumnFamiliesRequest request; + google::bigtable::admin::v2::Table response; + + grpc::Status status = + TableAdminClient()->ModifyColumnFamilies(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminUpdateTable) { + google::bigtable::admin::v2::UpdateTableRequest request; + google::longrunning::Operation response; + + grpc::Status status = + TableAdminClient()->UpdateTable(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +} // namespace emulator } // namespace bigtable } // namespace cloud } // namespace google From ab313def1378f4ca8f82d8c33c74e82f7d29e8f8 Mon Sep 17 00:00:00 2001 From: Adam Czajkowski Date: Tue, 1 Jul 2025 09:35:17 +0200 Subject: [PATCH 171/195] test: add emulator filter tests and fix the issues encountered (#12) --- .../cloud/bigtable/emulator/column_family.cc | 58 +- .../cloud/bigtable/emulator/column_family.h | 52 +- .../bigtable/emulator/column_family_test.cc | 24 +- .../bigtable/emulator/drop_row_range_test.cc | 2 - google/cloud/bigtable/emulator/filter.cc | 35 +- google/cloud/bigtable/emulator/filter.h | 2 +- google/cloud/bigtable/emulator/filter_test.cc | 741 ++++++++++++++++-- google/cloud/bigtable/emulator/filtered_map.h | 187 ++++- .../bigtable/emulator/filtered_map_test.cc | 123 ++- .../cloud/bigtable/emulator/mutations_test.cc | 2 - google/cloud/bigtable/emulator/range_set.h | 9 - .../cloud/bigtable/emulator/range_set_test.cc | 10 - google/cloud/bigtable/emulator/table.cc | 1 - google/cloud/bigtable/emulator/table_test.cc | 3 - 14 files changed, 1029 insertions(+), 220 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 0a8475869fcf6..61eb9dc328810 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -29,20 +29,6 @@ namespace cloud { namespace bigtable { namespace emulator { -// FIXME: Workaround our current incorrect ordering of -// timestamps. Remove when that is fixed and they are in decreasing -// order, at which point we can just pick the first element. -std::map::iterator latest( - std::map& cells_not_empty) { - assert(!cells_not_empty.empty()); - - auto first_it = cells_not_empty.begin(); - auto last_it = std::prev(cells_not_empty.end()); - auto latest_it = first_it->first >= last_it->first ? first_it : last_it; - - return latest_it; -} - StatusOr ColumnRow::ReadModifyWrite( std::int64_t inc_value) { auto system_ms = std::chrono::duration_cast( @@ -56,10 +42,7 @@ StatusOr ColumnRow::ReadModifyWrite( absl::nullopt}; } - // FIXME: Workaround our current incorrect ordering of - // timestamps. Remove when that is fixed and they are in decreasing - // order, at which point we can just pick the first element. - auto latest_it = latest(cells_); + auto latest_it = cells_.begin(); auto maybe_old_value = google::cloud::internal::DecodeBigEndian(latest_it->second); @@ -97,10 +80,7 @@ ReadModifyWriteCellResult ColumnRow::ReadModifyWrite( absl::nullopt}; } - // FIXME: Workaround our current incorrect ordering of - // timestamps. Remove when that is fixed and they are in decreasing - // order, at which point we can just pick the first element. - auto latest_it = latest(cells_); + auto latest_it = cells_.begin(); auto value = latest_it->second + append_value; @@ -159,14 +139,21 @@ StatusOr> ColumnRow::UpdateCell( std::vector ColumnRow::DeleteTimeRange( ::google::bigtable::v2::TimestampRange const& time_range) { std::vector deleted_cells; - for (auto cell_it = cells_.lower_bound( - std::chrono::duration_cast( - std::chrono::microseconds(time_range.start_timestamp_micros()))); + absl::optional maybe_end_micros = + time_range.end_timestamp_micros(); + if (maybe_end_micros.value_or(0) == 0) { + maybe_end_micros.reset(); + } + for (auto cell_it = + maybe_end_micros + ? upper_bound( + std::chrono::duration_cast( + std::chrono::microseconds(*maybe_end_micros))) + : begin(); cell_it != cells_.end() && - (time_range.end_timestamp_micros() == 0 || - cell_it->first < std::chrono::duration_cast( + cell_it->first >= std::chrono::duration_cast( std::chrono::microseconds( - time_range.end_timestamp_micros())));) { + time_range.start_timestamp_micros()));) { Cell cell = {std::move(cell_it->first), std::move(cell_it->second)}; deleted_cells.emplace_back(std::move(cell)); cells_.erase(cell_it++); @@ -346,9 +333,9 @@ FilteredColumnFamilyStream::FilteredColumnFamilyStream( row_ranges_(std::move(row_set)), column_ranges_(StringRangeSet::All()), timestamp_ranges_(TimestampRangeSet::All()), - rows_(RangeFilteredMapView(column_family, - *row_ranges_), - std::cref(row_regexes_)) {} + rows_( + StringRangeFilteredMapView(column_family, *row_ranges_), + std::cref(row_regexes_)) {} bool FilteredColumnFamilyStream::ApplyFilter( InternalFilter const& internal_filter) { @@ -404,7 +391,7 @@ void FilteredColumnFamilyStream::InitializeIfNeeded() const { bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { for (; column_it_.value() != columns_.value().end(); ++(column_it_.value())) { - cells_ = RangeFilteredMapView( + cells_ = TimestampRangeFilteredMapView( column_it_.value()->second, timestamp_ranges_); cell_it_ = cells_.value().begin(); if (cell_it_.value() != cells_.value().end()) { @@ -416,10 +403,9 @@ bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() const { for (; (*row_it_) != rows_.end(); ++(*row_it_)) { - columns_ = RegexFiteredMapView< - RangeFilteredMapView>( - RangeFilteredMapView( - (*row_it_)->second, column_ranges_), + columns_ = RegexFiteredMapView>( + StringRangeFilteredMapView((*row_it_)->second, + column_ranges_), column_regexes_); column_it_ = columns_.value().begin(); if (PointToFirstCellAfterColumnChange()) { diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 7becdb5a29d8e..6bb1a5ddc14ea 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -84,8 +84,7 @@ class ColumnRow { * Insert or update and existing cell at a given timestamp. * * @param timestamp the time stamp at which the value will be inserted or - * updated. If it equals zero then number of milliseconds since epoch will - * be used instead. + * updated. * @param value the value to insert/update. * * @return no value if the timestamp had no value before, otherwise @@ -120,13 +119,12 @@ class ColumnRow { absl::optional DeleteTimeStamp(std::chrono::milliseconds timestamp); bool HasCells() const { return !cells_.empty(); } - using const_iterator = - std::map::const_iterator; - using iterator = std::map::iterator; + + using const_iterator = std::map>::const_iterator; + const_iterator begin() const { return cells_.begin(); } const_iterator end() const { return cells_.end(); } - iterator begin() { return cells_.begin(); } - iterator end() { return cells_.end(); } const_iterator lower_bound(std::chrono::milliseconds timestamp) const { return cells_.lower_bound(timestamp); } @@ -134,18 +132,15 @@ class ColumnRow { return cells_.upper_bound(timestamp); } - std::map::iterator find( - std::chrono::milliseconds const& timestamp) { + const_iterator find(std::chrono::milliseconds const& timestamp) { return cells_.find(timestamp); } - void erase( - std::map::iterator timestamp_it) { - cells_.erase(timestamp_it); - } + void erase(const_iterator timestamp_it) { cells_.erase(timestamp_it); } private: - std::map cells_; + // Note the order - the iterator return the freshest cells first. + std::map> cells_; }; /** @@ -173,8 +168,7 @@ class ColumnFamilyRow { * * @param column_qualifier the column qualifier at which to update the value. * @param timestamp the time stamp at which the value will be inserted or - * updated. If it equals zero then number of milliseconds since epoch will - * be used instead. + * updated. * @param value the value to insert/update. * * @return no value if the timestamp had no value before, otherwise @@ -287,8 +281,7 @@ class ColumnFamily { * @param row_key the row key at which to update the value. * @param column_qualifier the column qualifier at which to update the value. * @param timestamp the time stamp at which the value will be inserted or - * updated. If it equals zero then number of milliseconds since epoch will - * be used instead. + * updated. * @param value the value to insert/update. * * @return no value if the timestamp had no value before, otherwise @@ -431,8 +424,8 @@ class ColumnFamily { if (!existing_int) { return existing_int.status(); } - auto new_int = google::cloud::internal::DecodeBigEndian( - std::move(new_value)); + auto new_int = + google::cloud::internal::DecodeBigEndian(new_value); if (!new_int) { return new_int.status(); } @@ -451,8 +444,8 @@ class ColumnFamily { if (!existing_int) { return existing_int.status(); } - auto new_int = google::cloud::internal::DecodeBigEndian( - std::move(new_value)); + auto new_int = + google::cloud::internal::DecodeBigEndian(new_value); if (!new_int) { return new_int.status(); } @@ -539,25 +532,24 @@ class FilteredColumnFamilyStream : public AbstractCellStreamImpl { std::vector> column_regexes_; mutable TimestampRangeSet timestamp_ranges_; - RegexFiteredMapView> rows_; - mutable absl::optional>> + RegexFiteredMapView> rows_; + mutable absl::optional< + RegexFiteredMapView>> columns_; - mutable absl::optional> - cells_; + mutable absl::optional> cells_; // If row_it_ == rows_.end() we've reached the end. // We maintain the following invariant: // if (row_it_ != rows_.end()) then // cell_it_ != cells.end() && column_it_ != columns_.end(). mutable absl::optional>::const_iterator> + StringRangeFilteredMapView>::const_iterator> row_it_; mutable absl::optional>::const_iterator> + StringRangeFilteredMapView>::const_iterator> column_it_; mutable absl::optional< - RangeFilteredMapView::const_iterator> + TimestampRangeFilteredMapView::const_iterator> cell_it_; mutable absl::optional cur_value_; mutable bool initialized_{false}; diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index ff76b03290080..e529d1a5e2f60 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -13,9 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" -#include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/chrono_literals.h" -#include "google/cloud/testing_util/is_proto_equal.h" #include #include #include @@ -69,8 +67,14 @@ TEST(ColumnRow, Trivial) { col_row.SetCell(0_ms, "baz"); col_row.SetCell(20_ms, "qux"); + EXPECT_EQ("qux", col_row.lower_bound(30_ms)->second); + EXPECT_EQ("qux", col_row.lower_bound(20_ms)->second); EXPECT_EQ("bar", col_row.lower_bound(10_ms)->second); - EXPECT_EQ("qux", col_row.upper_bound(10_ms)->second); + EXPECT_EQ("baz", col_row.lower_bound(0_ms)->second); + EXPECT_EQ("qux", col_row.upper_bound(30_ms)->second); + EXPECT_EQ("bar", col_row.upper_bound(20_ms)->second); + EXPECT_EQ("baz", col_row.upper_bound(10_ms)->second); + EXPECT_EQ(col_row.end(), col_row.upper_bound(0_ms)); } TEST(ColumnRow, DeleteTimeRangeFinite) { @@ -225,14 +229,14 @@ TEST(FilteredColumnFamilyStream, Unfiltered) { FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); EXPECT_EQ(R"""( row0 cf1:col0 @10ms: foo -row0 cf1:col1 @20ms: bar row0 cf1:col1 @30ms: baz +row0 cf1:col1 @20ms: bar row1 cf1:col0 @10ms: foo -row1 cf1:col1 @20ms: foo row1 cf1:col1 @30ms: foo +row1 cf1:col1 @20ms: foo row2 cf1:col0 @10ms: qux -row2 cf1:col2 @40ms: qux row2 cf1:col2 @50ms: qux +row2 cf1:col2 @40ms: qux )""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } @@ -272,12 +276,12 @@ TEST(FilteredColumnFamilyStream, FilterByTimestampRange) { TimestampRange{TimestampRangeSet::Range(100_ms, 200_ms)}); EXPECT_EQ(R"""( row0 cf1:col0 @100ms: foo -row0 cf1:col2 @100ms: foo -row0 cf1:col2 @120ms: foo row0 cf1:col2 @140ms: foo -row1 cf1:col2 @100ms: foo -row1 cf1:col2 @120ms: foo +row0 cf1:col2 @120ms: foo +row0 cf1:col2 @100ms: foo row1 cf1:col2 @140ms: foo +row1 cf1:col2 @120ms: foo +row1 cf1:col2 @100ms: foo )""", "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); } diff --git a/google/cloud/bigtable/emulator/drop_row_range_test.cc b/google/cloud/bigtable/emulator/drop_row_range_test.cc index 9ee7905daa551..7bb62616f86e9 100644 --- a/google/cloud/bigtable/emulator/drop_row_range_test.cc +++ b/google/cloud/bigtable/emulator/drop_row_range_test.cc @@ -13,13 +13,11 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" -#include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" -#include "gmock/gmock.h" #include #include #include diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 7e62ed87b08ce..6fcdacca49100 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -19,7 +19,6 @@ #include "google/cloud/internal/make_status.h" #include "google/cloud/status_or.h" #include -#include #include namespace google { @@ -325,8 +324,8 @@ class TrivialFilter : public AbstractCellStreamImpl { * @param filter_filter a functor which given an `InternalFilter` decides * whether filtering this cell stream's results and then applying the * `InternalFilter` would yield the same results as applying - * `InternalFilter` to the underlying stream and the perform this stream's - * filtering. + * `InternalFilter` to the underlying stream and then performing this + * stream's filtering. */ template CellStream MakeTrivialFilter( @@ -539,14 +538,14 @@ class ConditionStream : public AbstractCellStreamImpl { if (condition_true_) { true_stream_.Next(mode); if (!true_stream_ || - !internal::CompareRowKey(current_row_, true_stream_->row_key())) { + internal::CompareRowKey(current_row_, true_stream_->row_key()) != 0) { source_.Next(NextMode::kRow); OnNewRow(); } } else { false_stream_.Next(mode); - if (!false_stream_ || - !internal::CompareRowKey(current_row_, false_stream_->row_key())) { + if (!false_stream_ || internal::CompareRowKey( + current_row_, false_stream_->row_key()) != 0) { source_.Next(NextMode::kRow); OnNewRow(); } @@ -870,7 +869,7 @@ StatusOr CreateFilterImpl( if (per_row_state-- <= 0) { return {}; } - return NextMode::kRow; + return NextMode::kCell; }, [cells_per_row_offset]() { return cells_per_row_offset; }, [](InternalFilter const& internal_filter) { @@ -971,10 +970,14 @@ StatusOr CreateFilterImpl( } CellStreamConstructor res = [source_ctor = std::move(source_ctor)] { auto source = source_ctor(); - return MakeTrivialTransformer(std::move(source), [](CellView cell_view) { - cell_view.SetValue(""); - return cell_view; - }); + // We need to ensure that the value outlives the reference. + std::string const stripped_value; + return MakeTrivialTransformer( + std::move(source), + [stripped_value = std::move(stripped_value)](CellView cell_view) { + cell_view.SetValue(stripped_value); + return cell_view; + }); }; return res; } @@ -1046,6 +1049,16 @@ StatusOr CreateFilterImpl( "`condition` must have a `predicate_filter` set.", GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); } + if (!filter.condition().has_true_filter() && + !filter.condition().has_false_filter()) { + return InvalidArgumentError( + "`condition` must have `true_filter` or `false_filter` set.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + // FIXME: validate that `sink` is not present in condition's predicate. + // Expected error: + // INVALID_ARGUMENT: Error in field 'condition filter predicate' : sink + // cannot be nested in a condition filter auto maybe_predicate_stream_ctor = CreateFilterImpl( filter.condition().predicate_filter(), source_ctor, direct_sinks); diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index c01062a57ee8e..1addb61939db7 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -44,7 +44,7 @@ namespace emulator { // uninteresting columns and avoid passing all the values around. Most of the // filters can be applied in any order, which makes our filtering task easy. // -// Unfortunately, some filters (e.g. `cells_per_row_limit_filter`) prevents us +// Unfortunately, some filters (e.g. `cells_per_row_limit_filter`) prevent us // from moving filters applied later in the chain to its beginning. Hence, we // need to keep the naive (object-per-graph-node) approach at least as a backup // option. diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index 8e2fb2d78aa68..010adbd51bfff 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -13,13 +13,11 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/filter.h" -#include "google/cloud/bigtable/data_connection.h" -#include "google/cloud/bigtable/table.h" #include "google/cloud/testing_util/chrono_literals.h" -#include "google/cloud/testing_util/is_proto_equal.h" #include "google/cloud/testing_util/status_matchers.h" -#include +#include "gmock/gmock.h" #include +#include namespace google { namespace cloud { @@ -62,13 +60,15 @@ class TestCell { public: TestCell(std::string row_key, std::string column_family, std::string column_qualifier, std::chrono::milliseconds timestamp, - std::string value) + std::string value, absl::optional label = {}) : row_key_(std::move(row_key)), column_family_(std::move(column_family)), column_qualifier_(std::move(column_qualifier)), timestamp_(std::move(timestamp)), value_(std::move(value)), - view_(row_key_, column_family_, column_qualifier_, timestamp_, value_) { + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_), + label_(std::move(label)) { + maybe_label_view(); } TestCell(TestCell const& other) @@ -77,24 +77,44 @@ class TestCell { column_qualifier_(other.column_qualifier_), timestamp_(other.timestamp_), value_(other.value_), - view_(row_key_, column_family_, column_qualifier_, timestamp_, value_) { + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_), + label_(other.label_) { + maybe_label_view(); } + TestCell(TestCell&& other) noexcept : row_key_(std::move(other.row_key_)), column_family_(std::move(other.column_family_)), column_qualifier_(std::move(other.column_qualifier_)), timestamp_(std::move(other.timestamp_)), value_(std::move(other.value_)), - view_(row_key_, column_family_, column_qualifier_, timestamp_, value_) { + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_), + label_(std::move(other.label_)) { + maybe_label_view(); + } + + TestCell Labeled(std::string const& label) { + TestCell labeled_copy = *this; + labeled_copy.label_ = label; + labeled_copy.maybe_label_view(); + return labeled_copy; } CellView const& AsCellView() const { return view_; } bool operator==(CellView const& cell_view) const { + bool labels_equal = (!label_.has_value() && !cell_view.HasLabel()) || + (label_.has_value() && cell_view.HasLabel() && + label_.value() == cell_view.label()); return row_key_ == cell_view.row_key() && column_family_ == cell_view.column_family() && column_qualifier_ == cell_view.column_qualifier() && - timestamp_ == cell_view.timestamp() && value_ == cell_view.value(); + timestamp_ == cell_view.timestamp() && value_ == cell_view.value() && + labels_equal; + } + + bool operator==(TestCell const& other) const { + return operator==(other.AsCellView()); } private: @@ -104,6 +124,13 @@ class TestCell { std::chrono::milliseconds timestamp_; std::string value_; CellView view_; + absl::optional label_; + + void maybe_label_view() { + if (label_) { + view_.SetLabel(label_.value()); + } + } }; std::ostream& operator<<(std::ostream& stream, TestCell const& test_cell) { @@ -181,50 +208,6 @@ TEST(CellStream, NextColumnNotSupported) { ASSERT_FALSE(cell_stream.HasValue()); } -TEST(CellStream, NextRowNotSupported) { - std::vector cells{ - TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, - TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, - TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed - TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, - TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed - TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, - TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed - TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; - auto cur_cell = cells.begin(); - - auto mock_impl = std::make_unique(); - EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)) - .WillRepeatedly(Return(false)); - EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { - return cur_cell->AsCellView(); - }); - EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { - return cur_cell != cells.end(); - }); - EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillRepeatedly([&] { - ++cur_cell; - return true; - }); - - CellStream cell_stream(std::move(mock_impl)); - - cell_stream.Next(NextMode::kColumn); - ASSERT_TRUE(cell_stream.HasValue()); - EXPECT_EQ(cells[2], cell_stream.Value()); - - cell_stream.Next(NextMode::kColumn); - ASSERT_TRUE(cell_stream.HasValue()); - EXPECT_EQ(cells[4], cell_stream.Value()); - - cell_stream.Next(NextMode::kColumn); - ASSERT_TRUE(cell_stream.HasValue()); - EXPECT_EQ(cells[6], cell_stream.Value()); - - cell_stream.Next(NextMode::kColumn); - ASSERT_FALSE(cell_stream.HasValue()); -} - TEST(CellStream, NextRowUnsupported) { std::vector cells{ TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, @@ -834,6 +817,34 @@ TEST_F(InvalidFilterProtoTest, ConditionNoPredicate) { "`condition` must have a `predicate_filter` set."))); } +TEST_F(InvalidFilterProtoTest, ConditionNeitherTrueNorFalse) { + filter_.mutable_condition()->mutable_predicate_filter()->set_pass_all_filter( + true); + + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs( + StatusCode::kInvalidArgument, + testing::HasSubstr( + "`condition` must have `true_filter` or `false_filter` set."))); +} + +TEST_F(InvalidFilterProtoTest, ConditionPredicateSink) { + filter_.mutable_condition()->mutable_predicate_filter()->set_sink(true); + filter_.mutable_condition()->mutable_true_filter()->pass_all_filter(); + filter_.mutable_condition()->mutable_false_filter()->pass_all_filter(); + + auto maybe_stream = TryCreate(); + + // FIXME unskip this test after fixing condition validation. + GTEST_SKIP() << "Searching filter graph for sink nodes unimplemented."; + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "sink cannot be nested in a condition filter"))); +} + TEST_F(InvalidFilterProtoTest, SinkFalse) { filter_.set_sink(false); auto maybe_stream = TryCreate(); @@ -1013,6 +1024,13 @@ TEST_F(FilterApplicationPropagation, BlockAll) { } } +TEST_F(FilterApplicationPropagation, Sink) { + RowFilter filter; + filter.set_sink(true); + + TestPropagation(filter, 0); +} + TEST_F(FilterApplicationPropagation, RowKeyRegex) { RowFilter filter; filter.set_row_key_regex_filter("foo.*"); @@ -1176,6 +1194,7 @@ TEST_F(FilterApplicationPropagation, Condition) { } } } + class InternalFiltersAreApplied : public ::testing::Test { protected: RowFilter filter_; @@ -1248,6 +1267,622 @@ TEST_F(InternalFiltersAreApplied, TimestampRange) { }); } +class VectorCellStream : public AbstractCellStreamImpl { + public: + explicit VectorCellStream(std::vector const& cells) + : cells_{cells}, current_cell_{cells_.begin()} {} + bool ApplyFilter(InternalFilter const&) override { return false; } + bool HasValue() const override { return current_cell_ != cells_.end(); } + CellView const& Value() const override { return current_cell_->AsCellView(); } + bool Next(NextMode mode) override { + if (mode != NextMode::kCell) { + return false; + } + ++current_cell_; + return true; + } + + private: + std::vector cells_; + std::vector::const_iterator current_cell_; +}; + +class FilterWorkTest : public ::testing::Test { + public: + protected: + static StatusOr> GetFilterOutput( + std::vector const& input_cells, RowFilter const& filter) { + auto maybe_stream = CreateFilter(filter, [input_cells] { + return CellStream(std::make_unique(input_cells)); + }); + if (!maybe_stream.status().ok()) { + return maybe_stream.status(); + } + + std::vector filter_output; + while (maybe_stream->HasValue()) { + auto& v = maybe_stream.value(); + filter_output.emplace_back( + v->row_key(), v->column_family(), v->column_qualifier(), + v->timestamp(), v->value(), + v->HasLabel() ? absl::optional{v->label()} + : absl::optional{}); + maybe_stream->Next(); + } + return filter_output; + } +}; + +TEST_F(FilterWorkTest, Pass) { + RowFilter filter; + filter.set_pass_all_filter(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, PassLabels) { + RowFilter filter; + filter.set_pass_all_filter(true); + + std::vector cells{ + TestCell{"r", "cf", "q", 0_ms, "v", "label1"}, + TestCell{"r", "cf", "q", 0_ms, "v", "label2"}, + TestCell{"r", "cf", "q", 0_ms, "v", "label3"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, Sink) { + RowFilter filter; + filter.set_sink(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + // Next row + TestCell{"r2", "cf", "q", 0_ms, "v"}, + // Next cell + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, Block) { + RowFilter filter; + filter.set_block_all_filter(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r1", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_TRUE(maybe_output->empty()); +} + +TEST_F(FilterWorkTest, RowRegex) { + RowFilter filter; + filter.set_row_key_regex_filter("r2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, ValueRegex) { + RowFilter filter; + filter.set_value_regex_filter("v2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v1"}, + TestCell{"r2", "cf", "q", 0_ms, "v2"}, + TestCell{"r2", "cf", "q", 0_ms, "v3"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(1, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); +} + +TEST_F(FilterWorkTest, SampleRows) { + RowFilter filter; + filter.set_row_sample_filter(0.5); + + size_t samples = 100; + std::vector cells; + cells.reserve(samples); + for (size_t i = 0; i < samples; i++) { + cells.emplace_back("r" + std::to_string(i), "cf", "q", 0_ms, "v"); + } + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_NE(0, maybe_output->size()); + EXPECT_NE(samples, maybe_output->size()); +} + +TEST_F(FilterWorkTest, FamilyNameRegex) { + RowFilter filter; + filter.set_family_name_regex_filter("cf2"); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf3", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, QualifierRegex) { + RowFilter filter; + filter.set_column_qualifier_regex_filter("q2"); + + std::vector cells{ + TestCell{"r1", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q3", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, ColumnRange) { + RowFilter filter; + filter.mutable_column_range_filter()->set_family_name("cf"); + filter.mutable_column_range_filter()->set_start_qualifier_open("q1"); + filter.mutable_column_range_filter()->set_end_qualifier_closed("q2"); + + std::vector cells{ + TestCell{"r1", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q3", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, ValueRange) { + RowFilter filter; + filter.mutable_value_range_filter()->set_start_value_open("v1"); + filter.mutable_value_range_filter()->set_end_value_closed("v2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v1"}, + TestCell{"r2", "cf", "q", 0_ms, "v2"}, + TestCell{"r2", "cf", "q", 0_ms, "v2"}, + TestCell{"r3", "cf", "q", 0_ms, "v3"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, CellsPerRowOffset) { + RowFilter filter; + filter.set_cells_per_row_offset_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(5, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[3], maybe_output->at(1)); + EXPECT_EQ(cells[5], maybe_output->at(2)); + EXPECT_EQ(cells[7], maybe_output->at(3)); + EXPECT_EQ(cells[8], maybe_output->at(4)); +} + +TEST_F(FilterWorkTest, CellsPerRowLimit) { + RowFilter filter; + filter.set_cells_per_row_limit_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(4, maybe_output->size()); + EXPECT_EQ(cells[0], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); + EXPECT_EQ(cells[4], maybe_output->at(2)); + EXPECT_EQ(cells[6], maybe_output->at(3)); +} + +TEST_F(FilterWorkTest, LatestCellsPerColumnLimit) { + RowFilter filter; + filter.set_cells_per_column_limit_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(6, maybe_output->size()); + EXPECT_EQ(cells[0], maybe_output->at(0)); + EXPECT_EQ(cells[1], maybe_output->at(1)); + EXPECT_EQ(cells[2], maybe_output->at(2)); + EXPECT_EQ(cells[3], maybe_output->at(3)); + EXPECT_EQ(cells[4], maybe_output->at(4)); + EXPECT_EQ(cells[6], maybe_output->at(5)); +} + +TEST_F(FilterWorkTest, TimestampRange) { + RowFilter filter; + filter.mutable_timestamp_range_filter()->set_start_timestamp_micros(2000); + filter.mutable_timestamp_range_filter()->set_end_timestamp_micros(3000); + + std::vector cells{ + TestCell{"r1", "cf", "q", 3_ms, "v"}, + TestCell{"r2", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(1, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); +} + +TEST_F(FilterWorkTest, Label) { + RowFilter filter; + std::string label = "lbl"; + filter.set_apply_label_transformer(label); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r1", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + TestCell expected{"r1", "cf", "q", 0_ms, "v", label}; + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(expected, maybe_output->at(0)); + EXPECT_EQ(expected, maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, StripValue) { + RowFilter filter; + filter.set_strip_value_transformer(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r1", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + TestCell expected{"r1", "cf", "q", 0_ms, ""}; + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(expected, maybe_output->at(0)); + EXPECT_EQ(expected, maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, Chain) { + RowFilter filter; + filter.mutable_chain()->add_filters()->set_cells_per_row_offset_filter(1); + filter.mutable_chain()->add_filters()->set_cells_per_row_limit_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r1", "cf3", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q3", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 3_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(3, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[4], maybe_output->at(1)); + EXPECT_EQ(cells[7], maybe_output->at(2)); +} + +TEST_F(FilterWorkTest, ChainEmpty) { + RowFilter filter; + filter.mutable_chain()->Clear(); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, ChainSink) { + RowFilter filter; + filter.mutable_chain()->add_filters()->set_sink(true); + filter.mutable_chain()->add_filters()->set_sink(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, Interleave) { + RowFilter filter; + filter.mutable_interleave()->add_filters()->set_family_name_regex_filter( + "cf1"); + filter.mutable_interleave()->add_filters()->set_family_name_regex_filter( + "cf2"); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r3", "cf1", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, InterleaveEmpty) { + RowFilter filter; + filter.mutable_interleave()->Clear(); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(0, maybe_output->size()); +} + +TEST_F(FilterWorkTest, InterleaveSink) { + RowFilter filter; + filter.mutable_interleave()->add_filters()->set_sink(true); + filter.mutable_interleave()->add_filters()->set_block_all_filter(true); + filter.mutable_interleave()->add_filters()->set_sink(true); + filter.mutable_interleave()->add_filters()->set_pass_all_filter(true); + filter.mutable_interleave()->add_filters()->set_sink(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(cells.size() * 4, maybe_output->size()); + for (size_t i = 0; i < maybe_output->size() / 3; i++) { + EXPECT_EQ(cells[0], maybe_output->at(i)); + EXPECT_EQ(cells[1], maybe_output->at(i + maybe_output->size() / 3)); + EXPECT_EQ(cells[2], maybe_output->at(i + 2 * maybe_output->size() / 3)); + } +} + +// The test case from the example given next to `sink` protobuf definition. +TEST_F(FilterWorkTest, RegexInterleaveChainLabelSinkRegex) { + RowFilter filter; + + RowFilter* c0 = filter.mutable_chain()->add_filters(); + RowFilter* c1 = filter.mutable_chain()->add_filters(); + RowFilter* c2 = filter.mutable_chain()->add_filters(); + + RowFilter* c1i0 = c1->mutable_interleave()->add_filters(); + RowFilter* c1i1 = c1->mutable_interleave()->add_filters(); + + RowFilter* c1i1c0 = c1i1->mutable_chain()->add_filters(); + RowFilter* c1i1c1 = c1i1->mutable_chain()->add_filters(); + + c0->set_family_name_regex_filter("A"); + + c1i0->set_pass_all_filter(true); + c1i1c0->set_apply_label_transformer("foo"); + c1i1c1->set_sink(true); + + c2->set_column_qualifier_regex_filter("B"); + + std::vector cells{ + TestCell("r", "A", "A", 1_ms, "w"), + TestCell("r", "A", "B", 2_ms, "x"), + TestCell("r", "B", "B", 4_ms, "z"), + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + TestCell labeled0 = cells[0].Labeled("foo"); + TestCell labeled1 = cells[1].Labeled("foo"); + + ASSERT_EQ(3, maybe_output->size()); + EXPECT_EQ(labeled0, maybe_output->at(0)); + EXPECT_TRUE(maybe_output->at(1) == labeled1 || + maybe_output->at(1) == cells[1]); + EXPECT_TRUE(maybe_output->at(2) == labeled1 || + maybe_output->at(2) == cells[1]); + EXPECT_NE(maybe_output->at(1).AsCellView().HasLabel(), + maybe_output->at(2).AsCellView().HasLabel()); +} + +TEST_F(FilterWorkTest, ConditionEmptyNonempty) { + RowFilter filter; + filter.mutable_condition() + ->mutable_predicate_filter() + ->set_value_regex_filter("t"); + filter.mutable_condition() + ->mutable_true_filter() + ->set_apply_label_transformer("TRUE"); + filter.mutable_condition() + ->mutable_false_filter() + ->set_apply_label_transformer("FALSE"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 3_ms, "t"}, + TestCell{"r1", "cf", "q", 2_ms, "t"}, + TestCell{"r1", "cf", "q", 1_ms, "t"}, + TestCell{"r2", "cf", "q", 3_ms, "f"}, + TestCell{"r2", "cf", "q", 2_ms, "t"}, + TestCell{"r2", "cf", "q", 1_ms, "f"}, + TestCell{"r3", "cf", "q", 3_ms, "f"}, + TestCell{"r3", "cf", "q", 2_ms, "f"}, + TestCell{"r3", "cf", "q", 1_ms, "f"}, + TestCell{"r4", "cf", "q", 3_ms, "t"}, + TestCell{"r4", "cf", "q", 2_ms, "f"}, + TestCell{"r4", "cf", "q", 1_ms, "t"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(cells.size(), maybe_output->size()); + EXPECT_EQ(cells[1].Labeled("TRUE"), maybe_output->at(1)); + EXPECT_EQ(cells[2].Labeled("TRUE"), maybe_output->at(2)); + EXPECT_EQ(cells[3].Labeled("TRUE"), maybe_output->at(3)); + EXPECT_EQ(cells[4].Labeled("TRUE"), maybe_output->at(4)); + EXPECT_EQ(cells[5].Labeled("TRUE"), maybe_output->at(5)); + EXPECT_EQ(cells[6].Labeled("FALSE"), maybe_output->at(6)); + EXPECT_EQ(cells[7].Labeled("FALSE"), maybe_output->at(7)); + EXPECT_EQ(cells[8].Labeled("FALSE"), maybe_output->at(8)); + EXPECT_EQ(cells[9].Labeled("TRUE"), maybe_output->at(9)); + EXPECT_EQ(cells[10].Labeled("TRUE"), maybe_output->at(10)); + EXPECT_EQ(cells[11].Labeled("TRUE"), maybe_output->at(11)); +} + +TEST_F(FilterWorkTest, ConditionBranchFilterNextDifferentThanCell) { + RowFilter filter; + filter.mutable_condition() + ->mutable_predicate_filter() + ->set_value_regex_filter("t"); + filter.mutable_condition() + ->mutable_true_filter() + ->mutable_chain() + ->add_filters() + ->set_apply_label_transformer("TRUE"); + filter.mutable_condition() + ->mutable_true_filter() + ->mutable_chain() + ->add_filters() + ->set_cells_per_column_limit_filter(1); + filter.mutable_condition() + ->mutable_false_filter() + ->mutable_chain() + ->add_filters() + ->set_apply_label_transformer("FALSE"); + filter.mutable_condition() + ->mutable_false_filter() + ->mutable_chain() + ->add_filters() + ->set_column_qualifier_regex_filter("q2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 3_ms, "t"}, + TestCell{"r1", "cf", "q", 2_ms, "t"}, + TestCell{"r1", "cf", "q", 1_ms, "t"}, + TestCell{"r2", "cf", "q", 3_ms, "f"}, + TestCell{"r2", "cf", "q", 2_ms, "t"}, + TestCell{"r2", "cf", "q", 1_ms, "f"}, + TestCell{"r3", "cf1", "q2", 1_ms, "f"}, + TestCell{"r3", "cf2", "q1", 2_ms, "f"}, + TestCell{"r3", "cf3", "q2", 3_ms, "f"}, + TestCell{"r4", "cf", "q", 3_ms, "f"}, + TestCell{"r4", "cf", "q", 2_ms, "f"}, + TestCell{"r4", "cf", "q", 1_ms, "t"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + std::vector expected{ + TestCell{"r1", "cf", "q", 3_ms, "t", "TRUE"}, + TestCell{"r2", "cf", "q", 3_ms, "f", "TRUE"}, + TestCell{"r3", "cf1", "q2", 1_ms, "f", "FALSE"}, + TestCell{"r3", "cf3", "q2", 3_ms, "f", "FALSE"}, + TestCell{"r4", "cf", "q", 3_ms, "f", "TRUE"}, + }; + EXPECT_EQ(expected, *maybe_output); +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h index a6f369ba38fc3..e94359eaf16fe 100644 --- a/google/cloud/bigtable/emulator/filtered_map.h +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -26,23 +26,21 @@ namespace bigtable { namespace emulator { /** - * A map view filtering elements by whether their keys fall into a range set. + * A map view filtering elements by whether their keys fall into a string range + * set. * - * Objects of this type provide a lightweight wrapper around `std::map`, which - * provides a iterator, which will skip over unwanted elements. + * Objects of this type provide a lightweight wrapper around `std::map`-like + * object, which provides a iterator, which will skip over unwanted elements. * * This class is not very generic. It should be thought of as a crude way of * deduplicating code. * - * The unfiltered elements' keys should fall into a given range set - either - * `StringRangeSet` or by `TimestampRangeSet`. + * The unfiltered elements' keys should fall into a given string range set. * - * @tparam Map the type of the map to be wrapped, an instantiation of `std::map` - * @tparam PermittedRanges the type of the filter, either `StringRangeSet` or - * `TimestampRangeSet` + * @tparam Map the type of the map-like object to be wrapped. */ -template -class RangeFilteredMapView { +template +class StringRangeFilteredMapView { public: // NOLINTNEXTLINE(readability-identifier-naming) class const_iterator { @@ -56,10 +54,10 @@ class RangeFilteredMapView { using pointer = value_type const*; const_iterator( - RangeFilteredMapView const& parent, + StringRangeFilteredMapView const& parent, typename Map::const_iterator unfiltered_pos, - typename std::set:: + typename std::set:: const_iterator filter_pos) : parent_(std::cref(parent)), unfiltered_pos_(std::move(unfiltered_pos)), @@ -132,11 +130,11 @@ class RangeFilteredMapView { } } - std::reference_wrapper parent_; + std::reference_wrapper parent_; typename Map::const_iterator unfiltered_pos_; - typename std::set< - typename PermittedRanges::Range, - typename PermittedRanges::Range::StartLess>::const_iterator filter_pos_; + typename std::set::const_iterator + filter_pos_; }; /** @@ -150,7 +148,8 @@ class RangeFilteredMapView { * @unfiltered the map whose elements need to be filtered. * @filter the range set which dictates which ranges should remain unfiltered. */ - RangeFilteredMapView(Map const& unfiltered, PermittedRanges const& filter) + StringRangeFilteredMapView(Map const& unfiltered, + StringRangeSet const& filter) : unfiltered_(std::cref(unfiltered)), filter_(std::cref(filter)) {} const_iterator begin() const { @@ -164,21 +163,165 @@ class RangeFilteredMapView { private: std::reference_wrapper unfiltered_; - std::reference_wrapper filter_; + std::reference_wrapper filter_; +}; + +/** + * A map view filtering elements by whether their keys fall into a timestamp + * range set. + * + * Objects of this type provide a lightweight wrapper around `std::map`-like + * object, which provides a iterator, which will skip over unwanted elements. + * + * This class is not very generic. It should be thought of as a crude way of + * deduplicating code. + * + * The unfiltered elements' keys should fall into a given timestamp range set. + * + * Note that the implementation assumes that the input `std::map`-like object's + * iterator is sorted high-to-low. + * + * @tparam Map the type of the map-like object to be wrapped. + */ +template +class TimestampRangeFilteredMapView { + public: + // NOLINTNEXTLINE(readability-identifier-naming) + class const_iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = + typename std::iterator_traits::value_type; + using difference_type = typename std::iterator_traits< + typename Map::const_iterator>::difference_type; + using reference = value_type const&; + using pointer = value_type const*; + + // Note that the set whose iterator is received here is sorted + // "earliest-start-first", whereas we need to have the iterator sorted + // "latest-end-first". Fortunately, the set is disjoint, so we can simply + // use reverse iterator. + const_iterator( + TimestampRangeFilteredMapView const& parent, + typename Map::const_iterator unfiltered_pos, + typename std::set:: + const_reverse_iterator const& filter_pos) + : parent_(std::cref(parent)), + unfiltered_pos_(std::move(unfiltered_pos)), + filter_pos_(filter_pos) { + AdvanceToNextRange(); + EnsureIteratorValid(); + } + + const_iterator& operator++() { + ++unfiltered_pos_; + EnsureIteratorValid(); + return *this; + } + + const_iterator operator++(int) { + const_iterator retval = *this; + ++(*this); + return retval; + } + + bool operator==(const_iterator const& other) const { + return unfiltered_pos_ == other.unfiltered_pos_; + } + + bool operator!=(const_iterator const& other) const { + return !(*this == other); + } + + reference operator*() const { return *unfiltered_pos_; } + pointer operator->() const { return &*unfiltered_pos_; } + + private: + // Adjust `unfiltered_pos_` after we transition to a different range. + void AdvanceToNextRange() { + if (filter_pos_ == + parent_.get().filter_.get().disjoint_ranges().crend()) { + // We've reached the end. + unfiltered_pos_ = parent_.get().unfiltered_.get().end(); + return; + } + if (unfiltered_pos_ == parent_.get().unfiltered_.get().end()) { + // unfiltered_pos_ is already pointing far enough. + return; + } + if (!filter_pos_->IsAboveEnd(unfiltered_pos_->first)) { + // unfiltered_pos_ is already pointing far enough. + return; + } + // Timestamp ranges always have end open, so we always use upper_bound(). + unfiltered_pos_ = + parent_.get().unfiltered_.get().upper_bound(filter_pos_->end()); + } + + // After `unfiltered_pos_` was increased, make sure it's within a valid + // range. + void EnsureIteratorValid() { + // `unfiltered_pos_` may point to a row which is past the end of the range + // pointed by filter_pos_. Make sure this only happens when the iteration + // reaches its end. + while (unfiltered_pos_ != parent_.get().unfiltered_.get().end() && + filter_pos_ != + parent_.get().filter_.get().disjoint_ranges().crend() && + filter_pos_->IsBelowStart(unfiltered_pos_->first)) { + ++filter_pos_; + AdvanceToNextRange(); + } + } + + std::reference_wrapper parent_; + typename Map::const_iterator unfiltered_pos_; + typename std::set:: + const_reverse_iterator filter_pos_; + }; + + /** + * Create a new object. + * + * Objects of this class store references to arguments passed in the + * constructor. The user is responsible for making sure that the referenced + * objects continue to exist throughout the lifetime of this object. They + * should also not change. + * + * @unfiltered the map whose elements need to be filtered. + * @filter the range set which dictates which ranges should remain unfiltered. + */ + TimestampRangeFilteredMapView(Map const& unfiltered, + TimestampRangeSet const& filter) + : unfiltered_(std::cref(unfiltered)), filter_(std::cref(filter)) {} + + const_iterator begin() const { + return const_iterator(*this, unfiltered_.get().begin(), + filter_.get().disjoint_ranges().crbegin()); + } + const_iterator end() const { + return const_iterator(*this, unfiltered_.get().end(), + filter_.get().disjoint_ranges().crend()); + } + + private: + std::reference_wrapper unfiltered_; + std::reference_wrapper filter_; }; /** * A map view filtering elements by whether their keys match a regex. * - * Objects of this type provide a lightweight wrapper around `std::map`, which - * provides a iterator, which will skip over unwanted elements. + * Objects of this type provide a lightweight wrapper around `std::map`-like + * object, which provides a iterator, which will skip over unwanted elements. * * This class is not very generic. It should be thought of as a crude way of * deduplicating code. * * Elements whose keys match all regexes are not filtered out. * - * @tparam Map the type of the map to be wrapped, an instantiation of `std::map` + * @tparam Map the type of the map-like object to be wrapped. */ template class RegexFiteredMapView { diff --git a/google/cloud/bigtable/emulator/filtered_map_test.cc b/google/cloud/bigtable/emulator/filtered_map_test.cc index 69dd3856d5b98..e1b3edfcbf483 100644 --- a/google/cloud/bigtable/emulator/filtered_map_test.cc +++ b/google/cloud/bigtable/emulator/filtered_map_test.cc @@ -13,10 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/filtered_map.h" -#include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/chrono_literals.h" -#include "google/cloud/testing_util/is_proto_equal.h" -#include "google/cloud/testing_util/status_matchers.h" #include #include @@ -25,6 +22,8 @@ namespace cloud { namespace bigtable { namespace emulator { +using testing_util::chrono_literals::operator""_ms; + bool const kOpen = true; bool const kClosed = false; @@ -46,83 +45,85 @@ std::vector Vec(std::initializer_list const& v) { return res; } -TEST(RangeFilteredMapView, NoFilter) { +template +std::vector TSKeys(Map const& map) { + std::vector res; + std::transform(map.begin(), map.end(), std::back_inserter(res), + [](typename Map::const_iterator::value_type const& elem) { + return elem.first; + }); + return res; +} + +TEST(StringRangeFilteredMapView, NoFilter) { std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; auto filter = StringRangeSet::All(); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"zero", "one", "two"}), Keys(filtered)); } -TEST(RangeFilteredMapView, EmptyFilter) { +TEST(StringRangeFilteredMapView, EmptyFilter) { std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; auto filter = StringRangeSet::Empty(); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({}), Keys(filtered)); } -TEST(RangeFilteredMapView, OneOpen) { +TEST(StringRangeFilteredMapView, OneOpen) { std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); } -TEST(RangeFilteredMapView, OneClosed) { +TEST(StringRangeFilteredMapView, OneClosed) { std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb", "AAB"}), Keys(filtered)); } -TEST(RangeFilteredMapView, NoEntriesAfterClosedFilter) { +TEST(StringRangeFilteredMapView, NoEntriesAfterClosedFilter) { std::map unfiltered{ {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb"}), Keys(filtered)); } -TEST(RangeFilteredMapView, NoEntriesAfterOpenFilter) { +TEST(StringRangeFilteredMapView, NoEntriesAfterOpenFilter) { std::map unfiltered{ {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); } -TEST(RangeFilteredMapView, NoEntriesBeforeClosedFilter) { +TEST(StringRangeFilteredMapView, NoEntriesBeforeClosedFilter) { std::map unfiltered{ {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb", "AAB"}), Keys(filtered)); } -TEST(RangeFilteredMapView, NoEntriesBeforeOpenFilter) { +TEST(StringRangeFilteredMapView, NoEntriesBeforeOpenFilter) { std::map unfiltered{ {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; auto filter = StringRangeSet::Empty(); filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); } -TEST(RangeFilteredMapView, MultipleFilters) { +TEST(StringRangeFilteredMapView, MultipleFilters) { std::map unfiltered{ {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}, {"BB", 0}, {"BBB", 0}, {"BBBb", 0}, {"CCCa", 0}, @@ -131,13 +132,75 @@ TEST(RangeFilteredMapView, MultipleFilters) { filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kClosed)); filter.Sum(StringRangeSet::Range("BBB", kClosed, "BBC", kOpen)); filter.Sum(StringRangeSet::Range("CCC", kClosed, "CCD", kOpen)); - RangeFilteredMapView filtered( - unfiltered, filter); + StringRangeFilteredMapView filtered(unfiltered, filter); EXPECT_EQ(Vec({"AAAa", "AAAb", "AAB", "BBB", "BBBb", "CCCa", "CCCb"}), Keys(filtered)); } +TEST(TimestampRangeFilteredMapView, NoFilter) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 1}, {2_ms, 2}}; + auto filter = TimestampRangeSet::All(); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({2_ms, 1_ms, 0_ms}), + TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, EmptyFilter) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 1}, {2_ms, 2}}; + auto filter = TimestampRangeSet::Empty(); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({}), TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, FiniteRange) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 0}, {2_ms, 0}, {3_ms, 0}, {4_ms, 0}}; + auto filter = TimestampRangeSet::Empty(); + filter.Sum(TimestampRangeSet::Range(1_ms, 3_ms)); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({2_ms, 1_ms}), + TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, InfiniteRange) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 0}, {2_ms, 0}, {3_ms, 0}, {4_ms, 0}}; + auto filter = TimestampRangeSet::Empty(); + filter.Sum(TimestampRangeSet::Range(1_ms, 0_ms)); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({4_ms, 3_ms, 2_ms, 1_ms}), + TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, MultipleFilters) { + std::chrono::milliseconds max_millis(std::numeric_limits::max()); + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 0}, {2_ms, 0}, {3_ms, 0}, + {4_ms, 0}, {5_ms, 0}, {6_ms, 0}, {7_ms, 0}, + {8_ms, 0}, {9_ms, 0}, {10_ms, 0}, {11_ms, 0}, + {12_ms, 0}, {13_ms, 0}, {14_ms, 0}, {max_millis, 0}, + }; + auto filter = TimestampRangeSet::Empty(); + filter.Sum(TimestampRangeSet::Range(1_ms, 3_ms)); + filter.Sum(TimestampRangeSet::Range(3_ms, 5_ms)); + filter.Sum(TimestampRangeSet::Range(6_ms, 8_ms)); + filter.Sum(TimestampRangeSet::Range(10_ms, 12_ms)); + filter.Sum(TimestampRangeSet::Range(13_ms, 0_ms)); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({max_millis, 14_ms, 13_ms, + 11_ms, 10_ms, 7_ms, 6_ms, + 4_ms, 3_ms, 2_ms, 1_ms}), + TSKeys(filtered)); +} + TEST(RegexFiteredMapView, NoFilter) { std::vector> patterns; std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; diff --git a/google/cloud/bigtable/emulator/mutations_test.cc b/google/cloud/bigtable/emulator/mutations_test.cc index 1a49c8be79d80..245f82ad07fc2 100644 --- a/google/cloud/bigtable/emulator/mutations_test.cc +++ b/google/cloud/bigtable/emulator/mutations_test.cc @@ -13,14 +13,12 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" -#include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/internal/big_endian.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" -#include "gmock/gmock.h" #include #include #include diff --git a/google/cloud/bigtable/emulator/range_set.h b/google/cloud/bigtable/emulator/range_set.h index dd5e87cdd69ef..0495543766a79 100644 --- a/google/cloud/bigtable/emulator/range_set.h +++ b/google/cloud/bigtable/emulator/range_set.h @@ -146,18 +146,9 @@ class TimestampRangeSet { google::bigtable::v2::TimestampRange const& timestamp_range); Value start() const { return start_; } - Value start_finite() const { return start_; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - bool start_open() const { return false; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - bool start_closed() const { return true; } void set_start(Range const& source) { start_ = source.start_; } Value end() const { return end_; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - bool end_open() const { return true; } - // NOLINTNEXTLINE(readability-convert-member-functions-to-static) - bool end_closed() const { return false; } void set_end(Range const& source) { end_ = source.end_; } bool IsBelowStart(Value value) const { return value < start_; } diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index df635e6c9c9d3..b214d753d7c7f 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -528,12 +528,7 @@ TEST(TimestampRangeSet, FromInfiniteTimestampRange) { google::bigtable::v2::TimestampRange{}); ASSERT_STATUS_OK(infinite); EXPECT_EQ(0_ms, infinite->start()); - EXPECT_EQ(0_ms, infinite->start_finite()); EXPECT_EQ(0_ms, infinite->end()); - EXPECT_TRUE(infinite->start_closed()); - EXPECT_TRUE(infinite->end_open()); - EXPECT_FALSE(infinite->start_open()); - EXPECT_FALSE(infinite->end_closed()); } TEST(TimestampRangeSet, FromFiniteTimestampRange) { @@ -544,12 +539,7 @@ TEST(TimestampRangeSet, FromFiniteTimestampRange) { auto finite = TimestampRangeSet::Range::FromTimestampRange(proto); ASSERT_STATUS_OK(finite); EXPECT_EQ(1_ms, finite->start()); - EXPECT_EQ(1_ms, finite->start_finite()); EXPECT_EQ(123456_ms, finite->end()); - EXPECT_TRUE(finite->start_closed()); - EXPECT_TRUE(finite->end_open()); - EXPECT_FALSE(finite->start_open()); - EXPECT_FALSE(finite->end_closed()); } TEST(TimestampRangeSet, RangeStartLess) { diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 598ff4595352d..8df6751a21f54 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -15,7 +15,6 @@ #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/filter.h" -#include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" #include "google/cloud/internal/big_endian.h" diff --git a/google/cloud/bigtable/emulator/table_test.cc b/google/cloud/bigtable/emulator/table_test.cc index 1b251a7604200..04f115a229002 100644 --- a/google/cloud/bigtable/emulator/table_test.cc +++ b/google/cloud/bigtable/emulator/table_test.cc @@ -13,10 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/table.h" -#include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/chrono_literals.h" -#include "google/cloud/testing_util/is_proto_equal.h" -#include "google/cloud/testing_util/status_matchers.h" #include namespace google { From d6238e5230365ef6caa9d160ba052a683141c4a5 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 1 Jul 2025 12:00:43 +0300 Subject: [PATCH 172/195] emulator: filters: ColumnRange: Don't return cells from every column family. - There is a passing unit test that tests for this (which is fixed and shows up in this diff). - The CI test (bigtable_filters_integration_test) which revealed this bug now passes. --- google/cloud/bigtable/emulator/table.cc | 39 ++++++++++++++++---- google/cloud/bigtable/emulator/table_test.cc | 4 +- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 8df6751a21f54..dc81a04312da8 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -395,26 +395,51 @@ StatusOr Table::CreateCellStream( } bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { - if (!absl::holds_alternative(internal_filter)) { + if (!absl::holds_alternative(internal_filter) && + !absl::holds_alternative(internal_filter)) { return MergeCellStreams::ApplyFilter(internal_filter); } + // internal_filter is either FamilyNameRegex or ColumnRange for (auto stream_it = unfinished_streams_.begin(); - stream_it != unfinished_streams_.end(); ++stream_it) { + stream_it != unfinished_streams_.end(); ) { auto const* cf_stream = dynamic_cast(&(*stream_it)->impl()); assert(cf_stream); - if (!re2::RE2::PartialMatch( - cf_stream->column_family_name(), - *absl::get(internal_filter).regex)) { + + // We need to call ApplyFilter on the column family stream. But + // ApplyFilter changes the data of the calling object so it cannot + // be const. + auto* cf_stream_mutable = + const_cast(cf_stream); + assert(cf_stream_mutable); + + if ((absl::holds_alternative(internal_filter) && + !re2::RE2::PartialMatch( + cf_stream_mutable->column_family_name(), + *absl::get(internal_filter).regex)) || + (absl::holds_alternative(internal_filter) && + absl::get(internal_filter).column_family != + cf_stream_mutable->column_family_name())) { auto last_it = std::prev(unfinished_streams_.end()); if (stream_it == last_it) { unfinished_streams_.pop_back(); break; } - stream_it->swap(unfinished_streams_.back()); - unfinished_streams_.pop_back(); + + stream_it = unfinished_streams_.erase(stream_it); + continue; + } + + if (absl::holds_alternative(internal_filter) && + absl::get(internal_filter).column_family == + cf_stream_mutable->column_family_name()) { + cf_stream_mutable->ApplyFilter(internal_filter); } + + stream_it++; + } + return true; } diff --git a/google/cloud/bigtable/emulator/table_test.cc b/google/cloud/bigtable/emulator/table_test.cc index 04f115a229002..64c3c1cd5b96e 100644 --- a/google/cloud/bigtable/emulator/table_test.cc +++ b/google/cloud/bigtable/emulator/table_test.cc @@ -91,8 +91,8 @@ TEST(FilteredTableStream, OnlyRightFamilyColumnsAreFiltered) { FilteredTableStream stream(std::move(fams)); stream.ApplyFilter( - ColumnRange{"fam2", StringRangeSet::Range("a", false, "b", false)}); - EXPECT_EQ("row0 fam1:col0 @10ms: foo\n", DumpStream(stream)); + ColumnRange{"fam2", StringRangeSet::Range("col0", false, "col1", true)}); + EXPECT_EQ("row0 fam2:col0 @10ms: foo\n", DumpStream(stream)); } TEST(FilteredTableStream, OtherFiltersArePropagated) { From 023fbd83693e600f77167b52aaed2cbe27174120 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 1 Jul 2025 12:15:50 +0300 Subject: [PATCH 173/195] emulator: tests: add license and header include guards. --- google/cloud/bigtable/emulator/test_util.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/google/cloud/bigtable/emulator/test_util.h b/google/cloud/bigtable/emulator/test_util.h index 04bb2f0caf6f0..09188068df120 100644 --- a/google/cloud/bigtable/emulator/test_util.h +++ b/google/cloud/bigtable/emulator/test_util.h @@ -1,3 +1,20 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H + #include "google/cloud/bigtable/emulator/table.h" #include #include @@ -26,3 +43,5 @@ StatusOr> CreateTable( } // namespace bigtable } // namespace cloud } // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H From cc648489ee4f6d883879c86e43a4fa24d47efe4c Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 1 Jul 2025 12:18:50 +0300 Subject: [PATCH 174/195] emulator: automatic formatting fixes by CI tool. --- google/cloud/bigtable/emulator/CMakeLists.txt | 2 +- google/cloud/bigtable/emulator/table.cc | 3 +-- google/cloud/bigtable/emulator/test_util.h | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index c58856c8c9cf7..e6258241e372f 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -33,8 +33,8 @@ add_library( server.h table.cc table.h - test_util.h test_util.cc + test_util.h to_grpc_status.cc to_grpc_status.h) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index dc81a04312da8..f79f49690d962 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -401,7 +401,7 @@ bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { } // internal_filter is either FamilyNameRegex or ColumnRange for (auto stream_it = unfinished_streams_.begin(); - stream_it != unfinished_streams_.end(); ) { + stream_it != unfinished_streams_.end();) { auto const* cf_stream = dynamic_cast(&(*stream_it)->impl()); assert(cf_stream); @@ -437,7 +437,6 @@ bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { } stream_it++; - } return true; diff --git a/google/cloud/bigtable/emulator/test_util.h b/google/cloud/bigtable/emulator/test_util.h index 09188068df120..8ed8bf58ad776 100644 --- a/google/cloud/bigtable/emulator/test_util.h +++ b/google/cloud/bigtable/emulator/test_util.h @@ -44,4 +44,4 @@ StatusOr> CreateTable( } // namespace cloud } // namespace google -#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H From 8ac5df7bd0b1c1e8250d0eb744ae8c2d50ddc5a8 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 1 Jul 2025 12:30:11 +0300 Subject: [PATCH 175/195] emulator: mechanical formatting fixes by CI tool. --- google/cloud/bigtable/emulator/filter_test.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index adebe2e639b39..976bde6f92ba4 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -26,11 +26,6 @@ #include #include #include -#include "google/cloud/testing_util/chrono_literals.h" -#include "google/cloud/testing_util/status_matchers.h" -#include "gmock/gmock.h" -#include -#include namespace google { namespace cloud { From e89a4dda895bb9908cc069f9fa4d6cd7130eb9bd Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 2 Jul 2025 14:14:53 +0300 Subject: [PATCH 176/195] fix: Fix several ReadRow bugs revealed by the bigtable_data_integration_test test - ReadRows: An empty RowSet should mean stream all the rows, not none of them - ReadRows: Implement rows_limit - server.cc: Increase maximum receive message size to 256MiB to accommodate mutations with large row keys (one client test tries to set several large row keys and sends a message that is 128MiB in size). References: TBL-57 Fixes: TBL-57 --- google/cloud/bigtable/emulator/server.cc | 1 + google/cloud/bigtable/emulator/table.cc | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 045daa55a42bf..88d65d673737b 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -325,6 +325,7 @@ class DefaultEmulatorServer : public EmulatorServer { table_service_(cluster_) { builder_.AddListeningPort(host + ":" + std::to_string(port), grpc::InsecureServerCredentials(), &bound_port_); + builder_.SetMaxReceiveMessageSize(256 * 1024 * 1024); builder_.RegisterService(&bt_service_); builder_.RegisterService(&table_service_); server_ = builder_.BuildAndStart(); diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 8df6751a21f54..62e3cbd232209 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -517,11 +517,15 @@ Table::CheckAndMutateRow( Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const { std::shared_ptr row_set; - if (request.has_rows()) { + // We need to check that, not only do we have rows, but that it is + // not empty (i.e. at least one of row_range or rows is specified). + if (request.has_rows() && (request.rows().row_ranges_size() > 0 || + request.rows().row_keys_size() > 0)) { auto maybe_row_set = CreateStringRangeSet(request.rows()); if (!maybe_row_set) { return maybe_row_set.status(); } + row_set = std::make_shared(*std::move(maybe_row_set)); } else { row_set = std::make_shared(StringRangeSet::All()); @@ -538,6 +542,10 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, if (!maybe_stream) { return maybe_stream.status(); } + + std::int64_t rows_count = 0; + absl::optional current_row_key; + CellStream& stream = *maybe_stream; for (; stream; ++stream) { std::cout << "Row: " << stream->row_key() @@ -547,11 +555,25 @@ Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, << " column_value: " << stream->value() << " label: " << (stream->HasLabel() ? stream->label() : std::string("unset")) << std::endl; + + if (request.rows_limit() > 0) { + if (!current_row_key.has_value() || + stream->row_key() != current_row_key.value()) { + rows_count++; + current_row_key = stream->row_key(); + } + + if (rows_count > request.rows_limit()) { + break; + } + } + if (!row_streamer.Stream(*stream)) { std::cout << "HOW?" << std::endl; return AbortedError("Stream closed by the client.", GCP_ERROR_INFO()); } } + if (!row_streamer.Flush(true)) { std::cout << "Flush failed?" << std::endl; return AbortedError("Stream closed by the client.", GCP_ERROR_INFO()); From 1e747c9042bc4acd7c2af7ac16a9a45aa26baf10 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 4 Jul 2025 11:35:54 +0300 Subject: [PATCH 177/195] fix: emulator: Filters: Fix the ColumnRange Filter This fixes the ColumnRange filter bug in which the filter would let through cells from all other column families. The failing bigtable_filters_integration_test now passes and there is a unit test (now fixed in this PR) that checks for this specific bug and which is now passing. An end-to-end test - in which we store data in an actual emulator table in multiple column families and apply the filter - is also now implemented in this PR and passing. References: TBL-58 Fixes: TBL-58 --- google/cloud/bigtable/emulator/CMakeLists.txt | 2 + .../emulator/bigtable_emulator_common.bzl | 2 + google/cloud/bigtable/emulator/filter.h | 2 +- google/cloud/bigtable/emulator/filter_test.cc | 54 +++++++++++++++++++ .../cloud/bigtable/emulator/mutations_test.cc | 40 +------------- google/cloud/bigtable/emulator/table.cc | 39 +++++++++----- google/cloud/bigtable/emulator/table.h | 7 +-- google/cloud/bigtable/emulator/table_test.cc | 4 +- google/cloud/bigtable/emulator/test_util.cc | 44 +++++++++++++++ google/cloud/bigtable/emulator/test_util.h | 47 ++++++++++++++++ 10 files changed, 182 insertions(+), 59 deletions(-) create mode 100644 google/cloud/bigtable/emulator/test_util.cc create mode 100644 google/cloud/bigtable/emulator/test_util.h diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index 5a656edc1932a..e6258241e372f 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -33,6 +33,8 @@ add_library( server.h table.cc table.h + test_util.cc + test_util.h to_grpc_status.cc to_grpc_status.h) diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index 1b733901fde4a..29519b5409272 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -26,6 +26,7 @@ bigtable_emulator_common_hdrs = [ "row_streamer.h", "server.h", "table.h", + "test_util.h", "to_grpc_status.h", ] @@ -38,5 +39,6 @@ bigtable_emulator_common_srcs = [ "row_streamer.cc", "server.cc", "table.cc", + "test_util.cc", "to_grpc_status.cc", ] diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index 1addb61939db7..0b8a9a8487ea0 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -214,7 +214,7 @@ class CellStream { CellView const* operator->() const { return &Value(); } /// equivalent to `HasValue()` explicit operator bool() const { return HasValue(); } - AbstractCellStreamImpl const& impl() const { return *impl_; } + AbstractCellStreamImpl& impl() const { return *impl_; } private: void NextColumn(); diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index 010adbd51bfff..9579457362edb 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -13,11 +13,19 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/test_util.h" #include "google/cloud/testing_util/chrono_literals.h" #include "google/cloud/testing_util/status_matchers.h" #include "gmock/gmock.h" +#include #include +#include +#include #include +#include +#include namespace google { namespace cloud { @@ -1883,6 +1891,52 @@ TEST_F(FilterWorkTest, ConditionBranchFilterNextDifferentThanCell) { EXPECT_EQ(expected, *maybe_output); } +// Test our implementation of the ColumnRange filter, by actually +// streaming cells from actual table data (hence end to end). +TEST(FiltersEndToEnd, ColumnRange) { + std::vector column_families = {"family1", "family2", "family3"}; + auto maybe_table = CreateTable("table", column_families); + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + std::vector created = { + {"family1", "a00", 0, "bar"}, {"family1", "b00", 0, "bar"}, + {"family1", "b01", 0, "bar"}, {"family1", "b02", 0, "bar"}, + {"family2", "a00", 0, "bar"}, {"family2", "b01", 0, "bar"}, + {"family2", "b00", 0, "bar"}, {"family3", "a00", 0, "bar"}, + }; + + std::string row_key = "column-range-row-key"; + + auto status = SetCells(table, "table", row_key, created); + ASSERT_STATUS_OK(status); + + auto all_rows_set = std::make_shared(StringRangeSet::All()); + + RowFilter filter; + filter.mutable_column_range_filter()->set_family_name("family1"); + filter.mutable_column_range_filter()->set_start_qualifier_closed("b00"); + filter.mutable_column_range_filter()->set_end_qualifier_open("b02"); + + auto maybe_stream = table->CreateCellStream(all_rows_set, filter); + ASSERT_STATUS_OK(maybe_stream); + + std::vector expected = { + {row_key, "family1", "b00", 0_ms, "bar"}, + {row_key, "family1", "b01", 0_ms, "bar"}, + }; + + std::vector actual; + auto& stream = *maybe_stream; + for (; stream; ++stream) { + actual.emplace_back(stream->row_key(), stream->column_family(), + stream->column_qualifier(), stream->timestamp(), + stream->value()); + } + + ASSERT_EQ(expected, actual); +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/mutations_test.cc b/google/cloud/bigtable/emulator/mutations_test.cc index 245f82ad07fc2..219bc069053c0 100644 --- a/google/cloud/bigtable/emulator/mutations_test.cc +++ b/google/cloud/bigtable/emulator/mutations_test.cc @@ -14,6 +14,7 @@ #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/test_util.h" #include "google/cloud/internal/big_endian.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" @@ -41,25 +42,6 @@ namespace emulator { using ::google::protobuf::TextFormat; using std::string; -struct SetCellParams { - std::string column_family_name; - std::string column_qualifier; - int64_t timestamp_micros; - std::string data; -}; - -StatusOr> CreateTable( - std::string const& table_name, std::vector& column_families) { - ::google::bigtable::admin::v2::Table schema; - schema.set_name(table_name); - for (auto& column_family_name : column_families) { - (*schema.mutable_column_families())[column_family_name] = - ::google::bigtable::admin::v2::ColumnFamily(); - } - - return Table::Create(schema); -} - ::google::bigtable::admin::v2::ColumnFamily MakeBEAggregateCFProto( ::google::bigtable::admin::v2::Type_Aggregate::AggregatorCase aggregator) { ::google::bigtable::admin::v2::ColumnFamily column_family; @@ -154,26 +136,6 @@ Status DeleteFromColumns( return table->MutateRow(mutation_request); } -Status SetCells( - std::shared_ptr& table, - std::string const& table_name, std::string const& row_key, - std::vector& set_cell_params) { - ::google::bigtable::v2::MutateRowRequest mutation_request; - mutation_request.set_table_name(table_name); - mutation_request.set_row_key(row_key); - - for (auto m : set_cell_params) { - auto* mutation_request_mutation = mutation_request.add_mutations(); - auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); - set_cell_mutation->set_family_name(m.column_family_name); - set_cell_mutation->set_column_qualifier(m.column_qualifier); - set_cell_mutation->set_timestamp_micros(m.timestamp_micros); - set_cell_mutation->set_value(m.data); - } - - return table->MutateRow(mutation_request); -} - Status HasCell(std::shared_ptr& table, std::string const& column_family, std::string const& row_key, std::string const& column_qualifier, int64_t timestamp_micros, diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 62e3cbd232209..70e3115b308a8 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -395,26 +395,37 @@ StatusOr Table::CreateCellStream( } bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { - if (!absl::holds_alternative(internal_filter)) { + if (!absl::holds_alternative(internal_filter) && + !absl::holds_alternative(internal_filter)) { return MergeCellStreams::ApplyFilter(internal_filter); } + // internal_filter is either FamilyNameRegex or ColumnRange for (auto stream_it = unfinished_streams_.begin(); - stream_it != unfinished_streams_.end(); ++stream_it) { - auto const* cf_stream = - dynamic_cast(&(*stream_it)->impl()); + stream_it != unfinished_streams_.end();) { + auto* cf_stream = + dynamic_cast(&(*stream_it)->impl()); assert(cf_stream); - if (!re2::RE2::PartialMatch( - cf_stream->column_family_name(), - *absl::get(internal_filter).regex)) { - auto last_it = std::prev(unfinished_streams_.end()); - if (stream_it == last_it) { - unfinished_streams_.pop_back(); - break; - } - stream_it->swap(unfinished_streams_.back()); - unfinished_streams_.pop_back(); + + if ((absl::holds_alternative(internal_filter) && + !re2::RE2::PartialMatch( + cf_stream->column_family_name(), + *absl::get(internal_filter).regex)) || + (absl::holds_alternative(internal_filter) && + absl::get(internal_filter).column_family != + cf_stream->column_family_name())) { + stream_it = unfinished_streams_.erase(stream_it); + continue; } + + if (absl::holds_alternative(internal_filter) && + absl::get(internal_filter).column_family == + cf_stream->column_family_name()) { + cf_stream->ApplyFilter(internal_filter); + } + + stream_it++; } + return true; } diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index 186c90ae2c6c4..f281eaa17d500 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -71,6 +71,10 @@ class Table : public std::enable_shared_from_this
{ return DoMutationsWithPossibleRollback(row_key, mutations); } + StatusOr CreateCellStream( + std::shared_ptr range_set, + absl::optional) const; + Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, RowStreamer& row_streamer) const; @@ -104,9 +108,6 @@ class Table : public std::enable_shared_from_this
{ MESSAGE const& message) const; bool IsDeleteProtectedNoLock() const; Status Construct(google::bigtable::admin::v2::Table schema); - StatusOr CreateCellStream( - std::shared_ptr range_set, - absl::optional) const; Status DoMutationsWithPossibleRollback( std::string const& row_key, google::protobuf::RepeatedPtrField const& diff --git a/google/cloud/bigtable/emulator/table_test.cc b/google/cloud/bigtable/emulator/table_test.cc index 04f115a229002..64c3c1cd5b96e 100644 --- a/google/cloud/bigtable/emulator/table_test.cc +++ b/google/cloud/bigtable/emulator/table_test.cc @@ -91,8 +91,8 @@ TEST(FilteredTableStream, OnlyRightFamilyColumnsAreFiltered) { FilteredTableStream stream(std::move(fams)); stream.ApplyFilter( - ColumnRange{"fam2", StringRangeSet::Range("a", false, "b", false)}); - EXPECT_EQ("row0 fam1:col0 @10ms: foo\n", DumpStream(stream)); + ColumnRange{"fam2", StringRangeSet::Range("col0", false, "col1", true)}); + EXPECT_EQ("row0 fam2:col0 @10ms: foo\n", DumpStream(stream)); } TEST(FilteredTableStream, OtherFiltersArePropagated) { diff --git a/google/cloud/bigtable/emulator/test_util.cc b/google/cloud/bigtable/emulator/test_util.cc new file mode 100644 index 0000000000000..458595e8f9a2a --- /dev/null +++ b/google/cloud/bigtable/emulator/test_util.cc @@ -0,0 +1,44 @@ +#include "google/cloud/bigtable/emulator/test_util.h" +#include "google/cloud/bigtable/emulator/table.h" + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +StatusOr> CreateTable( + std::string const& table_name, std::vector& column_families) { + ::google::bigtable::admin::v2::Table schema; + schema.set_name(table_name); + for (auto& column_family_name : column_families) { + (*schema.mutable_column_families())[column_family_name] = + ::google::bigtable::admin::v2::ColumnFamily(); + } + + return Table::Create(schema); +} + +Status SetCells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto m : set_cell_params) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(m.column_family_name); + set_cell_mutation->set_column_qualifier(m.column_qualifier); + set_cell_mutation->set_timestamp_micros(m.timestamp_micros); + set_cell_mutation->set_value(m.data); + } + + return table->MutateRow(mutation_request); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/test_util.h b/google/cloud/bigtable/emulator/test_util.h new file mode 100644 index 0000000000000..8ed8bf58ad776 --- /dev/null +++ b/google/cloud/bigtable/emulator/test_util.h @@ -0,0 +1,47 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H + +#include "google/cloud/bigtable/emulator/table.h" +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +struct SetCellParams { + std::string column_family_name; + std::string column_qualifier; + int64_t timestamp_micros; + std::string data; +}; + +Status SetCells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params); + +StatusOr> CreateTable( + std::string const& table_name, std::vector& column_families); + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H From fd7f0fe0d2eae85c43dd102b523edc31ac8bf528 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 7 Jul 2025 17:38:23 +0300 Subject: [PATCH 178/195] feat: emulator: server: Implement port and address command-line options. --- google/cloud/bigtable/emulator/CMakeLists.txt | 2 ++ google/cloud/bigtable/emulator/emulator.cc | 22 +++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index e6258241e372f..c8ccfccef6c8c 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -101,6 +101,8 @@ foreach (fname ${bigtable_emulator_programs}) target_link_libraries( ${target} PRIVATE bigtable_emulator_common + absl::flags + absl::flags_parse google-cloud-cpp::bigtable google-cloud-cpp::bigtable_protos google-cloud-cpp::grpc_utils diff --git a/google/cloud/bigtable/emulator/emulator.cc b/google/cloud/bigtable/emulator/emulator.cc index 3501b6a6f753f..08d4d034e0c58 100644 --- a/google/cloud/bigtable/emulator/emulator.cc +++ b/google/cloud/bigtable/emulator/emulator.cc @@ -13,11 +13,29 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/server.h" +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/flags/usage.h" +#include +#include +#include +#include #include +#include + +ABSL_FLAG(std::string, host, "localhost", + "the address to bind to on the local machine"); +ABSL_FLAG(std::uint16_t, port, 8888, + "the port to bind to on the local machine"); + +int main(int argc, char* argv[]) { + absl::SetProgramUsageMessage( + absl::StrCat("Usage: %s -h -p ", argv[0])); + absl::ParseCommandLine(argc, argv); -int main() { auto server = google::cloud::bigtable::emulator::CreateDefaultEmulatorServer( - "[::]", 8888); + absl::GetFlag(FLAGS_host), absl::GetFlag(FLAGS_port)); + std::cout << "Server running on port " << server->bound_port() << "\n"; server->Wait(); return 0; From f5e14cf2d0ba52da140e717b65c17d2638dba1ff Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 7 Jul 2025 19:15:39 +0300 Subject: [PATCH 179/195] fix: fix segmentation fault due to the use of an invalid grpc server_ object. BuildAndStart() can fail for any number of reasons (e.g. when passed a port reserved for privileged use or a non-existent local address). We were not checking for this and would try to use the server unconditionally, causing a segmentation fault (e.g. when emulator was called by a non-root user as: ./emulator -p 100). This adds a fix so that we can check that we have a valid server object before trying to use it. Also added a unit test for the failing path. --------- Signed-off-by: Brian Gitonga Marete --- google/cloud/bigtable/emulator/emulator.cc | 13 +++++++++++-- google/cloud/bigtable/emulator/server.cc | 13 +++++++++++-- google/cloud/bigtable/emulator/server.h | 3 ++- google/cloud/bigtable/emulator/server_test.cc | 12 +++++++++++- 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/emulator/emulator.cc b/google/cloud/bigtable/emulator/emulator.cc index 08d4d034e0c58..a514a143807aa 100644 --- a/google/cloud/bigtable/emulator/emulator.cc +++ b/google/cloud/bigtable/emulator/emulator.cc @@ -33,8 +33,17 @@ int main(int argc, char* argv[]) { absl::StrCat("Usage: %s -h -p ", argv[0])); absl::ParseCommandLine(argc, argv); - auto server = google::cloud::bigtable::emulator::CreateDefaultEmulatorServer( - absl::GetFlag(FLAGS_host), absl::GetFlag(FLAGS_port)); + auto maybe_server = + google::cloud::bigtable::emulator::CreateDefaultEmulatorServer( + absl::GetFlag(FLAGS_host), absl::GetFlag(FLAGS_port)); + if (!maybe_server) { + std::cerr << "CreateDefaultEmulatorServer() failed. See logs for " + "possible reason" + << std::endl; + return 1; + } + + auto& server = maybe_server.value(); std::cout << "Server running on port " << server->bound_port() << "\n"; server->Wait(); diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 88d65d673737b..42486ff896840 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -333,6 +333,7 @@ class DefaultEmulatorServer : public EmulatorServer { int bound_port() override { return bound_port_; } void Shutdown() override { server_->Shutdown(); } void Wait() override { server_->Wait(); } + bool HasValidServer() { return static_cast(server_); } private: int bound_port_; @@ -343,9 +344,17 @@ class DefaultEmulatorServer : public EmulatorServer { std::unique_ptr server_; }; -std::unique_ptr CreateDefaultEmulatorServer( +StatusOr> CreateDefaultEmulatorServer( std::string const& host, std::uint16_t port) { - return std::unique_ptr(new DefaultEmulatorServer(host, port)); + auto* default_emulator_server = new DefaultEmulatorServer(host, port); + if (!default_emulator_server->HasValidServer()) { + return UnknownError("An unknown error occurred when starting server", + GCP_ERROR_INFO() + .WithMetadata("host", host) + .WithMetadata("port", absl::StrCat("%d", port))); + } + + return std::unique_ptr(default_emulator_server); } } // namespace emulator diff --git a/google/cloud/bigtable/emulator/server.h b/google/cloud/bigtable/emulator/server.h index 890c7c341ab6e..dbc29cfd6393b 100644 --- a/google/cloud/bigtable/emulator/server.h +++ b/google/cloud/bigtable/emulator/server.h @@ -15,6 +15,7 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SERVER_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SERVER_H +#include "google/cloud/status_or.h" #include #include #include @@ -36,7 +37,7 @@ class EmulatorServer { virtual void Wait() = 0; }; -std::unique_ptr CreateDefaultEmulatorServer( +StatusOr> CreateDefaultEmulatorServer( std::string const& host, std::uint16_t port); } // namespace emulator diff --git a/google/cloud/bigtable/emulator/server_test.cc b/google/cloud/bigtable/emulator/server_test.cc index ceba09422f381..2fcfcc9c57694 100644 --- a/google/cloud/bigtable/emulator/server_test.cc +++ b/google/cloud/bigtable/emulator/server_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/server.h" +#include "google/cloud/testing_util/status_matchers.h" #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include namespace google { namespace cloud { @@ -33,7 +35,9 @@ class ServerTest : public ::testing::Test { grpc::ClientContext ctx_; void SetUp() override { - server_ = CreateDefaultEmulatorServer("127.0.0.1", 0); + auto maybe_server = CreateDefaultEmulatorServer("127.0.0.1", 0); + ASSERT_STATUS_OK(maybe_server); + server_ = std::move(maybe_server.value()); channel_ = grpc::CreateChannel( "localhost:" + std::to_string(server_->bound_port()), grpc::InsecureChannelCredentials()); @@ -215,6 +219,12 @@ TEST_F(ServerTest, TableAdminUpdateTable) { EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); } +// Test that the failure path for server creation does not crash. +TEST(ServerCreationTest, TestServerCreationFailurePath) { + auto maybe_server = CreateDefaultEmulatorServer("invalid_host_address", 0); + ASSERT_EQ(false, maybe_server.ok()); +} + } // namespace emulator } // namespace bigtable } // namespace cloud From 530d2ea122e2824283f5743cc12612e23f009516 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 8 Jul 2025 16:09:42 +0300 Subject: [PATCH 180/195] emulator: Implement SampleRowKeys. This implements the SampleRowKeys bigtable RPC. This change (when combined with the MutateRows PR in an integration branch) fixes the bigtable_table_sample_rows_integration_test tests which were all failing, and thus fixes TBL-60. Care has been taken to ensure that the RPC can return any number of sample rows using constant memory. TESTED=bigtable_table_sample_rows_integration_test now passes Fixes: TBL-60 References: TBL-60 --- .../cloud/bigtable/emulator/column_family.h | 3 + google/cloud/bigtable/emulator/server.cc | 15 +- google/cloud/bigtable/emulator/table.cc | 149 +++++++++++++++++- google/cloud/bigtable/emulator/table.h | 12 +- 4 files changed, 169 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 6bb1a5ddc14ea..666d6a1749cd9 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -374,6 +375,8 @@ class ColumnFamily { return rows_.upper_bound(row_key); } + std::size_t size() { return rows_.size(); } + std::map::iterator find( std::string const& row_key) { return rows_.find(row_key); diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 42486ff896840..bb2e8dee47ce2 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -27,6 +27,7 @@ #include #include #include +#include #include namespace google { @@ -56,10 +57,16 @@ class EmulatorService final : public btproto::Bigtable::Service { grpc::Status SampleRowKeys( grpc::ServerContext* /* context */, - btproto::SampleRowKeysRequest const* /* request */, - grpc::ServerWriter* /* writer */) - override { - return grpc::Status::OK; + btproto::SampleRowKeysRequest const* request, + grpc::ServerWriter* writer) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + auto& table = maybe_table.value(); + + return ToGrpcStatus(table->SampleRowKeys(0.0001, writer)); } grpc::Status MutateRow(grpc::ServerContext* /* context */, diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 70e3115b308a8..14951c2730c5b 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -16,27 +16,42 @@ #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/range_set.h" -#include "google/cloud/bigtable/internal/google_bytes_traits.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/internal/big_endian.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" +#include "google/cloud/status_or.h" #include "google/protobuf/util/field_mask_util.h" -#include +#include +#include #include #include #include +#include #include #include #include +#include +#include #include +#include #include +#include +#include +#include #include #include +#include +#include +#include #include #include -#include +#include +#include #include #include +#include +#include namespace google { namespace cloud { @@ -602,6 +617,134 @@ bool Table::IsDeleteProtectedNoLock() const { return schema_.deletion_protection(); } +Status Table::SampleRowKeys( + double pass_probability, + grpc::ServerWriter* writer) { + if (pass_probability <= 0.0) { + return InvalidArgumentError( + "The sampling probabality must be positive", + GCP_ERROR_INFO().WithMetadata("provided sampling probability", + absl::StrFormat("%f", pass_probability))); + } + + auto sample_every = + static_cast(std::ceil(1.0 / pass_probability)); + + std::lock_guard lock(mu_); + + // First, stream all rows and cells and compute the offsets. + auto all_rows_set = std::make_shared(StringRangeSet::All()); + auto maybe_all_rows_stream = CreateCellStream(all_rows_set, absl::nullopt); + if (!maybe_all_rows_stream) { + return maybe_all_rows_stream.status(); + } + + auto& stream = *maybe_all_rows_stream; + + absl::optional first_row_key; + // The first row read will be used as a constant estimate of row + // sizes. If we are sampling 1/n rows, the value added to the offset + // (which is to be regarded as the size of all the rows before the + // sampled one) will be (n * row_size_estimate). + // + // That is every time a row is sampled, we do: offset += (n * + // row_size_estimate). + std::size_t row_size_estimate = 0; + + for (; stream; ++stream) { + if (first_row_key.has_value() && + stream->row_key() != first_row_key.value()) { + break; + } + + first_row_key = stream->row_key(); + + row_size_estimate += stream->row_key().size(); + row_size_estimate += stream->column_qualifier().size(); + row_size_estimate += stream->value().size(); + row_size_estimate += sizeof(stream->timestamp()); + } + + if (!first_row_key.has_value()) { + // No rows in the table + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(""); + resp.set_offset_bytes(0); + + auto opts = grpc::WriteOptions(); + opts.set_last_message(); + + writer->WriteLast(std::move(resp), opts); + return Status(); + } + + std::int64_t offset_delta = sample_every * row_size_estimate; + + google::bigtable::v2::RowFilter sample_filter; + sample_filter.set_row_sample_filter(pass_probability); + + auto maybe_stream = CreateCellStream(all_rows_set, sample_filter); + if (!maybe_stream) { + return maybe_stream.status(); + } + + auto& sampled_stream = *maybe_stream; + + std::int64_t offset = 0; + + bool wrote_a_sample; + + for (; sampled_stream; sampled_stream.Next(NextMode::kRow)) { + google::bigtable::v2::SampleRowKeysResponse resp; + offset += offset_delta; + resp.set_row_key(sampled_stream->row_key()); + resp.set_offset_bytes(offset); + + writer->Write(std::move(resp)); + + wrote_a_sample = true; + } + + // Cloud bigtable client tests expect that, if they populated the + // table with at least one row, then at least one row sample is + // returned. + // + // In such a case, return any string that represents the last key, + // and an offset that is the estimated row size * the number of rows + // in the largest column family. We can return any string because + // the keys returned need not be in the table. See the proto + // specification. + if (!wrote_a_sample) { + std::size_t row_count_estimate = 0; + + for (auto const& cf : *get()) { + if (cf.second->size() > row_count_estimate) { + row_count_estimate = cf.second->size(); + } + } + + std::int64_t this_offset = row_count_estimate * row_size_estimate; + + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key("last_key"); + resp.set_offset_bytes(this_offset); + writer->Write(std::move(resp)); + + offset += this_offset; + } + + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(""); + // Client test code expects offset_bytes to be strictly + // increasing. + resp.set_offset_bytes(offset + 1); + auto opts = grpc::WriteOptions(); + opts.set_last_message(); + writer->WriteLast(std::move(resp), opts); + + return Status(); +} + Status Table::DropRowRange( ::google::bigtable::admin::v2::DropRowRangeRequest const& request) { std::lock_guard lock(mu_); diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h index f281eaa17d500..123537913c634 100644 --- a/google/cloud/bigtable/emulator/table.h +++ b/google/cloud/bigtable/emulator/table.h @@ -23,20 +23,22 @@ #include "google/cloud/status_or.h" #include "absl/types/variant.h" #include "google/protobuf/repeated_ptr_field.h" -#include +#include #include -#include #include #include #include -#include #include +#include #include +#include #include #include #include #include #include +#include +#include namespace google { namespace cloud { @@ -93,6 +95,10 @@ class Table : public std::enable_shared_from_this
{ return column_families_.find(column_family); } + Status SampleRowKeys( + double pass_probability, + grpc::ServerWriter* writer); + std::shared_ptr
get() { return shared_from_this(); } Status DropRowRange( From f5043257e57cdc3be883bf57b41ac049d3d93568 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 8 Jul 2025 18:06:16 +0300 Subject: [PATCH 181/195] feat: bigtable emulator: Use new emulator (written in C++) for tests. This swaps out the old emulator for the new one for tests. --- .../bigtable/ci/run_integration_tests_emulator_cmake.sh | 6 ++++++ google/cloud/bigtable/tools/run_emulator_utils.sh | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh b/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh index 9621925f22514..f988e1045b00b 100755 --- a/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh +++ b/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh @@ -41,6 +41,12 @@ fi CBT_INSTANCE_ADMIN_EMULATOR_START=( "${BINARY_DIR}/google/cloud/bigtable/tests/instance_admin_emulator" ) + +# Configure run_emulators_utils.sh to find the cbt emulator. +CBT_EMULATOR_CMD=( + "${BINARY_DIR}/google/cloud/bigtable/emulator/emulator" +) + source module /google/cloud/bigtable/tools/run_emulator_utils.sh cd "${BINARY_DIR}" diff --git a/google/cloud/bigtable/tools/run_emulator_utils.sh b/google/cloud/bigtable/tools/run_emulator_utils.sh index 807ae6efb41a9..6a7c4dd1ad68b 100755 --- a/google/cloud/bigtable/tools/run_emulator_utils.sh +++ b/google/cloud/bigtable/tools/run_emulator_utils.sh @@ -92,7 +92,6 @@ function start_emulators() { io::log "Launching Cloud Bigtable emulators in the background" trap kill_emulators EXIT - local -r CBT_EMULATOR_CMD="/usr/local/google-cloud-sdk/platform/bigtable-emulator/cbtemulator" "${CBT_EMULATOR_CMD}" -port "${emulator_port}" >emulator.log 2>&1 Date: Wed, 9 Jul 2025 12:24:07 +0300 Subject: [PATCH 182/195] fix: DeleteFromColumn: reject requests with an empty or reversed time range. This fixes the failure of a couple of tests in the bigtable_mutations_integration_test suite that check for the rejection of empty or reversed time ranges by the server. References: TBL-59 Fixes: TBL-59 --- google/cloud/bigtable/emulator/table.cc | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 14951c2730c5b..25e4b1dd4f2d3 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -916,6 +916,30 @@ Status RowTransaction::DeleteFromColumn( return maybe_column_family.status(); } + // We need to check if the given timerange is empty or reversed, but + // only up to the server's time accuracy (in our case, milliseconds) + // - For example a time range of [1000, 1200] would be empty. + if (delete_from_column.has_time_range()) { + auto start = std::chrono::duration_cast( + std::chrono::microseconds( + delete_from_column.time_range().start_timestamp_micros())); + auto end = std::chrono::duration_cast( + std::chrono::microseconds( + delete_from_column.time_range().end_timestamp_micros())); + + // An end timestamp micros of 0 is to be interpreted as infinity, + // so we allow that. + if (end <= start && + delete_from_column.time_range().end_timestamp_micros() != 0) { + return InvalidArgumentError( + "empty or reversed time range: the end timestamp must be more than " + "the start timestamp when they are truncated to the server's time " + "precision (milliseconds)", + GCP_ERROR_INFO().WithMetadata("delete_from_column proto", + delete_from_column.DebugString())); + } + } + auto& column_family = maybe_column_family->get(); auto deleted_cells = column_family.DeleteColumn( From 52936ca943e5e8208df563170eae814808ee2949 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 9 Jul 2025 21:52:39 +0300 Subject: [PATCH 183/195] style: emulator: mechanical header inclusion fixes by clang-tidy and checkers CI. * style: emulator: mechanical header inclusion fixes by clang-tidy. * style: emulator: further mechanical header fixes by checkers CI script. --- google/cloud/bigtable/emulator/cluster.cc | 10 ++++++++++ google/cloud/bigtable/emulator/cluster.h | 7 +++++++ google/cloud/bigtable/emulator/column_family.cc | 16 +++++++++++++--- google/cloud/bigtable/emulator/column_family.h | 12 ++++-------- .../bigtable/emulator/column_family_test.cc | 14 ++++++++++---- .../emulator/conditional_mutations_test.cc | 8 ++++++++ .../bigtable/emulator/drop_row_range_test.cc | 3 +-- google/cloud/bigtable/emulator/emulator.cc | 1 - google/cloud/bigtable/emulator/filter.cc | 14 ++++++++++++++ google/cloud/bigtable/emulator/filter.h | 7 +++++++ google/cloud/bigtable/emulator/filter_test.cc | 15 ++++++++++++++- google/cloud/bigtable/emulator/filtered_map.h | 3 +++ .../cloud/bigtable/emulator/filtered_map_test.cc | 16 ++++++++++++++-- google/cloud/bigtable/emulator/mutations_test.cc | 7 ++++--- google/cloud/bigtable/emulator/range_set.cc | 8 ++++++++ google/cloud/bigtable/emulator/range_set_test.cc | 10 ++++++++-- google/cloud/bigtable/emulator/row_streamer.cc | 7 ++++++- google/cloud/bigtable/emulator/row_streamer.h | 6 ++++-- google/cloud/bigtable/emulator/server.cc | 13 +++++++++++-- google/cloud/bigtable/emulator/server_test.cc | 8 +++++++- google/cloud/bigtable/emulator/table_test.cc | 12 +++++++++++- google/cloud/bigtable/emulator/test_util.cc | 7 +++++++ google/cloud/bigtable/emulator/test_util.h | 4 ++++ google/cloud/bigtable/emulator/to_grpc_status.cc | 5 +++++ 24 files changed, 180 insertions(+), 33 deletions(-) diff --git a/google/cloud/bigtable/emulator/cluster.cc b/google/cloud/bigtable/emulator/cluster.cc index fe3397174b53c..a6aac1570198d 100644 --- a/google/cloud/bigtable/emulator/cluster.cc +++ b/google/cloud/bigtable/emulator/cluster.cc @@ -13,10 +13,20 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/cluster.h" +#include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "absl/strings/match.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/cluster.h b/google/cloud/bigtable/emulator/cluster.h index 11baa11387c24..a6a6481042f51 100644 --- a/google/cloud/bigtable/emulator/cluster.h +++ b/google/cloud/bigtable/emulator/cluster.h @@ -16,7 +16,14 @@ #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CLUSTER_H #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/status.h" #include "google/cloud/status_or.h" +#include +#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc index 61eb9dc328810..e501141ea05c7 100644 --- a/google/cloud/bigtable/emulator/column_family.cc +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -13,16 +13,26 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/cell_view.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/internal/big_endian.h" -#include +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status_or.h" +#include +#include +#include #include -#include +#include +#include #include #include -#include +#include #include +#include #include #include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h index 666d6a1749cd9..3aabc2f6145f5 100644 --- a/google/cloud/bigtable/emulator/column_family.h +++ b/google/cloud/bigtable/emulator/column_family.h @@ -15,27 +15,23 @@ #ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H -#include "google/cloud/bigtable/cell.h" #include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/filter.h" #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/bigtable/emulator/range_set.h" -#include "google/cloud/bigtable/read_modify_write_rule.h" #include "google/cloud/internal/big_endian.h" -#include "google/cloud/internal/make_status.h" #include "google/cloud/status_or.h" #include "absl/types/optional.h" -#include +#include #include -#include -#include #include #include #include +#include #include #include -#include -#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc index e529d1a5e2f60..9300391290f7e 100644 --- a/google/cloud/bigtable/emulator/column_family_test.cc +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -13,11 +13,17 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/testing_util/chrono_literals.h" -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/conditional_mutations_test.cc b/google/cloud/bigtable/emulator/conditional_mutations_test.cc index 540213cac1c93..97520fd28f31f 100644 --- a/google/cloud/bigtable/emulator/conditional_mutations_test.cc +++ b/google/cloud/bigtable/emulator/conditional_mutations_test.cc @@ -1,10 +1,18 @@ #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/internal/make_status.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" +#include #include #include #include #include +#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/drop_row_range_test.cc b/google/cloud/bigtable/emulator/drop_row_range_test.cc index 7bb62616f86e9..eb9255e9ce00d 100644 --- a/google/cloud/bigtable/emulator/drop_row_range_test.cc +++ b/google/cloud/bigtable/emulator/drop_row_range_test.cc @@ -18,10 +18,8 @@ #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" -#include #include #include -#include #include #include #include @@ -29,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/google/cloud/bigtable/emulator/emulator.cc b/google/cloud/bigtable/emulator/emulator.cc index a514a143807aa..291057dd705a0 100644 --- a/google/cloud/bigtable/emulator/emulator.cc +++ b/google/cloud/bigtable/emulator/emulator.cc @@ -17,7 +17,6 @@ #include "absl/flags/parse.h" #include "absl/flags/usage.h" #include -#include #include #include #include diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 6fcdacca49100..3657d6770b735 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -18,8 +18,22 @@ #include "google/cloud/internal/invoke_result.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status_or.h" +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h index 0b8a9a8487ea0..10df364d9cd8a 100644 --- a/google/cloud/bigtable/emulator/filter.h +++ b/google/cloud/bigtable/emulator/filter.h @@ -17,7 +17,14 @@ #include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/status_or.h" #include +#include +#include +#include +#include +#include +#include namespace re2 { class RE2; diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc index 9579457362edb..8ce833f9f29db 100644 --- a/google/cloud/bigtable/emulator/filter_test.cc +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -13,18 +13,31 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/cell_view.h" #include "google/cloud/bigtable/emulator/range_set.h" -#include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/emulator/test_util.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" #include "google/cloud/testing_util/chrono_literals.h" #include "google/cloud/testing_util/status_matchers.h" #include "gmock/gmock.h" +#include +#include +#include #include #include #include #include +#include +#include +#include +#include +#include #include +#include #include +#include +#include #include namespace google { diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h index e94359eaf16fe..90d8ed991d55b 100644 --- a/google/cloud/bigtable/emulator/filtered_map.h +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -19,6 +19,9 @@ #include #include #include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/filtered_map_test.cc b/google/cloud/bigtable/emulator/filtered_map_test.cc index e1b3edfcbf483..cf6b386a75f1e 100644 --- a/google/cloud/bigtable/emulator/filtered_map_test.cc +++ b/google/cloud/bigtable/emulator/filtered_map_test.cc @@ -14,8 +14,20 @@ #include "google/cloud/bigtable/emulator/filtered_map.h" #include "google/cloud/testing_util/chrono_literals.h" -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/mutations_test.cc b/google/cloud/bigtable/emulator/mutations_test.cc index 219bc069053c0..d3d858aecc628 100644 --- a/google/cloud/bigtable/emulator/mutations_test.cc +++ b/google/cloud/bigtable/emulator/mutations_test.cc @@ -20,11 +20,8 @@ #include "google/cloud/status.h" #include "google/cloud/status_or.h" #include "google/cloud/testing_util/status_matchers.h" -#include -#include #include #include -#include #include #include #include @@ -32,7 +29,11 @@ #include #include #include +#include +#include #include +#include +#include #include namespace google { diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index 9f1012a3b38b5..798e87f41108f 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -14,7 +14,15 @@ #include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/internal/google_bytes_traits.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status_or.h" #include +#include +#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index b214d753d7c7f..02a615f358c65 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -16,8 +16,14 @@ #include "google/cloud/bigtable/row_range.h" #include "google/cloud/testing_util/chrono_literals.h" #include "google/cloud/testing_util/status_matchers.h" -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/row_streamer.cc b/google/cloud/bigtable/emulator/row_streamer.cc index f511be77060d6..40a9e1873644e 100644 --- a/google/cloud/bigtable/emulator/row_streamer.cc +++ b/google/cloud/bigtable/emulator/row_streamer.cc @@ -13,7 +13,12 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/row_streamer.h" -#include +#include "google/cloud/bigtable/emulator/cell_view.h" +#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/row_streamer.h b/google/cloud/bigtable/emulator/row_streamer.h index ae22d18c5aea0..d8d9ed6a73280 100644 --- a/google/cloud/bigtable/emulator/row_streamer.h +++ b/google/cloud/bigtable/emulator/row_streamer.h @@ -17,8 +17,10 @@ #include "google/cloud/bigtable/emulator/cell_view.h" #include "absl/types/optional.h" -#include -#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index bb2e8dee47ce2..0001fb3f63045 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -14,21 +14,30 @@ #include "google/cloud/bigtable/emulator/server.h" #include "google/cloud/bigtable/emulator/cluster.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/bigtable/emulator/to_grpc_status.h" #include "google/cloud/internal/make_status.h" +#include "google/cloud/status_or.h" #include +#include +#include #include #include +#include +#include #include #include -#include #include +#include #include #include -#include #include +#include #include #include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/server_test.cc b/google/cloud/bigtable/emulator/server_test.cc index 2fcfcc9c57694..31579b69a3603 100644 --- a/google/cloud/bigtable/emulator/server_test.cc +++ b/google/cloud/bigtable/emulator/server_test.cc @@ -19,9 +19,15 @@ #include #include #include -#include +#include +#include #include +#include +#include #include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/table_test.cc b/google/cloud/bigtable/emulator/table_test.cc index 64c3c1cd5b96e..d0f4eac00c924 100644 --- a/google/cloud/bigtable/emulator/table_test.cc +++ b/google/cloud/bigtable/emulator/table_test.cc @@ -13,8 +13,18 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/testing_util/chrono_literals.h" -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/test_util.cc b/google/cloud/bigtable/emulator/test_util.cc index 458595e8f9a2a..ac26cb23d96e5 100644 --- a/google/cloud/bigtable/emulator/test_util.cc +++ b/google/cloud/bigtable/emulator/test_util.cc @@ -1,5 +1,12 @@ #include "google/cloud/bigtable/emulator/test_util.h" #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include +#include +#include +#include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/test_util.h b/google/cloud/bigtable/emulator/test_util.h index 8ed8bf58ad776..1d45ec56767a4 100644 --- a/google/cloud/bigtable/emulator/test_util.h +++ b/google/cloud/bigtable/emulator/test_util.h @@ -16,8 +16,12 @@ #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H #include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include #include #include +#include namespace google { namespace cloud { diff --git a/google/cloud/bigtable/emulator/to_grpc_status.cc b/google/cloud/bigtable/emulator/to_grpc_status.cc index 13b049ab5c2b5..bac9b04c2e0e4 100644 --- a/google/cloud/bigtable/emulator/to_grpc_status.cc +++ b/google/cloud/bigtable/emulator/to_grpc_status.cc @@ -13,9 +13,14 @@ // limitations under the License. #include "google/cloud/bigtable/emulator/to_grpc_status.h" +#include "google/cloud/status.h" #include "google/rpc/status.pb.h" #include #include +#include +#include +#include +#include namespace google { namespace cloud { From 041a27dd230183356baf5d89e08b3f8a01109a28 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Wed, 9 Jul 2025 21:55:55 +0300 Subject: [PATCH 184/195] fix: DeleteFromColumn: reject requests with an empty or reversed time range. (#27) This fixes the failure of a couple of tests in the bigtable_mutations_integration_test suite that check for the rejection of empty or reversed time ranges by the server. References: TBL-59 Fixes: TBL-59 --- google/cloud/bigtable/emulator/table.cc | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 14951c2730c5b..25e4b1dd4f2d3 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -916,6 +916,30 @@ Status RowTransaction::DeleteFromColumn( return maybe_column_family.status(); } + // We need to check if the given timerange is empty or reversed, but + // only up to the server's time accuracy (in our case, milliseconds) + // - For example a time range of [1000, 1200] would be empty. + if (delete_from_column.has_time_range()) { + auto start = std::chrono::duration_cast( + std::chrono::microseconds( + delete_from_column.time_range().start_timestamp_micros())); + auto end = std::chrono::duration_cast( + std::chrono::microseconds( + delete_from_column.time_range().end_timestamp_micros())); + + // An end timestamp micros of 0 is to be interpreted as infinity, + // so we allow that. + if (end <= start && + delete_from_column.time_range().end_timestamp_micros() != 0) { + return InvalidArgumentError( + "empty or reversed time range: the end timestamp must be more than " + "the start timestamp when they are truncated to the server's time " + "precision (milliseconds)", + GCP_ERROR_INFO().WithMetadata("delete_from_column proto", + delete_from_column.DebugString())); + } + } + auto& column_family = maybe_column_family->get(); auto deleted_cells = column_family.DeleteColumn( From 7ef5b5f803f8c81ea9fb1eb2ae25590f0970c117 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 10 Jul 2025 20:58:47 +0300 Subject: [PATCH 185/195] fix: emulator: fix MSVC compilation error due to storing a int64_t in a size_t. The compiler spits out an error in pedantic mode because on 32 bit platforms, there is a usually the possibility of an overflow. However, the quantity is guaranteed to be 32 bit (https://github.com/googleapis/googleapis/blob/74657e8a6690b249c048f685124ee3b8473b70b4/google/bigtable/v2/data.proto#L497) so use int32_t to store it and fix the MSVC compiler warning and failure. --- google/cloud/bigtable/emulator/filter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 3657d6770b735..1afb37a36f2e2 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -919,7 +919,7 @@ StatusOr CreateFilterImpl( return res; } if (filter.has_cells_per_column_limit_filter()) { - std::int64_t cells_per_column_limit = + std::int32_t cells_per_column_limit = filter.cells_per_column_limit_filter(); if (cells_per_column_limit < 0) { return InvalidArgumentError( From 5efa4046c221d2dc2b626ebe2f192cbef8165338 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 10 Jul 2025 21:46:10 +0300 Subject: [PATCH 186/195] fix: MSVC (windows): Prevent GetCurrentTime macro expansion in windows. Without this, GetCurrentTime which exists as a macro in Windows is expanded, leading to this compilation error: https://github.com/googleapis/google-cloud-cpp/actions/runs/16149454809/job/45577395445#step:12:3259 This method of fixing this problem is documented and recommended here: https://protobuf.dev/support/migration/#getcurrenttime --- google/cloud/bigtable/emulator/server.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 0001fb3f63045..313f389f17a0e 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -245,9 +245,9 @@ class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { btadmin::UpdateTableMetadata res_md; res_md.set_name(request->table().name()); *res_md.mutable_start_time() = - google::protobuf::util::TimeUtil::GetCurrentTime(); + (google::protobuf::util::TimeUtil::GetCurrentTime)(); *res_md.mutable_end_time() = - google::protobuf::util::TimeUtil::GetCurrentTime(); + (google::protobuf::util::TimeUtil::GetCurrentTime)(); response->set_name("UpdateTable"); response->mutable_metadata()->PackFrom(std::move(res_md)); response->set_done(true); From c10571b57157712f4b9dd287194cf12894dda8ad Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 11 Jul 2025 11:23:14 +0300 Subject: [PATCH 187/195] fix: MSVC (windows): Prevent GetCurrentTime macro expansion in windows. Without this, GetCurrentTime which exists as a macro in Windows is expanded, leading to this compilation error: https://github.com/googleapis/google-cloud-cpp/actions/runs/16149454809/job/45577395445#step:12:3259 This method of fixing this problem is documented and recommended here: https://protobuf.dev/support/migration/#getcurrenttime --- google/cloud/bigtable/emulator/server.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc index 0001fb3f63045..313f389f17a0e 100644 --- a/google/cloud/bigtable/emulator/server.cc +++ b/google/cloud/bigtable/emulator/server.cc @@ -245,9 +245,9 @@ class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { btadmin::UpdateTableMetadata res_md; res_md.set_name(request->table().name()); *res_md.mutable_start_time() = - google::protobuf::util::TimeUtil::GetCurrentTime(); + (google::protobuf::util::TimeUtil::GetCurrentTime)(); *res_md.mutable_end_time() = - google::protobuf::util::TimeUtil::GetCurrentTime(); + (google::protobuf::util::TimeUtil::GetCurrentTime)(); response->set_name("UpdateTable"); response->mutable_metadata()->PackFrom(std::move(res_md)); response->set_done(true); From 173f00beb4d4f69db10b848760f15c9f8ab15ef5 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Fri, 11 Jul 2025 11:23:57 +0300 Subject: [PATCH 188/195] fix: emulator: fix MSVC compilation error due to storing a int64_t in a size_t. The compiler spits out an error in pedantic mode because on 32 bit platforms, there is a usually the possibility of an overflow. However, the quantity is guaranteed to be 32 bit (https://github.com/googleapis/googleapis/blob/74657e8a6690b249c048f685124ee3b8473b70b4/google/bigtable/v2/data.proto#L497) so use int32_t to store it and fix the MSVC compiler warning and failure. --- google/cloud/bigtable/emulator/filter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc index 3657d6770b735..1afb37a36f2e2 100644 --- a/google/cloud/bigtable/emulator/filter.cc +++ b/google/cloud/bigtable/emulator/filter.cc @@ -919,7 +919,7 @@ StatusOr CreateFilterImpl( return res; } if (filter.has_cells_per_column_limit_filter()) { - std::int64_t cells_per_column_limit = + std::int32_t cells_per_column_limit = filter.cells_per_column_limit_filter(); if (cells_per_column_limit < 0) { return InvalidArgumentError( From 00c42328a665f5f55c706b7fb7ed04f6aa3eb27a Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 15 Jul 2025 22:28:38 +0300 Subject: [PATCH 189/195] feat: Implement a function to determine if 2 strings of a maximum length are adjacent. This will be used in a subsequent PR to correctly determine if 2 Bigtable row keys are adjacent, given that a Bigtable row key must be at most 4KiB in length. --- google/cloud/bigtable/emulator/range_set.cc | 47 +++++++++++++++++++ google/cloud/bigtable/emulator/range_set.h | 3 ++ .../cloud/bigtable/emulator/range_set_test.cc | 47 +++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index 798e87f41108f..d5d3e249805ca 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -573,6 +574,52 @@ std::ostream& operator<<(std::ostream& os, return os; } +bool ConsecutiveStringsOfMaxLen(std::string const& a, std::string const& b, + std::size_t max_len) { + assert(a.length() <= max_len); + assert(b.length() <= max_len); + + if (max_len == 0) { + return false; + } + + if (a.length() < max_len) { + return internal::ConsecutiveRowKeys(a, b); + } + + // Note that at this point we are guaranteed that a is not empty but + // let us be sure. + assert(!a.empty()); + + // What is the rightmost index that we can increment by 1 to get + // the next string in lexicographic order. + absl::optional index_to_inc = absl::nullopt; + + for (auto it = a.rbegin(); it != a.rend(); it++) { + char c = *it; + if (c < CHAR_MAX) { + index_to_inc = (a.length() - 1) - std::distance(a.rbegin(), it); + break; + } + } + + if (!index_to_inc.has_value()) { + // a is the last string in lexicographical order for strings of + // max_len chars, so it has no successor. + return false; + } + + std::string successor_to_a; + // A copy but if we want we could fix that by a comparison in 3 + // parts. However, row keys that are at maximum length are the rare + // exception, presumably. + successor_to_a = a; + + successor_to_a[index_to_inc.value()] += 1; + + return successor_to_a == b; +} + } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/range_set.h b/google/cloud/bigtable/emulator/range_set.h index 0495543766a79..df857be5ff78a 100644 --- a/google/cloud/bigtable/emulator/range_set.h +++ b/google/cloud/bigtable/emulator/range_set.h @@ -191,6 +191,9 @@ bool operator==(TimestampRangeSet::Range const& lhs, std::ostream& operator<<(std::ostream& os, TimestampRangeSet::Range const& range); +bool ConsecutiveStringsOfMaxLen(std::string const& a, std::string const& b, + std::size_t max_len); + // For testing only. namespace detail { diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index 02a615f358c65..7dfbca4c3a8d9 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -816,6 +817,52 @@ TEST(StringRangeSet, IntersectDistinct) { ASSERT_EQ(empty, srs.disjoint_ranges()); } +TEST(ConsecutiveStringsOfMaxLen, ZeroLen) { + std::string a; + std::string b; + + ASSERT_FALSE(ConsecutiveStringsOfMaxLen(a, b, 0)); +} + +TEST(ConsecutiveStringsOfMaxLen, ShorterThanMaxLen) { + std::string a; + std::string b; + b.push_back('\0'); + + ASSERT_TRUE(ConsecutiveStringsOfMaxLen(a, b, 1)); + + a = "a"; + b = "a"; + b.push_back('\0'); + + ASSERT_TRUE(ConsecutiveStringsOfMaxLen(a, b, 2)); +} + +TEST(ConsecutiveStringsOfMaxLen, FirstHasMaxLen) { + std::string a = "a"; + std::string b = "b"; + + ASSERT_TRUE(ConsecutiveStringsOfMaxLen(a, b, 1)); + + a = "a"; + b = "c"; + + ASSERT_FALSE(ConsecutiveStringsOfMaxLen(a, b, 1)); + + // Test the case where the first string is at max length and the + // terminal char or chars cannot be incremented because they are at + // maximum value. + a = "a"; + a.push_back(CHAR_MAX); + a.push_back(CHAR_MAX); + + b = "b"; + b.push_back(CHAR_MAX); + b.push_back(CHAR_MAX); + + ASSERT_TRUE(ConsecutiveStringsOfMaxLen(a, b, 3)); +} + } // anonymous namespace } // namespace emulator } // namespace bigtable From 1001bdaf8e4db1e000aa13ac9146222b0f3211d6 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Thu, 17 Jul 2025 20:35:03 +0300 Subject: [PATCH 190/195] fix: Reject row_keys larger than 4KiB for mutations (and range set creation). --- google/cloud/bigtable/emulator/CMakeLists.txt | 1 + .../emulator/bigtable_emulator_common.bzl | 1 + google/cloud/bigtable/emulator/limits.h | 29 ++++++++++++ google/cloud/bigtable/emulator/range_set.cc | 47 ------------------- google/cloud/bigtable/emulator/range_set.h | 3 -- .../cloud/bigtable/emulator/range_set_test.cc | 47 ------------------- google/cloud/bigtable/emulator/table.cc | 42 +++++++++++++++++ .../bigtable/google_cloud_cpp_bigtable.bzl | 1 - 8 files changed, 73 insertions(+), 98 deletions(-) create mode 100644 google/cloud/bigtable/emulator/limits.h diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index c8ccfccef6c8c..ea98e1b6b2024 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -25,6 +25,7 @@ add_library( filter.cc filter.h filtered_map.h + limits.h range_set.cc range_set.h row_streamer.cc diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index 29519b5409272..6854ddbb4ae2a 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -22,6 +22,7 @@ bigtable_emulator_common_hdrs = [ "column_family.h", "filter.h", "filtered_map.h", + "limits.h", "range_set.h", "row_streamer.h", "server.h", diff --git a/google/cloud/bigtable/emulator/limits.h b/google/cloud/bigtable/emulator/limits.h new file mode 100644 index 0000000000000..46760d693232e --- /dev/null +++ b/google/cloud/bigtable/emulator/limits.h @@ -0,0 +1,29 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_LIMITS_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_LIMITS_H + +#include +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +constexpr std::size_t kMaxRowLen = 2 << 21; +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_LIMITS_H diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc index d5d3e249805ca..798e87f41108f 100644 --- a/google/cloud/bigtable/emulator/range_set.cc +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -574,52 +573,6 @@ std::ostream& operator<<(std::ostream& os, return os; } -bool ConsecutiveStringsOfMaxLen(std::string const& a, std::string const& b, - std::size_t max_len) { - assert(a.length() <= max_len); - assert(b.length() <= max_len); - - if (max_len == 0) { - return false; - } - - if (a.length() < max_len) { - return internal::ConsecutiveRowKeys(a, b); - } - - // Note that at this point we are guaranteed that a is not empty but - // let us be sure. - assert(!a.empty()); - - // What is the rightmost index that we can increment by 1 to get - // the next string in lexicographic order. - absl::optional index_to_inc = absl::nullopt; - - for (auto it = a.rbegin(); it != a.rend(); it++) { - char c = *it; - if (c < CHAR_MAX) { - index_to_inc = (a.length() - 1) - std::distance(a.rbegin(), it); - break; - } - } - - if (!index_to_inc.has_value()) { - // a is the last string in lexicographical order for strings of - // max_len chars, so it has no successor. - return false; - } - - std::string successor_to_a; - // A copy but if we want we could fix that by a comparison in 3 - // parts. However, row keys that are at maximum length are the rare - // exception, presumably. - successor_to_a = a; - - successor_to_a[index_to_inc.value()] += 1; - - return successor_to_a == b; -} - } // namespace emulator } // namespace bigtable } // namespace cloud diff --git a/google/cloud/bigtable/emulator/range_set.h b/google/cloud/bigtable/emulator/range_set.h index df857be5ff78a..0495543766a79 100644 --- a/google/cloud/bigtable/emulator/range_set.h +++ b/google/cloud/bigtable/emulator/range_set.h @@ -191,9 +191,6 @@ bool operator==(TimestampRangeSet::Range const& lhs, std::ostream& operator<<(std::ostream& os, TimestampRangeSet::Range const& range); -bool ConsecutiveStringsOfMaxLen(std::string const& a, std::string const& b, - std::size_t max_len); - // For testing only. namespace detail { diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc index 7dfbca4c3a8d9..02a615f358c65 100644 --- a/google/cloud/bigtable/emulator/range_set_test.cc +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -817,52 +816,6 @@ TEST(StringRangeSet, IntersectDistinct) { ASSERT_EQ(empty, srs.disjoint_ranges()); } -TEST(ConsecutiveStringsOfMaxLen, ZeroLen) { - std::string a; - std::string b; - - ASSERT_FALSE(ConsecutiveStringsOfMaxLen(a, b, 0)); -} - -TEST(ConsecutiveStringsOfMaxLen, ShorterThanMaxLen) { - std::string a; - std::string b; - b.push_back('\0'); - - ASSERT_TRUE(ConsecutiveStringsOfMaxLen(a, b, 1)); - - a = "a"; - b = "a"; - b.push_back('\0'); - - ASSERT_TRUE(ConsecutiveStringsOfMaxLen(a, b, 2)); -} - -TEST(ConsecutiveStringsOfMaxLen, FirstHasMaxLen) { - std::string a = "a"; - std::string b = "b"; - - ASSERT_TRUE(ConsecutiveStringsOfMaxLen(a, b, 1)); - - a = "a"; - b = "c"; - - ASSERT_FALSE(ConsecutiveStringsOfMaxLen(a, b, 1)); - - // Test the case where the first string is at max length and the - // terminal char or chars cannot be incremented because they are at - // maximum value. - a = "a"; - a.push_back(CHAR_MAX); - a.push_back(CHAR_MAX); - - b = "b"; - b.push_back(CHAR_MAX); - b.push_back(CHAR_MAX); - - ASSERT_TRUE(ConsecutiveStringsOfMaxLen(a, b, 3)); -} - } // anonymous namespace } // namespace emulator } // namespace bigtable diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 25e4b1dd4f2d3..c820358edb0f6 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -15,6 +15,7 @@ #include "google/cloud/bigtable/emulator/table.h" #include "google/cloud/bigtable/emulator/column_family.h" #include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/limits.h" #include "google/cloud/bigtable/emulator/range_set.h" #include "google/cloud/bigtable/emulator/row_streamer.h" #include "google/cloud/internal/big_endian.h" @@ -300,6 +301,13 @@ Status Table::DoMutationsWithPossibleRollback( std::string const& row_key, google::protobuf::RepeatedPtrField const& mutations) { + if (row_key.size() > kMaxRowLen) { + return InvalidArgumentError( + "The row_key is longer than 4KiB", + GCP_ERROR_INFO().WithMetadata("row_key size", + absl::StrFormat("%zu", row_key.size()))); + } + RowTransaction row_transaction(this->get(), row_key); for (auto const& mutation : mutations) { @@ -458,6 +466,15 @@ StatusOr CreateStringRangeSet( google::bigtable::v2::RowSet const& row_set) { StringRangeSet res; for (auto const& row_key : row_set.row_keys()) { + if (row_key.size() > kMaxRowLen) { + return InvalidArgumentError( + "The row_key in row_set is longer than 4KiB", + GCP_ERROR_INFO() + .WithMetadata("row_key size", + absl::StrFormat("%zu", row_key.size())) + .WithMetadata("row_set", row_set.DebugString())); + } + if (row_key.empty()) { return InvalidArgumentError( "`row_key` empty", @@ -484,6 +501,16 @@ Table::CheckAndMutateRow( std::lock_guard lock(mu_); auto const& row_key = request.row_key(); + + if (row_key.size() > kMaxRowLen) { + return InvalidArgumentError( + "The row_key is longer than 4KiB", + GCP_ERROR_INFO() + .WithMetadata("row_key size", + absl::StrFormat("%zu", row_key.size())) + .WithMetadata("CheckAndMutateRequest", request.DebugString())); + } + if (row_key.empty()) { return InvalidArgumentError( "row key required", @@ -766,6 +793,14 @@ Status Table::DropRowRange( } auto const& row_prefix = request.row_key_prefix(); + if (request.row_key_prefix().size() > kMaxRowLen) { + return InvalidArgumentError( + "The row_key_prefix is longer than 4KiB", + GCP_ERROR_INFO().WithMetadata( + "row_key_prefix size", + absl::StrFormat("%zu", request.row_key_prefix().size()))); + } + if (row_prefix.empty()) { return InvalidArgumentError( "Row prefix provided is empty.", @@ -790,6 +825,13 @@ Status Table::DropRowRange( StatusOr<::google::bigtable::v2::ReadModifyWriteRowResponse> Table::ReadModifyWriteRow( google::bigtable::v2::ReadModifyWriteRowRequest const& request) { + if (request.row_key().size() > kMaxRowLen) { + return InvalidArgumentError( + "The row_key is longer than 4KiB", + GCP_ERROR_INFO().WithMetadata( + "row_key size", absl::StrFormat("%zu", request.row_key().size()))); + } + std::lock_guard lock(mu_); RowTransaction row_transaction(this->get(), request.row_key()); diff --git a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl index 0d395fd997ff1..818a75601c799 100644 --- a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl +++ b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl @@ -204,7 +204,6 @@ google_cloud_cpp_bigtable_srcs = [ "internal/prefix_range_end.cc", "internal/rate_limiter.cc", "internal/readrowsparser.cc", - "internal/retry_context.cc", "internal/row_range_helpers.cc", "internal/traced_row_reader.cc", "metadata_update_policy.cc", From 83456530d1285fe0cb5042449e80327ba18df6bb Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 21 Jul 2025 16:04:52 +0300 Subject: [PATCH 191/195] fix: windows build: fix build failure due to undefined type. This fixes the following build failure on Windows: https://github.com/googleapis/google-cloud-cpp/actions/runs/16345973765/job/46188441753#step:12:3215 --- google/cloud/bigtable/emulator/table.cc | 1 + google/cloud/bigtable/google_cloud_cpp_bigtable.bzl | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 25e4b1dd4f2d3..3fb116753512e 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl index 0d395fd997ff1..818a75601c799 100644 --- a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl +++ b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl @@ -204,7 +204,6 @@ google_cloud_cpp_bigtable_srcs = [ "internal/prefix_range_end.cc", "internal/rate_limiter.cc", "internal/readrowsparser.cc", - "internal/retry_context.cc", "internal/row_range_helpers.cc", "internal/traced_row_reader.cc", "metadata_update_policy.cc", From b2cc85a629e8e03e8aa2d51f94887383d25e3623 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 22 Jul 2025 14:16:56 +0300 Subject: [PATCH 192/195] fix: Windows Build: Fix remaining compile and linkage errors. --- google/cloud/bigtable/emulator/CMakeLists.txt | 1 - google/cloud/bigtable/emulator/bigtable_emulator_common.bzl | 1 - google/cloud/bigtable/emulator/row_streamer.cc | 1 - google/cloud/bigtable/emulator/row_streamer.h | 2 ++ google/cloud/bigtable/emulator/table.cc | 1 - 5 files changed, 2 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index c8ccfccef6c8c..4c90ae7974e83 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -21,7 +21,6 @@ add_library( cluster.h column_family.cc column_family.h - column_family_test.cc filter.cc filter.h filtered_map.h diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index 29519b5409272..eca5acfaa860d 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -33,7 +33,6 @@ bigtable_emulator_common_hdrs = [ bigtable_emulator_common_srcs = [ "cluster.cc", "column_family.cc", - "column_family_test.cc", "filter.cc", "range_set.cc", "row_streamer.cc", diff --git a/google/cloud/bigtable/emulator/row_streamer.cc b/google/cloud/bigtable/emulator/row_streamer.cc index 40a9e1873644e..3dc243faba1ca 100644 --- a/google/cloud/bigtable/emulator/row_streamer.cc +++ b/google/cloud/bigtable/emulator/row_streamer.cc @@ -16,7 +16,6 @@ #include "google/cloud/bigtable/emulator/cell_view.h" #include #include -#include #include #include diff --git a/google/cloud/bigtable/emulator/row_streamer.h b/google/cloud/bigtable/emulator/row_streamer.h index d8d9ed6a73280..0700b58cd3df1 100644 --- a/google/cloud/bigtable/emulator/row_streamer.h +++ b/google/cloud/bigtable/emulator/row_streamer.h @@ -17,6 +17,8 @@ #include "google/cloud/bigtable/emulator/cell_view.h" #include "absl/types/optional.h" +#include +#include #include #include #include diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc index 3fb116753512e..25e4b1dd4f2d3 100644 --- a/google/cloud/bigtable/emulator/table.cc +++ b/google/cloud/bigtable/emulator/table.cc @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include From 5fd1768040b842f179fc856b8f3f5fe48861c838 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 22 Jul 2025 14:31:52 +0300 Subject: [PATCH 193/195] style: mechanical header fixes by checkers CI script. --- google/cloud/bigtable/emulator/row_streamer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/emulator/row_streamer.h b/google/cloud/bigtable/emulator/row_streamer.h index 0700b58cd3df1..c670f6322613a 100644 --- a/google/cloud/bigtable/emulator/row_streamer.h +++ b/google/cloud/bigtable/emulator/row_streamer.h @@ -18,8 +18,8 @@ #include "google/cloud/bigtable/emulator/cell_view.h" #include "absl/types/optional.h" #include -#include #include +#include #include #include #include From b71328dd0b1170f51d4c667152fd5ca3e5ec2618 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Mon, 21 Jul 2025 16:04:52 +0300 Subject: [PATCH 194/195] fix: windows build: fix windows buiuld. The failures of the build were due to: - Missing includes leading to errors in template type instantiation - column_family_test.cc was included in the common library causing - duplication definitions at link time. Not sure why these problems did not manifest on Linux which builds and test cleanly before this change, even with -pedantic and -pedantic-errors turned on. --- google/cloud/bigtable/emulator/CMakeLists.txt | 1 - google/cloud/bigtable/emulator/bigtable_emulator_common.bzl | 1 - google/cloud/bigtable/emulator/row_streamer.cc | 1 - google/cloud/bigtable/emulator/row_streamer.h | 2 ++ google/cloud/bigtable/google_cloud_cpp_bigtable.bzl | 1 - 5 files changed, 2 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index c8ccfccef6c8c..4c90ae7974e83 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -21,7 +21,6 @@ add_library( cluster.h column_family.cc column_family.h - column_family_test.cc filter.cc filter.h filtered_map.h diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index 29519b5409272..eca5acfaa860d 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -33,7 +33,6 @@ bigtable_emulator_common_hdrs = [ bigtable_emulator_common_srcs = [ "cluster.cc", "column_family.cc", - "column_family_test.cc", "filter.cc", "range_set.cc", "row_streamer.cc", diff --git a/google/cloud/bigtable/emulator/row_streamer.cc b/google/cloud/bigtable/emulator/row_streamer.cc index 40a9e1873644e..3dc243faba1ca 100644 --- a/google/cloud/bigtable/emulator/row_streamer.cc +++ b/google/cloud/bigtable/emulator/row_streamer.cc @@ -16,7 +16,6 @@ #include "google/cloud/bigtable/emulator/cell_view.h" #include #include -#include #include #include diff --git a/google/cloud/bigtable/emulator/row_streamer.h b/google/cloud/bigtable/emulator/row_streamer.h index d8d9ed6a73280..c670f6322613a 100644 --- a/google/cloud/bigtable/emulator/row_streamer.h +++ b/google/cloud/bigtable/emulator/row_streamer.h @@ -17,7 +17,9 @@ #include "google/cloud/bigtable/emulator/cell_view.h" #include "absl/types/optional.h" +#include #include +#include #include #include #include diff --git a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl index 0d395fd997ff1..818a75601c799 100644 --- a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl +++ b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl @@ -204,7 +204,6 @@ google_cloud_cpp_bigtable_srcs = [ "internal/prefix_range_end.cc", "internal/rate_limiter.cc", "internal/readrowsparser.cc", - "internal/retry_context.cc", "internal/row_range_helpers.cc", "internal/traced_row_reader.cc", "metadata_update_policy.cc", From e7735102b5b1f9795615c71ecbf2e5b1ca6ccf15 Mon Sep 17 00:00:00 2001 From: Brian Gitonga Marete Date: Tue, 22 Jul 2025 16:16:37 +0300 Subject: [PATCH 195/195] fix: windows build: fix windows buiuld. The failures of the build were due to: - Missing includes leading to errors in template type instantiation - column_family_test.cc was included in the common library causing duplication definitions at link time. Not sure why these problems did not manifest on Linux which builds and test cleanly before this change, even with -pedantic and -pedantic-errors turned on. --- google/cloud/bigtable/emulator/CMakeLists.txt | 1 - google/cloud/bigtable/emulator/bigtable_emulator_common.bzl | 1 - google/cloud/bigtable/emulator/row_streamer.cc | 1 - google/cloud/bigtable/emulator/row_streamer.h | 2 ++ google/cloud/bigtable/google_cloud_cpp_bigtable.bzl | 1 - 5 files changed, 2 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt index c8ccfccef6c8c..4c90ae7974e83 100644 --- a/google/cloud/bigtable/emulator/CMakeLists.txt +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -21,7 +21,6 @@ add_library( cluster.h column_family.cc column_family.h - column_family_test.cc filter.cc filter.h filtered_map.h diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl index 29519b5409272..eca5acfaa860d 100644 --- a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -33,7 +33,6 @@ bigtable_emulator_common_hdrs = [ bigtable_emulator_common_srcs = [ "cluster.cc", "column_family.cc", - "column_family_test.cc", "filter.cc", "range_set.cc", "row_streamer.cc", diff --git a/google/cloud/bigtable/emulator/row_streamer.cc b/google/cloud/bigtable/emulator/row_streamer.cc index 40a9e1873644e..3dc243faba1ca 100644 --- a/google/cloud/bigtable/emulator/row_streamer.cc +++ b/google/cloud/bigtable/emulator/row_streamer.cc @@ -16,7 +16,6 @@ #include "google/cloud/bigtable/emulator/cell_view.h" #include #include -#include #include #include diff --git a/google/cloud/bigtable/emulator/row_streamer.h b/google/cloud/bigtable/emulator/row_streamer.h index d8d9ed6a73280..c670f6322613a 100644 --- a/google/cloud/bigtable/emulator/row_streamer.h +++ b/google/cloud/bigtable/emulator/row_streamer.h @@ -17,7 +17,9 @@ #include "google/cloud/bigtable/emulator/cell_view.h" #include "absl/types/optional.h" +#include #include +#include #include #include #include diff --git a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl index 0d395fd997ff1..818a75601c799 100644 --- a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl +++ b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl @@ -204,7 +204,6 @@ google_cloud_cpp_bigtable_srcs = [ "internal/prefix_range_end.cc", "internal/rate_limiter.cc", "internal/readrowsparser.cc", - "internal/retry_context.cc", "internal/row_range_helpers.cc", "internal/traced_row_reader.cc", "metadata_update_policy.cc",