diff --git a/.gitignore b/.gitignore index b8e44a997ef9b..c9097511f9df2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,11 @@ +# Code coverage output artefacts +code_coverage.info +code_coverage_report/ + # Common build output directory names .build/ _build/ +build/ build-output/ build-out/ cmake-out/ diff --git a/google/cloud/bigtable/CMakeLists.txt b/google/cloud/bigtable/CMakeLists.txt index e9003a19e810a..9a5149d91ee53 100644 --- a/google/cloud/bigtable/CMakeLists.txt +++ b/google/cloud/bigtable/CMakeLists.txt @@ -26,6 +26,7 @@ set(DOXYGEN_EXCLUDE_SYMBOLS "benchmarks" "bigtable_admin_internal" "bigtable_internal" + "emulator" "internal" "testing" "examples" @@ -204,6 +205,8 @@ add_library( internal/readrowsparser.cc internal/readrowsparser.h internal/retry_traits.h + internal/row_range_helpers.cc + internal/row_range_helpers.h internal/row_reader_impl.h internal/rpc_policy_parameters.h internal/rpc_policy_parameters.inc @@ -512,6 +515,8 @@ if (BUILD_TESTING) add_subdirectory(tests) endif () +add_subdirectory(emulator) + # Examples are enabled if possible, but package maintainers may want to disable # compilation to speed up their builds. if (GOOGLE_CLOUD_CPP_ENABLE_EXAMPLES) diff --git a/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh b/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh index 9621925f22514..f988e1045b00b 100755 --- a/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh +++ b/google/cloud/bigtable/ci/run_integration_tests_emulator_cmake.sh @@ -41,6 +41,12 @@ fi CBT_INSTANCE_ADMIN_EMULATOR_START=( "${BINARY_DIR}/google/cloud/bigtable/tests/instance_admin_emulator" ) + +# Configure run_emulators_utils.sh to find the cbt emulator. +CBT_EMULATOR_CMD=( + "${BINARY_DIR}/google/cloud/bigtable/emulator/emulator" +) + source module /google/cloud/bigtable/tools/run_emulator_utils.sh cd "${BINARY_DIR}" diff --git a/google/cloud/bigtable/emulator/CMakeLists.txt b/google/cloud/bigtable/emulator/CMakeLists.txt new file mode 100644 index 0000000000000..d3352793bfb20 --- /dev/null +++ b/google/cloud/bigtable/emulator/CMakeLists.txt @@ -0,0 +1,114 @@ +# ~~~ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ~~~ + +add_library( + bigtable_emulator_common # cmake-format: sort + cell_view.h + cluster.cc + cluster.h + column_family.cc + column_family.h + filter.cc + filter.h + filtered_map.h + limits.h + range_set.cc + range_set.h + row_streamer.cc + row_streamer.h + server.cc + server.h + table.cc + table.h + test_util.cc + test_util.h + to_grpc_status.cc + to_grpc_status.h) + +target_link_libraries( + bigtable_emulator_common + google-cloud-cpp::bigtable + google-cloud-cpp::bigtable_protos + google-cloud-cpp::common + google-cloud-cpp::grpc_utils + gRPC::grpc++ + gRPC::grpc + protobuf::libprotobuf) +google_cloud_cpp_add_common_options(bigtable_emulator_common) + +include(CreateBazelConfig) +create_bazel_config(bigtable_emulator_common YEAR 2024) + +if (BUILD_TESTING) + # List the unit tests, then setup the targets and dependencies. + set(bigtable_emulator_unit_tests + # cmake-format: sort + column_family_test.cc + conditional_mutations_test.cc + drop_row_range_test.cc + filter_test.cc + filtered_map_test.cc + mutations_test.cc + range_set_test.cc + server_test.cc + table_test.cc) + export_list_to_bazel("bigtable_emulator_unit_tests.bzl" + "bigtable_emulator_unit_tests" YEAR "2024") + + foreach (fname ${bigtable_emulator_unit_tests}) + google_cloud_cpp_add_executable(target "bigtable_emulator" "${fname}") + target_link_libraries( + ${target} + PRIVATE bigtable_emulator_common + bigtable_client_testing + google_cloud_cpp_testing + google_cloud_cpp_testing_grpc + google-cloud-cpp::bigtable + google-cloud-cpp::bigtable_protos + google-cloud-cpp::common + google-cloud-cpp::grpc_utils + GTest::gmock_main + GTest::gmock + GTest::gtest + gRPC::grpc++ + gRPC::grpc + protobuf::libprotobuf) + google_cloud_cpp_add_common_options(${target}) + add_test(NAME ${target} COMMAND ${target}) + endforeach () +endif () + +set(bigtable_emulator_programs # cmake-format: sort + emulator.cc) +export_list_to_bazel("bigtable_emulator_programs.bzl" + "bigtable_emulator_programs" YEAR "2024") + +foreach (fname ${bigtable_emulator_programs}) + google_cloud_cpp_add_executable(target "bigtable" "${fname}") + target_link_libraries( + ${target} + PRIVATE bigtable_emulator_common + absl::flags + absl::flags_parse + google-cloud-cpp::bigtable + google-cloud-cpp::bigtable_protos + google-cloud-cpp::grpc_utils + google_cloud_cpp_testing + gRPC::grpc++ + gRPC::grpc + protobuf::libprotobuf) + google_cloud_cpp_add_common_options(${target}) +endforeach () diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl new file mode 100644 index 0000000000000..00346be211c32 --- /dev/null +++ b/google/cloud/bigtable/emulator/bigtable_emulator_common.bzl @@ -0,0 +1,44 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated source lists for bigtable_emulator_common - DO NOT EDIT.""" + +bigtable_emulator_common_hdrs = [ + "cell_view.h", + "cluster.h", + "column_family.h", + "filter.h", + "filtered_map.h", + "limits.h", + "range_set.h", + "row_streamer.h", + "server.h", + "table.h", + "test_util.h", + "to_grpc_status.h", +] + +bigtable_emulator_common_srcs = [ + "cluster.cc", + "column_family.cc", + "filter.cc", + "range_set.cc", + "row_streamer.cc", + "server.cc", + "table.cc", + "test_util.cc", + "to_grpc_status.cc", +] diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_programs.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_programs.bzl new file mode 100644 index 0000000000000..f0260e80cf394 --- /dev/null +++ b/google/cloud/bigtable/emulator/bigtable_emulator_programs.bzl @@ -0,0 +1,21 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated unit tests list - DO NOT EDIT.""" + +bigtable_emulator_programs = [ + "emulator.cc", +] diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_test_common.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_test_common.bzl new file mode 100644 index 0000000000000..553f19d2de95f --- /dev/null +++ b/google/cloud/bigtable/emulator/bigtable_emulator_test_common.bzl @@ -0,0 +1,25 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated source lists for bigtable_emulator_test_common - DO NOT EDIT.""" + +bigtable_emulator_test_common_hdrs = [ + "test_util.h", +] + +bigtable_emulator_test_common_srcs = [ + "test_util.cc", +] diff --git a/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl new file mode 100644 index 0000000000000..cf47318619e60 --- /dev/null +++ b/google/cloud/bigtable/emulator/bigtable_emulator_unit_tests.bzl @@ -0,0 +1,29 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated unit tests list - DO NOT EDIT.""" + +bigtable_emulator_unit_tests = [ + "column_family_test.cc", + "conditional_mutations_test.cc", + "drop_row_range_test.cc", + "filter_test.cc", + "filtered_map_test.cc", + "mutations_test.cc", + "range_set_test.cc", + "server_test.cc", + "table_test.cc", +] diff --git a/google/cloud/bigtable/emulator/cell_view.h b/google/cloud/bigtable/emulator/cell_view.h new file mode 100644 index 0000000000000..b0cb9db253b49 --- /dev/null +++ b/google/cloud/bigtable/emulator/cell_view.h @@ -0,0 +1,71 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CELL_VIEW_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CELL_VIEW_H + +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +/** + * A class used to represent values when scanning a table. + * + * It is transient - it should never be stored as it only contains references to + * data which will likely become invalidated on first update. + */ +class CellView { + public: + CellView(std::string const& row_key, std::string const& column_family, + std::string const& column_qualifier, + std::chrono::milliseconds timestamp, std::string const& value) + : row_key_(row_key), + column_family_(column_family), + column_qualifier_(column_qualifier), + timestamp_(timestamp), + value_(value) {} + + std::string const& row_key() const { return row_key_.get(); } + std::string const& column_family() const { return column_family_.get(); } + std::string const& column_qualifier() const { + return column_qualifier_.get(); + } + std::chrono::milliseconds timestamp() const { return timestamp_; } + std::string const& value() const { return value_.get(); } + bool HasLabel() const { return label_.has_value(); } + std::string const& label() const { return label_.value().get(); } + void SetLabel(std::string const& label) { label_ = label; } + void SetValue(std::string const& value) { value_ = value; } + + private: + std::reference_wrapper row_key_; + std::reference_wrapper column_family_; + std::reference_wrapper column_qualifier_; + std::chrono::milliseconds timestamp_; + std::reference_wrapper value_; + absl::optional> label_; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CELL_VIEW_H diff --git a/google/cloud/bigtable/emulator/cluster.cc b/google/cloud/bigtable/emulator/cluster.cc new file mode 100644 index 0000000000000..a6aac1570198d --- /dev/null +++ b/google/cloud/bigtable/emulator/cluster.cc @@ -0,0 +1,193 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/cluster.h" +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include "absl/strings/match.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +namespace btadmin = google::bigtable::admin::v2; + +/** + * Obtain a limited view of a `Table`'s schema, by applying a `TableView`. + * + * @param table_name table name in the form of + * `/projects/{}/instances/{}/tables/{}` to be returned in the + * @param table the table in question + * @param view the view to apply + * @param default_view the view to fall back to in case `view` is set to + * `btadmin::Table::VIEW_UNSPECIFIED`. `default_view` should not be set to + * `btadmin::Table::VIEW_UNSPECIFIED`. + * @return the trimmed schema or error + */ +StatusOr ApplyView(std::string const& table_name, + Table const& table, btadmin::Table_View view, + btadmin::Table_View default_view) { + if (view == btadmin::Table::VIEW_UNSPECIFIED) { + view = default_view; + } + switch (view) { + case btadmin::Table::VIEW_UNSPECIFIED: + return google::cloud::internal::InternalError( + "VIEW_UNSPECIFIED cannot be the default view"); + case btadmin::Table::NAME_ONLY: { + btadmin::Table res; + res.set_name(table_name); + return res; + } + case btadmin::Table::SCHEMA_VIEW: { + btadmin::Table res; + res.set_name(table_name); + auto before_view = table.GetSchema(); + *res.mutable_column_families() = + std::move(*before_view.mutable_column_families()); + res.set_granularity(before_view.granularity()); + return res; + } + case btadmin::Table::REPLICATION_VIEW: + case btadmin::Table::ENCRYPTION_VIEW: { + btadmin::Table res; + res.set_name(table_name); + auto before_view = table.GetSchema(); + *res.mutable_cluster_states() = + std::move(*before_view.mutable_cluster_states()); + return res; + } + case btadmin::Table::FULL: + return table.GetSchema(); + default: + return google::cloud::internal::UnimplementedError( + "Unsupported view.", + GCP_ERROR_INFO().WithMetadata("view", Table_View_Name(view))); + } +} + +} // anonymous namespace + +StatusOr Cluster::CreateTable(std::string const& table_name, + btadmin::Table schema) { + schema.set_name(table_name); + std::cout << "Creating table " << table_name << std::endl; + auto maybe_table = Table::Create(std::move(schema)); + if (!maybe_table) { + return maybe_table.status(); + } + { + std::lock_guard lock(mu_); + if (!table_by_name_.emplace(table_name, *maybe_table).second) { + return google::cloud::internal::AlreadyExistsError( + "Table already exists.", + GCP_ERROR_INFO().WithMetadata("table_name", table_name)); + } + } + return (*maybe_table)->GetSchema(); +} + +StatusOr> Cluster::ListTables( + std::string const& instance_name, btadmin::Table_View view) const { + std::map> table_by_name_copy; + { + std::lock_guard lock(mu_); + table_by_name_copy = table_by_name_; + } + std::vector res; + std::string const prefix = instance_name + "/tables/"; + std::cout << "Listing tables with prefix " << prefix << std::endl; + for (auto name_and_table_it = table_by_name_copy.upper_bound(prefix); + name_and_table_it != table_by_name_copy.end() && + absl::StartsWith(name_and_table_it->first, prefix); + ++name_and_table_it) { + auto maybe_view = + ApplyView(name_and_table_it->first, *name_and_table_it->second, view, + btadmin::Table::NAME_ONLY); + if (!maybe_view) { + return maybe_view.status(); + } + res.emplace_back(*maybe_view); + } + return res; +} + +StatusOr Cluster::GetTable(std::string const& table_name, + btadmin::Table_View view) const { + std::shared_ptr found_table; + { + std::lock_guard lock(mu_); + auto it = table_by_name_.find(table_name); + if (it == table_by_name_.end()) { + return NotFoundError("No such table.", GCP_ERROR_INFO().WithMetadata( + "table_name", table_name)); + } + found_table = it->second; + } + return ApplyView(table_name, *found_table, view, btadmin::Table::SCHEMA_VIEW); +} + +Status Cluster::DeleteTable(std::string const& table_name) { + { + std::lock_guard lock(mu_); + auto it = table_by_name_.find(table_name); + if (it == table_by_name_.end()) { + return NotFoundError("No such table.", GCP_ERROR_INFO().WithMetadata( + "table_name", table_name)); + } + if (it->second->IsDeleteProtected()) { + return FailedPreconditionError( + "The table has deletion protection.", + GCP_ERROR_INFO().WithMetadata("table_name", table_name)); + } + table_by_name_.erase(it); + } + return Status(); +} + +bool Cluster::HasTable(std::string const& table_name) const { + std::lock_guard lock(mu_); + return table_by_name_.find(table_name) != table_by_name_.end(); +} + +StatusOr> Cluster::FindTable( + std::string const& table_name) { + { + std::lock_guard lock(mu_); + auto it = table_by_name_.find(table_name); + if (it == table_by_name_.end()) { + return NotFoundError("No such table.", GCP_ERROR_INFO().WithMetadata( + "table_name", table_name)); + } + return it->second; + } +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/cluster.h b/google/cloud/bigtable/emulator/cluster.h new file mode 100644 index 0000000000000..a6a6481042f51 --- /dev/null +++ b/google/cloud/bigtable/emulator/cluster.h @@ -0,0 +1,126 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CLUSTER_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CLUSTER_H + +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +/** + * An emulated cluster, which manages the lifecycle of all tables. + * + * This emulated cluster holds tables from all projects and instances - they are + * merely components of table names. + */ +class Cluster { + public: + /** + * Create a new table according to schema. + * + * @param table_name table's name in the form of + * `/projects/{}/instances/{}/tables/{}`. + * @param schema the schema of the newly create table. + * @return the schema of the newly created table. + */ + StatusOr CreateTable( + std::string const& table_name, google::bigtable::admin::v2::Table schema); + + /** + * List tables in the clustera. + * + * @param instance_name instances` name in the form of + * `/projects/{}/instances/{}`. + * @param view a view to limit the amount of information returned about + * tables. + * @return a vector of tables' schemas present in the instance trimmed + * according to `view`. + */ + StatusOr> ListTables( + std::string const& instance_name, + google::bigtable::admin::v2::Table_View view) const; + + /** + * Get details about a given table. + * + * @param table_name table's name in the form of + * `/projects/{}/instances/{}/tables/{}`. + * @param view a view to limit the amount of information returned about + * table. + * @return table's schema trimmed according to `view`. + */ + StatusOr GetTable( + std::string const& table_name, + google::bigtable::admin::v2::Table_View view) const; + + /** + * Delete a table by its name. + * + * @param table_name table's name in the form of + * `/projects/{}/instances/{}/tables/{}`. + * @return whether deletion succeeded. Apart from failing to to remove a + * non-existent table it might also fail if the table has deletion + * protection set. + */ + Status DeleteTable(std::string const& table_name); + + /** + * Check if a table exists. + * @param table_name table's name in the form of + * `/projects/{}/instances/{}/tables/{}`. + * @return true if table exists. + */ + bool HasTable(std::string const& table_name) const; + + /** + * Find a table by name. + * + * @param table_name table's name in the form of + * `/projects/{}/instances/{}/tables/{}`. + * @return a pointer to the table or error if it doesn't exist. + */ + StatusOr> FindTable(std::string const& table_name); + + private: + mutable std::mutex mu_; + + /** + * All the tables indexed by their names. + * + * The names are in the form `/projects/{}/instances/{}/tables/{}`. We're + * holding the tables by `shared_ptr`s in order to be able to allow for more + * concurrency - every access to a table should start with creating a copy of + * the shared pointer. + */ + std::map> table_by_name_; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_CLUSTER_H diff --git a/google/cloud/bigtable/emulator/column_family.cc b/google/cloud/bigtable/emulator/column_family.cc new file mode 100644 index 0000000000000..e501141ea05c7 --- /dev/null +++ b/google/cloud/bigtable/emulator/column_family.cc @@ -0,0 +1,466 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/cell_view.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/filtered_map.h" +#include "google/cloud/internal/big_endian.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status_or.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +StatusOr ColumnRow::ReadModifyWrite( + std::int64_t inc_value) { + auto system_ms = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + + if (cells_.empty()) { + std::string value = google::cloud::internal::EncodeBigEndian(inc_value); + cells_[system_ms] = value; + + return ReadModifyWriteCellResult{system_ms, std::move(value), + absl::nullopt}; + } + + auto latest_it = cells_.begin(); + + auto maybe_old_value = + google::cloud::internal::DecodeBigEndian(latest_it->second); + if (!maybe_old_value) { + return maybe_old_value.status(); + } + + auto value = google::cloud::internal::EncodeBigEndian( + inc_value + maybe_old_value.value()); + + if (latest_it->first < system_ms) { + // We need to add a cell with the current system timestamp + cells_[system_ms] = value; + + return ReadModifyWriteCellResult{system_ms, std::move(value), + absl::nullopt}; + } + + // Latest timestamp is >= system time. Overwrite latest timestamp + auto old_value = std::move(latest_it->second); + latest_it->second = value; + + return ReadModifyWriteCellResult{latest_it->first, std::move(value), + std::move(old_value)}; +} + +ReadModifyWriteCellResult ColumnRow::ReadModifyWrite( + std::string const& append_value) { + auto system_ms = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + if (cells_.empty()) { + cells_[system_ms] = append_value; + + return ReadModifyWriteCellResult{system_ms, std::move(append_value), + absl::nullopt}; + } + + auto latest_it = cells_.begin(); + + auto value = latest_it->second + append_value; + + if (latest_it->first < system_ms) { + // We need to add a cell with the current system timestamp + cells_[system_ms] = value; + + return ReadModifyWriteCellResult{system_ms, std::move(value), + absl::nullopt}; + } + + // Latest timestamp is >= system time. Overwrite latest timestamp + auto old_value = std::move(latest_it->second); + latest_it->second = value; + + return ReadModifyWriteCellResult{latest_it->first, value, + std::move(old_value)}; +} + +absl::optional ColumnRow::SetCell( + std::chrono::milliseconds timestamp, std::string const& value) { + absl::optional ret = absl::nullopt; + + auto cell_it = cells_.find(timestamp); + if (!(cell_it == cells_.end())) { + ret = std::move(cell_it->second); + } + + cells_[timestamp] = value; + + return ret; +} + +StatusOr> ColumnRow::UpdateCell( + std::chrono::milliseconds timestamp, std::string& value, + std::function(std::string const&, + std::string&&)> const& update_fn) { + absl::optional ret = absl::nullopt; + + auto cell_it = cells_.find(timestamp); + if (!(cell_it == cells_.end())) { + auto maybe_update_value = update_fn(cell_it->second, std::move(value)); + if (!maybe_update_value) { + return maybe_update_value.status(); + } + ret = std::move(cell_it->second); + maybe_update_value.value().swap(cell_it->second); + return ret; + } + + cells_[timestamp] = value; + + return ret; +} + +std::vector ColumnRow::DeleteTimeRange( + ::google::bigtable::v2::TimestampRange const& time_range) { + std::vector deleted_cells; + absl::optional maybe_end_micros = + time_range.end_timestamp_micros(); + if (maybe_end_micros.value_or(0) == 0) { + maybe_end_micros.reset(); + } + for (auto cell_it = + maybe_end_micros + ? upper_bound( + std::chrono::duration_cast( + std::chrono::microseconds(*maybe_end_micros))) + : begin(); + cell_it != cells_.end() && + cell_it->first >= std::chrono::duration_cast( + std::chrono::microseconds( + time_range.start_timestamp_micros()));) { + Cell cell = {std::move(cell_it->first), std::move(cell_it->second)}; + deleted_cells.emplace_back(std::move(cell)); + cells_.erase(cell_it++); + } + return deleted_cells; +} + +absl::optional ColumnRow::DeleteTimeStamp( + std::chrono::milliseconds timestamp) { + absl::optional ret = absl::nullopt; + + auto cell_it = cells_.find(timestamp); + if (cell_it != cells_.end()) { + Cell cell = {std::move(cell_it->first), std::move(cell_it->second)}; + ret.emplace(std::move(cell)); + cells_.erase(cell_it); + } + + return ret; +} + +absl::optional ColumnFamilyRow::SetCell( + std::string const& column_qualifier, std::chrono::milliseconds timestamp, + std::string const& value) { + return columns_[column_qualifier].SetCell(timestamp, value); +} + +StatusOr> ColumnFamilyRow::UpdateCell( + std::string const& column_qualifier, std::chrono::milliseconds timestamp, + std::string& value, + std::function(std::string const&, + std::string&&)> const& update_fn) { + return columns_[column_qualifier].UpdateCell(timestamp, value, + std::move(update_fn)); +} + +std::vector ColumnFamilyRow::DeleteColumn( + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range) { + auto column_it = columns_.find(column_qualifier); + if (column_it == columns_.end()) { + return {}; + } + auto res = column_it->second.DeleteTimeRange(time_range); + if (!column_it->second.HasCells()) { + columns_.erase(column_it); + } + return res; +} + +absl::optional ColumnFamilyRow::DeleteTimeStamp( + std::string const& column_qualifier, std::chrono::milliseconds timestamp) { + auto column_it = columns_.find(column_qualifier); + if (column_it == columns_.end()) { + return absl::nullopt; + } + + auto ret = column_it->second.DeleteTimeStamp(timestamp); + if (!column_it->second.HasCells()) { + columns_.erase(column_it); + } + + return ret; +} + +absl::optional ColumnFamily::SetCell( + std::string const& row_key, std::string const& column_qualifier, + std::chrono::milliseconds timestamp, std::string const& value) { + return rows_[row_key].SetCell(column_qualifier, timestamp, value); +} + +StatusOr> ColumnFamily::UpdateCell( + std::string const& row_key, std::string const& column_qualifier, + std::chrono::milliseconds timestamp, std::string& value) { + return rows_[row_key].UpdateCell(column_qualifier, timestamp, value, + update_cell_); +} + +std::map> ColumnFamily::DeleteRow( + std::string const& row_key) { + std::map> res; + + auto row_it = rows_.find(row_key); + if (row_it == rows_.end()) { + return {}; + } + + for (auto& column : row_it->second.columns_) { + // Not setting start and end timestamps will select all cells for deletion + ::google::bigtable::v2::TimestampRange time_range; + auto deleted_cells = column.second.DeleteTimeRange(time_range); + if (!deleted_cells.empty()) { + res[column.first] = std::move(deleted_cells); + } + } + + rows_.erase(row_it); + + return res; +} + +std::vector ColumnFamily::DeleteColumn( + std::string const& row_key, std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range) { + auto row_it = rows_.find(row_key); + + return DeleteColumn(row_it, column_qualifier, time_range); +} + +std::vector ColumnFamily::DeleteColumn( + std::map::iterator row_it, + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range) { + if (row_it != rows_.end()) { + auto erased_cells = + row_it->second.DeleteColumn(column_qualifier, time_range); + if (!row_it->second.HasColumns()) { + rows_.erase(row_it); + } + return erased_cells; + } + return {}; +} + +absl::optional ColumnFamily::DeleteTimeStamp( + std::string const& row_key, std::string const& column_qualifier, + std::chrono::milliseconds timestamp) { + auto row_it = rows_.find(row_key); + if (row_it == rows_.end()) { + return absl::nullopt; + } + + auto ret = row_it->second.DeleteTimeStamp(column_qualifier, timestamp); + if (!row_it->second.HasColumns()) { + rows_.erase(row_it); + } + + return ret; +} + +class FilteredColumnFamilyStream::FilterApply { + public: + explicit FilterApply(FilteredColumnFamilyStream& parent) : parent_(parent) {} + + bool operator()(ColumnRange const& column_range) { + if (column_range.column_family == parent_.column_family_name_) { + parent_.column_ranges_.Intersect(column_range.range); + } + return true; + } + + bool operator()(TimestampRange const& timestamp_range) { + parent_.timestamp_ranges_.Intersect(timestamp_range.range); + return true; + } + + bool operator()(RowKeyRegex const& row_key_regex) { + parent_.row_regexes_.emplace_back(row_key_regex.regex); + return true; + } + + bool operator()(FamilyNameRegex const&) { return false; } + + bool operator()(ColumnRegex const& column_regex) { + parent_.column_regexes_.emplace_back(column_regex.regex); + return true; + } + + private: + FilteredColumnFamilyStream& parent_; +}; + +FilteredColumnFamilyStream::FilteredColumnFamilyStream( + ColumnFamily const& column_family, std::string column_family_name, + std::shared_ptr row_set) + : column_family_name_(std::move(column_family_name)), + row_ranges_(std::move(row_set)), + column_ranges_(StringRangeSet::All()), + timestamp_ranges_(TimestampRangeSet::All()), + rows_( + StringRangeFilteredMapView(column_family, *row_ranges_), + std::cref(row_regexes_)) {} + +bool FilteredColumnFamilyStream::ApplyFilter( + InternalFilter const& internal_filter) { + assert(!initialized_); + return absl::visit(FilterApply(*this), internal_filter); +} + +bool FilteredColumnFamilyStream::HasValue() const { + InitializeIfNeeded(); + return *row_it_ != rows_.end(); +} +CellView const& FilteredColumnFamilyStream::Value() const { + InitializeIfNeeded(); + if (!cur_value_) { + cur_value_ = CellView((*row_it_)->first, column_family_name_, + column_it_.value()->first, cell_it_.value()->first, + cell_it_.value()->second); + } + return cur_value_.value(); +} + +bool FilteredColumnFamilyStream::Next(NextMode mode) { + InitializeIfNeeded(); + cur_value_.reset(); + assert(*row_it_ != rows_.end()); + assert(column_it_.value() != columns_.value().end()); + assert(cell_it_.value() != cells_.value().end()); + + if (mode == NextMode::kCell) { + ++(cell_it_.value()); + if (cell_it_.value() != cells_.value().end()) { + return true; + } + } + if (mode == NextMode::kCell || mode == NextMode::kColumn) { + ++(column_it_.value()); + if (PointToFirstCellAfterColumnChange()) { + return true; + } + } + ++(*row_it_); + PointToFirstCellAfterRowChange(); + return true; +} + +void FilteredColumnFamilyStream::InitializeIfNeeded() const { + if (!initialized_) { + row_it_ = rows_.begin(); + PointToFirstCellAfterRowChange(); + initialized_ = true; + } +} + +bool FilteredColumnFamilyStream::PointToFirstCellAfterColumnChange() const { + for (; column_it_.value() != columns_.value().end(); ++(column_it_.value())) { + cells_ = TimestampRangeFilteredMapView( + column_it_.value()->second, timestamp_ranges_); + cell_it_ = cells_.value().begin(); + if (cell_it_.value() != cells_.value().end()) { + return true; + } + } + return false; +} + +bool FilteredColumnFamilyStream::PointToFirstCellAfterRowChange() const { + for (; (*row_it_) != rows_.end(); ++(*row_it_)) { + columns_ = RegexFiteredMapView>( + StringRangeFilteredMapView((*row_it_)->second, + column_ranges_), + column_regexes_); + column_it_ = columns_.value().begin(); + if (PointToFirstCellAfterColumnChange()) { + return true; + } + } + return false; +} + +StatusOr> +ColumnFamily::ConstructAggregateColumnFamily( + google::bigtable::admin::v2::Type value_type) { + auto cf = std::make_shared(); + + if (value_type.has_aggregate_type()) { + auto const& aggregate_type = value_type.aggregate_type(); + switch (aggregate_type.aggregator_case()) { + case google::bigtable::admin::v2::Type::Aggregate::kSum: + cf->update_cell_ = cf->SumUpdateCellBEInt64; + break; + case google::bigtable::admin::v2::Type::Aggregate::kMin: + cf->update_cell_ = cf->MinUpdateCellBEInt64; + break; + case google::bigtable::admin::v2::Type::Aggregate::kMax: + cf->update_cell_ = cf->MaxUpdateCellBEInt64; + break; + default: + return InvalidArgumentError( + "unsupported aggregation type", + GCP_ERROR_INFO().WithMetadata( + "aggregation case", + absl::StrFormat("%d", aggregate_type.aggregator_case()))); + } + + cf->value_type_ = std::move(value_type); + + return cf; + } + + return InvalidArgumentError( + "no aggregate type set in the supplied value_type", + GCP_ERROR_INFO().WithMetadata("supplied value type", + value_type.DebugString())); +} +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/column_family.h b/google/cloud/bigtable/emulator/column_family.h new file mode 100644 index 0000000000000..3aabc2f6145f5 --- /dev/null +++ b/google/cloud/bigtable/emulator/column_family.h @@ -0,0 +1,562 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H + +#include "google/cloud/bigtable/emulator/cell_view.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/filtered_map.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/internal/big_endian.h" +#include "google/cloud/status_or.h" +#include "absl/types/optional.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +struct Cell { + std::chrono::milliseconds timestamp; + std::string value; +}; + +// ReadModifyWriteCellResult supports undo and return value +// construction for the ReadModifyWrite RPC. +// +// The timestamp and value written are always returned in timestamp +// and value and will be used to construct the Row returned by the +// RPC. +// +// If maybe_old_value has a value, then a timestamp was overwritten +// and the ReadModifyWriteCellResult will be used to create a +// RestoreValue for undo log. Otherwise, a new cell was added and the +// ReadmodifyWriteCellResult will be used to create a DeleteValue for +// the undo log. +struct ReadModifyWriteCellResult { + std::chrono::milliseconds timestamp; + std::string value; + absl::optional maybe_old_value; +}; + +/** + * Objects of this class hold contents of a specific column in a specific row. + * + * This is essentially a blessed map from timestamps to values. + */ +class ColumnRow { + public: + ColumnRow() = default; + // Disable copying. + ColumnRow(ColumnRow const&) = delete; + ColumnRow& operator=(ColumnRow const&) = delete; + + StatusOr ReadModifyWrite(std::int64_t inc_value); + + ReadModifyWriteCellResult ReadModifyWrite(std::string const& append_value); + + /** + * Insert or update and existing cell at a given timestamp. + * + * @param timestamp the time stamp at which the value will be inserted or + * updated. + * @param value the value to insert/update. + * + * @return no value if the timestamp had no value before, otherwise + * the previous value of the timestamp. + */ + absl::optional SetCell(std::chrono::milliseconds timestamp, + std::string const& value); + + StatusOr> UpdateCell( + std::chrono::milliseconds timestamp, std::string& value, + std::function(std::string const&, + std::string&&)> const& update_fn); + + /** + * Delete cells falling into a given timestamp range. + * + * @param time_range the timestamp range dictating which values to delete. + * @return vector of deleted cells. + */ + std::vector DeleteTimeRange( + ::google::bigtable::v2::TimestampRange const& time_range); + + /** + * Delete a cell with the given timestamp. + * + * @param timestamp the std::chrono::milliseconds timestamp of the + * cell to delete. + * + * @return Cell representing deleted cell, if there + * was a cell with that timestamp, otherwise absl::nullopt. + */ + absl::optional DeleteTimeStamp(std::chrono::milliseconds timestamp); + + bool HasCells() const { return !cells_.empty(); } + + using const_iterator = std::map>::const_iterator; + + const_iterator begin() const { return cells_.begin(); } + const_iterator end() const { return cells_.end(); } + const_iterator lower_bound(std::chrono::milliseconds timestamp) const { + return cells_.lower_bound(timestamp); + } + const_iterator upper_bound(std::chrono::milliseconds timestamp) const { + return cells_.upper_bound(timestamp); + } + + const_iterator find(std::chrono::milliseconds const& timestamp) { + return cells_.find(timestamp); + } + + void erase(const_iterator timestamp_it) { cells_.erase(timestamp_it); } + + private: + // Note the order - the iterator return the freshest cells first. + std::map> cells_; +}; + +/** + * Objects of this class hold contents of a specific row in a column family. + * + * The users of this class may access the columns for a given row via + * references to `ColumnRow`. + * + * It is guaranteed that every returned `ColumnRow` contains at least one cell. + */ +class ColumnFamilyRow { + public: + StatusOr ReadModifyWrite( + std::string const& column_qualifier, std::int64_t inc_value) { + return columns_[column_qualifier].ReadModifyWrite(inc_value); + }; + + ReadModifyWriteCellResult ReadModifyWrite(std::string const& column_qualifier, + std::string const& append_value) { + return columns_[column_qualifier].ReadModifyWrite(append_value); + } + + /** + * Insert or update and existing cell at a given column and timestamp. + * + * @param column_qualifier the column qualifier at which to update the value. + * @param timestamp the time stamp at which the value will be inserted or + * updated. + * @param value the value to insert/update. + * + * @return no value if the timestamp had no value before, otherwise + * the previous value of the timestamp. + * + */ + absl::optional SetCell(std::string const& column_qualifier, + std::chrono::milliseconds timestamp, + std::string const& value); + + StatusOr> UpdateCell( + std::string const& column_qualifier, std::chrono::milliseconds timestamp, + std::string& value, + std::function(std::string const&, + std::string&&)> const& update_fn); + + /** + * Delete cells falling into a given timestamp range in one column. + * + * @param column_qualifier the column qualifier from which to delete the + * values. + * @param time_range the timestamp range dictating which values to delete. + * @return vector of deleted cells. + */ + std::vector DeleteColumn( + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); + /** + * Delete a cell with the given timestamp from the column given by + * the given column qualifier. + * + * @param column_qualifier the column from which to delete the cell. + * + * @param timestamp the std::chrono::milliseconds timestamp of the + * cell to delete. + * + * @return Cell representing deleted cell, if there was a cell with + * that timestamp in then given column, otherwise absl::nullopt. + */ + absl::optional DeleteTimeStamp(std::string const& column_qualifier, + std::chrono::milliseconds timestamp); + + bool HasColumns() { return !columns_.empty(); } + using const_iterator = std::map::const_iterator; + const_iterator begin() const { return columns_.begin(); } + const_iterator end() const { return columns_.end(); } + const_iterator lower_bound(std::string const& column_qualifier) const { + return columns_.lower_bound(column_qualifier); + } + const_iterator upper_bound(std::string const& column_qualifier) const { + return columns_.upper_bound(column_qualifier); + } + + std::map::iterator find( + std::string const& column_qualifier) { + return columns_.find(column_qualifier); + } + + void erase(std::map::iterator column_it) { + columns_.erase(column_it); + } + + private: + friend class ColumnFamily; + + std::map columns_; +}; + +/** + * Objects of this class hold contents of a column family indexed by rows. + * + * The users of this class may access individual rows via references to + * `ColumnFamilyRow`. + * + * It is guaranteed that every returned `ColumnFamilyRow` contains at least one + * `ColumnRow`. + */ +class ColumnFamily { + public: + ColumnFamily() = default; + // ConstructAggregateColumnFamily can be used to return an aggregate + // ColumnFamily that can support AddToCell or MergeToCell and + // similar aggregate complex types. To construct an ordinary + // ColumnFamily, use the default constructor ColumnFamily(). + static StatusOr> ConstructAggregateColumnFamily( + google::bigtable::admin::v2::Type value_type); + + // Disable copying. + ColumnFamily(ColumnFamily const&) = delete; + ColumnFamily& operator=(ColumnFamily const&) = delete; + + using const_iterator = std::map::const_iterator; + using iterator = std::map::iterator; + + StatusOr ReadModifyWrite( + std::string const& row_key, std::string const& column_qualifier, + std::int64_t inc_value) { + return rows_[row_key].ReadModifyWrite(column_qualifier, inc_value); + }; + + ReadModifyWriteCellResult ReadModifyWrite(std::string const& row_key, + std::string const& column_qualifier, + std::string const& append_value) { + return rows_[row_key].ReadModifyWrite(column_qualifier, append_value); + }; + + /** + * Insert or update and existing cell at a given row, column and timestamp. + * + * @param row_key the row key at which to update the value. + * @param column_qualifier the column qualifier at which to update the value. + * @param timestamp the time stamp at which the value will be inserted or + * updated. + * @param value the value to insert/update. + * + * @return no value if the timestamp had no value before, otherwise + * the previous value of the timestamp. + * + */ + absl::optional SetCell(std::string const& row_key, + std::string const& column_qualifier, + std::chrono::milliseconds timestamp, + std::string const& value); + + /** + * UpdateCell is like SetCell except that, when a cell exists with + * the same timestamp, an update function (that depends on the column + * family type) is called to derive a new value from the new and + * existing value, and that is the value that is written. + * + * Simple (non-aggregate) column families have a default update + * function that just returns the new value. + * + */ + StatusOr> UpdateCell( + std::string const& row_key, std::string const& column_qualifier, + std::chrono::milliseconds timestamp, std::string& value); + + /** + * Delete the whole row from this column family. + * + * @param row_key the row key to remove. + * @return map from deleted column qualifiers to deleted cells. + */ + std::map> DeleteRow( + std::string const& row_key); + /** + * Delete cells from a row falling into a given timestamp range in one column. + * + * @param row_key the row key to remove the cells from (or the + * iterator to the row - row_it - in the 2nd overloaded form of the + * function). + + * @param column_qualifier the column qualifier from which to delete + * the values. + * + * @param time_range the timestamp range dictating which values to + * delete. + * @return vector of deleted cells. + */ + std::vector DeleteColumn( + std::string const& row_key, std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); + + std::vector DeleteColumn( + std::map::iterator row_it, + std::string const& column_qualifier, + ::google::bigtable::v2::TimestampRange const& time_range); + + /** + * Delete a cell with the given timestamp from the column given by + * the given column qualifier from the row given by row_key. + * + * @param row_key the row from which to delete the cell + * + * @param column_qualifier the column from which to delete the cell. + * + * @param timestamp the std::chrono::milliseconds timestamp of the + * cell to delete. + * + * @return Cell representing deleted cell, if there was a cell with + * that timestamp in then given column in the given row, + * otherwise absl::nullopt. + */ + absl::optional DeleteTimeStamp(std::string const& row_key, + std::string const& column_qualifier, + std::chrono::milliseconds timestamp); + + const_iterator begin() const { return rows_.begin(); } + iterator begin() { return rows_.begin(); } + const_iterator end() const { return rows_.end(); } + iterator end() { return rows_.end(); } + const_iterator lower_bound(std::string const& row_key) const { + return rows_.lower_bound(row_key); + } + iterator lower_bound(std::string const& row_key) { + return rows_.lower_bound(row_key); + } + const_iterator upper_bound(std::string const& row_key) const { + return rows_.upper_bound(row_key); + } + iterator upper_bound(std::string const& row_key) { + return rows_.upper_bound(row_key); + } + + std::size_t size() { return rows_.size(); } + + std::map::iterator find( + std::string const& row_key) { + return rows_.find(row_key); + } + + iterator erase(std::map::iterator row_it) { + return rows_.erase(row_it); + } + + void clear() { rows_.clear(); } + absl::optional GetValueType() { + return value_type_; + }; + + private: + std::map rows_; + + // Support for aggregate and other complex types. + absl::optional value_type_ = absl::nullopt; + + static StatusOr DefaultUpdateCell( + std::string const& /*existing_value*/, std::string&& new_value) { + return new_value; + }; + + static StatusOr SumUpdateCellBEInt64( + std::string const& existing_value, std::string&& new_value) { + auto existing_value_int = + google::cloud::internal::DecodeBigEndian(existing_value); + if (!existing_value_int) { + return existing_value_int.status(); + } + + auto new_value_int = + google::cloud::internal::DecodeBigEndian(new_value); + if (!new_value_int) { + return new_value_int.status(); + } + + return google::cloud::internal::EncodeBigEndian(existing_value_int.value() + + new_value_int.value()); + }; + + static StatusOr MaxUpdateCellBEInt64( + std::string const& existing_value, std::string&& new_value) { + auto existing_int = + google::cloud::internal::DecodeBigEndian(existing_value); + if (!existing_int) { + return existing_int.status(); + } + auto new_int = + google::cloud::internal::DecodeBigEndian(new_value); + if (!new_int) { + return new_int.status(); + } + + if (existing_int.value() > new_int.value()) { + return existing_value; + } + + return new_value; + }; + + static StatusOr MinUpdateCellBEInt64( + std::string const& existing_value, std::string&& new_value) { + auto existing_int = + google::cloud::internal::DecodeBigEndian(existing_value); + if (!existing_int) { + return existing_int.status(); + } + auto new_int = + google::cloud::internal::DecodeBigEndian(new_value); + if (!new_int) { + return new_int.status(); + } + + if (existing_int.value() < new_int.value()) { + return existing_value; + } + + return new_value; + }; + + std::function(std::string const&, std::string&&)> + update_cell_ = DefaultUpdateCell; +}; + +/** + * A stream of cells which allows for filtering unwanted ones. + * + * In absence of any filters, objects of this class stream the contents of a + * whole `ColumnFamily` just like true `Bigtable` would. + * + * The users can apply the following filters: + * * row sets - to only stream cells for relevant rows + * * row regexes - ditto + * * column ranges - to only stream cells with given column qualifiers + * * column regexes - ditto + * * timestamp ranges - to only stream cells with timestamps in given ranges + * + * Objects of this class are not thread safe. Their users need to ensure that + * underlying `ColumnFamily` object tree doesn't change. + */ +class FilteredColumnFamilyStream : public AbstractCellStreamImpl { + public: + /** + * Construct a new object. + * + * @column_family the family to iterate over. It should not change over this + * objects lifetime. + * @column_family_name the name of this column family. It will be used to + * populate the returned `CellView`s. + * @row_set the row set indicating which row keys include in the returned + * values. + */ + FilteredColumnFamilyStream(ColumnFamily const& column_family, + std::string column_family_name, + std::shared_ptr row_set); + bool ApplyFilter(InternalFilter const& internal_filter) override; + bool HasValue() const override; + CellView const& Value() const override; + bool Next(NextMode mode) override; + std::string const& column_family_name() const { return column_family_name_; } + + private: + class FilterApply; + + void InitializeIfNeeded() const; + /** + * Adjust the internal iterators after `column_it_` advanced. + * + * We need to make sure that either we reach the end of the column family or: + * * `column_it_` doesn't point to `end()` + * * `cell_it` points to a cell in the column family pointed to by + * `column_it_` + * + * @return whether we've managed to find another cell in currently pointed + * row. + */ + bool PointToFirstCellAfterColumnChange() const; + /** + * Adjust the internal iterators after `row_it_` advanced. + * + * Similarly to `PointToFirstCellAfterColumnChange()` it ensures that all + * internal iterators are valid (or we've reached `end()`). + * + * @return whether we've managed to find another cell + */ + bool PointToFirstCellAfterRowChange() const; + + std::string column_family_name_; + + std::shared_ptr row_ranges_; + std::vector> row_regexes_; + mutable StringRangeSet column_ranges_; + std::vector> column_regexes_; + mutable TimestampRangeSet timestamp_ranges_; + + RegexFiteredMapView> rows_; + mutable absl::optional< + RegexFiteredMapView>> + columns_; + mutable absl::optional> cells_; + + // If row_it_ == rows_.end() we've reached the end. + // We maintain the following invariant: + // if (row_it_ != rows_.end()) then + // cell_it_ != cells.end() && column_it_ != columns_.end(). + mutable absl::optional>::const_iterator> + row_it_; + mutable absl::optional>::const_iterator> + column_it_; + mutable absl::optional< + TimestampRangeFilteredMapView::const_iterator> + cell_it_; + mutable absl::optional cur_value_; + mutable bool initialized_{false}; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_COLUMN_FAMILY_H diff --git a/google/cloud/bigtable/emulator/column_family_test.cc b/google/cloud/bigtable/emulator/column_family_test.cc new file mode 100644 index 0000000000000..9300391290f7e --- /dev/null +++ b/google/cloud/bigtable/emulator/column_family_test.cc @@ -0,0 +1,398 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/testing_util/chrono_literals.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +std::string DumpColumnRow(ColumnRow const& col_row, + std::string const& prefix = "") { + std::stringstream ss; + for (auto const& cell : col_row) { + ss << prefix << "@" << cell.first.count() << "ms: " << cell.second + << std::endl; + } + return ss.str(); +} + +std::string DumpColumnFamilyRow(ColumnFamilyRow const& fam_row, + std::string const& prefix = "") { + std::stringstream ss; + for (auto const& col_row : fam_row) { + ss << DumpColumnRow(col_row.second, prefix + col_row.first + " "); + } + return ss.str(); +} + +std::string DumpColumnFamily(ColumnFamily const& fam, + std::string const& cf_name = "") { + std::stringstream ss; + for (auto const& fam_row : fam) { + ss << DumpColumnFamilyRow(fam_row.second, + fam_row.first + " " + cf_name + ":"); + } + return ss.str(); +} + +TEST(ColumnRow, Trivial) { + using testing_util::chrono_literals::operator""_ms; + + ColumnRow col_row; + EXPECT_FALSE(col_row.HasCells()); + col_row.SetCell(10_ms, "foo"); + EXPECT_TRUE(col_row.HasCells()); + col_row.SetCell(10_ms, "bar"); + EXPECT_EQ(std::next(col_row.begin()), col_row.end()); + EXPECT_EQ("bar", col_row.begin()->second); + + col_row.SetCell(0_ms, "baz"); + col_row.SetCell(20_ms, "qux"); + EXPECT_EQ("qux", col_row.lower_bound(30_ms)->second); + EXPECT_EQ("qux", col_row.lower_bound(20_ms)->second); + EXPECT_EQ("bar", col_row.lower_bound(10_ms)->second); + EXPECT_EQ("baz", col_row.lower_bound(0_ms)->second); + EXPECT_EQ("qux", col_row.upper_bound(30_ms)->second); + EXPECT_EQ("bar", col_row.upper_bound(20_ms)->second); + EXPECT_EQ("baz", col_row.upper_bound(10_ms)->second); + EXPECT_EQ(col_row.end(), col_row.upper_bound(0_ms)); +} + +TEST(ColumnRow, DeleteTimeRangeFinite) { + using testing_util::chrono_literals::operator""_ms; + + ColumnRow col_row; + col_row.SetCell(10_ms, "foo"); + col_row.SetCell(20_ms, "bar"); + col_row.SetCell(30_ms, "baz"); + col_row.SetCell(40_ms, "qux"); + google::bigtable::v2::TimestampRange range; + range.set_start_timestamp_micros(5000); + range.set_end_timestamp_micros(40000); + col_row.DeleteTimeRange(range); + + EXPECT_EQ("@40ms: qux\n", DumpColumnRow(col_row)); +} + +TEST(ColumnRow, DeleteTimeRangeInfinite) { + using testing_util::chrono_literals::operator""_ms; + + ColumnRow col_row; + col_row.SetCell(10_ms, "foo"); + col_row.SetCell(20_ms, "bar"); + col_row.SetCell(30_ms, "baz"); + col_row.SetCell(40_ms, "qux"); + google::bigtable::v2::TimestampRange range; + range.set_start_timestamp_micros(20000); + col_row.DeleteTimeRange(range); + + EXPECT_EQ("@10ms: foo\n", DumpColumnRow(col_row)); +} + +TEST(ColumnFamilyRow, Trivial) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamilyRow fam_row; + EXPECT_FALSE(fam_row.HasColumns()); + fam_row.SetCell("col1", 10_ms, "foo"); + EXPECT_TRUE(fam_row.HasColumns()); + fam_row.SetCell("col1", 10_ms, "bar"); + EXPECT_EQ(std::next(fam_row.begin()), fam_row.end()); + EXPECT_EQ("bar", fam_row.begin()->second.begin()->second); + + fam_row.SetCell("col0", 10_ms, "baz"); + fam_row.SetCell("col2", 10_ms, "qux"); + + EXPECT_EQ(R"""( +col0 @10ms: baz +col1 @10ms: bar +col2 @10ms: qux +)""", + "\n" + DumpColumnFamilyRow(fam_row)); + + EXPECT_EQ("bar", fam_row.lower_bound("col1")->second.begin()->second); + EXPECT_EQ("qux", fam_row.upper_bound("col1")->second.begin()->second); + + EXPECT_EQ( + 1, fam_row.DeleteColumn("col1", ::google::bigtable::v2::TimestampRange{}) + .size()); + + // Verify that there is no empty column. + EXPECT_EQ(2, std::distance(fam_row.begin(), fam_row.end())); + + google::bigtable::v2::TimestampRange not_matching_range; + not_matching_range.set_start_timestamp_micros(10); + not_matching_range.set_end_timestamp_micros(20); + EXPECT_EQ(0, fam_row.DeleteColumn("col2", not_matching_range).size()); + + EXPECT_EQ(R"""( +col0 @10ms: baz +col2 @10ms: qux +)""", + "\n" + DumpColumnFamilyRow(fam_row)); +} + +TEST(ColumnFamily, Trivial) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam; + fam.SetCell("row1", "col0", 10_ms, "foo"); + fam.SetCell("row1", "col0", 10_ms, "bar"); + EXPECT_EQ("row1 :col0 @10ms: bar\n", DumpColumnFamily(fam)); + + fam.SetCell("row0", "col0", 10_ms, "baz"); + fam.SetCell("row2", "col0", 10_ms, "qux"); + + EXPECT_EQ(R"""( +row0 :col0 @10ms: baz +row1 :col0 @10ms: bar +row2 :col0 @10ms: qux +)""", + "\n" + DumpColumnFamily(fam)); + + EXPECT_EQ("col0 @10ms: bar\n", + DumpColumnFamilyRow(fam.lower_bound("row1")->second)); + EXPECT_EQ("col0 @10ms: qux\n", + DumpColumnFamilyRow(fam.upper_bound("row1")->second)); + + EXPECT_EQ(1, fam.DeleteColumn("row1", "col0", + ::google::bigtable::v2::TimestampRange{}) + .size()); + + // Verify that there is no empty row + EXPECT_EQ(2, std::distance(fam.begin(), fam.end())); + + EXPECT_EQ(R"""( +row0 :col0 @10ms: baz +row2 :col0 @10ms: qux +)""", + "\n" + DumpColumnFamily(fam)); + + EXPECT_GT(fam.DeleteRow("row2").size(), 0); + EXPECT_TRUE(fam.DeleteRow("row_nonexistent").empty()); + + EXPECT_EQ("row0 :col0 @10ms: baz\n", DumpColumnFamily(fam)); +} + +std::string DumpFilteredColumnFamilyStream( + AbstractCellStreamImpl& stream, NextMode next_mode = NextMode::kCell) { + std::stringstream ss; + for (; stream.HasValue(); stream.Next(next_mode)) { + auto const& cell = stream.Value(); + ss << cell.row_key() << " " << cell.column_family() << ":" + << cell.column_qualifier() << " @" << cell.timestamp().count() + << "ms: " << cell.value() << std::endl; + } + return ss.str(); +} + +TEST(FilteredColumnFamilyStream, Empty) { + ColumnFamily fam; + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + EXPECT_EQ("", DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, Unfiltered) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); + fam.SetCell("row0", "col1", 20_ms, "bar"); + fam.SetCell("row0", "col1", 30_ms, "baz"); + fam.SetCell("row1", "col0", 10_ms, "foo"); + fam.SetCell("row1", "col1", 20_ms, "foo"); + fam.SetCell("row1", "col1", 30_ms, "foo"); + fam.SetCell("row2", "col0", 10_ms, "qux"); + fam.SetCell("row2", "col2", 40_ms, "qux"); + fam.SetCell("row2", "col2", 50_ms, "qux"); + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + EXPECT_EQ(R"""( +row0 cf1:col0 @10ms: foo +row0 cf1:col1 @30ms: baz +row0 cf1:col1 @20ms: bar +row1 cf1:col0 @10ms: foo +row1 cf1:col1 @30ms: foo +row1 cf1:col1 @20ms: foo +row2 cf1:col0 @10ms: qux +row2 cf1:col2 @50ms: qux +row2 cf1:col2 @40ms: qux +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, FilterByTimestampRange) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam; + fam.SetCell("row0", "col0", 100_ms, "foo"); + fam.SetCell("row0", "col0", 300_ms, "bar"); // Filter out + + fam.SetCell("row0", "col1", 200_ms, "foo"); // Filter out + fam.SetCell("row0", "col1", 250_ms, "foo"); // Filter out + fam.SetCell("row0", "col1", 290_ms, "foo"); // Filter out + fam.SetCell("row0", "col2", 100_ms, "foo"); + fam.SetCell("row0", "col2", 120_ms, "foo"); + fam.SetCell("row0", "col2", 140_ms, "foo"); + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row1", "col0", 20_ms, "bar"); // Filter out + fam.SetCell("row1", "col0", 10_ms, "baz"); + fam.SetCell("row1", "col1", 200_ms, "foo"); // Filter out + fam.SetCell("row1", "col1", 250_ms, "foo"); // Filter out + fam.SetCell("row1", "col1", 290_ms, "foo"); // Filter out + fam.SetCell("row1", "col2", 100_ms, "foo"); + fam.SetCell("row1", "col2", 120_ms, "foo"); + fam.SetCell("row1", "col2", 140_ms, "foo"); + fam.SetCell("row1", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row1", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row1", "col3", 300_ms, "foo"); // Filter out + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + filtered_stream.ApplyFilter( + TimestampRange{TimestampRangeSet::Range(0_ms, 300_ms)}); + filtered_stream.ApplyFilter( + TimestampRange{TimestampRangeSet::Range(100_ms, 200_ms)}); + EXPECT_EQ(R"""( +row0 cf1:col0 @100ms: foo +row0 cf1:col2 @140ms: foo +row0 cf1:col2 @120ms: foo +row0 cf1:col2 @100ms: foo +row1 cf1:col2 @140ms: foo +row1 cf1:col2 @120ms: foo +row1 cf1:col2 @100ms: foo +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, FilterByColumnRange) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); // Filter out + fam.SetCell("row0", "col1", 100_ms, "foo"); + fam.SetCell("row0", "col2", 200_ms, "foo"); + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row2", "col1", 300_ms, "foo"); + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + filtered_stream.ApplyFilter(ColumnRange{ + "dummy", StringRangeSet::Range("col1", false, "col4", false)}); + filtered_stream.ApplyFilter( + ColumnRange{"cf1", StringRangeSet::Range("col1", false, "col2", false)}); + EXPECT_EQ(R"""( +row0 cf1:col1 @100ms: foo +row0 cf1:col2 @200ms: foo +row2 cf1:col1 @300ms: foo +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, FilterByColumnRegex) { + using testing_util::chrono_literals::operator""_ms; + auto pattern1 = std::make_shared("col"); + ASSERT_TRUE(pattern1->ok()); + auto pattern2 = std::make_shared("[02]"); + ASSERT_TRUE(pattern2->ok()); + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); + fam.SetCell("row0", "col1", 100_ms, "foo"); // Filter out + fam.SetCell("row0", "col2", 200_ms, "foo"); + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row0", "col3", 300_ms, "foo"); // Filter out + fam.SetCell("row1", "col2", 300_ms, "foo"); + fam.SetCell("row2", "col0", 300_ms, "foo"); + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + filtered_stream.ApplyFilter(ColumnRegex{pattern1}); + filtered_stream.ApplyFilter(ColumnRegex{pattern2}); + EXPECT_EQ(R"""( +row0 cf1:col0 @10ms: foo +row0 cf1:col2 @200ms: foo +row1 cf1:col2 @300ms: foo +row2 cf1:col0 @300ms: foo +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, FilterRowKeyRegex) { + using testing_util::chrono_literals::operator""_ms; + auto pattern1 = std::make_shared("row"); + ASSERT_TRUE(pattern1->ok()); + auto pattern2 = std::make_shared("[02]"); + ASSERT_TRUE(pattern2->ok()); + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); + fam.SetCell("row1", "col1", 100_ms, "foo"); // Filter out + fam.SetCell("row2", "col2", 200_ms, "foo"); + fam.SetCell("row3", "col3", 300_ms, "foo"); // Filter out + auto included_rows = std::make_shared(StringRangeSet::All()); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + filtered_stream.ApplyFilter(RowKeyRegex{pattern1}); + filtered_stream.ApplyFilter(RowKeyRegex{pattern2}); + EXPECT_EQ(R"""( +row0 cf1:col0 @10ms: foo +row2 cf1:col2 @200ms: foo +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +TEST(FilteredColumnFamilyStream, FilterRowSet) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam; + fam.SetCell("row0", "col0", 10_ms, "foo"); + fam.SetCell("row1", "col1", 100_ms, "foo"); // Filter out + fam.SetCell("row2", "col2", 200_ms, "foo"); + fam.SetCell("row3", "col3", 300_ms, "foo"); // Filter out + auto included_rows = + std::make_shared(StringRangeSet::Empty()); + included_rows->Sum(StringRangeSet::Range("row0", false, "row2", true)); + included_rows->Sum(StringRangeSet::Range( + "row3", false, StringRangeSet::Range::Infinity{}, false)); + FilteredColumnFamilyStream filtered_stream(fam, "cf1", included_rows); + EXPECT_EQ(R"""( +row0 cf1:col0 @10ms: foo +row1 cf1:col1 @100ms: foo +row3 cf1:col3 @300ms: foo +)""", + "\n" + DumpFilteredColumnFamilyStream(filtered_stream)); +} + +// Add Next Column, Next Row tests + +} // anonymous namespace +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/conditional_mutations_test.cc b/google/cloud/bigtable/emulator/conditional_mutations_test.cc new file mode 100644 index 0000000000000..97520fd28f31f --- /dev/null +++ b/google/cloud/bigtable/emulator/conditional_mutations_test.cc @@ -0,0 +1,232 @@ +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include "google/cloud/testing_util/status_matchers.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +struct SetCellParams { + std::string column_family_name; + std::string column_qualifier; + int64_t timestamp_micros; + std::string data; +}; + +StatusOr> CreateTable( + std::string const& table_name, std::vector& column_families) { + ::google::bigtable::admin::v2::Table schema; + schema.set_name(table_name); + for (auto& column_family_name : column_families) { + (*schema.mutable_column_families())[column_family_name] = + ::google::bigtable::admin::v2::ColumnFamily(); + } + + return Table::Create(schema); +} + +Status HasCell(std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier, int64_t timestamp_micros, + std::string const& value) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return NotFoundError("no row key found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); + } + + auto& column_row = column_row_it->second; + auto timestamp_it = + column_row.find(std::chrono::duration_cast( + std::chrono::microseconds(timestamp_micros))); + if (timestamp_it == column_row.end()) { + return NotFoundError( + "timestamp not found", + GCP_ERROR_INFO().WithMetadata("timestamp", + absl::StrFormat("%d", timestamp_micros))); + } + + if (timestamp_it->second != value) { + return NotFoundError("wrong value", + GCP_ERROR_INFO() + .WithMetadata("expected", value) + .WithMetadata("found", timestamp_it->second)); + } + + return Status(); +} + +Status SetCells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto m : set_cell_params) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(m.column_family_name); + set_cell_mutation->set_column_qualifier(m.column_qualifier); + set_cell_mutation->set_timestamp_micros(m.timestamp_micros); + set_cell_mutation->set_value(m.data); + } + + return table->MutateRow(mutation_request); +} + +TEST(ConditionalMutations, TestTrueMutations) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const column_family_name = "test_column_family"; + auto const* const row_key = "0"; + auto const* const column_qualifier = "column_1"; + auto timestamp_micros = 1000; + auto const* const true_mutation_value = "set by a true mutation"; + auto const* const false_mutation_value = "set by a false mutation"; + + std::vector column_families = {column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + ::google::bigtable::v2::Mutation true_mutation; + auto* set_cell_mutation = true_mutation.mutable_set_cell(); + set_cell_mutation->set_family_name(column_family_name); + set_cell_mutation->set_column_qualifier(column_qualifier); + set_cell_mutation->set_timestamp_micros(timestamp_micros); + set_cell_mutation->set_value(true_mutation_value); + + std::vector true_mutations = {true_mutation}; + + ::google::bigtable::v2::Mutation false_mutation; + set_cell_mutation = false_mutation.mutable_set_cell(); + set_cell_mutation->set_family_name(column_family_name); + set_cell_mutation->set_column_qualifier(column_qualifier); + set_cell_mutation->set_timestamp_micros(timestamp_micros); + set_cell_mutation->set_value(false_mutation_value); + + std::vector false_mutations = { + false_mutation}; + + std::vector v = { + {column_family_name, "column_2", 1000, "some_value"}}; + ASSERT_STATUS_OK(SetCells(table, table_name, row_key, v)); + ASSERT_STATUS_OK(HasCell(table, v[0].column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); + + google::bigtable::v2::CheckAndMutateRowRequest cond_mut_with_pass_all; + + cond_mut_with_pass_all.set_row_key(row_key); + cond_mut_with_pass_all.set_table_name(table_name); + cond_mut_with_pass_all.mutable_predicate_filter()->set_pass_all_filter(true); + cond_mut_with_pass_all.mutable_true_mutations()->Assign( + true_mutations.begin(), true_mutations.end()); + cond_mut_with_pass_all.mutable_false_mutations()->Assign( + false_mutations.begin(), false_mutations.end()); + + auto status_or = table->CheckAndMutateRow(cond_mut_with_pass_all); + ASSERT_STATUS_OK(status_or); + + // pass_all_filter means that true_mutation should have succeeded, + // so check for the true_mutation cell value e.t.c. + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, true_mutation_value)); + + // And just for good measure, ensure that false_mutation was not written. + ASSERT_EQ(false, HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, false_mutation_value) + .ok()); +} + +TEST(ConditionalMutations, RejectInvalidRequest) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const column_family_name = "test_column_family"; + auto const* const row_key = "0"; + auto const* const column_qualifier = "column_1"; + auto timestamp_micros = 1000; + auto const* const true_mutation_value = "set by a true mutation"; + auto const* const false_mutation_value = "set by a false mutation"; + + std::vector column_families = {column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + ::google::bigtable::v2::Mutation true_mutation; + auto* set_cell_mutation = true_mutation.mutable_set_cell(); + set_cell_mutation->set_family_name(column_family_name); + set_cell_mutation->set_column_qualifier(column_qualifier); + set_cell_mutation->set_timestamp_micros(timestamp_micros); + set_cell_mutation->set_value(true_mutation_value); + + std::vector true_mutations = {true_mutation}; + + ::google::bigtable::v2::Mutation false_mutation; + set_cell_mutation = false_mutation.mutable_set_cell(); + set_cell_mutation->set_family_name(column_family_name); + set_cell_mutation->set_column_qualifier(column_qualifier); + set_cell_mutation->set_timestamp_micros(timestamp_micros); + set_cell_mutation->set_value(false_mutation_value); + + // Will be configured so that row_key is not set. + std::vector false_mutations = { + false_mutation}; + + google::bigtable::v2::CheckAndMutateRowRequest cond_mutation_no_row_key; + + cond_mutation_no_row_key.set_table_name(table_name); + cond_mutation_no_row_key.mutable_true_mutations()->Assign( + true_mutations.begin(), true_mutations.end()); + cond_mutation_no_row_key.mutable_false_mutations()->Assign( + false_mutations.begin(), false_mutations.end()); + + auto status_or = table->CheckAndMutateRow(cond_mutation_no_row_key); + ASSERT_EQ(false, status_or.ok()); + + // Will be configured so that both true_mutations and + // false_mutations are empty. + google::bigtable::v2::CheckAndMutateRowRequest cond_mutation_no_mutations; + cond_mutation_no_mutations.set_row_key(row_key); + cond_mutation_no_row_key.set_table_name(table_name); + ASSERT_EQ(false, table->CheckAndMutateRow(cond_mutation_no_mutations).ok()); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/drop_row_range_test.cc b/google/cloud/bigtable/emulator/drop_row_range_test.cc new file mode 100644 index 0000000000000..eb9255e9ce00d --- /dev/null +++ b/google/cloud/bigtable/emulator/drop_row_range_test.cc @@ -0,0 +1,251 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include "google/cloud/testing_util/status_matchers.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +struct SetCellParams { + std::string column_family_name; + std::string column_qualifier; + int64_t timestamp_micros; + std::string data; +}; + +StatusOr> CreateTable( + std::string const& table_name, std::vector& column_families) { + ::google::bigtable::admin::v2::Table schema; + schema.set_name(table_name); + for (auto& column_family_name : column_families) { + (*schema.mutable_column_families())[column_family_name] = + ::google::bigtable::admin::v2::ColumnFamily(); + } + + return Table::Create(schema); +} + +Status SetCells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto m : set_cell_params) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(m.column_family_name); + set_cell_mutation->set_column_qualifier(m.column_qualifier); + set_cell_mutation->set_timestamp_micros(m.timestamp_micros); + set_cell_mutation->set_value(m.data); + } + + return table->MutateRow(mutation_request); +} + +Status SetCellsInMultipleRows( + std::shared_ptr table, + std::string const& table_name, + std::map> params) { + for (auto& p : params) { + auto status = SetCells(table, table_name, p.first, p.second); + if (!status.ok()) { + return status; + } + } + + return Status(); +} + +Status HasCell(std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier, int64_t timestamp_micros, + std::string const& value) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return NotFoundError("no row key found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); + } + + auto& column_row = column_row_it->second; + auto timestamp_it = + column_row.find(std::chrono::duration_cast( + std::chrono::microseconds(timestamp_micros))); + if (timestamp_it == column_row.end()) { + return NotFoundError( + "timestamp not found", + GCP_ERROR_INFO().WithMetadata("timestamp", + absl::StrFormat("%d", timestamp_micros))); + } + + if (timestamp_it->second != value) { + return NotFoundError("wrong value", + GCP_ERROR_INFO() + .WithMetadata("expected", value) + .WithMetadata("found", timestamp_it->second)); + } + + return Status(); +} + +StatusOr HasRow( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return false; + } + + return true; +} + +TEST(DropRowRange, DropAll) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + std::vector column_families = {"column_family_1", + "column_family_2"}; + + auto maybe_table = CreateTable(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); + + std::map> params = { + {"0", + {{column_families[0], "column_1", 1000, "data_0"}, + {column_families[1], "column_1", 3000, "data_2"}}}, + {"1", + {{column_families[0], "column_1", 2000, "data_1"}, + {column_families[1], "column_1", 4000, "data_3"}}}}; + + ASSERT_STATUS_OK(SetCellsInMultipleRows(table, table_name, params)); + + ::google::bigtable::admin::v2::DropRowRangeRequest request; + request.set_name(table_name); + request.set_delete_all_data_from_table(true); + + auto status = table->DropRowRange(request); + ASSERT_STATUS_OK(status); + + for (auto& p : params) { + for (auto& set_cell_params : p.second) { + auto status_or = + HasRow(table, set_cell_params.column_family_name, p.first); + ASSERT_STATUS_OK(status_or); + ASSERT_FALSE(status_or.value()); + } + } +} + +TEST(DropRowRange, DropSome) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + std::vector column_families = {"column_family_1", + "column_family_2"}; + + auto maybe_table = CreateTable(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); + + std::map> params = { + {"a", + { + {column_families[0], "column_1", 1000, "data_0"}, + }}, + {"aa", + {{column_families[0], "column_1", 2000, "data_1"}, + {column_families[1], "column_1", 5000, "data_5"}}}, + {"aaa", {{column_families[0], "column_1", 3000, "data_2"}}}, + {"aab", {{column_families[0], "column_1", 4000, "data_3"}}}, + {"ab", {{column_families[1], "column_1", 6000, "data_6"}}}, + }; + + ASSERT_STATUS_OK(SetCellsInMultipleRows(table, table_name, params)); + + ::google::bigtable::admin::v2::DropRowRangeRequest request; + request.set_name(table_name); + std::string prefix = "aa"; + request.set_row_key_prefix(prefix); + + auto status = table->DropRowRange(request); + ASSERT_STATUS_OK(status); + + for (auto& p : params) { + for (auto& set_cell_params : p.second) { + if (absl::StartsWith(p.first, prefix)) { + auto status_or = + HasRow(table, set_cell_params.column_family_name, p.first); + ASSERT_STATUS_OK(status_or); + ASSERT_FALSE(status_or.value()); + } else { + auto status_or = + HasRow(table, set_cell_params.column_family_name, p.first); + ASSERT_STATUS_OK(status_or); + ASSERT_TRUE(status_or.value()); + } + } + } +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/emulator.cc b/google/cloud/bigtable/emulator/emulator.cc new file mode 100644 index 0000000000000..291057dd705a0 --- /dev/null +++ b/google/cloud/bigtable/emulator/emulator.cc @@ -0,0 +1,50 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/server.h" +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/flags/usage.h" +#include +#include +#include +#include +#include + +ABSL_FLAG(std::string, host, "localhost", + "the address to bind to on the local machine"); +ABSL_FLAG(std::uint16_t, port, 8888, + "the port to bind to on the local machine"); + +int main(int argc, char* argv[]) { + absl::SetProgramUsageMessage( + absl::StrCat("Usage: %s -h -p ", argv[0])); + absl::ParseCommandLine(argc, argv); + + auto maybe_server = + google::cloud::bigtable::emulator::CreateDefaultEmulatorServer( + absl::GetFlag(FLAGS_host), absl::GetFlag(FLAGS_port)); + if (!maybe_server) { + std::cerr << "CreateDefaultEmulatorServer() failed. See logs for " + "possible reason" + << std::endl; + return 1; + } + + auto& server = maybe_server.value(); + + std::cout << "Server running on port " << server->bound_port() << "\n"; + server->Wait(); + return 0; +} diff --git a/google/cloud/bigtable/emulator/filter.cc b/google/cloud/bigtable/emulator/filter.cc new file mode 100644 index 0000000000000..6cc4ba6f1738d --- /dev/null +++ b/google/cloud/bigtable/emulator/filter.cc @@ -0,0 +1,1172 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/internal/google_bytes_traits.h" +#include "google/cloud/internal/invoke_result.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status_or.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +bool PassAllFilters(InternalFilter const&) { return true; } + +// We need to ensure that the value outlives the reference stored in CellView. +std::string const kStrippedValue; + +} // namespace + +void CellStream::Next(NextMode mode) { + if (impl_->Next(mode)) { + return; + } + if (mode == NextMode::kColumn) { + EmulateNextColumn(); + return; + } + assert(mode == NextMode::kRow); + EmulateNextRow(); +} + +void CellStream::NextColumn() { + if (!impl_->Next(NextMode::kColumn)) { + EmulateNextColumn(); + } +} + +void CellStream::EmulateNextColumn() { + std::string cur_row_key = impl_->Value().row_key(); + std::string cur_column_family = impl_->Value().column_family(); + std::string cur_column_qualifier = impl_->Value().column_qualifier(); + for (impl_->Next(NextMode::kCell); + impl_->HasValue() && cur_row_key == impl_->Value().row_key() && + cur_column_family == impl_->Value().column_family() && + cur_column_qualifier == impl_->Value().column_qualifier(); + impl_->Next(NextMode::kCell)); +} + +void CellStream::EmulateNextRow() { + std::string cur_row_key = impl_->Value().row_key(); + for (NextColumn(); + impl_->HasValue() && cur_row_key == impl_->Value().row_key(); + NextColumn()); +} + +/** + * A meta functor useful for building filters which act on whole rows. + * + * Some filters (e.g. `row_sample_filter`) have a per-row state (in this + * example, the state is either to filter a row out or not). This state is + * reset every time a new row is encountered. Hence, this meta functor allows + * its users to specify two underlying functors: + * * `FilterFunctor` which given the per-row state and a cell, decides whether + * to filter it out or not (if not, also how far to advance the underlying + * cell stream). + * * `StateResetFunctor` which creates a new state for every row. + * + * @tparam FilterFunctor a functor which accepts the per-row state and a cell as + * input and returns whether this cell should be included in the result. + * @tparam StateResetFunctor a zero-argument functor which creates a new per-row + * state. + */ +template +class PerRowStateFilter { + static_assert(google::cloud::internal::is_invocable::value, + "StateResetFunctor must be invocable with no arguments."); + using State = + std::decay_t>; + static_assert(std::is_default_constructible::value, + "State must be default constructible."); + static_assert(std::is_assignable::value, + "State must be assignable."); + static_assert(std::is_same, + absl::optional>::value, + "Invalid result of `FilterFunctor` invocation."); + + public: + /** + * Create a new object. + * + * @param filter a functor which accepts the per-row state and a cell as + * input and returns whether this cell should be included in the result. + * @param reset a zero-argument functor which creates a new per-row + * state. + */ + PerRowStateFilter(FilterFunctor filter, StateResetFunctor reset) + : filter_(std::move(filter)), reset_(std::move(reset)) {} + + /** + * Decide on what to do with a cell. + * + * @param cell_view the cell in question + * @return if empty - include the cell in the result; if not empty - instruct + * the caller by how much to advance the underlying stream. + */ + absl::optional operator()(CellView const& cell_view) { + if (!prev_row_ || prev_row_.value() != cell_view.row_key()) { + state_ = reset_(); + prev_row_ = cell_view.row_key(); + } + return filter_(state_, cell_view); + } + + private: + absl::optional prev_row_; + State state_; + FilterFunctor filter_; + StateResetFunctor reset_; +}; + +/// A functor for filtering cell streams to return only first X cells per col. +class CellsPerColumnFilter { + public: + explicit CellsPerColumnFilter(std::size_t cells_per_column_limit) + : cells_per_column_limit_(cells_per_column_limit), + cells_per_column_left_(cells_per_column_limit) {} + + absl::optional operator()(CellView const& cell_view) { + if (!prev_ || !prev_->Matches(cell_view)) { + cells_per_column_left_ = cells_per_column_limit_; + prev_ = Prev(cell_view); + } + if (cells_per_column_left_ > 0) { + --cells_per_column_left_; + return {}; + } + return NextMode::kColumn; + } + + private: + class Prev { + public: + explicit Prev(CellView const& cell_view) + : row_key_(cell_view.row_key()), + column_family_(cell_view.column_family()), + column_qualifier_(cell_view.column_qualifier()) {} + + bool Matches(CellView const& cell_view) { + return row_key_ == cell_view.row_key() && + column_family_ == cell_view.column_family() && + column_qualifier_ == cell_view.column_qualifier(); + } + + private: + std::string row_key_; + std::string column_family_; + std::string column_qualifier_; + }; + absl::optional prev_; + std::size_t cells_per_column_limit_; + std::size_t cells_per_column_left_; +}; + +/** + * A meta cell stream, which is created from a cell transforming functor. + * + * @tparam Transformer an unary functor which should accept a `CellView` and + * return a transformed version of it. + */ +template +class TrivialTransformer : public AbstractCellStreamImpl { + public: + /** + * Create a new object. + * + * @param source underlying cell stream to be transformed. + * @param filter functor, which accepts a `CellView` and returns a transformed + * `CellView` to be returned from this stream. + */ + TrivialTransformer(CellStream source, Transformer transformer) + : source_(std::move(source)), transformer_(std::move(transformer)) {} + + bool ApplyFilter(InternalFilter const& internal_filter) override { + return source_.ApplyFilter(internal_filter); + } + + bool HasValue() const override { return source_.HasValue(); } + + CellView const& Value() const override { + if (!transformed_) { + transformed_ = absl::optional(transformer_(source_.Value())); + } + return transformed_.value(); + } + + bool Next(NextMode mode) override { + source_.Next(mode); + transformed_.reset(); + return true; + } + + private: + CellStream source_; + Transformer transformer_; + mutable absl::optional transformed_; +}; + +/** + * Create a cell stream from an underlying stream and a transforming functor. + * + * @tparam Transformer an unary functor which should accept a `CellView` and + * return a transformed version of it. + */ +template +CellStream MakeTrivialTransformer(CellStream source, Transformer transformer) { + return CellStream(std::make_unique>( + std::move(source), std::move(transformer))); +} + +/** + * A meta cell stream filtering an underlying stream according to a functor. + * + * @tparam Filter a functor, which given a cell, decides whether to filter it + * out or not (if not, also how far to advance the underlying cell stream). + */ +template +class TrivialFilter : public AbstractCellStreamImpl { + static_assert( + std::is_same< + google::cloud::internal::invoke_result_t, + absl::optional>::value, + "Invalid filter return type."); + + public: + /** + * Create a new object. + * + * @param source underlying cell stream to be filtered. + * @param filter functor, which accepts a `CellView` and decides + * whether to filter it out or not (if not, also how far to advance the + * underlying cell stream). + * @param filter_filter a functor which given an `InternalFilter` decides + * whether filtering this cell stream's results and then applying the + * `InternalFilter` would yield the same results as applying + * `InternalFilter` to the underlying stream and then perform this stream's + * filtering. + */ + TrivialFilter(CellStream source, Filter filter, + std::function filter_filter) + : source_(std::move(source)), + filter_(std::move(filter)), + filter_filter_(std::move(filter_filter)) {} + + bool ApplyFilter(InternalFilter const& filter) override { + if (filter_filter_(filter)) { + return source_.ApplyFilter(filter); + } + return false; + } + + bool HasValue() const override { + InitializeIfNeeded(); + return source_.HasValue(); + } + + CellView const& Value() const override { + InitializeIfNeeded(); + return source_.Value(); + } + + bool Next(NextMode mode) override { + source_.Next(mode); + EnsureCurrentNotFiltered(); + return true; + } + + private: + /// Consume the underlying stream until an unfiltered cell is encountered. + void EnsureCurrentNotFiltered() const { + while (source_.HasValue()) { + auto maybe_next_mode = filter_(*source_); + if (!maybe_next_mode) { + return; + } + source_.Next(*maybe_next_mode); + } + } + + void InitializeIfNeeded() const { + if (!initialized_) { + EnsureCurrentNotFiltered(); + initialized_ = true; + } + } + + mutable bool initialized_{false}; + mutable CellStream source_; + mutable Filter filter_; + std::function filter_filter_; +}; + +/** + * Create a cell stream from an underlying stream and a cell filtering functor. + * + * @param source underlying cell stream to be filtered. + * @param filter functor, which accepts a `CellView` and decides + * whether to filter it out or not (if not, also how far to advance the + * underlying cell stream). + * @param filter_filter a functor which given an `InternalFilter` decides + * whether filtering this cell stream's results and then applying the + * `InternalFilter` would yield the same results as applying + * `InternalFilter` to the underlying stream and then performing this + * stream's filtering. + */ +template +CellStream MakeTrivialFilter( + CellStream source, Filter filter, + std::function filter_filter = PassAllFilters) { + return CellStream(std::make_unique>( + std::move(source), std::move(filter), std::move(filter_filter))); +} + +/** + * Create a cell stream filtering underlying stream, which has a per-row state. + * + * @param source underlying cell stream to be filtered. + * @param filter a functor which accepts the per-row state and a cell as + * input and returns whether this cell should be included in the result. + * @param reset a zero-argument functor which creates a new per-row + * state. + * @param filter_filter a functor which given an `InternalFilter` decides + * whether filtering this cell stream's results and then applying the + * `InternalFilter` would yield the same results as applying + * `InternalFilter` to the underlying stream and the perform this stream's + * filtering. + */ +template +CellStream MakePerRowStateFilter( + CellStream source, FilterFunctor filter, StateResetFunctor state_reset, + std::function filter_filter = PassAllFilters) { + return MakeTrivialFilter(std::move(source), + PerRowStateFilter( + std::move(filter), std::move(state_reset)), + std::move(filter_filter)); +} + +bool MergeCellStreams::CellStreamGreater::operator()( + std::unique_ptr const& lhs, + std::unique_ptr const& rhs) const { + auto row_key_cmp = + internal::CompareRowKey((*lhs)->row_key(), (*rhs)->row_key()); + if (row_key_cmp != 0) { + return row_key_cmp > 0; + } + auto cf_cmp = internal::CompareColumnQualifiers((*lhs)->column_family(), + (*rhs)->column_family()); + if (cf_cmp != 0) { + return cf_cmp > 0; + } + auto col_cmp = internal::CompareColumnQualifiers((*lhs)->column_qualifier(), + (*rhs)->column_qualifier()); + if (col_cmp != 0) { + return col_cmp > 0; + } + return (*lhs)->timestamp() > (*rhs)->timestamp(); +} + +MergeCellStreams::MergeCellStreams(std::vector streams) { + for (auto& stream : streams) { + unfinished_streams_.emplace_back( + std::make_unique(std::move(stream))); + } +} + +bool MergeCellStreams::ApplyFilter(InternalFilter const& internal_filter) { + assert(!initialized_); + bool res = true; + for (auto& stream : unfinished_streams_) { + res = stream->ApplyFilter(internal_filter) && res; + } + return res; +} + +bool MergeCellStreams::HasValue() const { + InitializeIfNeeded(); + return !unfinished_streams_.empty(); +} + +CellView const& MergeCellStreams::Value() const { + InitializeIfNeeded(); + return unfinished_streams_.front()->Value(); +} + +bool MergeCellStreams::Next(NextMode mode) { + InitializeIfNeeded(); + assert(!unfinished_streams_.empty()); + + // If we're skipping to the next column/row, we need to advance all streams + // that currently point to that column/row. + // + // To do this, we temporarily remove those streams from the heap + // (since advancing them would require re-adjusting the heap). + // These streams remain at the end of the `unfinished_streams_` vector, + // but are not considered part of the heap. The `to_readd_begin` iterator + // marks the start of the range in `unfinished_streams_` that is outside + // the heap. + + std::pop_heap(unfinished_streams_.begin(), unfinished_streams_.end(), + CellStreamGreater()); + auto first_to_advance = std::prev(unfinished_streams_.end()); + auto to_readd_begin = first_to_advance; + + auto all_streams_to_advance_removed_from_heap = [&]() { + if (unfinished_streams_.begin() == to_readd_begin) { + // All streams removed. + return true; + } + if (mode == NextMode::kCell) { + // We only need to remove one stream, which we already did. + return true; + } + if (mode == NextMode::kRow) { + return unfinished_streams_.front()->Value().row_key() != + (*first_to_advance)->Value().row_key(); + } + assert(mode == NextMode::kColumn); + return unfinished_streams_.front()->Value().column_qualifier() != + (*first_to_advance)->Value().column_qualifier() || + unfinished_streams_.front()->Value().column_family() != + (*first_to_advance)->Value().column_family() || + unfinished_streams_.front()->Value().row_key() != + (*first_to_advance)->Value().row_key(); + }; + while (!all_streams_to_advance_removed_from_heap()) { + std::pop_heap(unfinished_streams_.begin(), to_readd_begin, + CellStreamGreater()); + --to_readd_begin; + } + while (to_readd_begin != unfinished_streams_.end()) { + (*to_readd_begin)->Next(mode); + if ((*to_readd_begin)->HasValue()) { + ++to_readd_begin; + std::push_heap(unfinished_streams_.begin(), to_readd_begin, + CellStreamGreater()); + continue; + } + // The stream is finished, delete it. + to_readd_begin->swap(unfinished_streams_.back()); + unfinished_streams_.pop_back(); + // Don't advance `to_readd_begin` since it points to a different stream + // after `swap()`. + } + return true; +} + +void MergeCellStreams::InitializeIfNeeded() const { + if (!initialized_) { + for (auto stream_it = unfinished_streams_.begin(); + stream_it != unfinished_streams_.end();) { + if (!(*stream_it)->HasValue()) { + stream_it->swap(unfinished_streams_.back()); + unfinished_streams_.pop_back(); + } else { + ++stream_it; + } + } + std::make_heap(unfinished_streams_.begin(), unfinished_streams_.end(), + CellStreamGreater()); + initialized_ = true; + } +} + +/// A cell stream for handling a Condition filter. +class ConditionStream : public AbstractCellStreamImpl { + public: + /** + * Create a new object. + * + * @param source the underlying cell stream + * @param predicate_stream the stream deciding whether for a given row the + * true branch or false branch should be selected + * @param true_stream the stream generating cells for the true branch + * @param false_stream the stream generating cells for the false branch + */ + ConditionStream(CellStream source, CellStream predicate, + CellStream true_stream, CellStream false_stream) + : source_(std::move(source)), + predicate_stream_(std::move(predicate)), + true_stream_(std::move(true_stream)), + false_stream_(std::move(false_stream)) {} + + bool ApplyFilter(InternalFilter const& internal_filter) override { + bool res = true; + if (absl::holds_alternative(internal_filter)) { + // If we're skipping whole rows we may apply it to all four streams. + // If we fail to apply to `source_` or `predicate_stream` but succeed with + // both `false_stream` and `true_stream` we should still return true + // because the stream will not yield the unwanted cells. + source_.ApplyFilter(internal_filter); + predicate_stream_.ApplyFilter(internal_filter); + } + res = true_stream_.ApplyFilter(internal_filter) && res; + res = false_stream_.ApplyFilter(internal_filter) && res; + return res; + } + + bool HasValue() const override { + InitializeIfNeeded(); + return source_.HasValue(); + } + + CellView const& Value() const override { + InitializeIfNeeded(); + if (condition_true_) { + return *true_stream_; + } + return *false_stream_; + } + + bool Next(NextMode mode) override { + InitializeIfNeeded(); + assert(source_); + if (condition_true_) { + true_stream_.Next(mode); + if (!true_stream_ || + internal::CompareRowKey(current_row_, true_stream_->row_key()) != 0) { + source_.Next(NextMode::kRow); + OnNewRow(); + } + } else { + false_stream_.Next(mode); + if (!false_stream_ || internal::CompareRowKey( + current_row_, false_stream_->row_key()) != 0) { + source_.Next(NextMode::kRow); + OnNewRow(); + } + } + return true; + } + + private: + void OnNewRow() const { + while (true) { + if (!source_) { + return; + } + auto cell_view = *source_; + current_row_ = cell_view.row_key(); + + // Let's test if the predicate stream returned something for this row. + for (; predicate_stream_ && + internal::CompareRowKey(predicate_stream_->row_key(), + cell_view.row_key()) < 0; + predicate_stream_.Next(NextMode::kRow)); + if (predicate_stream_ && + internal::CompareRowKey(predicate_stream_->row_key(), + cell_view.row_key()) == 0) { + // Predicate stream did return something for this row. + condition_true_ = true; + // Fast-forward the true stream to start at current row. + for (; true_stream_ && internal::CompareRowKey(true_stream_->row_key(), + cell_view.row_key()) < 0; + true_stream_.Next(NextMode::kRow)); + } else { + // Predicate stream did not return anything for this row. + condition_true_ = false; + // Fast-forward the false stream to start at current row. + for (; + false_stream_ && internal::CompareRowKey(false_stream_->row_key(), + cell_view.row_key()) < 0; + false_stream_.Next(NextMode::kRow)); + } + if (condition_true_ && true_stream_ && + internal::CompareRowKey(true_stream_->row_key(), + cell_view.row_key()) == 0) { + return; + } + if (!condition_true_ && false_stream_ && + internal::CompareRowKey(false_stream_->row_key(), + cell_view.row_key()) == 0) { + return; + } + // True/false stream exhausted, fast-forward source. + source_.Next(NextMode::kRow); + } + } + + void InitializeIfNeeded() const { + if (initialized_) { + return; + } + OnNewRow(); + initialized_ = true; + } + + mutable CellStream source_; + mutable CellStream predicate_stream_; + mutable CellStream true_stream_; + mutable CellStream false_stream_; + mutable bool initialized_{false}; + mutable bool condition_true_; + mutable std::string current_row_; +}; + +/// A cell stream not generating any cells. +class EmptyCellStreamImpl : public AbstractCellStreamImpl { + bool ApplyFilter(InternalFilter const&) override { return true; } + bool HasValue() const override { return false; } + CellView const& Value() const override { + assert(false); + // The code below makes no sense but it should be dead. It's to silence + // compiler warnings. + static CellView dummy{"row", "cf", "col", std::chrono::milliseconds(0), + "val"}; + return dummy; + } + bool Next(NextMode) override { return true; } +}; + +// NOLINTBEGIN(misc-no-recursion,readability-function-cognitive-complexity) +/** + * Create a filter DAG constructor based on the proto definition. + * + * @param filter the protobuf definition of the filter DAG to be created + * @param source_ctor a zero-argument functor which can be used to create the + * underlying cell stream, which this filter will work on. + * @param direct_sinks an accumulator which will be filled by zero-argument + * functors which will create branches of the DAG whose output should bypass + * any other filters (the `sink` filter). + * @return a zero-argument functor which will return a DAG described by + * `filter`. + */ +StatusOr CreateFilterImpl( + ::google::bigtable::v2::RowFilter const& filter, + CellStreamConstructor source_ctor, + std::vector& direct_sinks) { + if (filter.has_pass_all_filter()) { + if (!filter.pass_all_filter()) { + return InvalidArgumentError( + "`pass_all_filter` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + return source_ctor; + } + if (filter.has_block_all_filter()) { + if (!filter.block_all_filter()) { + return InvalidArgumentError( + "`block_all_filter` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + CellStreamConstructor res = [] { + return CellStream(std::make_unique()); + }; + return res; + } + if (filter.has_row_key_regex_filter()) { + auto pattern = std::make_shared(filter.row_key_regex_filter()); + if (!pattern->ok()) { + return InvalidArgumentError( + "`row_key_regex_filter` is not a valid RE2 regex.", + GCP_ERROR_INFO() + .WithMetadata("filter", filter.DebugString()) + .WithMetadata("description", pattern->error())); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + pattern = std::move(pattern)] { + auto source = source_ctor(); + if (source.ApplyFilter(RowKeyRegex{pattern})) { + return source; + } + return MakeTrivialFilter( + std::move(source), + [pattern = pattern]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.row_key(), *pattern)) { + return {}; + } + return NextMode::kCell; + }); + }; + return res; + } + if (filter.has_value_regex_filter()) { + auto pattern = std::make_shared(filter.value_regex_filter()); + if (!pattern->ok()) { + return InvalidArgumentError( + "`value_regex_filter` is not a valid RE2 regex.", + GCP_ERROR_INFO() + .WithMetadata("filter", filter.DebugString()) + .WithMetadata("description", pattern->error())); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + pattern = std::move(pattern)] { + auto source = source_ctor(); + return MakeTrivialFilter( + std::move(source), + [pattern = pattern]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.value(), *pattern)) { + return {}; + } + return NextMode::kCell; + }); + }; + return res; + } + if (filter.has_row_sample_filter()) { + double pass_prob = filter.row_sample_filter(); + if (pass_prob + std::numeric_limits::epsilon() < 0 || + pass_prob - std::numeric_limits::epsilon() > 1) { + return InvalidArgumentError( + "`row_sample_filter` is not a valid probability.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + pass_prob] { + auto source = source_ctor(); + return MakePerRowStateFilter( + std::move(source), + [](bool& should_pass, CellView const&) -> absl::optional { + if (should_pass) { + return {}; + } + return NextMode::kRow; + }, + [gen = std::mt19937(), pass_prob]() mutable { + std::uniform_real_distribution dis(0.0, 1.0); + return dis(gen) < pass_prob; + }); + }; + return res; + } + if (filter.has_family_name_regex_filter()) { + auto pattern = + std::make_shared(filter.family_name_regex_filter()); + if (!pattern->ok()) { + return InvalidArgumentError( + "`family_name_regex_filter` is not a valid RE2 regex.", + GCP_ERROR_INFO() + .WithMetadata("filter", filter.DebugString()) + .WithMetadata("description", pattern->error())); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + pattern = std::move(pattern)] { + auto source = source_ctor(); + if (source.ApplyFilter(FamilyNameRegex{pattern})) { + return source; + } + return MakeTrivialFilter( + std::move(source), + [pattern = pattern]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.column_family(), *pattern)) { + return {}; + } + // FIXME we could introduce even column family skipping + return NextMode::kColumn; + }); + }; + return res; + } + if (filter.has_column_qualifier_regex_filter()) { + auto pattern = + std::make_shared(filter.column_qualifier_regex_filter()); + if (!pattern->ok()) { + return InvalidArgumentError( + "`column_qualifier_regex_filter` is not a valid RE2 regex.", + GCP_ERROR_INFO() + .WithMetadata("filter", filter.DebugString()) + .WithMetadata("description", pattern->error())); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + pattern = std::move(pattern)] { + auto source = source_ctor(); + if (source.ApplyFilter(ColumnRegex{pattern})) { + return source; + } + return MakeTrivialFilter( + std::move(source), + [pattern]( + CellView const& cell_view) mutable -> absl::optional { + if (re2::RE2::PartialMatch(cell_view.column_qualifier(), + *pattern)) { + return {}; + } + return NextMode::kColumn; + }); + }; + return res; + } + if (filter.has_column_range_filter()) { + auto maybe_range = + StringRangeSet::Range::FromColumnRange(filter.column_range_filter()); + if (!maybe_range) { + return maybe_range.status(); + } + std::string family_name = filter.column_range_filter().family_name(); + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + family_name = std::move(family_name), + range = *std::move(maybe_range)] { + auto source = source_ctor(); + if (source.ApplyFilter(ColumnRange{family_name, range})) { + return source; + } + return MakeTrivialFilter( + std::move(source), + [range, + family_name](CellView const& cell_view) -> absl::optional { + if (cell_view.column_family() == family_name && + range.IsWithin(cell_view.column_qualifier())) { + return {}; + } + // FIXME - we might know that we should skip the whole column + // family + return NextMode::kColumn; + }); + }; + return res; + } + if (filter.has_value_range_filter()) { + auto maybe_range = + StringRangeSet::Range::FromValueRange(filter.value_range_filter()); + if (!maybe_range) { + return maybe_range.status(); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + range = *std::move(maybe_range)] { + auto source = source_ctor(); + return MakeTrivialFilter( + std::move(source), + [range](CellView const& cell_view) -> absl::optional { + if (range.IsWithin(cell_view.value())) { + return {}; + } + return NextMode::kCell; + }); + }; + return res; + } + if (filter.has_cells_per_row_offset_filter()) { + std::int64_t cells_per_row_offset = filter.cells_per_row_offset_filter(); + if (cells_per_row_offset < 0) { + return InvalidArgumentError( + "`cells_per_row_offset_filter` is negative.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + cells_per_row_offset] { + auto source = source_ctor(); + return MakePerRowStateFilter( + std::move(source), + [](std::int64_t& per_row_state, + CellView const&) -> absl::optional { + if (per_row_state-- <= 0) { + return {}; + } + return NextMode::kCell; + }, + [cells_per_row_offset]() { return cells_per_row_offset; }, + [](InternalFilter const& internal_filter) { + return absl::holds_alternative(internal_filter); + }); + }; + return res; + } + if (filter.has_cells_per_row_limit_filter()) { + std::int64_t cells_per_row_limit = filter.cells_per_row_limit_filter(); + if (cells_per_row_limit < 0) { + return InvalidArgumentError( + "`cells_per_row_limit_filter` is negative.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + cells_per_row_limit] { + auto source = source_ctor(); + return MakePerRowStateFilter( + std::move(source), + [cells_per_row_limit](std::int64_t& per_row_state, + CellView const&) -> absl::optional { + if (per_row_state++ < cells_per_row_limit) { + return {}; + } + return NextMode::kRow; + }, + []() -> std::int64_t { return 0; }, + [](InternalFilter const& internal_filter) { + return absl::holds_alternative(internal_filter); + }); + }; + return res; + } + if (filter.has_cells_per_column_limit_filter()) { + std::int32_t cells_per_column_limit = + filter.cells_per_column_limit_filter(); + if (cells_per_column_limit < 0) { + return InvalidArgumentError( + "`cells_per_column_limit_filter` is negative.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + cells_per_column_limit] { + auto source = source_ctor(); + return MakeTrivialFilter( + std::move(source), CellsPerColumnFilter(cells_per_column_limit), + [](InternalFilter const& internal_filter) { + return !absl::holds_alternative(internal_filter); + }); + }; + return res; + } + if (filter.has_timestamp_range_filter()) { + auto maybe_range = TimestampRangeSet::Range::FromTimestampRange( + filter.timestamp_range_filter()); + if (!maybe_range) { + return maybe_range.status(); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + range = *std::move(maybe_range)] { + auto source = source_ctor(); + if (source.ApplyFilter(TimestampRange{range})) { + return source; + } + return MakeTrivialFilter( + std::move(source), + [range](CellView const& cell_view) -> absl::optional { + if (range.IsBelowStart(cell_view.timestamp())) { + return NextMode::kCell; + } + if (range.IsAboveEnd(cell_view.timestamp())) { + return NextMode::kColumn; + } + return {}; + }); + }; + return res; + } + if (filter.has_apply_label_transformer()) { + std::string label = filter.apply_label_transformer(); + CellStreamConstructor res = [source_ctor = std::move(source_ctor), + label = std::move(label)] { + auto source = source_ctor(); + return MakeTrivialTransformer(std::move(source), + [label](CellView cell_view) { + cell_view.SetLabel(label); + return cell_view; + }); + }; + return res; + } + if (filter.has_strip_value_transformer()) { + if (!filter.strip_value_transformer()) { + return InvalidArgumentError( + "`strip_value_transformer` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + CellStreamConstructor res = [source_ctor = std::move(source_ctor)] { + auto source = source_ctor(); + // We need to ensure that the value outlives the reference. + std::string const stripped_value; + return MakeTrivialTransformer( + std::move(source), + [stripped_value = std::move(stripped_value)](CellView cell_view) { + cell_view.SetValue(stripped_value); + return cell_view; + }); + }; + return res; + } + if (filter.has_chain()) { + CellStreamConstructor res = std::move(source_ctor); + for (auto const& subfilter : filter.chain().filters()) { + if (subfilter.has_sink()) { + if (!subfilter.sink()) { + return InvalidArgumentError( + "`sink` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", subfilter.DebugString())); + } + direct_sinks.emplace_back(std::move(res)); + res = [] { + return CellStream(std::make_unique()); + }; + return res; + } + auto maybe_res = + CreateFilterImpl(subfilter, std::move(res), direct_sinks); + if (!maybe_res) { + return maybe_res.status(); + } + res = *std::move(maybe_res); + } + return res; + } + if (filter.has_interleave()) { + std::vector parallel_stream_ctors; + for (auto const& subfilter : filter.interleave().filters()) { + if (subfilter.has_sink()) { + if (!subfilter.sink()) { + return InvalidArgumentError( + "`sink` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", subfilter.DebugString())); + } + direct_sinks.emplace_back(source_ctor); + continue; + } + auto maybe_filter = + CreateFilterImpl(subfilter, source_ctor, direct_sinks); + if (!maybe_filter) { + return maybe_filter.status(); + } + parallel_stream_ctors.emplace_back(*maybe_filter); + } + if (parallel_stream_ctors.empty()) { + CellStreamConstructor res = [] { + return CellStream(std::make_unique()); + }; + return res; + } + CellStreamConstructor res = [parallel_stream_ctors = + std::move(parallel_stream_ctors)] { + std::vector parallel_streams; + std::transform(parallel_stream_ctors.begin(), parallel_stream_ctors.end(), + std::back_inserter(parallel_streams), + [](CellStreamConstructor const& stream_ctor) { + return stream_ctor(); + }); + return CellStream( + std::make_unique(std::move(parallel_streams))); + }; + return res; + } + if (filter.has_condition()) { + if (!filter.condition().has_predicate_filter()) { + return InvalidArgumentError( + "`condition` must have a `predicate_filter` set.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + if (!filter.condition().has_true_filter() && + !filter.condition().has_false_filter()) { + return InvalidArgumentError( + "`condition` must have `true_filter` or `false_filter` set.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + // FIXME: validate that `sink` is not present in condition's predicate. + // Expected error: + // INVALID_ARGUMENT: Error in field 'condition filter predicate' : sink + // cannot be nested in a condition filter + + auto maybe_predicate_stream_ctor = CreateFilterImpl( + filter.condition().predicate_filter(), source_ctor, direct_sinks); + if (!maybe_predicate_stream_ctor) { + return maybe_predicate_stream_ctor.status(); + } + auto maybe_true_stream_ctor = + filter.condition().has_true_filter() + ? CreateFilterImpl(filter.condition().true_filter(), source_ctor, + direct_sinks) + : StatusOr([] { + return CellStream(std::make_unique()); + }); + if (!maybe_true_stream_ctor) { + return maybe_true_stream_ctor.status(); + } + auto maybe_false_stream_ctor = + filter.condition().has_false_filter() + ? CreateFilterImpl(filter.condition().false_filter(), source_ctor, + direct_sinks) + : StatusOr([] { + return CellStream(std::make_unique()); + }); + if (!maybe_false_stream_ctor) { + return maybe_false_stream_ctor.status(); + } + + CellStreamConstructor res = + [source_ctor = std::move(source_ctor), + predicate_stream_ctor = *std::move(maybe_predicate_stream_ctor), + true_stream_ctor = *std::move(maybe_true_stream_ctor), + false_stream_ctor = *std::move(maybe_false_stream_ctor)] { + // The test FilterApplicationPropagation.Condition relies on the + // order of creating those streams. + auto source = source_ctor(); + auto predicate_stream = predicate_stream_ctor(); + auto true_stream = true_stream_ctor(); + auto false_stream = false_stream_ctor(); + return CellStream(std::make_unique( + std::move(source), std::move(predicate_stream), + std::move(true_stream), std::move(false_stream))); + }; + return res; + } + return UnimplementedError( + "Unsupported filter.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); +} +// NOLINTEND(misc-no-recursion,readability-function-cognitive-complexity) + +/** + * Create a filter DAG based on the proto definition. + * + * @param filter the protobuf definition of the filter DAG to be created + * @param source_ctor a zero-argument functor which can be used to create the + * underlying cell stream, which this filter will work on. + * @return DAG described by `filter`. + */ +StatusOr CreateFilter( + ::google::bigtable::v2::RowFilter const& filter, + CellStreamConstructor source_ctor) { + std::vector direct_sink_ctors; + if (filter.has_sink()) { + if (!filter.sink()) { + return InvalidArgumentError( + "`sink` explicitly set to `false`.", + GCP_ERROR_INFO().WithMetadata("filter", filter.DebugString())); + } + return source_ctor(); + } + auto maybe_filter_ctor = + CreateFilterImpl(filter, std::move(source_ctor), direct_sink_ctors); + if (!maybe_filter_ctor) { + return maybe_filter_ctor.status(); + } + if (direct_sink_ctors.empty()) { + return (*maybe_filter_ctor)(); + } + std::vector direct_sinks; + + std::transform( + direct_sink_ctors.begin(), direct_sink_ctors.end(), + std::back_inserter(direct_sinks), + [](CellStreamConstructor const& stream_ctor) { return stream_ctor(); }); + + direct_sinks.emplace_back((*maybe_filter_ctor)()); + return CellStream( + std::make_unique(std::move(direct_sinks))); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/filter.h b/google/cloud/bigtable/emulator/filter.h new file mode 100644 index 0000000000000..10df364d9cd8a --- /dev/null +++ b/google/cloud/bigtable/emulator/filter.h @@ -0,0 +1,282 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTER_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTER_H + +#include "google/cloud/bigtable/emulator/cell_view.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/status_or.h" +#include +#include +#include +#include +#include +#include +#include + +namespace re2 { +class RE2; +} // namespace re2 + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +// The code declared in this file is used to construct filters according to +// `::google::bigtable::v2::RowFilter` protobuf definition. +// It describes a DAG through which cells should be routed (and potentially +// copied in case of the `Interleave` filter). +// +// The simplest way of implementing such a DAG is to create an object for every +// node of the graph, which would filter/transform the result. This, however, +// could be very inefficient. For example, if we're only interested in the last +// version of a cell of a specific column, the lowermost layers of the graph +// would have to scan the whole table. +// +// This example shows that we should apply the filters as close to the beginning +// of the graph as possible. The in-memory implementation could jump over +// uninteresting columns and avoid passing all the values around. Most of the +// filters can be applied in any order, which makes our filtering task easy. +// +// Unfortunately, some filters (e.g. `cells_per_row_limit_filter`) prevent us +// from moving filters applied later in the chain to its beginning. Hence, we +// need to keep the naive (object-per-graph-node) approach at least as a backup +// option. +// +// We do attempt to apply the filtering as close to the root as possible, +// though. It is performed via the `AbstractCellStreamImpl::Apply()` function. +// This operation has different implementations for different filters. +// +// The algorithm looks as follows: +// * we try to build the DAG according to the proto, from the ground up +// * every time we're about to add a new node, we first try applying the filter +// to the graph we built so far by calling `Apply()` on the last node we +// added; +// * these `Apply()` calls are propagated through the graph all the way to the +// root +// * if the `Apply()` call fails (e.g. because there is a +// `cells_per_row_limit_filter` in the DAG), we will continue with adding a +// new node to the graph +// * if the `Apply` call succeeds then we know that the lower layers will filter +// out the unwanted data, so we can skip adding the node to the graph. + +/// Only return cells from rows whose keys match `regex`. +struct RowKeyRegex { + std::shared_ptr regex; +}; +/// Only return cells from column families whose names match `regex`. +struct FamilyNameRegex { + std::shared_ptr regex; +}; +/// Only return cells from columns whose qualifiers match `regex`. +struct ColumnRegex { + std::shared_ptr regex; +}; +/// Only return cells from columns which fall into `range`. +struct ColumnRange { + std::string column_family; + StringRangeSet::Range range; +}; +/// Only return cells from timestamps which fall into `range`. +struct TimestampRange { + TimestampRangeSet::Range range; +}; + +using InternalFilter = absl::variant; +enum class NextMode { + // Advance a stream to the next available cell. + kCell = 0, + // Advance a stream to the first cell which is in a different column. + kColumn, + // Advance a stream to the first cell which is in a different row. + kRow, +}; + +/** + * An interface for `CellView` stream implementations. + * + * Objects of classes implementing this abstract class represent a stream of + * `CellView`. They should all guarantee that returned `CellViews` are sorted by + * (row_key, column_family, column_qualifier, timestamp). + * + * Depending on the implementation, objects of this class may support filtering + * of the returned `CellView`. The users may request filtering via `Apply()`. It + * should be used only before first access to the actual stream (i.e. functions + * `HasValue()`, `Value()` and `Next()`). + * + * Objects of derived classes should be assumed to be not thread safe. + */ +class AbstractCellStreamImpl { + public: + virtual ~AbstractCellStreamImpl() = default; + + /** + * Attempt to apply a filter on the stream. + * + * It should not be called after `HasValue()`, `Value()` or `Next()` have been + * called. + * + * Depending on the implementation the application may succeed or not. If it + * doesn't, the stream is unchanged. + * + * @param internal_filter a filter to apply on the stream. + * @return whether the filter application succeeded. If it didn't the filter + * is unchanged. + */ + virtual bool ApplyFilter(InternalFilter const& internal_filter) = 0; + /// Whether the stream is pointing to a cell or has it finished. + virtual bool HasValue() const = 0; + /** + * The first "unconsumed" value. + * + * \pre{One should not call this member function if `HasValue() == false`.} + * + * @return currently pointed cell + */ + virtual CellView const& Value() const = 0; + /** + * Advance the stream to next `CellView`. + * + * \pre{One should not call this member function if `HasValue() == false`.} + * + * Specific implementations have to support `mode == NextMode::kCell` but may + * not support others. If the requested `mode` is not supported, `false` is + * returned. + * + * @param mode how far to advance - it may be any next cell, or the first cell + * which is in a different column or the first cell which is in a + * different row. + * @return whether `mode` is supported; the returned value is unrelated to + * what `HasValue()` will return. + */ + virtual bool Next(NextMode mode) = 0; +}; + +/** + * A convenience wrapper around `AbstractCellStreamImpl`. + * + * The purpose of this class is to provide what `AbstractCellStreamImpl` + * implementations do but with a more convenient interface. + */ +class CellStream { + public: + explicit CellStream(std::unique_ptr impl) + : impl_(std::move(impl)) {} + + /** + * Attempt to apply a filter on the stream. + * + * It should not be called after `HasValue()`, `Value()` or `Next()` have been + * called. + * + * Depending on the implementation the application may succeed or not. If it + * doesn't, the stream is unchanged. + * + * @param internal_filter a filter to apply on the stream. + * @return whether the filter application succeeded. If it didn't the filter + * is unchanged. + */ + bool ApplyFilter(InternalFilter const& internal_filter) { + return impl_->ApplyFilter(internal_filter); + } + /// Whether the stream is pointing to a cell or has it finished. + bool HasValue() const { return impl_->HasValue(); } + /** + * The first "unconsumed" value. + * + * \pre{One should not call this member function if `HasValue() == false`.} + * + * @return currently pointed cell + */ + CellView const& Value() const { return impl_->Value(); } + /** + * Advance the stream to next `CellView`. + * + * \pre{One should not call this member function if `HasValue() == false`.} + * + * @param mode how far to advance - it may be any next cell, or the first cell + * which is in a different column or the first cell which is in a + * different row. + */ + void Next(NextMode mode = NextMode::kCell); + /// equivalent to `Next(NextMode::kCell)` + void operator++() { Next(); } + /// equivalent to `Next(NextMode::kCell)` + CellView operator++(int); + CellView const& operator*() const { return Value(); } + CellView const* operator->() const { return &Value(); } + /// equivalent to `HasValue()` + explicit operator bool() const { return HasValue(); } + AbstractCellStreamImpl& impl() const { return *impl_; } + + private: + void NextColumn(); + void EmulateNextColumn(); + void EmulateNextRow(); + std::unique_ptr impl_; +}; + +/** + * A stream which merges multiple stream while maintaining ordering. + */ +class MergeCellStreams : public AbstractCellStreamImpl { + public: + class CellStreamGreater { + public: + bool operator()(std::unique_ptr const& lhs, + std::unique_ptr const& rhs) const; + }; + + explicit MergeCellStreams(std::vector streams); + bool ApplyFilter(InternalFilter const& internal_filter) override; + bool HasValue() const override; + CellView const& Value() const override; + bool Next(NextMode mode) override; + + private: + void InitializeIfNeeded() const; + + mutable bool initialized_{false}; + + protected: + // A priority queue of streams which still have data. + // `std::priority_queue` can't be used because it cannot be iterated over. + mutable std::vector> unfinished_streams_; +}; + +/** + * Create a filter hierarchy according to a protobuf description. + * + * The filter hierarchy is essentially a DAG with specific filters in nodes. + * + * @param filter the protobuf description of the filter hierarchy + * @param source_ctor a zero argument function to create the unfiltered stream + * to be filtered. Depending on `filter` it may be called multiple times and + * it should return separate streams each time. + * @return the filtered stream or an error. + */ +using CellStreamConstructor = std::function; +StatusOr CreateFilter( + ::google::bigtable::v2::RowFilter const& filter, + CellStreamConstructor source_ctor); + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTER_H diff --git a/google/cloud/bigtable/emulator/filter_test.cc b/google/cloud/bigtable/emulator/filter_test.cc new file mode 100644 index 0000000000000..8ce833f9f29db --- /dev/null +++ b/google/cloud/bigtable/emulator/filter_test.cc @@ -0,0 +1,1956 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/cell_view.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/emulator/test_util.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include "google/cloud/testing_util/chrono_literals.h" +#include "google/cloud/testing_util/status_matchers.h" +#include "gmock/gmock.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +using ::google::bigtable::v2::RowFilter; +using ::testing::Return; +using testing_util::StatusIs; +using testing_util::chrono_literals::operator""_ms; + +class MockStream : public AbstractCellStreamImpl { + public: + MOCK_METHOD(bool, ApplyFilter, (InternalFilter const& internal_filter), + (override)); + MOCK_METHOD(bool, HasValue, (), (const, override)); + MOCK_METHOD(CellView const&, Value, (), (const, override)); + MOCK_METHOD(bool, Next, (NextMode mode), (override)); +}; + +TEST(CellStream, NextAllSupported) { + { + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillOnce(Return(true)); + CellStream(std::move(mock_impl)).Next(); + } + { + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillOnce(Return(true)); + CellStream(std::move(mock_impl)).Next(NextMode::kColumn); + } + { + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kRow)).WillOnce(Return(true)); + CellStream(std::move(mock_impl)).Next(NextMode::kRow); + } +} + +class TestCell { + public: + TestCell(std::string row_key, std::string column_family, + std::string column_qualifier, std::chrono::milliseconds timestamp, + std::string value, absl::optional label = {}) + : row_key_(std::move(row_key)), + column_family_(std::move(column_family)), + column_qualifier_(std::move(column_qualifier)), + timestamp_(std::move(timestamp)), + value_(std::move(value)), + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_), + label_(std::move(label)) { + maybe_label_view(); + } + + TestCell(TestCell const& other) + : row_key_(other.row_key_), + column_family_(other.column_family_), + column_qualifier_(other.column_qualifier_), + timestamp_(other.timestamp_), + value_(other.value_), + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_), + label_(other.label_) { + maybe_label_view(); + } + + TestCell(TestCell&& other) noexcept + : row_key_(std::move(other.row_key_)), + column_family_(std::move(other.column_family_)), + column_qualifier_(std::move(other.column_qualifier_)), + timestamp_(std::move(other.timestamp_)), + value_(std::move(other.value_)), + view_(row_key_, column_family_, column_qualifier_, timestamp_, value_), + label_(std::move(other.label_)) { + maybe_label_view(); + } + + TestCell Labeled(std::string const& label) { + TestCell labeled_copy = *this; + labeled_copy.label_ = label; + labeled_copy.maybe_label_view(); + return labeled_copy; + } + + CellView const& AsCellView() const { return view_; } + + bool operator==(CellView const& cell_view) const { + bool labels_equal = (!label_.has_value() && !cell_view.HasLabel()) || + (label_.has_value() && cell_view.HasLabel() && + label_.value() == cell_view.label()); + return row_key_ == cell_view.row_key() && + column_family_ == cell_view.column_family() && + column_qualifier_ == cell_view.column_qualifier() && + timestamp_ == cell_view.timestamp() && value_ == cell_view.value() && + labels_equal; + } + + bool operator==(TestCell const& other) const { + return operator==(other.AsCellView()); + } + + private: + std::string row_key_; + std::string column_family_; + std::string column_qualifier_; + std::chrono::milliseconds timestamp_; + std::string value_; + CellView view_; + absl::optional label_; + + void maybe_label_view() { + if (label_) { + view_.SetLabel(label_.value()); + } + } +}; + +std::ostream& operator<<(std::ostream& stream, TestCell const& test_cell) { + auto const& cell_view = test_cell.AsCellView(); + stream << "Cell(" << cell_view.row_key() << " " << cell_view.column_family() + << ":" << cell_view.column_qualifier() << " @" + << cell_view.timestamp().count() << "ms: " << cell_view.value() << ")"; + return stream; +} + +TEST(CellStream, NextColumnNotSupportedNoMoreData) { + std::vector cells{TestCell{"row1", "cf1", "col1", 0_ms, "val1"}}; + auto cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillOnce(Return(false)); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillOnce([&] { + ++cur_cell; + return true; + }); + CellStream cell_stream(std::move(mock_impl)); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[0], cell_stream.Value()); + cell_stream.Next(NextMode::kColumn); + ASSERT_FALSE(cell_stream.HasValue()); +} + +TEST(CellStream, NextColumnNotSupported) { + std::vector cells{ + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed + TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, + TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed + TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, + TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed + TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; + auto cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillRepeatedly([&] { + ++cur_cell; + return true; + }); + + CellStream cell_stream(std::move(mock_impl)); + + cell_stream.Next(NextMode::kColumn); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[2], cell_stream.Value()); + + cell_stream.Next(NextMode::kColumn); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[4], cell_stream.Value()); + + cell_stream.Next(NextMode::kColumn); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[6], cell_stream.Value()); + + cell_stream.Next(NextMode::kColumn); + ASSERT_FALSE(cell_stream.HasValue()); +} + +TEST(CellStream, NextRowUnsupported) { + std::vector cells{ + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed + TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, + TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed + TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, + TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed + TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; + auto cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kRow)).WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillRepeatedly([&] { + cur_cell = std::find_if(cur_cell, cells.end(), [&](TestCell const& cell) { + return cell.AsCellView().row_key() != cur_cell->AsCellView().row_key() || + cell.AsCellView().column_family() != + cur_cell->AsCellView().column_family() || + cell.AsCellView().column_qualifier() != + cur_cell->AsCellView().column_qualifier(); + }); + return true; + }); + + CellStream cell_stream(std::move(mock_impl)); + + cell_stream.Next(NextMode::kRow); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[6], cell_stream.Value()); + + cell_stream.Next(NextMode::kRow); + ASSERT_FALSE(cell_stream.HasValue()); +} + +TEST(CellStream, NextRowAndColumnUnsupported) { + std::vector cells{ + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed + TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, + TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed + TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, + TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed + TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; + auto cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kRow)).WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillRepeatedly([&] { + ++cur_cell; + return true; + }); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + + CellStream cell_stream(std::move(mock_impl)); + + cell_stream.Next(NextMode::kRow); + ASSERT_TRUE(cell_stream.HasValue()); + EXPECT_EQ(cells[6], cell_stream.Value()); + + cell_stream.Next(NextMode::kRow); + ASSERT_FALSE(cell_stream.HasValue()); +} + +class CellStreamOrderTest : public ::testing::Test, + public ::testing::WithParamInterface< + // Expectation, lhs, rhs. + std::tuple> {}; + +INSTANTIATE_TEST_SUITE_P( + , CellStreamOrderTest, + ::testing::Values( + std::make_tuple(false, TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row0", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(true, TestCell{"row2", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf0", "col1", 0_ms, "val1"}, + TestCell{"row", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(true, TestCell{"row2", "cf2", "col1", 0_ms, "val1"}, + TestCell{"row", "cf1", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf", "col1", 0_ms, "val1"}, + TestCell{"row", "cf", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf", "col0", 0_ms, "val1"}, + TestCell{"row", "cf", "col1", 0_ms, "val1"}), + std::make_tuple(true, TestCell{"row", "cf", "col2", 0_ms, "val1"}, + TestCell{"row", "cf", "col1", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf", "col", 0_ms, "val1"}, + TestCell{"row", "cf", "col", 0_ms, "val1"}), + std::make_tuple(false, TestCell{"row", "cf", "col", 0_ms, "val1"}, + TestCell{"row", "cf", "col", 1_ms, "val1"}), + std::make_tuple(true, TestCell{"row", "cf", "col", 1_ms, "val1"}, + TestCell{"row", "cf", "col", 0_ms, "val1"}))); + +TEST_P(CellStreamOrderTest, Order) { + auto mock_impl_left = std::make_unique(); + auto left_cell = std::get<1>(GetParam()); + auto right_cell = std::get<2>(GetParam()); + EXPECT_CALL(*mock_impl_left, Value).WillRepeatedly([&]() -> CellView const& { + return left_cell.AsCellView(); + }); + EXPECT_CALL(*mock_impl_left, HasValue).WillRepeatedly([&] { return true; }); + + auto mock_impl_right = std::make_unique(); + EXPECT_CALL(*mock_impl_right, Value).WillRepeatedly([&]() -> CellView const& { + return right_cell.AsCellView(); + }); + EXPECT_CALL(*mock_impl_right, HasValue).WillRepeatedly([&] { return true; }); + auto left = std::make_unique(std::move(mock_impl_left)); + auto right = std::make_unique(std::move(mock_impl_right)); + EXPECT_EQ(std::get<0>(GetParam()), + MergeCellStreams::CellStreamGreater()(left, right)); +} + +TEST(MergeCellStreams, NoStreams) { + CellStream stream( + std::make_unique(std::vector{})); + EXPECT_FALSE(stream.HasValue()); +} + +TEST(MergeCellStreams, OnlyEmptyStreams) { + auto empty_impl_1 = std::make_unique(); + EXPECT_CALL(*empty_impl_1, HasValue).WillRepeatedly(Return(false)); + auto empty_impl_2 = std::make_unique(); + EXPECT_CALL(*empty_impl_2, HasValue).WillRepeatedly(Return(false)); + CellStream empty_1(std::move(empty_impl_1)); + CellStream empty_2(std::move(empty_impl_2)); + std::vector streams; + streams.emplace_back(std::move(empty_1)); + streams.emplace_back(std::move(empty_2)); + CellStream stream(std::make_unique(std::move(streams))); + EXPECT_FALSE(stream.HasValue()); +} + +TEST(MergeCellStreams, OneStream) { + std::vector cells{ + TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row1", "cf1", "col2", 0_ms, "val3"}, // column changed + TestCell{"row1", "cf1", "col2", 1_ms, "val4"}, + TestCell{"row1", "cf2", "col2", 0_ms, "val5"}, // column family changed + TestCell{"row1", "cf2", "col2", 1_ms, "val6"}, + TestCell{"row2", "cf2", "col2", 0_ms, "val7"}, // row changed + TestCell{"row2", "cf2", "col2", 1_ms, "val8"}}; + + auto cur_cell = cells.begin(); + + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, Next(NextMode::kColumn)).WillOnce([&]() { + cur_cell = std::next(cells.begin(), 2); + return true; + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kRow)).WillOnce([&]() { + cur_cell = std::next(cells.begin(), 6); + return true; + }); + EXPECT_CALL(*mock_impl, Next(NextMode::kCell)).WillRepeatedly([&]() { + ++cur_cell; + return true; + }); + EXPECT_CALL(*mock_impl, Value).WillRepeatedly([&]() -> CellView const& { + return cur_cell->AsCellView(); + }); + EXPECT_CALL(*mock_impl, HasValue).WillRepeatedly([&] { + return cur_cell != cells.end(); + }); + + std::vector streams; + streams.emplace_back(std::move(mock_impl)); + CellStream stream(std::make_unique(std::move(streams))); + + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(cells[0], stream.Value()); + + stream.Next(NextMode::kColumn); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(cells[2], stream.Value()); + + stream.Next(NextMode::kRow); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(cells[6], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(cells[7], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_FALSE(stream.HasValue()); +} + +struct TestStreamData { + explicit TestStreamData(std::vector data) + : cells(std::move(data)), + cur_cell(cells.begin()), + stream(std::make_unique()) {} + + std::vector cells; + std::vector::iterator cur_cell; + std::unique_ptr stream; +}; + +TEST(MergeCellStreams, ThreeStreams) { + TestStreamData stream_data_1( + std::vector{TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row1", "cf2", "col1", 2_ms, "val2"}}); + + TestStreamData stream_data_2( + std::vector{TestCell{"row1", "cf1", "col1", 1_ms, "val1"}, + TestCell{"row2", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row2", "cf1", "col2", 0_ms, "val3"}}); + + TestStreamData stream_data_3( + std::vector{TestCell{"row1", "cf1", "col1", 3_ms, "val1"}, + TestCell{"row2", "cf0", "col1", 1_ms, "val2"}}); + + auto prepare_stream = [](TestStreamData& stream_data) { + EXPECT_CALL(*stream_data.stream, Next(NextMode::kCell)) + .WillRepeatedly([&]() { + ++stream_data.cur_cell; + return true; + }); + EXPECT_CALL(*stream_data.stream, Value) + .WillRepeatedly([&]() -> CellView const& { + return stream_data.cur_cell->AsCellView(); + }); + EXPECT_CALL(*stream_data.stream, HasValue).WillRepeatedly([&] { + return stream_data.cur_cell != stream_data.cells.end(); + }); + return CellStream(std::move(stream_data.stream)); + }; + + std::vector streams; + streams.push_back(prepare_stream(stream_data_1)); + streams.push_back(prepare_stream(stream_data_2)); + streams.push_back(prepare_stream(stream_data_3)); + CellStream stream(std::make_unique(std::move(streams))); + + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_1.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_2.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_3.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_1.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_3.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_2.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_2.cells[2], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_FALSE(stream.HasValue()); +} + +TEST(MergeCellStreams, AdvancingRowAdvancesAllRelevantStreams) { + // When calling Next(NextMode::kRow), all streams currently pointing to the + // same row as the first stream should be advanced. + TestStreamData stream_data_1( + std::vector{TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row2", "cf2", "col1", 2_ms, "val2"}}); + + TestStreamData stream_data_2( + std::vector{TestCell{"row2", "cf1", "col1", 1_ms, "val2"}, + TestCell{"row2", "cf1", "col2", 10_ms, "val3"}}); + + TestStreamData stream_data_3( + std::vector{TestCell{"row1", "cf1", "col1", 3_ms, "val1"}, + TestCell{"row2", "cf0", "col1", 1_ms, "val2"}}); + + TestStreamData stream_data_4( + std::vector{TestCell{"row1", "cf1", "col1", 3_ms, "val1"}}); + + auto prepare_stream = [](TestStreamData& stream_data) { + EXPECT_CALL(*stream_data.stream, Value) + .WillRepeatedly([&]() -> CellView const& { + return stream_data.cur_cell->AsCellView(); + }); + EXPECT_CALL(*stream_data.stream, HasValue).WillRepeatedly([&] { + return stream_data.cur_cell != stream_data.cells.end(); + }); + }; + prepare_stream(stream_data_1); + prepare_stream(stream_data_2); + prepare_stream(stream_data_3); + prepare_stream(stream_data_4); + + EXPECT_CALL(*stream_data_1.stream, Next(NextMode::kRow)).WillOnce([&]() { + stream_data_1.cur_cell = std::next(stream_data_1.cells.begin()); + return true; + }); + EXPECT_CALL(*stream_data_3.stream, Next(NextMode::kRow)).WillOnce([&]() { + stream_data_3.cur_cell = std::next(stream_data_3.cells.begin()); + return true; + }); + EXPECT_CALL(*stream_data_4.stream, Next(NextMode::kRow)).WillOnce([&]() { + stream_data_4.cur_cell = stream_data_4.cells.end(); + return true; + }); + + EXPECT_CALL(*stream_data_1.stream, Next(NextMode::kCell)).WillOnce([&]() { + ++stream_data_1.cur_cell; + return true; + }); + + EXPECT_CALL(*stream_data_2.stream, Next(NextMode::kCell)) + .Times(2) + .WillRepeatedly([&]() { + ++stream_data_2.cur_cell; + return true; + }); + + EXPECT_CALL(*stream_data_3.stream, Next(NextMode::kCell)).WillOnce([&]() { + ++stream_data_3.cur_cell; + return true; + }); + + std::vector streams; + streams.emplace_back(std::move(stream_data_1.stream)); + streams.emplace_back(std::move(stream_data_2.stream)); + streams.emplace_back(std::move(stream_data_3.stream)); + streams.emplace_back(std::move(stream_data_4.stream)); + CellStream stream(std::make_unique(std::move(streams))); + + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_1.cells[0], stream.Value()); + + stream.Next(NextMode::kRow); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_3.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_2.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_2.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_1.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_FALSE(stream.HasValue()); +} + +TEST(MergeCellStreams, AdvancingColumnAdvancesAllRelevantStreams) { + // When calling Next(NextMode::kColumn), all streams currently pointing to the + // same column as the first stream should be advanced. + TestStreamData stream_data( + std::vector{TestCell{"row1", "cf1", "col1", 0_ms, "val1"}, + TestCell{"row2", "cf2", "col1", 2_ms, "val2"}}); + + TestStreamData stream_data_different_column_family( + std::vector{TestCell{"row1", "cf2", "col1", 1_ms, "val2"}}); + + TestStreamData stream_data_different_column_qualifier( + std::vector{TestCell{"row1", "cf1", "col2", 1_ms, "val2"}}); + + TestStreamData stream_data_different_row( + std::vector{TestCell{"row2", "cf1", "col1", 1_ms, "val2"}}); + + TestStreamData stream_data_same_column_different_timestamp( + std::vector{TestCell{"row1", "cf1", "col1", 10_ms, "val2"}}); + + auto prepare_stream = [](TestStreamData& stream_data) { + EXPECT_CALL(*stream_data.stream, Value) + .WillRepeatedly([&]() -> CellView const& { + return stream_data.cur_cell->AsCellView(); + }); + EXPECT_CALL(*stream_data.stream, HasValue).WillRepeatedly([&] { + return stream_data.cur_cell != stream_data.cells.end(); + }); + }; + prepare_stream(stream_data); + prepare_stream(stream_data_different_column_family); + prepare_stream(stream_data_different_column_qualifier); + prepare_stream(stream_data_different_row); + prepare_stream(stream_data_same_column_different_timestamp); + + EXPECT_CALL(*stream_data.stream, Next(NextMode::kColumn)).WillOnce([&]() { + ++stream_data.cur_cell; + return true; + }); + EXPECT_CALL(*stream_data.stream, Next(NextMode::kCell)).WillOnce([&]() { + ++stream_data.cur_cell; + return true; + }); + EXPECT_CALL(*stream_data_same_column_different_timestamp.stream, + Next(NextMode::kColumn)) + .WillOnce([&]() { + ++stream_data_same_column_different_timestamp.cur_cell; + return true; + }); + + EXPECT_CALL(*stream_data_different_column_family.stream, + Next(NextMode::kCell)) + .WillOnce([&]() { + ++stream_data_different_column_family.cur_cell; + return true; + }); + + EXPECT_CALL(*stream_data_different_column_qualifier.stream, + Next(NextMode::kCell)) + .WillOnce([&]() { + ++stream_data_different_column_qualifier.cur_cell; + return true; + }); + + EXPECT_CALL(*stream_data_different_row.stream, Next(NextMode::kCell)) + .WillOnce([&]() { + ++stream_data_different_row.cur_cell; + return true; + }); + + std::vector streams; + streams.emplace_back(std::move(stream_data.stream)); + streams.emplace_back(std::move(stream_data_different_column_family.stream)); + streams.emplace_back( + std::move(stream_data_different_column_qualifier.stream)); + streams.emplace_back(std::move(stream_data_different_row.stream)); + streams.emplace_back( + std::move(stream_data_same_column_different_timestamp.stream)); + CellStream stream(std::make_unique(std::move(streams))); + + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data.cells[0], stream.Value()); + + stream.Next(NextMode::kColumn); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_different_column_qualifier.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_different_column_family.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data_different_row.cells[0], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_TRUE(stream.HasValue()); + EXPECT_EQ(stream_data.cells[1], stream.Value()); + + stream.Next(NextMode::kCell); + ASSERT_FALSE(stream.HasValue()); +} + +class InvalidFilterProtoTest : public ::testing::Test { + protected: + ::google::bigtable::v2::RowFilter filter_; + StatusOr TryCreate() { + return CreateFilter( + filter_, [] { return CellStream(std::make_unique()); }); + } +}; + +TEST_F(InvalidFilterProtoTest, PassAll) { + filter_.set_pass_all_filter(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`pass_all_filter` explicitly set to `false`"))); +} + +TEST_F(InvalidFilterProtoTest, BlockAll) { + filter_.set_block_all_filter(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`block_all_filter` explicitly set to `false`"))); +} + +TEST_F(InvalidFilterProtoTest, RowKeyRegex) { + filter_.set_row_key_regex_filter("["); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`row_key_regex_filter` is not a valid RE2 regex"))); +} + +TEST_F(InvalidFilterProtoTest, ValueRegex) { + filter_.set_value_regex_filter("["); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`value_regex_filter` is not a valid RE2 regex."))); +} + +TEST_F(InvalidFilterProtoTest, RowSampleNegative) { + filter_.set_row_sample_filter(-1); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`row_sample_filter` is not a valid probability."))); +} + +TEST_F(InvalidFilterProtoTest, RowSampleTooLarge) { + filter_.set_row_sample_filter(10); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`row_sample_filter` is not a valid probability."))); +} + +TEST_F(InvalidFilterProtoTest, FamilyNameRegex) { + filter_.set_family_name_regex_filter("["); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`family_name_regex_filter` is not a valid RE2 regex."))); +} + +TEST_F(InvalidFilterProtoTest, ColumnQualifierRegex) { + filter_.set_column_qualifier_regex_filter("["); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs( + StatusCode::kInvalidArgument, + testing::HasSubstr( + "`column_qualifier_regex_filter` is not a valid RE2 regex."))); +} + +TEST_F(InvalidFilterProtoTest, PerRowOffset) { + filter_.set_cells_per_row_offset_filter(-1); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`cells_per_row_offset_filter` is negative."))); +} + +TEST_F(InvalidFilterProtoTest, PerRowLimit) { + filter_.set_cells_per_row_limit_filter(-1); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`cells_per_row_limit_filter` is negative."))); +} + +TEST_F(InvalidFilterProtoTest, PerColumnLimit) { + filter_.set_cells_per_column_limit_filter(-1); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`cells_per_column_limit_filter` is negative."))); +} + +TEST_F(InvalidFilterProtoTest, StripValue) { + filter_.set_strip_value_transformer(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`strip_value_transformer` explicitly set to `false`."))); +} + +TEST_F(InvalidFilterProtoTest, ConditionNoPredicate) { + filter_.mutable_condition(); + auto maybe_stream = TryCreate(); + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "`condition` must have a `predicate_filter` set."))); +} + +TEST_F(InvalidFilterProtoTest, ConditionNeitherTrueNorFalse) { + filter_.mutable_condition()->mutable_predicate_filter()->set_pass_all_filter( + true); + + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs( + StatusCode::kInvalidArgument, + testing::HasSubstr( + "`condition` must have `true_filter` or `false_filter` set."))); +} + +TEST_F(InvalidFilterProtoTest, ConditionPredicateSink) { + filter_.mutable_condition()->mutable_predicate_filter()->set_sink(true); + filter_.mutable_condition()->mutable_true_filter()->pass_all_filter(); + filter_.mutable_condition()->mutable_false_filter()->pass_all_filter(); + + auto maybe_stream = TryCreate(); + + // FIXME unskip this test after fixing condition validation. + GTEST_SKIP() << "Searching filter graph for sink nodes unimplemented."; + EXPECT_THAT(maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr( + "sink cannot be nested in a condition filter"))); +} + +TEST_F(InvalidFilterProtoTest, SinkFalse) { + filter_.set_sink(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr("`sink` explicitly set to `false`."))); +} + +TEST_F(InvalidFilterProtoTest, ChainSinkFalse) { + filter_.mutable_chain()->add_filters()->set_sink(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr("`sink` explicitly set to `false`."))); +} + +TEST_F(InvalidFilterProtoTest, InterleaveSinkFalse) { + filter_.mutable_interleave()->add_filters()->set_sink(false); + auto maybe_stream = TryCreate(); + EXPECT_THAT( + maybe_stream, + StatusIs(StatusCode::kInvalidArgument, + testing::HasSubstr("`sink` explicitly set to `false`."))); +} + +TEST(FilterTest, BlockAll) { + RowFilter filter; + filter.set_block_all_filter(true); + + auto maybe_stream = CreateFilter( + filter, [] { return CellStream(std::make_unique()); }); + + ASSERT_STATUS_OK(maybe_stream); + EXPECT_FALSE(maybe_stream->HasValue()); +} + +bool operator==(RowKeyRegex const& lhs, RowKeyRegex const& rhs) { + return lhs.regex == rhs.regex; +} +bool operator==(FamilyNameRegex const& lhs, FamilyNameRegex const& rhs) { + return lhs.regex == rhs.regex; +} +bool operator==(ColumnRegex const& lhs, ColumnRegex const& rhs) { + return lhs.regex == rhs.regex; +} +bool operator==(ColumnRange const& lhs, ColumnRange const& rhs) { + return lhs.range == rhs.range; +} +bool operator==(TimestampRange const& lhs, TimestampRange const& rhs) { + return lhs.range == rhs.range; +} + +class FilterPrinter { + public: + explicit FilterPrinter(std::ostream& stream) : stream_(stream) {} + void operator()(RowKeyRegex const& to_print) const { + stream_ << "RowKeyRegex(" << to_print.regex->pattern() << ")"; + } + void operator()(FamilyNameRegex const& to_print) { + stream_ << "FamilyNameRegex(" << to_print.regex->pattern() << ")"; + } + void operator()(ColumnRegex const& to_print) { + stream_ << "ColumnRegex(" << to_print.regex->pattern() << ")"; + } + void operator()(ColumnRange const& to_print) { + stream_ << "ColumnRange(" << to_print.column_family << "," << to_print.range + << ")"; + } + void operator()(TimestampRange const& to_print) { + stream_ << "TimestampRange(" << to_print.range << ")"; + } + + private: + std::ostream& stream_; +}; + +std::ostream& operator<<(std::ostream& os, InternalFilter const& filter) { + absl::visit(FilterPrinter(os), filter); + return os; +} + +class FilterApplicationPropagation : public ::testing::Test { + protected: + struct InternalFilterType { + InternalFilter internal_filter; + bool should_propagate; + }; + + FilterApplicationPropagation() + : sample_regex_(std::make_shared("foo.*")), + sample_string_range_("a", true, "b", false), + sample_ts_range_(std::chrono::milliseconds(10), + std::chrono::milliseconds(20)) { + internal_filters_.emplace( + "row_key_regex", InternalFilterType{RowKeyRegex{sample_regex_}, true}); + internal_filters_.emplace( + "family_name_regex", + InternalFilterType{FamilyNameRegex{sample_regex_}, true}); + internal_filters_.emplace( + "column_regex", InternalFilterType{ColumnRegex{sample_regex_}, true}); + internal_filters_.emplace( + "column_range", + InternalFilterType{ColumnRange{"fam", sample_string_range_}, true}); + internal_filters_.emplace( + "timestamp_range", + InternalFilterType{TimestampRange{sample_ts_range_}, true}); + } + + void PropagationNotExpected(std::string const& filter_type) { + auto filter_type_it = internal_filters_.find(filter_type); + ASSERT_NE(internal_filters_.end(), filter_type_it); + filter_type_it->second.should_propagate = false; + } + + std::shared_ptr sample_regex_; + StringRangeSet::Range sample_string_range_; + TimestampRangeSet::Range sample_ts_range_; + std::map internal_filters_; + + void TestPropagation(RowFilter const& filter, int num_applies_to_ignore) { + for (bool underlying_supports_filter : {false, true}) { + for (auto const& internal_filter_type : internal_filters_) { + auto maybe_stream = CreateFilter(filter, [&] { + auto mock_impl = std::make_unique(); + if (num_applies_to_ignore) { + // Creating the filter might trigger some `ApplyFilter` calls which + // we're not interested in in this test. Let's ignore them. + EXPECT_CALL(*mock_impl, ApplyFilter) + .Times(num_applies_to_ignore) + .WillRepeatedly(Return(false)); + } + if (internal_filter_type.second.should_propagate) { + EXPECT_CALL( + *mock_impl, + ApplyFilter(internal_filter_type.second.internal_filter)) + .WillOnce(Return(underlying_supports_filter)); + } + return CellStream(std::move(mock_impl)); + }); + ASSERT_STATUS_OK(maybe_stream); + + if (underlying_supports_filter) { + EXPECT_EQ(internal_filter_type.second.should_propagate, + maybe_stream->ApplyFilter( + internal_filter_type.second.internal_filter)) + << "for filter " << internal_filter_type.first; + } else { + EXPECT_FALSE(maybe_stream->ApplyFilter( + internal_filter_type.second.internal_filter)) + << "for filter " << internal_filter_type.first; + } + } + } + } +}; + +TEST_F(FilterApplicationPropagation, PassAll) { + RowFilter filter; + filter.set_pass_all_filter(true); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, BlockAll) { + RowFilter filter; + filter.set_block_all_filter(true); + + for (auto& internal_filter : internal_filters_) { + auto maybe_stream = + CreateFilter(filter, [&] { return CellStream(nullptr); }); + ASSERT_STATUS_OK(maybe_stream); + EXPECT_EQ(true, + maybe_stream->ApplyFilter(internal_filter.second.internal_filter)) + << " for filter " << internal_filter.first; + } +} + +TEST_F(FilterApplicationPropagation, Sink) { + RowFilter filter; + filter.set_sink(true); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, RowKeyRegex) { + RowFilter filter; + filter.set_row_key_regex_filter("foo.*"); + + TestPropagation(filter, 1); +} + +TEST_F(FilterApplicationPropagation, RowSample) { + RowFilter filter; + filter.set_row_sample_filter(0.5); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, FamilyNameRegex) { + RowFilter filter; + filter.set_family_name_regex_filter("foo.*"); + + TestPropagation(filter, 1); +} + +TEST_F(FilterApplicationPropagation, ColumnQualifierRegex) { + RowFilter filter; + filter.set_column_qualifier_regex_filter("foo.*"); + + TestPropagation(filter, 1); +} + +TEST_F(FilterApplicationPropagation, ColumnRange) { + RowFilter filter; + filter.mutable_column_range_filter()->set_family_name("fam1"); + filter.mutable_column_range_filter()->set_start_qualifier_open("q1"); + filter.mutable_column_range_filter()->set_end_qualifier_closed("q4"); + + TestPropagation(filter, 1); +} + +TEST_F(FilterApplicationPropagation, TimestampRange) { + RowFilter filter; + filter.mutable_timestamp_range_filter()->set_start_timestamp_micros(1000); + filter.mutable_timestamp_range_filter()->set_end_timestamp_micros(2000); + + TestPropagation(filter, 1); +} + +TEST_F(FilterApplicationPropagation, ValueRegex) { + RowFilter filter; + filter.set_value_regex_filter("foo.*"); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, ValueRange) { + RowFilter filter; + filter.mutable_value_range_filter()->set_start_value_open("q1"); + filter.mutable_value_range_filter()->set_end_value_closed("q4"); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, PerRowOffset) { + RowFilter filter; + filter.set_cells_per_row_offset_filter(10); + + for (auto const& filter_type : {"family_name_regex", "column_regex", + "column_range", "timestamp_range"}) { + PropagationNotExpected(filter_type); + } + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, PerRowLimit) { + RowFilter filter; + filter.set_cells_per_row_limit_filter(10); + + for (auto const& filter_type : {"family_name_regex", "column_regex", + "column_range", "timestamp_range"}) { + PropagationNotExpected(filter_type); + } + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, PerColumnLimit) { + RowFilter filter; + filter.set_cells_per_column_limit_filter(10); + + PropagationNotExpected("timestamp_range"); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, StripValue) { + RowFilter filter; + filter.set_strip_value_transformer(true); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, ApplyLabel) { + RowFilter filter; + filter.set_apply_label_transformer("foo"); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, InterleaveAllSupport) { + RowFilter filter; + auto& interleave = *filter.mutable_interleave(); + interleave.add_filters()->set_pass_all_filter(true); + interleave.add_filters()->set_pass_all_filter(true); + + TestPropagation(filter, 0); +} + +TEST_F(FilterApplicationPropagation, Condition) { + RowFilter filter; + auto& condition = *filter.mutable_condition(); + condition.mutable_predicate_filter()->set_pass_all_filter(true); + condition.mutable_true_filter()->set_pass_all_filter(true); + condition.mutable_false_filter()->set_pass_all_filter(true); + + for (bool underlying_supports_filter : {false, true}) { + for (auto& internal_filter_type : internal_filters_) { + // For lack of a better idea this test relies on the fact that the + // implementation calls the mocked source stream ctor in the following + // order: + // * for the source data + // * for the predicate stream + // * for the true branch stream + // * for the false branch stream + std::int32_t num_streams_created = 0; + auto maybe_stream = CreateFilter(filter, [&] { + auto mock_impl = std::make_unique(); + if (num_streams_created < 2 && + internal_filter_type.first == "row_key_regex") { + // source or predicate stream - they should only pass the row regexes + EXPECT_CALL(*mock_impl, + ApplyFilter(internal_filter_type.second.internal_filter)) + .WillOnce(Return(false)); // this should have no effect on the + // result. + } + if (num_streams_created >= 2) { + // true or false branch stream - they should propagate all filters + if (internal_filter_type.second.should_propagate) { + EXPECT_CALL( + *mock_impl, + ApplyFilter(internal_filter_type.second.internal_filter)) + .WillOnce(Return(underlying_supports_filter)); + } + } + ++num_streams_created; + return CellStream(std::move(mock_impl)); + }); + ASSERT_STATUS_OK(maybe_stream); + EXPECT_EQ(underlying_supports_filter, + maybe_stream->ApplyFilter( + internal_filter_type.second.internal_filter)) + << " for filter " << internal_filter_type.first; + } + } +} + +class InternalFiltersAreApplied : public ::testing::Test { + protected: + RowFilter filter_; + + template + void PerformTest(std::function onApply) { + auto maybe_stream = CreateFilter(filter_, [&] { + auto mock_impl = std::make_unique(); + EXPECT_CALL(*mock_impl, ApplyFilter) + .WillOnce([onApply](InternalFilter const& internal_filter) -> bool { + auto const* maybe_regex = absl::get_if(&internal_filter); + EXPECT_NE(nullptr, maybe_regex); + onApply(*maybe_regex); + return true; + }); + return CellStream(std::move(mock_impl)); + }); + ASSERT_STATUS_OK(maybe_stream); + // Verify that no separate CellStream object was created when filter is + // applied internally. + EXPECT_NE(nullptr, dynamic_cast(&maybe_stream->impl())); + } +}; + +TEST_F(InternalFiltersAreApplied, RowKeyRegex) { + filter_.set_row_key_regex_filter("foo.*"); + + PerformTest([](RowKeyRegex const& row_key_regex) { + EXPECT_EQ("foo.*", row_key_regex.regex->pattern()); + }); +} + +TEST_F(InternalFiltersAreApplied, FamilyNameRegex) { + filter_.set_family_name_regex_filter("foo.*"); + + PerformTest([](FamilyNameRegex const& family_name_regex) { + EXPECT_EQ("foo.*", family_name_regex.regex->pattern()); + }); +} + +TEST_F(InternalFiltersAreApplied, ColumnRegex) { + filter_.set_column_qualifier_regex_filter("foo.*"); + + PerformTest([](ColumnRegex const& column_qualifier_regex) { + EXPECT_EQ("foo.*", column_qualifier_regex.regex->pattern()); + }); +} + +TEST_F(InternalFiltersAreApplied, ColumnRange) { + filter_.mutable_column_range_filter()->set_family_name("fam1"); + filter_.mutable_column_range_filter()->set_start_qualifier_open("q1"); + filter_.mutable_column_range_filter()->set_end_qualifier_closed("q4"); + + PerformTest([](ColumnRange const& column_range) { + EXPECT_EQ("fam1", column_range.column_family); + EXPECT_EQ("q1", column_range.range.start()); + EXPECT_TRUE(column_range.range.start_open()); + EXPECT_EQ("q4", column_range.range.end()); + EXPECT_TRUE(column_range.range.end_closed()); + }); +} + +TEST_F(InternalFiltersAreApplied, TimestampRange) { + filter_.mutable_timestamp_range_filter()->set_start_timestamp_micros(1000); + filter_.mutable_timestamp_range_filter()->set_end_timestamp_micros(2000); + + PerformTest([](TimestampRange const& timestamp_range) { + EXPECT_EQ(std::chrono::milliseconds(1), timestamp_range.range.start()); + EXPECT_EQ(std::chrono::milliseconds(2), timestamp_range.range.end()); + }); +} + +class VectorCellStream : public AbstractCellStreamImpl { + public: + explicit VectorCellStream(std::vector const& cells) + : cells_{cells}, current_cell_{cells_.begin()} {} + bool ApplyFilter(InternalFilter const&) override { return false; } + bool HasValue() const override { return current_cell_ != cells_.end(); } + CellView const& Value() const override { return current_cell_->AsCellView(); } + bool Next(NextMode mode) override { + if (mode != NextMode::kCell) { + return false; + } + ++current_cell_; + return true; + } + + private: + std::vector cells_; + std::vector::const_iterator current_cell_; +}; + +class FilterWorkTest : public ::testing::Test { + public: + protected: + static StatusOr> GetFilterOutput( + std::vector const& input_cells, RowFilter const& filter) { + auto maybe_stream = CreateFilter(filter, [input_cells] { + return CellStream(std::make_unique(input_cells)); + }); + if (!maybe_stream.status().ok()) { + return maybe_stream.status(); + } + + std::vector filter_output; + while (maybe_stream->HasValue()) { + auto& v = maybe_stream.value(); + filter_output.emplace_back( + v->row_key(), v->column_family(), v->column_qualifier(), + v->timestamp(), v->value(), + v->HasLabel() ? absl::optional{v->label()} + : absl::optional{}); + maybe_stream->Next(); + } + return filter_output; + } +}; + +TEST_F(FilterWorkTest, Pass) { + RowFilter filter; + filter.set_pass_all_filter(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, PassLabels) { + RowFilter filter; + filter.set_pass_all_filter(true); + + std::vector cells{ + TestCell{"r", "cf", "q", 0_ms, "v", "label1"}, + TestCell{"r", "cf", "q", 0_ms, "v", "label2"}, + TestCell{"r", "cf", "q", 0_ms, "v", "label3"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, Sink) { + RowFilter filter; + filter.set_sink(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + // Next row + TestCell{"r2", "cf", "q", 0_ms, "v"}, + // Next cell + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, Block) { + RowFilter filter; + filter.set_block_all_filter(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r1", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_TRUE(maybe_output->empty()); +} + +TEST_F(FilterWorkTest, RowRegex) { + RowFilter filter; + filter.set_row_key_regex_filter("r2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, ValueRegex) { + RowFilter filter; + filter.set_value_regex_filter("v2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v1"}, + TestCell{"r2", "cf", "q", 0_ms, "v2"}, + TestCell{"r2", "cf", "q", 0_ms, "v3"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(1, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); +} + +TEST_F(FilterWorkTest, SampleRows) { + RowFilter filter; + filter.set_row_sample_filter(0.5); + + size_t samples = 100; + std::vector cells; + cells.reserve(samples); + for (size_t i = 0; i < samples; i++) { + cells.emplace_back("r" + std::to_string(i), "cf", "q", 0_ms, "v"); + } + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_NE(0, maybe_output->size()); + EXPECT_NE(samples, maybe_output->size()); +} + +TEST_F(FilterWorkTest, FamilyNameRegex) { + RowFilter filter; + filter.set_family_name_regex_filter("cf2"); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf3", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, QualifierRegex) { + RowFilter filter; + filter.set_column_qualifier_regex_filter("q2"); + + std::vector cells{ + TestCell{"r1", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q3", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, ColumnRange) { + RowFilter filter; + filter.mutable_column_range_filter()->set_family_name("cf"); + filter.mutable_column_range_filter()->set_start_qualifier_open("q1"); + filter.mutable_column_range_filter()->set_end_qualifier_closed("q2"); + + std::vector cells{ + TestCell{"r1", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q3", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, ValueRange) { + RowFilter filter; + filter.mutable_value_range_filter()->set_start_value_open("v1"); + filter.mutable_value_range_filter()->set_end_value_closed("v2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v1"}, + TestCell{"r2", "cf", "q", 0_ms, "v2"}, + TestCell{"r2", "cf", "q", 0_ms, "v2"}, + TestCell{"r3", "cf", "q", 0_ms, "v3"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, CellsPerRowOffset) { + RowFilter filter; + filter.set_cells_per_row_offset_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(5, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[3], maybe_output->at(1)); + EXPECT_EQ(cells[5], maybe_output->at(2)); + EXPECT_EQ(cells[7], maybe_output->at(3)); + EXPECT_EQ(cells[8], maybe_output->at(4)); +} + +TEST_F(FilterWorkTest, CellsPerRowLimit) { + RowFilter filter; + filter.set_cells_per_row_limit_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(4, maybe_output->size()); + EXPECT_EQ(cells[0], maybe_output->at(0)); + EXPECT_EQ(cells[2], maybe_output->at(1)); + EXPECT_EQ(cells[4], maybe_output->at(2)); + EXPECT_EQ(cells[6], maybe_output->at(3)); +} + +TEST_F(FilterWorkTest, LatestCellsPerColumnLimit) { + RowFilter filter; + filter.set_cells_per_column_limit_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + TestCell{"r4", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(6, maybe_output->size()); + EXPECT_EQ(cells[0], maybe_output->at(0)); + EXPECT_EQ(cells[1], maybe_output->at(1)); + EXPECT_EQ(cells[2], maybe_output->at(2)); + EXPECT_EQ(cells[3], maybe_output->at(3)); + EXPECT_EQ(cells[4], maybe_output->at(4)); + EXPECT_EQ(cells[6], maybe_output->at(5)); +} + +TEST_F(FilterWorkTest, TimestampRange) { + RowFilter filter; + filter.mutable_timestamp_range_filter()->set_start_timestamp_micros(2000); + filter.mutable_timestamp_range_filter()->set_end_timestamp_micros(3000); + + std::vector cells{ + TestCell{"r1", "cf", "q", 3_ms, "v"}, + TestCell{"r2", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(1, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); +} + +TEST_F(FilterWorkTest, Label) { + RowFilter filter; + std::string label = "lbl"; + filter.set_apply_label_transformer(label); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r1", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + TestCell expected{"r1", "cf", "q", 0_ms, "v", label}; + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(expected, maybe_output->at(0)); + EXPECT_EQ(expected, maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, StripValue) { + RowFilter filter; + filter.set_strip_value_transformer(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r1", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + TestCell expected{"r1", "cf", "q", 0_ms, ""}; + + ASSERT_EQ(2, maybe_output->size()); + EXPECT_EQ(expected, maybe_output->at(0)); + EXPECT_EQ(expected, maybe_output->at(1)); +} + +TEST_F(FilterWorkTest, Chain) { + RowFilter filter; + filter.mutable_chain()->add_filters()->set_cells_per_row_offset_filter(1); + filter.mutable_chain()->add_filters()->set_cells_per_row_limit_filter(1); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r1", "cf2", "q", 0_ms, "v"}, + TestCell{"r1", "cf3", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q1", 0_ms, "v"}, + TestCell{"r2", "cf", "q2", 0_ms, "v"}, + TestCell{"r2", "cf", "q3", 0_ms, "v"}, + TestCell{"r3", "cf", "q", 3_ms, "v"}, + TestCell{"r3", "cf", "q", 2_ms, "v"}, + TestCell{"r3", "cf", "q", 1_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(3, maybe_output->size()); + EXPECT_EQ(cells[1], maybe_output->at(0)); + EXPECT_EQ(cells[4], maybe_output->at(1)); + EXPECT_EQ(cells[7], maybe_output->at(2)); +} + +TEST_F(FilterWorkTest, ChainEmpty) { + RowFilter filter; + filter.mutable_chain()->Clear(); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, ChainSink) { + RowFilter filter; + filter.mutable_chain()->add_filters()->set_sink(true); + filter.mutable_chain()->add_filters()->set_sink(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, Interleave) { + RowFilter filter; + filter.mutable_interleave()->add_filters()->set_family_name_regex_filter( + "cf1"); + filter.mutable_interleave()->add_filters()->set_family_name_regex_filter( + "cf2"); + + std::vector cells{ + TestCell{"r1", "cf1", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r2", "cf2", "q", 0_ms, "v"}, + TestCell{"r3", "cf1", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(cells, *maybe_output); +} + +TEST_F(FilterWorkTest, InterleaveEmpty) { + RowFilter filter; + filter.mutable_interleave()->Clear(); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + EXPECT_EQ(0, maybe_output->size()); +} + +TEST_F(FilterWorkTest, InterleaveSink) { + RowFilter filter; + filter.mutable_interleave()->add_filters()->set_sink(true); + filter.mutable_interleave()->add_filters()->set_block_all_filter(true); + filter.mutable_interleave()->add_filters()->set_sink(true); + filter.mutable_interleave()->add_filters()->set_pass_all_filter(true); + filter.mutable_interleave()->add_filters()->set_sink(true); + + std::vector cells{ + TestCell{"r1", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + TestCell{"r2", "cf", "q", 0_ms, "v"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(cells.size() * 4, maybe_output->size()); + for (size_t i = 0; i < maybe_output->size() / 3; i++) { + EXPECT_EQ(cells[0], maybe_output->at(i)); + EXPECT_EQ(cells[1], maybe_output->at(i + maybe_output->size() / 3)); + EXPECT_EQ(cells[2], maybe_output->at(i + 2 * maybe_output->size() / 3)); + } +} + +// The test case from the example given next to `sink` protobuf definition. +TEST_F(FilterWorkTest, RegexInterleaveChainLabelSinkRegex) { + RowFilter filter; + + RowFilter* c0 = filter.mutable_chain()->add_filters(); + RowFilter* c1 = filter.mutable_chain()->add_filters(); + RowFilter* c2 = filter.mutable_chain()->add_filters(); + + RowFilter* c1i0 = c1->mutable_interleave()->add_filters(); + RowFilter* c1i1 = c1->mutable_interleave()->add_filters(); + + RowFilter* c1i1c0 = c1i1->mutable_chain()->add_filters(); + RowFilter* c1i1c1 = c1i1->mutable_chain()->add_filters(); + + c0->set_family_name_regex_filter("A"); + + c1i0->set_pass_all_filter(true); + c1i1c0->set_apply_label_transformer("foo"); + c1i1c1->set_sink(true); + + c2->set_column_qualifier_regex_filter("B"); + + std::vector cells{ + TestCell("r", "A", "A", 1_ms, "w"), + TestCell("r", "A", "B", 2_ms, "x"), + TestCell("r", "B", "B", 4_ms, "z"), + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + TestCell labeled0 = cells[0].Labeled("foo"); + TestCell labeled1 = cells[1].Labeled("foo"); + + ASSERT_EQ(3, maybe_output->size()); + EXPECT_EQ(labeled0, maybe_output->at(0)); + EXPECT_TRUE(maybe_output->at(1) == labeled1 || + maybe_output->at(1) == cells[1]); + EXPECT_TRUE(maybe_output->at(2) == labeled1 || + maybe_output->at(2) == cells[1]); + EXPECT_NE(maybe_output->at(1).AsCellView().HasLabel(), + maybe_output->at(2).AsCellView().HasLabel()); +} + +TEST_F(FilterWorkTest, ConditionEmptyNonempty) { + RowFilter filter; + filter.mutable_condition() + ->mutable_predicate_filter() + ->set_value_regex_filter("t"); + filter.mutable_condition() + ->mutable_true_filter() + ->set_apply_label_transformer("TRUE"); + filter.mutable_condition() + ->mutable_false_filter() + ->set_apply_label_transformer("FALSE"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 3_ms, "t"}, + TestCell{"r1", "cf", "q", 2_ms, "t"}, + TestCell{"r1", "cf", "q", 1_ms, "t"}, + TestCell{"r2", "cf", "q", 3_ms, "f"}, + TestCell{"r2", "cf", "q", 2_ms, "t"}, + TestCell{"r2", "cf", "q", 1_ms, "f"}, + TestCell{"r3", "cf", "q", 3_ms, "f"}, + TestCell{"r3", "cf", "q", 2_ms, "f"}, + TestCell{"r3", "cf", "q", 1_ms, "f"}, + TestCell{"r4", "cf", "q", 3_ms, "t"}, + TestCell{"r4", "cf", "q", 2_ms, "f"}, + TestCell{"r4", "cf", "q", 1_ms, "t"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + ASSERT_EQ(cells.size(), maybe_output->size()); + EXPECT_EQ(cells[1].Labeled("TRUE"), maybe_output->at(1)); + EXPECT_EQ(cells[2].Labeled("TRUE"), maybe_output->at(2)); + EXPECT_EQ(cells[3].Labeled("TRUE"), maybe_output->at(3)); + EXPECT_EQ(cells[4].Labeled("TRUE"), maybe_output->at(4)); + EXPECT_EQ(cells[5].Labeled("TRUE"), maybe_output->at(5)); + EXPECT_EQ(cells[6].Labeled("FALSE"), maybe_output->at(6)); + EXPECT_EQ(cells[7].Labeled("FALSE"), maybe_output->at(7)); + EXPECT_EQ(cells[8].Labeled("FALSE"), maybe_output->at(8)); + EXPECT_EQ(cells[9].Labeled("TRUE"), maybe_output->at(9)); + EXPECT_EQ(cells[10].Labeled("TRUE"), maybe_output->at(10)); + EXPECT_EQ(cells[11].Labeled("TRUE"), maybe_output->at(11)); +} + +TEST_F(FilterWorkTest, ConditionBranchFilterNextDifferentThanCell) { + RowFilter filter; + filter.mutable_condition() + ->mutable_predicate_filter() + ->set_value_regex_filter("t"); + filter.mutable_condition() + ->mutable_true_filter() + ->mutable_chain() + ->add_filters() + ->set_apply_label_transformer("TRUE"); + filter.mutable_condition() + ->mutable_true_filter() + ->mutable_chain() + ->add_filters() + ->set_cells_per_column_limit_filter(1); + filter.mutable_condition() + ->mutable_false_filter() + ->mutable_chain() + ->add_filters() + ->set_apply_label_transformer("FALSE"); + filter.mutable_condition() + ->mutable_false_filter() + ->mutable_chain() + ->add_filters() + ->set_column_qualifier_regex_filter("q2"); + + std::vector cells{ + TestCell{"r1", "cf", "q", 3_ms, "t"}, + TestCell{"r1", "cf", "q", 2_ms, "t"}, + TestCell{"r1", "cf", "q", 1_ms, "t"}, + TestCell{"r2", "cf", "q", 3_ms, "f"}, + TestCell{"r2", "cf", "q", 2_ms, "t"}, + TestCell{"r2", "cf", "q", 1_ms, "f"}, + TestCell{"r3", "cf1", "q2", 1_ms, "f"}, + TestCell{"r3", "cf2", "q1", 2_ms, "f"}, + TestCell{"r3", "cf3", "q2", 3_ms, "f"}, + TestCell{"r4", "cf", "q", 3_ms, "f"}, + TestCell{"r4", "cf", "q", 2_ms, "f"}, + TestCell{"r4", "cf", "q", 1_ms, "t"}, + }; + auto maybe_output = GetFilterOutput(cells, filter); + ASSERT_STATUS_OK(maybe_output); + + std::vector expected{ + TestCell{"r1", "cf", "q", 3_ms, "t", "TRUE"}, + TestCell{"r2", "cf", "q", 3_ms, "f", "TRUE"}, + TestCell{"r3", "cf1", "q2", 1_ms, "f", "FALSE"}, + TestCell{"r3", "cf3", "q2", 3_ms, "f", "FALSE"}, + TestCell{"r4", "cf", "q", 3_ms, "f", "TRUE"}, + }; + EXPECT_EQ(expected, *maybe_output); +} + +// Test our implementation of the ColumnRange filter, by actually +// streaming cells from actual table data (hence end to end). +TEST(FiltersEndToEnd, ColumnRange) { + std::vector column_families = {"family1", "family2", "family3"}; + auto maybe_table = CreateTable("table", column_families); + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + std::vector created = { + {"family1", "a00", 0, "bar"}, {"family1", "b00", 0, "bar"}, + {"family1", "b01", 0, "bar"}, {"family1", "b02", 0, "bar"}, + {"family2", "a00", 0, "bar"}, {"family2", "b01", 0, "bar"}, + {"family2", "b00", 0, "bar"}, {"family3", "a00", 0, "bar"}, + }; + + std::string row_key = "column-range-row-key"; + + auto status = SetCells(table, "table", row_key, created); + ASSERT_STATUS_OK(status); + + auto all_rows_set = std::make_shared(StringRangeSet::All()); + + RowFilter filter; + filter.mutable_column_range_filter()->set_family_name("family1"); + filter.mutable_column_range_filter()->set_start_qualifier_closed("b00"); + filter.mutable_column_range_filter()->set_end_qualifier_open("b02"); + + auto maybe_stream = table->CreateCellStream(all_rows_set, filter); + ASSERT_STATUS_OK(maybe_stream); + + std::vector expected = { + {row_key, "family1", "b00", 0_ms, "bar"}, + {row_key, "family1", "b01", 0_ms, "bar"}, + }; + + std::vector actual; + auto& stream = *maybe_stream; + for (; stream; ++stream) { + actual.emplace_back(stream->row_key(), stream->column_family(), + stream->column_qualifier(), stream->timestamp(), + stream->value()); + } + + ASSERT_EQ(expected, actual); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/filtered_map.h b/google/cloud/bigtable/emulator/filtered_map.h new file mode 100644 index 0000000000000..90d8ed991d55b --- /dev/null +++ b/google/cloud/bigtable/emulator/filtered_map.h @@ -0,0 +1,426 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTERED_MAP_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTERED_MAP_H + +#include "google/cloud/bigtable/emulator/range_set.h" +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +/** + * A map view filtering elements by whether their keys fall into a string range + * set. + * + * Objects of this type provide a lightweight wrapper around `std::map`-like + * object, which provides a iterator, which will skip over unwanted elements. + * + * This class is not very generic. It should be thought of as a crude way of + * deduplicating code. + * + * The unfiltered elements' keys should fall into a given string range set. + * + * @tparam Map the type of the map-like object to be wrapped. + */ +template +class StringRangeFilteredMapView { + public: + // NOLINTNEXTLINE(readability-identifier-naming) + class const_iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = + typename std::iterator_traits::value_type; + using difference_type = typename std::iterator_traits< + typename Map::const_iterator>::difference_type; + using reference = value_type const&; + using pointer = value_type const*; + + const_iterator( + StringRangeFilteredMapView const& parent, + typename Map::const_iterator unfiltered_pos, + typename std::set:: + const_iterator filter_pos) + : parent_(std::cref(parent)), + unfiltered_pos_(std::move(unfiltered_pos)), + filter_pos_(std::move(filter_pos)) { + AdvanceToNextRange(); + EnsureIteratorValid(); + } + + const_iterator& operator++() { + ++unfiltered_pos_; + EnsureIteratorValid(); + return *this; + } + + const_iterator operator++(int) { + const_iterator retval = *this; + ++(*this); + return retval; + } + + bool operator==(const_iterator const& other) const { + return unfiltered_pos_ == other.unfiltered_pos_; + } + + bool operator!=(const_iterator const& other) const { + return !(*this == other); + } + + reference operator*() const { return *unfiltered_pos_; } + pointer operator->() const { return &*unfiltered_pos_; } + + private: + // Adjust `unfiltered_pos_` after we transition to a different range. + void AdvanceToNextRange() { + if (filter_pos_ == parent_.get().filter_.get().disjoint_ranges().end()) { + // We've reached the end. + unfiltered_pos_ = parent_.get().unfiltered_.get().end(); + return; + } + if (unfiltered_pos_ == parent_.get().unfiltered_.get().end()) { + // unfiltered_pos_ is already pointing far enough. + return; + } + if (!filter_pos_->IsBelowStart(unfiltered_pos_->first)) { + // unfiltered_pos_ is already pointing far enough. + return; + } + + if (filter_pos_->start_closed()) { + unfiltered_pos_ = parent_.get().unfiltered_.get().lower_bound( + filter_pos_->start_finite()); + } else { + unfiltered_pos_ = parent_.get().unfiltered_.get().upper_bound( + filter_pos_->start_finite()); + } + } + + // After `unfiltered_pos_` was increased, make sure it's within a valid + // range. + void EnsureIteratorValid() { + // `unfiltered_pos_` may point to a row which is past the end of the range + // pointed by filter_pos_. Make sure this only happens when the iteration + // reaches its end. + while (unfiltered_pos_ != parent_.get().unfiltered_.get().end() && + filter_pos_ != + parent_.get().filter_.get().disjoint_ranges().end() && + filter_pos_->IsAboveEnd(unfiltered_pos_->first)) { + ++filter_pos_; + AdvanceToNextRange(); + } + } + + std::reference_wrapper parent_; + typename Map::const_iterator unfiltered_pos_; + typename std::set::const_iterator + filter_pos_; + }; + + /** + * Create a new object. + * + * Objects of this class store references to arguments passed in the + * constructor. The user is responsible for making sure that the referenced + * objects continue to exist throughout the lifetime of this object. They + * should also not change. + * + * @unfiltered the map whose elements need to be filtered. + * @filter the range set which dictates which ranges should remain unfiltered. + */ + StringRangeFilteredMapView(Map const& unfiltered, + StringRangeSet const& filter) + : unfiltered_(std::cref(unfiltered)), filter_(std::cref(filter)) {} + + const_iterator begin() const { + return const_iterator(*this, unfiltered_.get().begin(), + filter_.get().disjoint_ranges().begin()); + } + const_iterator end() const { + return const_iterator(*this, unfiltered_.get().end(), + filter_.get().disjoint_ranges().end()); + } + + private: + std::reference_wrapper unfiltered_; + std::reference_wrapper filter_; +}; + +/** + * A map view filtering elements by whether their keys fall into a timestamp + * range set. + * + * Objects of this type provide a lightweight wrapper around `std::map`-like + * object, which provides a iterator, which will skip over unwanted elements. + * + * This class is not very generic. It should be thought of as a crude way of + * deduplicating code. + * + * The unfiltered elements' keys should fall into a given timestamp range set. + * + * Note that the implementation assumes that the input `std::map`-like object's + * iterator is sorted high-to-low. + * + * @tparam Map the type of the map-like object to be wrapped. + */ +template +class TimestampRangeFilteredMapView { + public: + // NOLINTNEXTLINE(readability-identifier-naming) + class const_iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = + typename std::iterator_traits::value_type; + using difference_type = typename std::iterator_traits< + typename Map::const_iterator>::difference_type; + using reference = value_type const&; + using pointer = value_type const*; + + // Note that the set whose iterator is received here is sorted + // "earliest-start-first", whereas we need to have the iterator sorted + // "latest-end-first". Fortunately, the set is disjoint, so we can simply + // use reverse iterator. + const_iterator( + TimestampRangeFilteredMapView const& parent, + typename Map::const_iterator unfiltered_pos, + typename std::set:: + const_reverse_iterator const& filter_pos) + : parent_(std::cref(parent)), + unfiltered_pos_(std::move(unfiltered_pos)), + filter_pos_(filter_pos) { + AdvanceToNextRange(); + EnsureIteratorValid(); + } + + const_iterator& operator++() { + ++unfiltered_pos_; + EnsureIteratorValid(); + return *this; + } + + const_iterator operator++(int) { + const_iterator retval = *this; + ++(*this); + return retval; + } + + bool operator==(const_iterator const& other) const { + return unfiltered_pos_ == other.unfiltered_pos_; + } + + bool operator!=(const_iterator const& other) const { + return !(*this == other); + } + + reference operator*() const { return *unfiltered_pos_; } + pointer operator->() const { return &*unfiltered_pos_; } + + private: + // Adjust `unfiltered_pos_` after we transition to a different range. + void AdvanceToNextRange() { + if (filter_pos_ == + parent_.get().filter_.get().disjoint_ranges().crend()) { + // We've reached the end. + unfiltered_pos_ = parent_.get().unfiltered_.get().end(); + return; + } + if (unfiltered_pos_ == parent_.get().unfiltered_.get().end()) { + // unfiltered_pos_ is already pointing far enough. + return; + } + if (!filter_pos_->IsAboveEnd(unfiltered_pos_->first)) { + // unfiltered_pos_ is already pointing far enough. + return; + } + // Timestamp ranges always have end open, so we always use upper_bound(). + unfiltered_pos_ = + parent_.get().unfiltered_.get().upper_bound(filter_pos_->end()); + } + + // After `unfiltered_pos_` was increased, make sure it's within a valid + // range. + void EnsureIteratorValid() { + // `unfiltered_pos_` may point to a row which is past the end of the range + // pointed by filter_pos_. Make sure this only happens when the iteration + // reaches its end. + while (unfiltered_pos_ != parent_.get().unfiltered_.get().end() && + filter_pos_ != + parent_.get().filter_.get().disjoint_ranges().crend() && + filter_pos_->IsBelowStart(unfiltered_pos_->first)) { + ++filter_pos_; + AdvanceToNextRange(); + } + } + + std::reference_wrapper parent_; + typename Map::const_iterator unfiltered_pos_; + typename std::set:: + const_reverse_iterator filter_pos_; + }; + + /** + * Create a new object. + * + * Objects of this class store references to arguments passed in the + * constructor. The user is responsible for making sure that the referenced + * objects continue to exist throughout the lifetime of this object. They + * should also not change. + * + * @unfiltered the map whose elements need to be filtered. + * @filter the range set which dictates which ranges should remain unfiltered. + */ + TimestampRangeFilteredMapView(Map const& unfiltered, + TimestampRangeSet const& filter) + : unfiltered_(std::cref(unfiltered)), filter_(std::cref(filter)) {} + + const_iterator begin() const { + return const_iterator(*this, unfiltered_.get().begin(), + filter_.get().disjoint_ranges().crbegin()); + } + const_iterator end() const { + return const_iterator(*this, unfiltered_.get().end(), + filter_.get().disjoint_ranges().crend()); + } + + private: + std::reference_wrapper unfiltered_; + std::reference_wrapper filter_; +}; + +/** + * A map view filtering elements by whether their keys match a regex. + * + * Objects of this type provide a lightweight wrapper around `std::map`-like + * object, which provides a iterator, which will skip over unwanted elements. + * + * This class is not very generic. It should be thought of as a crude way of + * deduplicating code. + * + * Elements whose keys match all regexes are not filtered out. + * + * @tparam Map the type of the map-like object to be wrapped. + */ +template +class RegexFiteredMapView { + public: + // NOLINTNEXTLINE(readability-identifier-naming) + class const_iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = + typename std::iterator_traits::value_type; + using difference_type = typename std::iterator_traits< + typename Map::const_iterator>::difference_type; + using reference = value_type const&; + using pointer = value_type const*; + + const_iterator(RegexFiteredMapView const& parent, + typename Map::const_iterator unfiltered_pos) + : parent_(std::cref(parent)), + unfiltered_pos_(std::move(unfiltered_pos)) { + EnsureIteratorValid(); + } + + const_iterator& operator++() { + ++unfiltered_pos_; + EnsureIteratorValid(); + return *this; + } + + const_iterator operator++(int) { + const_iterator retval = *this; + ++(*this); + return retval; + } + + bool operator==(const_iterator const& other) const { + return unfiltered_pos_ == other.unfiltered_pos_; + } + + bool operator!=(const_iterator const& other) const { + return !(*this == other); + } + + reference operator*() const { return *unfiltered_pos_; } + pointer operator->() const { return &*unfiltered_pos_; } + + private: + // Make sure that `unfiltered_pos_` points to an unfiltered elem or end(). + void EnsureIteratorValid() { + for (; unfiltered_pos_ != parent_.get().unfiltered_.end() && + std::any_of(parent_.get().filters_.get().begin(), + parent_.get().filters_.get().end(), + [&](std::shared_ptr const& filter) { + return !re2::RE2::PartialMatch( + unfiltered_pos_->first, *filter); + }); + ++unfiltered_pos_) { + } + } + + std::reference_wrapper parent_; + typename Map::const_iterator unfiltered_pos_; + }; + + /** + * Create a new object. + * + * Objects of this class store references to arguments passed in the + * constructor. The user is responsible for making sure that the referenced + * objects continue to exist throughout the lifetime of this object. They + * should also not change. + * + * @unfiltered the map whose elements need to be filtered. + * @filters the regexes which element's keys have match to not be filtered + * out. + */ + RegexFiteredMapView( + Map unfiltered, + std::vector> const& filters) + : unfiltered_(std::move(unfiltered)), filters_(std::cref(filters)) {} + + const_iterator begin() const { + return const_iterator(*this, unfiltered_.begin()); + } + const_iterator end() const { + return const_iterator(*this, unfiltered_.end()); + } + + private: + Map unfiltered_; + std::reference_wrapper> const> + filters_; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_FILTERED_MAP_H diff --git a/google/cloud/bigtable/emulator/filtered_map_test.cc b/google/cloud/bigtable/emulator/filtered_map_test.cc new file mode 100644 index 0000000000000..cf6b386a75f1e --- /dev/null +++ b/google/cloud/bigtable/emulator/filtered_map_test.cc @@ -0,0 +1,265 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/filtered_map.h" +#include "google/cloud/testing_util/chrono_literals.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +using testing_util::chrono_literals::operator""_ms; + +bool const kOpen = true; +bool const kClosed = false; + +template +std::vector Keys(Map const& map) { + std::vector res; + std::transform(map.begin(), map.end(), std::back_inserter(res), + [](typename Map::const_iterator::value_type const& elem) { + return elem.first; + }); + return res; +} + +std::vector Vec(std::initializer_list const& v) { + std::vector res; + std::transform(v.begin(), v.end(), std::back_inserter(res), + [](char const* s) { return std::string(s); }); + std::sort(res.begin(), res.end()); + return res; +} + +template +std::vector TSKeys(Map const& map) { + std::vector res; + std::transform(map.begin(), map.end(), std::back_inserter(res), + [](typename Map::const_iterator::value_type const& elem) { + return elem.first; + }); + return res; +} + +TEST(StringRangeFilteredMapView, NoFilter) { + std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; + auto filter = StringRangeSet::All(); + StringRangeFilteredMapView filtered(unfiltered, filter); + EXPECT_EQ(Vec({"zero", "one", "two"}), Keys(filtered)); +} + +TEST(StringRangeFilteredMapView, EmptyFilter) { + std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; + auto filter = StringRangeSet::Empty(); + StringRangeFilteredMapView filtered(unfiltered, filter); + EXPECT_EQ(Vec({}), Keys(filtered)); +} + +TEST(StringRangeFilteredMapView, OneOpen) { + std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, + {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); + StringRangeFilteredMapView filtered(unfiltered, filter); + EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); +} + +TEST(StringRangeFilteredMapView, OneClosed) { + std::map unfiltered{{"AA", 0}, {"AAA", 0}, {"AAAa", 0}, + {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); + StringRangeFilteredMapView filtered(unfiltered, filter); + EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb", "AAB"}), Keys(filtered)); +} + +TEST(StringRangeFilteredMapView, NoEntriesAfterClosedFilter) { + std::map unfiltered{ + {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); + StringRangeFilteredMapView filtered(unfiltered, filter); + EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb"}), Keys(filtered)); +} + +TEST(StringRangeFilteredMapView, NoEntriesAfterOpenFilter) { + std::map unfiltered{ + {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); + StringRangeFilteredMapView filtered(unfiltered, filter); + EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); +} + +TEST(StringRangeFilteredMapView, NoEntriesBeforeClosedFilter) { + std::map unfiltered{ + {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Sum(StringRangeSet::Range("AAA", kClosed, "AAB", kClosed)); + StringRangeFilteredMapView filtered(unfiltered, filter); + EXPECT_EQ(Vec({"AAA", "AAAa", "AAAb", "AAB"}), Keys(filtered)); +} + +TEST(StringRangeFilteredMapView, NoEntriesBeforeOpenFilter) { + std::map unfiltered{ + {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, {"AAC", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kOpen)); + StringRangeFilteredMapView filtered(unfiltered, filter); + EXPECT_EQ(Vec({"AAAa", "AAAb"}), Keys(filtered)); +} + +TEST(StringRangeFilteredMapView, MultipleFilters) { + std::map unfiltered{ + {"AA", 0}, {"AAA", 0}, {"AAAa", 0}, {"AAAb", 0}, {"AAB", 0}, + {"AAC", 0}, {"BB", 0}, {"BBB", 0}, {"BBBb", 0}, {"CCCa", 0}, + {"CCCb", 0}, {"CCD", 0}, {"CCE", 0}}; + auto filter = StringRangeSet::Empty(); + filter.Sum(StringRangeSet::Range("AAA", kOpen, "AAB", kClosed)); + filter.Sum(StringRangeSet::Range("BBB", kClosed, "BBC", kOpen)); + filter.Sum(StringRangeSet::Range("CCC", kClosed, "CCD", kOpen)); + StringRangeFilteredMapView filtered(unfiltered, filter); + + EXPECT_EQ(Vec({"AAAa", "AAAb", "AAB", "BBB", "BBBb", "CCCa", "CCCb"}), + Keys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, NoFilter) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 1}, {2_ms, 2}}; + auto filter = TimestampRangeSet::All(); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({2_ms, 1_ms, 0_ms}), + TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, EmptyFilter) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 1}, {2_ms, 2}}; + auto filter = TimestampRangeSet::Empty(); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({}), TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, FiniteRange) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 0}, {2_ms, 0}, {3_ms, 0}, {4_ms, 0}}; + auto filter = TimestampRangeSet::Empty(); + filter.Sum(TimestampRangeSet::Range(1_ms, 3_ms)); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({2_ms, 1_ms}), + TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, InfiniteRange) { + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 0}, {2_ms, 0}, {3_ms, 0}, {4_ms, 0}}; + auto filter = TimestampRangeSet::Empty(); + filter.Sum(TimestampRangeSet::Range(1_ms, 0_ms)); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({4_ms, 3_ms, 2_ms, 1_ms}), + TSKeys(filtered)); +} + +TEST(TimestampRangeFilteredMapView, MultipleFilters) { + std::chrono::milliseconds max_millis(std::numeric_limits::max()); + std::map> unfiltered{ + {0_ms, 0}, {1_ms, 0}, {2_ms, 0}, {3_ms, 0}, + {4_ms, 0}, {5_ms, 0}, {6_ms, 0}, {7_ms, 0}, + {8_ms, 0}, {9_ms, 0}, {10_ms, 0}, {11_ms, 0}, + {12_ms, 0}, {13_ms, 0}, {14_ms, 0}, {max_millis, 0}, + }; + auto filter = TimestampRangeSet::Empty(); + filter.Sum(TimestampRangeSet::Range(1_ms, 3_ms)); + filter.Sum(TimestampRangeSet::Range(3_ms, 5_ms)); + filter.Sum(TimestampRangeSet::Range(6_ms, 8_ms)); + filter.Sum(TimestampRangeSet::Range(10_ms, 12_ms)); + filter.Sum(TimestampRangeSet::Range(13_ms, 0_ms)); + TimestampRangeFilteredMapView filtered(unfiltered, + filter); + EXPECT_EQ(std::vector({max_millis, 14_ms, 13_ms, + 11_ms, 10_ms, 7_ms, 6_ms, + 4_ms, 3_ms, 2_ms, 1_ms}), + TSKeys(filtered)); +} + +TEST(RegexFiteredMapView, NoFilter) { + std::vector> patterns; + std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; + auto filter = StringRangeSet::All(); + + RegexFiteredMapView filtered(unfiltered, patterns); + EXPECT_EQ(Vec({"zero", "one", "two"}), Keys(filtered)); +} + +TEST(RegexFiteredMapView, EmptyFilter) { + auto pattern = std::make_shared("this_will_not_be_matched"); + ASSERT_TRUE(pattern->ok()); + std::vector> patterns({std::move(pattern)}); + + std::map unfiltered{{"zero", 0}, {"one", 1}, {"two", 2}}; + RegexFiteredMapView filtered(unfiltered, patterns); + EXPECT_EQ(Vec({}), Keys(filtered)); +} + +TEST(RegexFiteredMapView, OneFilter) { + auto pattern = std::make_shared("^[a-z_]*$"); + ASSERT_TRUE(pattern->ok()); + std::vector> patterns({std::move(pattern)}); + + std::map unfiltered{ + {"NO_MATCH", 0}, {"match", 1}, {"another_match", 2}}; + RegexFiteredMapView filtered(unfiltered, patterns); + EXPECT_EQ(Vec({"match", "another_match"}), Keys(filtered)); +} + +TEST(RegexFiteredMapView, MultipleFilters) { + auto has_a = std::make_shared("a"); + ASSERT_TRUE(has_a->ok()); + auto has_b = std::make_shared("b"); + ASSERT_TRUE(has_b->ok()); + auto has_c = std::make_shared("c"); + ASSERT_TRUE(has_c->ok()); + std::vector> patterns( + {std::move(has_a), std::move(has_b), std::move(has_c)}); + + std::map unfiltered{ + {"abc", 0}, {"ab", 1}, {"a", 2}, {"QQ b QQ c QQ a QQ", 4}, {"ac", 5}}; + RegexFiteredMapView filtered(unfiltered, patterns); + EXPECT_EQ(Vec({"abc", "QQ b QQ c QQ a QQ"}), Keys(filtered)); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/limits.h b/google/cloud/bigtable/emulator/limits.h new file mode 100644 index 0000000000000..46760d693232e --- /dev/null +++ b/google/cloud/bigtable/emulator/limits.h @@ -0,0 +1,29 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_LIMITS_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_LIMITS_H + +#include +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +constexpr std::size_t kMaxRowLen = 2 << 21; +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_LIMITS_H diff --git a/google/cloud/bigtable/emulator/mutations_test.cc b/google/cloud/bigtable/emulator/mutations_test.cc new file mode 100644 index 0000000000000..d3d858aecc628 --- /dev/null +++ b/google/cloud/bigtable/emulator/mutations_test.cc @@ -0,0 +1,1561 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/test_util.h" +#include "google/cloud/internal/big_endian.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include "google/cloud/testing_util/status_matchers.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +using ::google::protobuf::TextFormat; +using std::string; + +::google::bigtable::admin::v2::ColumnFamily MakeBEAggregateCFProto( + ::google::bigtable::admin::v2::Type_Aggregate::AggregatorCase aggregator) { + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto* value_type = column_family.mutable_value_type(); + auto* kind_aggregate_type = value_type->mutable_aggregate_type(); + switch (aggregator) { + case google::bigtable::admin::v2::Type::Aggregate::kSum: + kind_aggregate_type->mutable_sum(); + break; + case google::bigtable::admin::v2::Type::Aggregate::kMax: + kind_aggregate_type->mutable_max(); + break; + case google::bigtable::admin::v2::Type::Aggregate::kMin: + kind_aggregate_type->mutable_min(); + break; + default: + std::abort(); + } + auto* input_type = kind_aggregate_type->mutable_input_type(); + auto* int64_type = input_type->mutable_int64_type(); + // We need to set the encoding + auto* encoding = int64_type->mutable_encoding(); + encoding->mutable_big_endian_bytes(); + + // What do we do about the state_type? + // FIXME: Is this correct? + auto* state_type = kind_aggregate_type->mutable_state_type(); + int64_type = state_type->mutable_int64_type(); + encoding = int64_type->mutable_encoding(); + encoding->mutable_big_endian_bytes(); + + return column_family; +} + +::google::bigtable::admin::v2::Table CreateSchema( + std::string const& table_name, + std::map const& + column_families) { + ::google::bigtable::admin::v2::Table schema; + + schema.set_name(table_name); + for (auto const& cf : column_families) { + (*schema.mutable_column_families())[cf.first] = cf.second; + } + + return schema; +} + +Status DeleteFromFamilies( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector const& column_families) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto column_family : column_families) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* delete_from_family_mutation = + mutation_request_mutation->mutable_delete_from_family(); + delete_from_family_mutation->set_family_name(column_family); + } + + return table->MutateRow(mutation_request); +} + +struct DeleteFromColumnParams { + std::string column_family; + std::string column_qualifier; + ::google::bigtable::v2::TimestampRange* timestamp_range; +}; + +Status DeleteFromColumns( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector v) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto& param : v) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* delete_from_column_mutation = + mutation_request_mutation->mutable_delete_from_column(); + delete_from_column_mutation->set_family_name(param.column_family); + delete_from_column_mutation->set_column_qualifier(param.column_qualifier); + delete_from_column_mutation->set_allocated_time_range( + param.timestamp_range); + } + + return table->MutateRow(mutation_request); +} + +Status HasCell(std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier, int64_t timestamp_micros, + std::string const& value) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return NotFoundError("no row key found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); + } + + auto& column_row = column_row_it->second; + auto timestamp_it = + column_row.find(std::chrono::duration_cast( + std::chrono::microseconds(timestamp_micros))); + if (timestamp_it == column_row.end()) { + return NotFoundError( + "timestamp not found", + GCP_ERROR_INFO().WithMetadata("timestamp", + absl::StrFormat("%d", timestamp_micros))); + } + + if (timestamp_it->second != value) { + return NotFoundError("wrong value", + GCP_ERROR_INFO() + .WithMetadata("expected", value) + .WithMetadata("found", timestamp_it->second)); + } + + return Status(); +} + +Status HasColumn( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return NotFoundError("row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with supplied qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); + } + + return Status(); +} + +StatusOr> GetColumn( + std::shared_ptr& table, + std::string const& column_family, std::string const& row_key, + std::string const& column_qualifier) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return NotFoundError("row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + auto& column_family_row = column_family_row_it->second; + auto column_row_it = column_family_row.find(column_qualifier); + if (column_row_it == column_family_row.end()) { + return NotFoundError( + "no column found with supplied qualifier", + GCP_ERROR_INFO().WithMetadata("column qualifier", column_qualifier)); + } + + std::map ret( + column_row_it->second.begin(), column_row_it->second.end()); + + return ret; +} + +Status HasRow(std::shared_ptr& table, + std::string const& column_family, std::string const& row_key) { + auto column_family_it = table->find(column_family); + if (column_family_it == table->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", column_family)); + } + + auto const& cf = column_family_it->second; + auto column_family_row_it = cf->find(row_key); + if (column_family_row_it == cf->end()) { + return NotFoundError("row key not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key) + .WithMetadata("column family", column_family)); + } + + return Status(); +} + +// Test that SetCell does the right thing when it receives a zero or +// negative timestamp, and that the cell created can be correctly +// deleted if rollback occurs. +// +// In particular: +// +// Supplied with a timestamp of -1, it should store the current system time as +// timestamp. +// +// Supplied with a timestamp of 0, it should store it as is. +// +// Supplied with a timestamp < -1, it should return an error and fail the entire +// mutation chain. +TEST(TransactionRollback, ZeroOrNegativeTimestampHandling) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + auto const timestamp_micros = 0; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, + data}; + v.push_back(p); + + auto status = SetCells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + + auto status_or = + GetColumn(table, column_family_name, row_key, column_qualifier); + ASSERT_STATUS_OK(status_or.status()); + auto column = status_or.value(); + ASSERT_EQ(1, column.size()); + for (auto const& cell : column) { + ASSERT_EQ(cell.first.count(), 0); + ASSERT_EQ(data, cell.second); + } + + // Test that a mutation with timestamp 0 can be rolled back. + v.clear(); + v = {{column_family_name, column_qualifier, 0, data}, + {"non_existent_column_family_name_causes_tx_rollbaclk", column_qualifier, + 1000, data}}; + auto const* const row_key_2 = "1"; + status = SetCells(table, table_name, row_key_2, v); + ASSERT_NE(true, status.ok()); + ASSERT_FALSE(HasRow(table, column_family_name, row_key_2).ok()); + + // Test that a mutation with timestamp 0 succeeds and stores 0 as + // the timestamp. + v.clear(); + v = { + {column_family_name, column_qualifier, 0, data}, + }; + auto const* const row_key_3 = "2"; + status = SetCells(table, table_name, row_key_3, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(HasCell(table, v[0].column_family_name, row_key_3, + v[0].column_qualifier, 0, v[0].data)); + + // Test that a mutation with timestamp < -1 fails + v.clear(); + v = { + {column_family_name, column_qualifier, -2, data}, + }; + auto const* const row_key_4 = "3"; + status = SetCells(table, table_name, row_key_4, v); + ASSERT_FALSE(status.ok()); + + // Test that a mutation with timestamp -1 succeeds and stores the + // system time. + v.clear(); + v = { + {column_family_name, column_qualifier, -1, data}, + }; + auto const* const row_key_5 = "4"; + auto system_time_ms_before = + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + status = SetCells(table, table_name, row_key_5, v); + ASSERT_STATUS_OK(status); + auto column_or = GetColumn(table, v[0].column_family_name, row_key_5, + v[0].column_qualifier); + ASSERT_STATUS_OK(column_or.status()); + auto col = column_or.value(); + ASSERT_EQ(col.size(), 1); + auto cell_it = col.begin(); + ASSERT_NE(cell_it, col.end()); + ASSERT_EQ(cell_it->second, v[0].data); + ASSERT_GE(cell_it->first, system_time_ms_before); +} + +// Does the SetCell mutation work to set a cell to a specific value? +TEST(TransactionRollback, SetCellBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + auto const timestamp_micros = 1234; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, + data}; + v.push_back(p); + + auto status = SetCells(table, table_name, row_key, v); + + ASSERT_STATUS_OK(status); + + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data)); +} + +// Test that an old value is correctly restored in a pre-populated +// cell, when one of a set of SetCell mutations fails after the cell +// had been updated with a new value. +TEST(TransactionRollback, TestRestoreValue) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + auto const* const column_qualifier = "test"; + int64_t good_mutation_timestamp_micros = 1000; + auto const* const good_mutation_data = "expected to succeed"; + + std::vector column_families = {valid_column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {valid_column_family_name, column_qualifier, + good_mutation_timestamp_micros, good_mutation_data}; + v.push_back(p); + + auto status = SetCells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(HasCell(table, valid_column_family_name, row_key, + column_qualifier, good_mutation_timestamp_micros, + good_mutation_data)); + + // Now atomically try 2 mutations. One modifies the above set cell, + // and the other one is expected to fail. The test is that + // RestoreValue will restore the previous value in cell with + // timestamp 1000. + std::vector w; + // Everything is the same but we try and modify the value in the cell cell set + // above. + p.data = "new data"; + w.push_back(p); + + // Because "invalid_column_family" does not exist in the table + // schema, a mutation with these SetCell parameters is expected to + // fail. + p = {"invalid_column_family", "test2", 1000, "expected to fail"}; + w.push_back(p); + + status = SetCells(table, table_name, row_key, w); + ASSERT_NE(status.ok(), true); // The whole mutation chain should + // fail because the 2nd mutation + // contains an invalid column family. + + // And the first mutation should have been rolled back by + // RestoreValue and so should contain the old value, and not "new + // data". + ASSERT_STATUS_OK(HasCell(table, valid_column_family_name, row_key, + column_qualifier, good_mutation_timestamp_micros, + good_mutation_data)); +} + +// Test that a new cell introduced in a chain of SetCell mutations is +// deleted on rollback if a subsequent mutation fails. +TEST(TransactionRollback, DeleteValue) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + std::vector column_families = {valid_column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + // To test that we do not delete a row or column that we should not, + // let us first commit a transaction on the same row where we will + // do the DeleteValue test. + std::vector v = { + {valid_column_family_name, "test", 1000, "data"}}; + auto status = SetCells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(HasCell(table, valid_column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); + + // We then setup a transaction chain with 2 SetCells, the first one + // should succeed to add a new cell and the second one should fail + // (because it assumes an invalid schema in column family name). We + // expect the first cell to not exist after the rollback (and of + // course also no data from the 2nd failing SetCell mutation should + // exist either). + v = {{valid_column_family_name, "test", 2000, "new data"}, + {"invalid_column_family_name", "test", 3000, "more new data"}}; + + status = SetCells(table, table_name, row_key, v); + ASSERT_NE(status.ok(), true); // We expect the chain of mutations to + // fail altogether. + status = HasCell(table, v[0].column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, v[0].data); + ASSERT_NE(status.ok(), true); // Undo should delete the cell + status = HasCell(table, v[1].column_family_name, row_key, + v[1].column_qualifier, v[1].timestamp_micros, v[1].data); + ASSERT_NE(status.ok(), true); // Also the SetCell with invalid shema + // should not have set anything. +} + +// Test that if a successful SetCell mutation in a chain of SetCell +// mutations in one transaction introduces a new column but a +// subsequent SetCell mutation fails (we simulate this by passing an +// column family name that is not in the table schema) then the column +// and any of the cells introduced is deleted in the rollback, but +// that any pre-transaction-attemot data in the row is unaffected. +TEST(TransactionRollback, DeleteColumn) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + std::vector column_families = {valid_column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v = { + {valid_column_family_name, "test", 1000, "data"}}; + auto status = SetCells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(HasCell(table, valid_column_family_name, row_key, + v[0].column_qualifier, v[0].timestamp_micros, + v[0].data)); + + // Introduce a new column in a chain of SetCell mutations, a + // subsequent one of which must fail due to an invalid schema + // assumption (bad column family name). + v = {{valid_column_family_name, "new_column", 2000, "new data"}, + {"invalid_column_family_name", "test", 3000, "more new data"}}; + + status = SetCells(table, table_name, row_key, v); + ASSERT_NE(status.ok(), + true); // We expect the chain of mutations to + // fail altogether because the last one must fail. + + // The original column ("test") should still exist. + status = HasColumn(table, valid_column_family_name, row_key, "test"); + ASSERT_STATUS_OK(status); + + // Bit the new column introduced should have been rolled back. + status = + HasColumn(table, v[0].column_family_name, row_key, v[0].column_qualifier); + ASSERT_NE(status.ok(), true); +} + +// Test that a chain of SetCell mutations that initially introduces a +// new row, but one of which eventually fails, will end with the whole +// row rolled back. +TEST(TransactionRollback, DeleteRow) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + // The table will be set up with a schema with + // valid_column_family_name and mutations with this column family + // name are expected to succeed. We will simulate a transaction + // failure by setting some other not-pre-provisioned column family + // name. + auto const* const valid_column_family_name = "test"; + std::vector column_families = {valid_column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + // First SetCell should succeed and introduce a new row with key + // "0". The second one will fail due to bad schema settings. We + // expect not to find the row after the row mutation call returns. + std::vector v = { + {valid_column_family_name, "test", 1000, "data"}, + {"invalid_column_family_name", "test", 2000, + "more new data which should never be written"}}; + + auto status = SetCells(table, table_name, row_key, v); + ASSERT_NE(status.ok(), + true); // We expect the chain of mutations to + // fail altogether because the last one must fail. + + status = HasRow(table, valid_column_family_name, row_key); + ASSERT_NE(status.ok(), true); +} + +// Does the DeleteFromfamily mutation work to delete a row from a +// specific family and does it rows with the same row key in other +// column families alone? +TEST(TransactionRollback, DeleteFromFamilyBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + auto const timestamp_micros = 1234; + auto const* data = "test"; + + auto const* const second_column_family_name = "test2"; + + std::vector column_families = {column_family_name, + second_column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, + data}; + v.push_back(p); + + p = {second_column_family_name, column_qualifier, timestamp_micros, data}; + v.push_back(p); + + auto status = SetCells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data)); + ASSERT_STATUS_OK( + HasColumn(table, column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(HasRow(table, column_family_name, row_key)); + + // Having established that the data is there, test the basic + // functionality of the DeleteFromFamily mutation by trying to + // delete it. + ASSERT_STATUS_OK( + DeleteFromFamilies(table, table_name, row_key, {column_family_name})); + ASSERT_NE(true, HasRow(table, column_family_name, row_key).ok()); + + // Ensure that we did not delete a row in another column family. + ASSERT_EQ(true, HasRow(table, second_column_family_name, row_key).ok()); +} + +// Test that DeleteFromfamily can be rolled back in case a subsequent +// mutation fails. +TEST(TransactionRollback, DeleteFromFamilyRollback) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + auto const timestamp_micros = 1234; + auto const* data = "test"; + + // Failure of one of the mutations is simalted by having a mutation + // with this column family, which has not been provisioned. Previous + // successful mutations should be rolled back when RowTransaction + // sees a mutation with this invalid column family name. + auto const* const column_family_not_in_schema = + "i_do_not_exist_in_the_schema"; + + std::vector column_families = {column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, + data}; + v.push_back(p); + + auto status = SetCells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data)); + ASSERT_STATUS_OK( + HasColumn(table, column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(HasRow(table, column_family_name, row_key)); + + // Setup two DeleteFromfamily mutation: The first one uses the + // correct table schema (a column family that exists and is expected + // to succeed to delete the row saved above. The second one uses a + // column family not provisioned and should fail, which should + // trigger a rollback of the previous row deletion. In the end, the + // above row should still exist and all its data should be intact. + status = + DeleteFromFamilies(table, table_name, row_key, + {column_family_name, column_family_not_in_schema}); + ASSERT_NE(true, status.ok()); // The overall chain of mutations should fail. + + // Check that the row deleted by the first mutation is restored, + // with all its data. + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data)); + ASSERT_STATUS_OK( + HasColumn(table, column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(HasRow(table, column_family_name, row_key)); +} + +::google::bigtable::v2::TimestampRange* NewTimestampRange(int64_t start, + int64_t end) { + auto* range = new (::google::bigtable::v2::TimestampRange); + range->set_start_timestamp_micros(start); + range->set_end_timestamp_micros(end); + + return range; +} + +// Does DeleteFromColumn basically work? +TEST(TransactionRollback, DeleteFromColumnBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v = { + {column_family_name, column_qualifier, 1000, data}, + {column_family_name, column_qualifier, 2000, data}, + {column_family_name, column_qualifier, 3000, data}, + }; + + auto status = SetCells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 1000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 2000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 3000, data)); + + std::vector dv = { + {column_family_name, column_qualifier, + NewTimestampRange(v[0].timestamp_micros, v[2].timestamp_micros + 1000)}}; + + ASSERT_STATUS_OK(DeleteFromColumns(table, table_name, row_key, dv)); + + status = HasColumn(table, column_family_name, row_key, column_qualifier); + ASSERT_EQ(false, status.ok()); +} + +// Does DeleteFromColumn rollback work? +TEST(TransactionRollback, DeleteFromColumnRollback) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "test"; + auto const* const column_qualifier = "test"; + // Simulate mutation failure and cause rollback by attempting a + // mutation with a non-existent column family name. + auto const* const bad_column_family_name = + "this_column_family_does_not_exist"; + auto const* data = "test"; + + std::vector column_families = {column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v = { + {column_family_name, column_qualifier, 1000, data}, + {column_family_name, column_qualifier, 2000, data}, + {column_family_name, column_qualifier, 3000, data}, + }; + + auto status = SetCells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 1000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 2000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 3000, data)); + + // The first mutation will succeed. The second assumes a schema that + // does not exist - it should fail and cause rollback of the column + // deletion in the first mutation. + std::vector dv = { + {column_family_name, column_qualifier, + NewTimestampRange(v[0].timestamp_micros, v[2].timestamp_micros + 1000)}, + {bad_column_family_name, column_qualifier, NewTimestampRange(1000, 2000)}, + }; + // The mutation chains should fail and rollback should occur. + ASSERT_EQ(false, DeleteFromColumns(table, table_name, row_key, dv).ok()); + + // The column should have been restored. + ASSERT_STATUS_OK( + HasColumn(table, column_family_name, row_key, column_qualifier)); + // Check that the data is where and what we expect. + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 1000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 2000, data)); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + 3000, data)); +} + +// Can we delete a row from all column families? +TEST(TransactionRollback, DeleteFromRowBasicFunction) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifier = "column_qualifier"; + auto const timestamp_micros = 1000; + auto const* data = "value"; + auto const* const second_column_family_name = "column_family_2"; + + std::vector column_families = {column_family_name, + second_column_family_name}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + std::vector v; + SetCellParams p = {column_family_name, column_qualifier, timestamp_micros, + data}; + v.push_back(p); + + p = {second_column_family_name, column_qualifier, timestamp_micros, data}; + v.push_back(p); + + auto status = SetCells(table, table_name, row_key, v); + ASSERT_STATUS_OK(status); + ASSERT_STATUS_OK(HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data)); + ASSERT_STATUS_OK( + HasColumn(table, second_column_family_name, row_key, column_qualifier)); + ASSERT_STATUS_OK(HasRow(table, column_family_name, row_key)); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + mutation_request_mutation->mutable_delete_from_row(); + + ASSERT_STATUS_OK(table->MutateRow(mutation_request)); + ASSERT_EQ(false, HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, data) + .ok()); + ASSERT_EQ(false, HasColumn(table, second_column_family_name, row_key, + column_qualifier) + .ok()); +} + +// Does AddToCell reject requests to add to a cell in a column family +// not provisioned for aggregation? +TEST(TransactionRollback, AddToCellRejectsRequestsToNonAggregateColumnFamily) { + ::google::bigtable::admin::v2::Table schema; + ::google::bigtable::admin::v2::ColumnFamily column_family; + + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifier = "column_qualifier"; + auto const timestamp_micros = 1000; + + auto maybe_table = Table::Create( + CreateSchema(table_name, {{column_family_name, column_family}})); + + ASSERT_STATUS_OK(maybe_table); + auto table = maybe_table.value(); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* add_to_cell_mutation = mutation_request_mutation->mutable_add_to_cell(); + + add_to_cell_mutation->set_family_name(column_family_name); + auto* mutable_column_qualifier = + add_to_cell_mutation->mutable_column_qualifier(); + mutable_column_qualifier->set_raw_value(column_qualifier); + auto* mutable_timestamp = add_to_cell_mutation->mutable_timestamp(); + mutable_timestamp->set_raw_timestamp_micros(timestamp_micros); + auto* mutable_input = add_to_cell_mutation->mutable_input(); + mutable_input->set_int_value(100); + + // Should fail because `column_family' has not been provisioned for + // aggregation. i.e. its value_type is not set all, in this case (it + // would need to be set to `Aggregate'. + ASSERT_EQ(false, table->MutateRow(mutation_request).ok()); +} + +// Test basic functionality of AddToCell Sum aggregation. +TEST(TransactionRollback, AddToCellTestSum) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifier = "column_qualifier"; + auto const timestamp_micros = 1000; + + auto maybe_table = Table::Create(CreateSchema( + table_name, {{column_family_name, + MakeBEAggregateCFProto( + google::bigtable::admin::v2::Type::Aggregate::kSum)}})); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* add_to_cell_mutation = mutation_request_mutation->mutable_add_to_cell(); + + add_to_cell_mutation->set_family_name(column_family_name); + auto* mutable_column_qualifier = + add_to_cell_mutation->mutable_column_qualifier(); + mutable_column_qualifier->set_raw_value(column_qualifier); + auto* mutable_timestamp = add_to_cell_mutation->mutable_timestamp(); + mutable_timestamp->set_raw_timestamp_micros(timestamp_micros); + auto* mutable_input = add_to_cell_mutation->mutable_input(); + mutable_input->set_int_value(100); + + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(100)) + .ok()); + + // Try and add 200 + mutable_input->set_int_value(200); + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(300)) + .ok()); + + // Try and subtract 50 + mutable_input->set_int_value(-50); + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(250)) + .ok()); +} + +// Test basic functionality of AddToCell Max aggregation. +TEST(TransactionRollback, AddToCellTestMax) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifier = "column_qualifier"; + auto const timestamp_micros = 1000; + + auto maybe_table = Table::Create(CreateSchema( + table_name, {{column_family_name, + MakeBEAggregateCFProto( + google::bigtable::admin::v2::Type::Aggregate::kMax)}})); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* add_to_cell_mutation = mutation_request_mutation->mutable_add_to_cell(); + + add_to_cell_mutation->set_family_name(column_family_name); + auto* mutable_column_qualifier = + add_to_cell_mutation->mutable_column_qualifier(); + mutable_column_qualifier->set_raw_value(column_qualifier); + auto* mutable_timestamp = add_to_cell_mutation->mutable_timestamp(); + mutable_timestamp->set_raw_timestamp_micros(timestamp_micros); + auto* mutable_input = add_to_cell_mutation->mutable_input(); + mutable_input->set_int_value(100); + + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(100)) + .ok()); + + mutable_input->set_int_value(200); + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(200)) + .ok()); +} + +// Test basic functionality of AddToCell Min aggregation. +TEST(TransactionRollback, AddToCellTestMin) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + auto const* const row_key = "0"; + auto const* const column_family_name = "column_family_1"; + auto const* const column_qualifier = "column_qualifier"; + auto const timestamp_micros = 1000; + + auto maybe_table = Table::Create(CreateSchema( + table_name, {{column_family_name, + MakeBEAggregateCFProto( + google::bigtable::admin::v2::Type::Aggregate::kMin)}})); + ASSERT_STATUS_OK(maybe_table); + + auto table = maybe_table.value(); + + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* add_to_cell_mutation = mutation_request_mutation->mutable_add_to_cell(); + + add_to_cell_mutation->set_family_name(column_family_name); + auto* mutable_column_qualifier = + add_to_cell_mutation->mutable_column_qualifier(); + mutable_column_qualifier->set_raw_value(column_qualifier); + auto* mutable_timestamp = add_to_cell_mutation->mutable_timestamp(); + mutable_timestamp->set_raw_timestamp_micros(timestamp_micros); + auto* mutable_input = add_to_cell_mutation->mutable_input(); + mutable_input->set_int_value(100); + + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(100)) + .ok()); + + mutable_input->set_int_value(50); + ASSERT_EQ(true, table->MutateRow(mutation_request).ok()); + ASSERT_EQ(true, + HasCell(table, column_family_name, row_key, column_qualifier, + timestamp_micros, + google::cloud::internal::EncodeBigEndian(50)) + .ok()); +} + +StatusOr GetColumn( + google::bigtable::v2::ReadModifyWriteRowResponse const& resp, + std::string const& row_key, int family_index, std::string const& qual) { + if (!resp.has_row()) { + return NotFoundError( + "response has no row", + GCP_ERROR_INFO().WithMetadata("response message", resp.DebugString())); + } + + if (resp.row().key() != row_key) { + return InvalidArgumentError( + "row key does not match", + GCP_ERROR_INFO().WithMetadata(row_key, resp.row().key())); + } + + if (family_index < 0) { + return InvalidArgumentError( + "supplied family index < 0", + GCP_ERROR_INFO().WithMetadata("family_index", + absl::StrFormat("%d", family_index))); + } + + if (family_index > resp.row().families_size() - 1) { + return InvalidArgumentError( + "supplied family index is out of range", + GCP_ERROR_INFO().WithMetadata("family index", + absl::StrFormat("%d", family_index))); + } + + // Check that column families and column qualifiers in the response + // are neither empty nor repeated. + std::set families; + for (int i = 0; i < resp.row().families_size(); i++) { + auto ret = families.emplace(resp.row().families(i).name()); + // The family name should not be empty and should not be + // repeated. Neither should the column qualifiers be empty or + // repeated. + if (ret.first->empty() || !ret.second) { + return InvalidArgumentError( + "empty or repeated family name", + GCP_ERROR_INFO().WithMetadata("ReadModifyWriteRowResponse", + resp.DebugString())); + } + + std::set column_qualifiers; + for (auto const& col : resp.row().families(i).columns()) { + auto ret = column_qualifiers.emplace(col.qualifier()); + if (ret.first->empty() || !ret.second) { + return InvalidArgumentError( + "empty or repeated column qualifier", + GCP_ERROR_INFO().WithMetadata("ReadModifyWriteRowResponse", + resp.DebugString())); + } + } + } + + for (auto const& col : resp.row().families(family_index).columns()) { + if (col.qualifier() == qual) { + return col; + } + } + + return NotFoundError("column not found", + GCP_ERROR_INFO().WithMetadata("qualifier", qual)); +} + +// Test that ReadModifyWrite does the correct thing when the row +// and/or the column is unset (it should introduce new cells with the +// timestamp of current system time and assume the missing values are +// 0 or an empty string). +TEST(ReadModifyWrite, Unsetcase) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + + std::vector column_families = {"column_family"}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + auto constexpr kRMWText = R"pb( + table_name: "projects/test/instances/test/tables/test" + row_key: "0" + rules: + [ { + family_name: "column_family" + column_qualifier: "column_1" + increment_amount: 1 + } + , { + family_name: "column_family" + column_qualifier: "column_2" + append_value: "a string" + }] + )pb"; + + google::bigtable::v2::ReadModifyWriteRowRequest request; + ASSERT_TRUE(TextFormat::ParseFromString(kRMWText, &request)); + + auto system_time_ms_before = + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + + auto maybe_response = table->ReadModifyWriteRow(request); + ASSERT_STATUS_OK(maybe_response); + + auto& response = maybe_response.value(); + ASSERT_EQ(response.row().key(), "0"); + ASSERT_EQ(response.row().families_size(), 1); + ASSERT_EQ(response.row().families(0).name(), "column_family"); + ASSERT_EQ(response.row().families(0).columns_size(), 2); + + auto maybe_column = GetColumn(response, "0", 0, "column_1"); + ASSERT_STATUS_OK(maybe_column); + auto& col = maybe_column.value(); + ASSERT_EQ(col.cells_size(), 1); + ASSERT_GE(std::chrono::duration_cast( + std::chrono::microseconds(col.cells(0).timestamp_micros())), + system_time_ms_before); + ASSERT_EQ(col.cells(0).value(), ::google::cloud::internal::EncodeBigEndian( + static_cast(1))); + + auto maybe_column_2 = GetColumn(response, "0", 0, "column_2"); + ASSERT_STATUS_OK(maybe_column_2); + col = maybe_column_2.value(); + ASSERT_EQ(col.cells_size(), 1); + ASSERT_GE(std::chrono::duration_cast( + std::chrono::microseconds(col.cells(0).timestamp_micros())), + system_time_ms_before); + ASSERT_EQ(col.cells(0).value(), "a string"); + + auto maybe_cells = GetColumn(table, "column_family", "0", "column_1"); + ASSERT_STATUS_OK(maybe_cells); + auto& cells = maybe_cells.value(); + ASSERT_EQ(cells.size(), 1); + auto cell_it = cells.begin(); + ASSERT_GE(cell_it->first, system_time_ms_before); + ASSERT_EQ(cell_it->second, ::google::cloud::internal::EncodeBigEndian( + static_cast(1))); + + auto maybe_cells_2 = GetColumn(table, "column_family", "0", "column_2"); + ASSERT_STATUS_OK(maybe_cells_2); + cells = maybe_cells_2.value(); + ASSERT_EQ(cells.size(), 1); + cell_it = cells.begin(); + ASSERT_GE(cell_it->first, system_time_ms_before); + ASSERT_EQ(cell_it->second, "a string"); +} + +// Test that the RPC does the right thing when the latest cell in the +// column has a newer timestamp than system time. In particular, it +// should update the latest cell with a new value (and not create a +// new cell). This also tests that the RPC chooses the latest cell to +// update (and will catch bugs in cell ordering). +TEST(ReadModifyWrite, SetAndNewerTimestampCase) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + + std::vector column_families = {"column_family"}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + auto usecs_in_day = (static_cast(24) * 60 * 60 * 1000 * 1000); + + auto far_future_us = (std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000) + + usecs_in_day; + ASSERT_GT(far_future_us, + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + + auto far_future_us_latest = far_future_us + 1000; + + std::vector p = { + {"column_family", "column_1", far_future_us, "older"}, + {"column_family", "column_1", far_future_us_latest, "latest"}, + {"column_family", "column_2", far_future_us, + ::google::cloud::internal::EncodeBigEndian( + static_cast(100))}, + {"column_family", "column_2", far_future_us_latest, + ::google::cloud::internal::EncodeBigEndian( + static_cast(200))}, + }; + + auto status = SetCells(table, table_name, "0", p); + ASSERT_STATUS_OK(status); + + auto constexpr kRMWText = R"pb( + table_name: "projects/test/instances/test/tables/test" + row_key: "0" + rules: + [ { + family_name: "column_family" + column_qualifier: "column_1" + append_value: "_with_suffix" + } + , { + family_name: "column_family" + column_qualifier: "column_2" + increment_amount: 1 + }] + )pb"; + + google::bigtable::v2::ReadModifyWriteRowRequest request; + ASSERT_TRUE(TextFormat::ParseFromString(kRMWText, &request)); + + auto maybe_response = table->ReadModifyWriteRow(request); + ASSERT_STATUS_OK(maybe_response); + + auto& response = maybe_response.value(); + ASSERT_EQ(response.row().key(), "0"); + ASSERT_EQ(response.row().families_size(), 1); + ASSERT_EQ(response.row().families(0).name(), "column_family"); + ASSERT_EQ(response.row().families(0).columns_size(), 2); + + auto maybe_column = GetColumn(response, "0", 0, "column_1"); + ASSERT_STATUS_OK(maybe_column); + auto& col = maybe_column.value(); + ASSERT_EQ(col.cells_size(), 1); + ASSERT_EQ(col.cells(0).timestamp_micros(), far_future_us_latest); + ASSERT_EQ(col.cells(0).value(), "latest_with_suffix"); + + auto maybe_column_2 = GetColumn(response, "0", 0, "column_2"); + ASSERT_STATUS_OK(maybe_column_2); + col = maybe_column_2.value(); + ASSERT_EQ(col.cells_size(), 1); + ASSERT_EQ(col.cells(0).timestamp_micros(), far_future_us_latest); + ASSERT_EQ(col.cells(0).value(), ::google::cloud::internal::EncodeBigEndian( + static_cast(201))); + + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_1", far_future_us, "older")); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_1", + far_future_us_latest, "latest_with_suffix")); + + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_2", + far_future_us, + ::google::cloud::internal::EncodeBigEndian( + static_cast(100)))); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_2", + far_future_us_latest, + ::google::cloud::internal::EncodeBigEndian( + static_cast(201)))); +} + +// Test that the RPC does the right thing when the latest cell in the +// column has an older timestamp than system time. In particular, a +// new cell with the current system time should be added to the cell +// to contain the value after adding or appending. +TEST(ReadModifyWrite, SetAndOlderTimestampCase) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + + std::vector column_families = {"column_family"}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + auto usecs_in_day = (static_cast(24) * 60 * 60 * 1000 * 1000); + + auto far_past_us = (std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000) - + usecs_in_day; + ASSERT_LT(far_past_us, + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + auto far_past_us_oldest = far_past_us - 1000; + + std::vector p = { + {"column_family", "column_1", far_past_us, "old"}, + {"column_family", "column_1", far_past_us_oldest, "oldest"}, + {"column_family", "column_2", far_past_us, + ::google::cloud::internal::EncodeBigEndian( + static_cast(100))}, + {"column_family", "column_2", far_past_us_oldest, + ::google::cloud::internal::EncodeBigEndian( + static_cast(200))}, + }; + + auto status = SetCells(table, table_name, "0", p); + ASSERT_STATUS_OK(status); + + auto constexpr kRMWText = R"pb( + table_name: "projects/test/instances/test/tables/test" + row_key: "0" + rules: + [ { + family_name: "column_family" + column_qualifier: "column_1" + append_value: "_with_suffix" + } + , { + family_name: "column_family" + column_qualifier: "column_2" + increment_amount: 1 + }] + )pb"; + + google::bigtable::v2::ReadModifyWriteRowRequest request; + ASSERT_TRUE(TextFormat::ParseFromString(kRMWText, &request)); + + auto system_time_us_before = + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000; + + auto maybe_response = table->ReadModifyWriteRow(request); + ASSERT_STATUS_OK(maybe_response); + + auto& response = maybe_response.value(); + ASSERT_EQ(response.row().key(), "0"); + ASSERT_EQ(response.row().families_size(), 1); + ASSERT_EQ(response.row().families(0).name(), "column_family"); + ASSERT_EQ(response.row().families(0).columns_size(), 2); + + auto maybe_column = GetColumn(response, "0", 0, "column_1"); + ASSERT_STATUS_OK(maybe_column); + auto& col = maybe_column.value(); + ASSERT_EQ(col.cells_size(), 1); + ASSERT_GE(col.cells(0).timestamp_micros(), system_time_us_before); + ASSERT_EQ(col.cells(0).value(), "old_with_suffix"); + + auto maybe_column_2 = GetColumn(response, "0", 0, "column_2"); + ASSERT_STATUS_OK(maybe_column_2); + auto& integer_col = maybe_column_2.value(); + ASSERT_EQ(integer_col.cells_size(), 1); + ASSERT_GE(integer_col.cells(0).timestamp_micros(), system_time_us_before); + ASSERT_EQ(integer_col.cells(0).value(), + ::google::cloud::internal::EncodeBigEndian( + static_cast(101))); + + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_1", far_past_us, "old")); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_1", + far_past_us_oldest, "oldest")); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_1", + col.cells(0).timestamp_micros(), "old_with_suffix")); + + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_2", far_past_us, + ::google::cloud::internal::EncodeBigEndian( + static_cast(100)))); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_2", + far_past_us_oldest, + ::google::cloud::internal::EncodeBigEndian( + static_cast(200)))); + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_2", + integer_col.cells(0).timestamp_micros(), + ::google::cloud::internal::EncodeBigEndian( + static_cast(101)))); +} + +// Test that the RPC does the right thing when the latest cell in the +// column has a newer timestamp than system time, and we need to roll +// back. In particular the changes to the latest cell should be rolled +// back. +TEST(ReadModifyWrite, RollbackNewerTimestamp) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + + std::vector column_families = {"column_family"}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + auto usecs_in_day = (static_cast(24) * 60 * 60 * 1000 * 1000); + + auto far_future_us = (std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000) + + usecs_in_day; + + ASSERT_GT(far_future_us, + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + + std::vector p = { + {"column_family", "column_1", far_future_us, "prefix"}, + }; + + auto status = SetCells(table, table_name, "0", p); + ASSERT_STATUS_OK(status); + + // The rules are evaluated in order. In this case, the 2nd rule + // refers to a column family that does not exist and should trigger + // a rollback. + auto constexpr kRMWText = R"pb( + table_name: "projects/test/instances/test/tables/test" + row_key: "0" + rules: + [ { + family_name: "column_family" + column_qualifier: "column_1" + append_value: "_with_suffix" + } + , { + family_name: "does_not_exist" + column_qualifier: "column_2" + increment_amount: 1 + }] + )pb"; + + google::bigtable::v2::ReadModifyWriteRowRequest request; + ASSERT_TRUE(TextFormat::ParseFromString(kRMWText, &request)); + + auto maybe_response = table->ReadModifyWriteRow(request); + ASSERT_EQ(false, maybe_response.ok()); + + ASSERT_STATUS_OK(HasCell(table, "column_family", "0", "column_1", + far_future_us, "prefix")); +} + +// Test that the RPC does the right thing when the latest cell in the +// column has a older timestamp than system time, and we need to roll +// back. In particular, the added cell should be deleted (no +// additional cell should be available after the failed transaction). +TEST(ReadModifyWrite, RollbackOlderTimestamp) { + auto const* const table_name = "projects/test/instances/test/tables/test"; + + std::vector column_families = {"column_family"}; + auto maybe_table = CreateTable(table_name, column_families); + + ASSERT_STATUS_OK(maybe_table); + auto& table = maybe_table.value(); + + auto usecs_in_day = (static_cast(24) * 60 * 60 * 1000 * 1000); + + auto far_past_us = (std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count() * + 1000) - + usecs_in_day; + ASSERT_LT(far_past_us, + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + + std::vector p = { + {"column_family", "column_1", far_past_us, "old"}, + }; + + auto status = SetCells(table, table_name, "0", p); + ASSERT_STATUS_OK(status); + + // The rules are evaluated in order. In this case, the 2nd rule + // refers to a column family that does not exist and should trigger + // a rollback. + auto constexpr kRMWText = R"pb( + table_name: "projects/test/instances/test/tables/test" + row_key: "0" + rules: + [ { + family_name: "column_family" + column_qualifier: "column_1" + append_value: "_with_suffix" + } + , { + family_name: "does_not_exist" + column_qualifier: "column_2" + increment_amount: 1 + }] + )pb"; + + google::bigtable::v2::ReadModifyWriteRowRequest request; + ASSERT_TRUE(TextFormat::ParseFromString(kRMWText, &request)); + + auto maybe_response = table->ReadModifyWriteRow(request); + ASSERT_EQ(false, maybe_response.ok()); + + ASSERT_STATUS_OK( + HasCell(table, "column_family", "0", "column_1", far_past_us, "old")); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/range_set.cc b/google/cloud/bigtable/emulator/range_set.cc new file mode 100644 index 0000000000000..798e87f41108f --- /dev/null +++ b/google/cloud/bigtable/emulator/range_set.cc @@ -0,0 +1,579 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/internal/google_bytes_traits.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status_or.h" +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace detail { + +/// Return -1 for lhs < rhs, 0 for lhs == rhs, 1 for lhs > rhs. +int CompareRangeValues(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs) { + if (absl::holds_alternative(lhs)) { + return absl::holds_alternative(rhs) ? 0 + : 1; + } + if (absl::holds_alternative(rhs)) { + return -1; + } + return internal::CompareRowKey(absl::get(lhs), + absl::get(rhs)); +} + +/** + * Check whether there exists a row key between `lhs` and `rhs`. + * + * @param `lhs` the first (in order) of the two ranges. + * @param `rhs` the second (in order) of the two ranges. + * @return if `rhs` directly follows `lhs` in the row key order. + */ +bool ConsecutiveRowKeys(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs) { + if (absl::holds_alternative(lhs) || + absl::holds_alternative(rhs)) { + return false; + } + return internal::ConsecutiveRowKeys(absl::get(lhs), + absl::get(rhs)); +} + +/// Checks whether there exists a string which belongs to both `lhs` and `rhs`. +bool HasOverlap(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs) { + auto const start_cmp = CompareRangeValues(lhs.start(), rhs.start()); + StringRangeSet::Range const* intersect_start; + if (start_cmp == 0) { + intersect_start = lhs.start_open() ? &lhs : &rhs; + } else { + intersect_start = (start_cmp > 0) ? &lhs : &rhs; + } + auto const end_cmp = CompareRangeValues(lhs.end(), rhs.end()); + StringRangeSet::Range const* intersect_end; + if (end_cmp == 0) { + intersect_end = lhs.end_open() ? &lhs : &rhs; + } else { + intersect_end = (end_cmp < 0) ? &lhs : &rhs; + } + return !StringRangeSet::Range::IsEmpty( + intersect_start->start(), intersect_start->start_open(), + intersect_end->end(), intersect_end->end_open()); +} + +/// Checks if there exists a timestamp which belongs to both `lhs` and `rhs`. +bool HasOverlap(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs) { + TimestampRangeSet::Range::Value overlap_start = + std::max(lhs.start(), rhs.start()); + TimestampRangeSet::Range::Value overlap_end = + TimestampRangeSet::Range::EndLess()(lhs, rhs) ? lhs.end() : rhs.end(); + return !TimestampRangeSet::Range::IsEmpty(overlap_start, overlap_end); +} + +/** + * Checks if two disjoint ranges are adjacent. + * + * In other words, whether there doesn't exist a string, which could be squeezed + * between the ranges. + * + * @param `lhs` the first (in order) of the two ranges. + * @param `rhs` the second (in order) of the two ranges. + * @return if `rhs` directly follows `lhs` in the row key order. + */ +bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs) { + assert(!HasOverlap(lhs, rhs)); + assert(StringRangeSet::Range::StartLess()(lhs, rhs)); + if (lhs.end_closed() && rhs.start_open() && lhs.end() == rhs.start()) { + return true; + } + if (lhs.end_open() && rhs.start_closed() && lhs.end() == rhs.start()) { + return true; + } + // FIXME - ConsecutiveRowKeys should somehow take into account the allowed + // length of the strings. + if (lhs.end_closed() && rhs.start_closed() && + detail::ConsecutiveRowKeys(lhs.end(), rhs.start())) { + return true; + } + return false; +} + +/** + * Checks if two disjoint ranges are adjacent. + * + * In other words, whether there doesn't exist a timestamp, which could be + * squeezed between the ranges. + * + * @param `lhs` the first (in order) of the two ranges. + * @param `rhs` the second (in order) of the two ranges. + * @return if `rhs` directly follows `lhs` in the row key order. + */ +bool DisjointAndSortedRangesAdjacent(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs) { + assert(!HasOverlap(lhs, rhs)); + assert(TimestampRangeSet::Range::StartLess()(lhs, rhs)); + return lhs.end() == rhs.start(); +} + +/** + * A generic implementation of adding a range to set of disjoint ranges. + * + * The word "Sum" in the function name shall be understood as a set sum. + * + * /pre{The ranges should be disjoint} + * + * @tparam RangeSetType the type of a set containing disjoint ranges. This will + * be a carefully ordered `std::set`. + * @tparam RangeType the type of a single range. + * @param disjoint_ranges the set of disjoint ranges to which the + * `inserted_range` should be added. + * @param `inserted_range` the range being added. + */ +template +void RangeSetSumImpl(RangeSetType& disjoint_ranges, RangeType inserted_range) { + // Remove all ranges which either have an overlap with `inserted_range` or are + // adjacent to it. Then add `inserted_range` with `start` and `end` + // adjusted to cover what the removed ranges used to cover. + + auto first_to_remove = disjoint_ranges.upper_bound(inserted_range); + // `*first_to_remove` starts strictly after `inserted_range`'s start. + // The previous range is the first to have a chance for an overlap (or being + // adjacent) - it is the last one, which starts at or before `inserted_range` + // start. + if (first_to_remove != disjoint_ranges.begin() && + (detail::HasOverlap(*std::prev(first_to_remove), inserted_range) || + detail::DisjointAndSortedRangesAdjacent(*std::prev(first_to_remove), + inserted_range))) { + std::advance(first_to_remove, -1); + } + if (first_to_remove != disjoint_ranges.end()) { + if (typename RangeType::StartLess()(*first_to_remove, inserted_range)) { + inserted_range.set_start(*first_to_remove); + } + do { + if (typename RangeType::EndLess()(inserted_range, *first_to_remove)) { + inserted_range.set_end(*first_to_remove); + } + disjoint_ranges.erase(first_to_remove++); + } while (first_to_remove != disjoint_ranges.end() && + (detail::HasOverlap(*first_to_remove, inserted_range) || + detail::DisjointAndSortedRangesAdjacent(inserted_range, + *first_to_remove))); + } + disjoint_ranges.insert(std::move(inserted_range)); +} + +/** + * An implementation of intersecting a set of disjoint ranges with a range. + * + * /pre{The range set should be disjoint} + * + * @tparam RangeSetType the type of a set containing disjoint ranges. This will + * be a carefully ordered `std::set`. + * @tparam RangeType the type of a single range. + * @param disjoint_ranges the set of disjoint ranges which will have be modified + * to only cover points also present in `inserted_range`. + * @param `intersected_range` the range with which the range set will be + * intersected. + */ +template +void RangeSetIntersectImpl(RangeSetType& disjoint_ranges, + RangeType const& intersected_range) { + // Intersect with every range. This could be optimized but it's unlikely that + // the extra code complexity would be offset by any performance gains in real + // life. + for (auto range_it = disjoint_ranges.begin(); + range_it != disjoint_ranges.end();) { + if (!detail::HasOverlap(*range_it, intersected_range)) { + disjoint_ranges.erase(range_it++); + continue; + } + if (typename RangeType::StartLess()(*range_it, intersected_range)) { + RangeType to_update = std::move(*range_it); + disjoint_ranges.erase(range_it); + to_update.set_start(intersected_range); + range_it = disjoint_ranges.emplace(std::move(to_update)).first; + } + if (typename RangeType::EndLess()(intersected_range, *range_it)) { + RangeType to_update = std::move(*range_it); + disjoint_ranges.erase(range_it); + to_update.set_end(intersected_range); + range_it = disjoint_ranges.emplace(std::move(to_update)).first; + } + ++range_it; + } +} + +} // namespace detail + +StringRangeSet::Range::Range(Value start, bool start_open, Value end, + bool end_open) + : start_(std::move(start)), + start_open_(start_open), + end_(std::move(end)), + end_open_(end_open) { + assert(!Range::ValueLess()(end_, start_)); + assert(!absl::holds_alternative(start_) || + !start_open_); + assert(!absl::holds_alternative(end_) || + !end_open_); + assert(!absl::holds_alternative(start_) || + absl::holds_alternative(end_)); +} + +StatusOr StringRangeSet::Range::FromRowRange( + google::bigtable::v2::RowRange const& row_range) { + StringRangeSet::Range::Value start; + bool start_open; + if (row_range.has_start_key_open() && !row_range.start_key_open().empty()) { + start = StringRangeSet::Range::Value(row_range.start_key_open()); + start_open = true; + } else if (row_range.has_start_key_closed() && + !row_range.start_key_closed().empty()) { + start = StringRangeSet::Range::Value(row_range.start_key_closed()); + start_open = false; + } else { + start = StringRangeSet::Range::Value(""); + start_open = false; + } + StringRangeSet::Range::Value end; + bool end_open; + if (row_range.has_end_key_open() && !row_range.end_key_open().empty()) { + end = StringRangeSet::Range::Value(row_range.end_key_open()); + end_open = true; + } else if (row_range.has_end_key_closed() && + !row_range.end_key_closed().empty()) { + end = StringRangeSet::Range::Value(row_range.end_key_closed()); + end_open = false; + } else { + end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); + end_open = false; + } + if (StringRangeSet::Range::ValueLess()(end, start)) { + return InvalidArgumentError( + "reversed `row_range`", + GCP_ERROR_INFO().WithMetadata("row_range", row_range.DebugString())); + } + return StringRangeSet::Range(std::move(start), start_open, std::move(end), + end_open); +} + +StatusOr StringRangeSet::Range::FromValueRange( + google::bigtable::v2::ValueRange const& value_range) { + StringRangeSet::Range::Value start; + bool start_open; + if (value_range.has_start_value_open() && + !value_range.start_value_open().empty()) { + start = StringRangeSet::Range::Value(value_range.start_value_open()); + start_open = true; + } else if (value_range.has_start_value_closed() && + !value_range.start_value_closed().empty()) { + start = StringRangeSet::Range::Value(value_range.start_value_closed()); + start_open = false; + } else { + start = StringRangeSet::Range::Value(""); + start_open = false; + } + StringRangeSet::Range::Value end; + bool end_open; + if (value_range.has_end_value_open() && + !value_range.end_value_open().empty()) { + end = StringRangeSet::Range::Value(value_range.end_value_open()); + end_open = true; + } else if (value_range.has_end_value_closed() && + !value_range.end_value_closed().empty()) { + end = StringRangeSet::Range::Value(value_range.end_value_closed()); + end_open = false; + } else { + end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); + end_open = false; + } + if (StringRangeSet::Range::ValueLess()(end, start)) { + return InvalidArgumentError("reversed `value_range`", + GCP_ERROR_INFO().WithMetadata( + "value_range", value_range.DebugString())); + } + return StringRangeSet::Range(std::move(start), start_open, std::move(end), + end_open); +} + +StatusOr StringRangeSet::Range::FromColumnRange( + google::bigtable::v2::ColumnRange const& column_range) { + StringRangeSet::Range::Value start; + bool start_open; + if (column_range.has_start_qualifier_open() && + !column_range.start_qualifier_open().empty()) { + start = StringRangeSet::Range::Value(column_range.start_qualifier_open()); + start_open = true; + } else if (column_range.has_start_qualifier_closed() && + !column_range.start_qualifier_closed().empty()) { + start = StringRangeSet::Range::Value(column_range.start_qualifier_closed()); + start_open = false; + } else { + start = StringRangeSet::Range::Value(""); + start_open = false; + } + StringRangeSet::Range::Value end; + bool end_open; + if (column_range.has_end_qualifier_open() && + !column_range.end_qualifier_open().empty()) { + end = StringRangeSet::Range::Value(column_range.end_qualifier_open()); + end_open = true; + } else if (column_range.has_end_qualifier_closed() && + !column_range.end_qualifier_closed().empty()) { + end = StringRangeSet::Range::Value(column_range.end_qualifier_closed()); + end_open = false; + } else { + end = StringRangeSet::Range::Value(StringRangeSet::Range::Infinity{}); + end_open = false; + } + if (StringRangeSet::Range::ValueLess()(end, start)) { + return InvalidArgumentError( + "reversed `column_range`", + GCP_ERROR_INFO().WithMetadata("column_range", + column_range.DebugString())); + } + return StringRangeSet::Range(std::move(start), start_open, std::move(end), + end_open); +} + +void StringRangeSet::Range::set_start(Range const& source) { + start_ = source.start(); + start_open_ = source.start_open(); +} + +void StringRangeSet::Range::set_end(Range const& source) { + end_ = source.end(); + end_open_ = source.end_open(); +} + +bool StringRangeSet::Range::IsBelowStart(Value const& value) const { + auto const cmp = detail::CompareRangeValues(value, start_); + if (cmp != 0) { + return cmp < 0; + } + return start_open_; +} + +bool StringRangeSet::Range::IsEmpty(StringRangeSet::Range::Value const& start, + bool start_open, + StringRangeSet::Range::Value const& end, + bool end_open) { + auto const res_cmp = detail::CompareRangeValues(start, end); + if (res_cmp > 0) { + return true; + } + if (res_cmp == 0) { + return start_open || end_open || + absl::holds_alternative(start); + } + if (start_open && end_open) { + // FIXME - ConsecutiveRowKeys should somehow take into account the allowed + // length of the strings. + return detail::ConsecutiveRowKeys(start, end); + } + return false; +} + +bool StringRangeSet::Range::IsAboveEnd(Value const& value) const { + auto const cmp = detail::CompareRangeValues(value, end_); + if (cmp != 0) { + return cmp > 0; + } + return end_open_; +} + +bool StringRangeSet::Range::IsWithin(Value const& value) const { + return !IsAboveEnd(value) && !IsBelowStart(value); +} + +bool StringRangeSet::Range::IsEmpty() const { + return Range::IsEmpty(start_, start_open_, end_, end_open_); +} + +bool StringRangeSet::Range::ValueLess::operator()( + Range::Value const& lhs, Range::Value const& rhs) const { + return detail::CompareRangeValues(lhs, rhs) < 0; +} + +bool StringRangeSet::Range::StartLess::operator()(Range const& lhs, + Range const& rhs) const { + auto res = detail::CompareRangeValues(lhs.start(), rhs.start()); + if (res == 0) { + return lhs.start_closed() && rhs.start_open(); + } + return res < 0; +} + +bool StringRangeSet::Range::EndLess::operator()(Range const& lhs, + Range const& rhs) const { + auto res = detail::CompareRangeValues(lhs.end(), rhs.end()); + if (res == 0) { + return lhs.end_open() && rhs.end_closed(); + } + return res < 0; +} + +StringRangeSet StringRangeSet::All() { + StringRangeSet res; + res.Sum(Range("", false, StringRangeSet::Range::Infinity{}, false)); + return res; +} + +StringRangeSet StringRangeSet::Empty() { return StringRangeSet{}; } + +void StringRangeSet::Sum(StringRangeSet::Range inserted_range) { + detail::RangeSetSumImpl(disjoint_ranges_, std::move(inserted_range)); +} + +void StringRangeSet::Intersect(StringRangeSet::Range const& intersected_range) { + detail::RangeSetIntersectImpl(disjoint_ranges_, intersected_range); +} + +bool operator==(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs) { + if (absl::holds_alternative(lhs)) { + return absl::holds_alternative(rhs); + } + if (absl::holds_alternative(rhs)) { + return false; + } + return absl::get(lhs) == absl::get(rhs); +} + +std::ostream& operator<<(std::ostream& os, + StringRangeSet::Range::Value const& value) { + if (absl::holds_alternative(value)) { + os << "inf"; + return os; + } + os << absl::get(value); + return os; +} + +bool operator==(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs) { + return lhs.start() == rhs.start() && lhs.start_open() == rhs.start_open() && + lhs.end() == rhs.end() && lhs.end_open() == rhs.end_open(); +} + +std::ostream& operator<<(std::ostream& os, StringRangeSet::Range const& range) { + os << (range.start_closed() ? "[" : "(") << range.start() << "," + << range.end() << (range.end_closed() ? "]" : ")"); + return os; +} + +TimestampRangeSet::Range::Range(Value start, Value end) + : start_(std::move(start)), end_(std::move(end)) { + assert(end_ == std::chrono::milliseconds::zero() || start_ <= end_); +} + +StatusOr TimestampRangeSet::Range::FromTimestampRange( + google::bigtable::v2::TimestampRange const& timestamp_range) { + auto start = std::chrono::duration_cast( + std::chrono::microseconds(timestamp_range.start_timestamp_micros())); + auto end = std::chrono::duration_cast( + std::chrono::microseconds(timestamp_range.end_timestamp_micros())); + if (end != std::chrono::milliseconds::zero() && start > end) { + return InvalidArgumentError( + "reversed `timestamp_range`", + GCP_ERROR_INFO().WithMetadata("timestamp_range", + timestamp_range.DebugString())); + } + return Range(start, end); +} + +bool TimestampRangeSet::Range::IsAboveEnd(Value value) const { + return end_ != std::chrono::milliseconds::zero() && value >= end_; +} + +bool TimestampRangeSet::Range::IsWithin(Value value) const { + return !IsAboveEnd(value) && !IsBelowStart(value); +} + +bool TimestampRangeSet::Range::IsEmpty(TimestampRangeSet::Range::Value start, + TimestampRangeSet::Range::Value end) { + if (end == std::chrono::milliseconds::zero()) { + return false; + } + return start >= end; +} + +bool TimestampRangeSet::Range::StartLess::operator()(Range const& lhs, + Range const& rhs) const { + return lhs.start() < rhs.start(); +} + +bool TimestampRangeSet::Range::EndLess::operator()(Range const& lhs, + Range const& rhs) const { + if (lhs.end() == std::chrono::milliseconds::zero()) { + return false; + } + if (rhs.end() == std::chrono::milliseconds::zero()) { + return true; + } + return lhs.end() < rhs.end(); +} + +TimestampRangeSet TimestampRangeSet::All() { + TimestampRangeSet res; + res.Sum(Range(std::chrono::milliseconds(0), std::chrono::milliseconds(0))); + return res; +} + +TimestampRangeSet TimestampRangeSet::Empty() { return TimestampRangeSet{}; } + +void TimestampRangeSet::Sum(TimestampRangeSet::Range inserted_range) { + detail::RangeSetSumImpl(disjoint_ranges_, std::move(inserted_range)); +} + +void TimestampRangeSet::Intersect( + TimestampRangeSet::Range const& intersected_range) { + detail::RangeSetIntersectImpl(disjoint_ranges_, intersected_range); +} + +bool operator==(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs) { + return lhs.start() == rhs.start() && lhs.end() == rhs.end(); +} + +std::ostream& operator<<(std::ostream& os, + TimestampRangeSet::Range const& range) { + os << "[" << range.start().count() << "ms,"; + if (range.end() == std::chrono::milliseconds::zero()) { + os << "inf"; + } else { + os << range.end().count() << "ms"; + } + os << ")"; + return os; +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/range_set.h b/google/cloud/bigtable/emulator/range_set.h new file mode 100644 index 0000000000000..0495543766a79 --- /dev/null +++ b/google/cloud/bigtable/emulator/range_set.h @@ -0,0 +1,216 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_RANGE_SET_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_RANGE_SET_H + +#include "google/cloud/status_or.h" +#include "absl/types/variant.h" +#include +#include +#include +#include + +namespace google { +namespace bigtable { +namespace v2 { +class RowRange; +class ValueRange; +class ColumnRange; +class TimestampRange; +} // namespace v2 +} // namespace bigtable +namespace cloud { +namespace bigtable { +namespace emulator { + +/** + * Objects of this class hold a sorted, disjoint set of string ranges. + * + * Users of this class can intersect and sum the ranges held by this structure. + * + * The ranges can be open or closed on each end and the end may hold a special + * value - infinity. + * + * Ranges starts are never larger than ends. + */ +class StringRangeSet { + public: + class Range { + public: + struct Infinity {}; + using Value = absl::variant; + + Range(Value start, bool start_open, Value end, bool end_open); + static StatusOr FromRowRange( + google::bigtable::v2::RowRange const& row_range); + static StatusOr FromValueRange( + google::bigtable::v2::ValueRange const& value_range); + static StatusOr FromColumnRange( + google::bigtable::v2::ColumnRange const& column_range); + + Value const& start() const& { return start_; } + std::string const& start_finite() const& { + return absl::get(start_); + } + bool start_open() const { return start_open_; } + bool start_closed() const { return !start_open_; } + void set_start(Range const& source); + + Value const& end() const& { return end_; } + bool end_open() const { return end_open_; } + bool end_closed() const { return !end_open_; } + void set_end(Range const& source); + + bool IsBelowStart(Value const& value) const; + bool IsAboveEnd(Value const& value) const; + bool IsWithin(Value const& value) const; + bool IsEmpty() const; + + static bool IsEmpty(StringRangeSet::Range::Value const& start, + bool start_open, + StringRangeSet::Range::Value const& end, bool end_open); + + struct ValueLess { + bool operator()(Range::Value const& lhs, Range::Value const& rhs) const; + }; + + struct StartLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + + struct EndLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + + private: + Value start_; + bool start_open_; + Value end_; + bool end_open_; + }; + + static StringRangeSet All(); + static StringRangeSet Empty(); + void Sum(Range inserted_range); + void Intersect(Range const& intersected_range); + + std::set const& disjoint_ranges() const { + return disjoint_ranges_; + }; + + private: + std::set disjoint_ranges_; +}; + +bool operator==(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs); + +std::ostream& operator<<(std::ostream& os, + StringRangeSet::Range::Value const& value); + +bool operator==(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs); + +std::ostream& operator<<(std::ostream& os, StringRangeSet::Range const& range); + +/** + * Objects of this class hold a sorted, disjoint set of timestamp ranges. + * + * Users of this class can intersect and sum the ranges held by this structure. + * + * The ranges have are open on the left and closed on the right. A value zero on + * the end is treated as infinity. + * + * Ranges starts are never larger than ends. + */ +class TimestampRangeSet { + public: + class Range { + public: + using Value = std::chrono::milliseconds; + + Range(Value start, Value end); + static StatusOr FromTimestampRange( + google::bigtable::v2::TimestampRange const& timestamp_range); + + Value start() const { return start_; } + void set_start(Range const& source) { start_ = source.start_; } + + Value end() const { return end_; } + void set_end(Range const& source) { end_ = source.end_; } + + bool IsBelowStart(Value value) const { return value < start_; } + bool IsAboveEnd(Value value) const; + bool IsWithin(Value value) const; + + static bool IsEmpty(TimestampRangeSet::Range::Value start, + TimestampRangeSet::Range::Value end); + bool IsEmpty() const { return IsEmpty(start_, end_); } + + struct StartLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + + struct EndLess { + bool operator()(Range const& lhs, Range const& rhs) const; + }; + + private: + Value start_; + Value end_; + }; + + static TimestampRangeSet All(); + static TimestampRangeSet Empty(); + void Sum(Range inserted_range); + void Intersect(Range const& intersected_range); + + std::set const& disjoint_ranges() const { + return disjoint_ranges_; + }; + + private: + std::set disjoint_ranges_; +}; + +bool operator==(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs); + +std::ostream& operator<<(std::ostream& os, + TimestampRangeSet::Range const& range); + +// For testing only. +namespace detail { + +int CompareRangeValues(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs); +bool ConsecutiveRowKeys(StringRangeSet::Range::Value const& lhs, + StringRangeSet::Range::Value const& rhs); +bool HasOverlap(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs); +bool HasOverlap(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs); +bool DisjointAndSortedRangesAdjacent(StringRangeSet::Range const& lhs, + StringRangeSet::Range const& rhs); +bool DisjointAndSortedRangesAdjacent(TimestampRangeSet::Range const& lhs, + TimestampRangeSet::Range const& rhs); + +} // namespace detail +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_RANGE_SET_H diff --git a/google/cloud/bigtable/emulator/range_set_test.cc b/google/cloud/bigtable/emulator/range_set_test.cc new file mode 100644 index 0000000000000..02a615f358c65 --- /dev/null +++ b/google/cloud/bigtable/emulator/range_set_test.cc @@ -0,0 +1,823 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/row_range.h" +#include "google/cloud/testing_util/chrono_literals.h" +#include "google/cloud/testing_util/status_matchers.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +bool const kOpen = true; +bool const kClosed = false; +bool const kWhatever = true; // to indicate it's unimportant in the test + +TEST(StringRangeValueOrder, Simple) { + EXPECT_EQ(-1, detail::CompareRangeValues("A", "B")); + EXPECT_EQ(0, detail::CompareRangeValues("A", "A")); + EXPECT_EQ(1, detail::CompareRangeValues("B", "A")); +} + +TEST(StringRangeValueOrder, Empty) { + EXPECT_EQ(-1, detail::CompareRangeValues("", "A")); + EXPECT_EQ(0, detail::CompareRangeValues("", "")); + EXPECT_EQ(1, detail::CompareRangeValues("A", "")); +} + +TEST(StringRangeValueOrder, Infinite) { + EXPECT_EQ(-1, + detail::CompareRangeValues("A", StringRangeSet::Range::Infinity{})); + EXPECT_EQ(0, detail::CompareRangeValues(StringRangeSet::Range::Infinity{}, + StringRangeSet::Range::Infinity{})); + EXPECT_EQ(1, + detail::CompareRangeValues(StringRangeSet::Range::Infinity{}, "A")); + + EXPECT_EQ(-1, + detail::CompareRangeValues("", StringRangeSet::Range::Infinity{})); + EXPECT_EQ(1, + detail::CompareRangeValues(StringRangeSet::Range::Infinity{}, "")); +} + +// FIXME add invalid data tests +TEST(StringRangeSet, FromRowRangeClosed) { + auto closed = StringRangeSet::Range::FromRowRange( + RowRange::Closed("A", "B").as_proto()); + EXPECT_STATUS_OK(closed); + EXPECT_EQ("A", closed->start()); + EXPECT_EQ("B", closed->end()); + EXPECT_TRUE(closed->start_closed()); + EXPECT_TRUE(closed->end_closed()); + EXPECT_FALSE(closed->start_open()); + EXPECT_FALSE(closed->end_open()); +} + +TEST(StringRangeSet, FromRowRangeOpen) { + auto open = + StringRangeSet::Range::FromRowRange(RowRange::Open("A", "B").as_proto()); + EXPECT_STATUS_OK(open); + EXPECT_EQ("A", open->start()); + EXPECT_EQ("B", open->end()); + EXPECT_FALSE(open->start_closed()); + EXPECT_FALSE(open->end_closed()); + EXPECT_TRUE(open->start_open()); + EXPECT_TRUE(open->end_open()); +} + +TEST(StringRangeSet, FromRowRangeImplicitlyInfinite) { + auto range = + StringRangeSet::Range::FromRowRange(google::bigtable::v2::RowRange{}); + + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); +} + +TEST(StringRangeSet, FromRowRangeExplicitlyInfinite) { + for (bool end_open : {true, false}) { + google::bigtable::v2::RowRange proto_range; + proto_range.set_start_key_closed(""); + if (end_open) { + proto_range.set_end_key_open(""); + } else { + proto_range.set_end_key_closed(""); + } + + auto range = StringRangeSet::Range::FromRowRange(proto_range); + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); + } +} + +TEST(StringRangeSet, FromColumnRangeClosed) { + google::bigtable::v2::ColumnRange proto_range; + proto_range.set_start_qualifier_closed("A"); + proto_range.set_end_qualifier_closed("B"); + auto closed = StringRangeSet::Range::FromColumnRange(proto_range); + EXPECT_STATUS_OK(closed); + EXPECT_EQ("A", closed->start()); + EXPECT_EQ("B", closed->end()); + EXPECT_TRUE(closed->start_closed()); + EXPECT_TRUE(closed->end_closed()); + EXPECT_FALSE(closed->start_open()); + EXPECT_FALSE(closed->end_open()); +} + +TEST(StringRangeSet, FromColumnRangeOpen) { + google::bigtable::v2::ColumnRange proto_range; + proto_range.set_start_qualifier_open("A"); + proto_range.set_end_qualifier_open("B"); + auto open = StringRangeSet::Range::FromColumnRange(proto_range); + EXPECT_STATUS_OK(open); + EXPECT_EQ("A", open->start()); + EXPECT_EQ("B", open->end()); + EXPECT_FALSE(open->start_closed()); + EXPECT_FALSE(open->end_closed()); + EXPECT_TRUE(open->start_open()); + EXPECT_TRUE(open->end_open()); +} + +TEST(StringRangeSet, FromColumnRangeImplicitlyInfinite) { + auto range = StringRangeSet::Range::FromColumnRange( + google::bigtable::v2::ColumnRange{}); + + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); +} + +TEST(StringRangeSet, FromColumnRangeExplicitlyInfinite) { + for (bool end_open : {true, false}) { + google::bigtable::v2::ColumnRange proto_range; + proto_range.set_start_qualifier_closed(""); + if (end_open) { + proto_range.set_end_qualifier_open(""); + } else { + proto_range.set_end_qualifier_closed(""); + } + + auto range = StringRangeSet::Range::FromColumnRange(proto_range); + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); + } +} + +TEST(StringRangeSet, FromValueRangeClosed) { + google::bigtable::v2::ValueRange proto_range; + proto_range.set_start_value_closed("A"); + proto_range.set_end_value_closed("B"); + auto closed = StringRangeSet::Range::FromValueRange(proto_range); + EXPECT_STATUS_OK(closed); + EXPECT_EQ("A", closed->start()); + EXPECT_EQ("B", closed->end()); + EXPECT_TRUE(closed->start_closed()); + EXPECT_TRUE(closed->end_closed()); + EXPECT_FALSE(closed->start_open()); + EXPECT_FALSE(closed->end_open()); +} + +TEST(StringRangeSet, FromValueRangeOpen) { + google::bigtable::v2::ValueRange proto_range; + proto_range.set_start_value_open("A"); + proto_range.set_end_value_open("B"); + auto open = StringRangeSet::Range::FromValueRange(proto_range); + EXPECT_STATUS_OK(open); + EXPECT_EQ("A", open->start()); + EXPECT_EQ("B", open->end()); + EXPECT_FALSE(open->start_closed()); + EXPECT_FALSE(open->end_closed()); + EXPECT_TRUE(open->start_open()); + EXPECT_TRUE(open->end_open()); +} + +TEST(StringRangeSet, FromValueRangeImplicitlyInfinite) { + auto range = + StringRangeSet::Range::FromValueRange(google::bigtable::v2::ValueRange{}); + + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); +} + +TEST(StringRangeSet, FromValueRangeExplicitlyInfinite) { + for (bool end_open : {true, false}) { + google::bigtable::v2::ValueRange proto_range; + proto_range.set_start_value_closed(""); + if (end_open) { + proto_range.set_end_value_open(""); + } else { + proto_range.set_end_value_closed(""); + } + + auto range = StringRangeSet::Range::FromValueRange(proto_range); + EXPECT_STATUS_OK(range); + EXPECT_EQ("", range->start()); + EXPECT_EQ(StringRangeSet::Range::Infinity{}, range->end()); + EXPECT_TRUE(range->start_closed()); + EXPECT_TRUE(range->end_closed()); + EXPECT_FALSE(range->start_open()); + EXPECT_FALSE(range->end_open()); + } +} + +TEST(StringRangeSet, RangeValueLess) { + EXPECT_TRUE(StringRangeSet::Range::ValueLess()("A", "B")); + EXPECT_FALSE(StringRangeSet::Range::ValueLess()("A", "A")); + EXPECT_FALSE(StringRangeSet::Range::ValueLess()("B", "A")); +} + +TEST(StringRangeSet, RangeStartLess) { + EXPECT_TRUE(StringRangeSet::Range::StartLess()( + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever), + StringRangeSet::Range("B", kOpen, "unimportant", kWhatever))); + EXPECT_FALSE(StringRangeSet::Range::StartLess()( + StringRangeSet::Range("B", kOpen, "unimportant", kWhatever), + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever))); + EXPECT_FALSE(StringRangeSet::Range::StartLess()( + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever), + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever))); + + EXPECT_TRUE(StringRangeSet::Range::StartLess()( + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever), + StringRangeSet::Range("B", kClosed, "unimportant", kWhatever))); + EXPECT_FALSE(StringRangeSet::Range::StartLess()( + StringRangeSet::Range("B", kClosed, "unimportant", kWhatever), + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever))); + EXPECT_FALSE(StringRangeSet::Range::StartLess()( + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever), + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever))); + + EXPECT_FALSE(StringRangeSet::Range::StartLess()( + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever), + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever))); + EXPECT_TRUE(StringRangeSet::Range::StartLess()( + StringRangeSet::Range("A", kClosed, "unimportant", kWhatever), + StringRangeSet::Range("A", kOpen, "unimportant", kWhatever))); +} + +TEST(StringRangeSet, RangeEndLess) { + EXPECT_TRUE(StringRangeSet::Range::EndLess()( + StringRangeSet::Range("A", kWhatever, "A", kOpen), + StringRangeSet::Range("A", kWhatever, "B", kOpen))); + EXPECT_FALSE(StringRangeSet::Range::EndLess()( + StringRangeSet::Range("A", kWhatever, "B", kOpen), + StringRangeSet::Range("A", kWhatever, "A", kOpen))); + EXPECT_FALSE(StringRangeSet::Range::EndLess()( + StringRangeSet::Range("A", kWhatever, "A", kOpen), + StringRangeSet::Range("A", kWhatever, "A", kOpen))); + + EXPECT_TRUE(StringRangeSet::Range::EndLess()( + StringRangeSet::Range("A", kWhatever, "A", kClosed), + StringRangeSet::Range("A", kWhatever, "B", kClosed))); + EXPECT_FALSE(StringRangeSet::Range::EndLess()( + StringRangeSet::Range("A", kWhatever, "B", kClosed), + StringRangeSet::Range("A", kWhatever, "A", kClosed))); + EXPECT_FALSE(StringRangeSet::Range::EndLess()( + StringRangeSet::Range("A", kWhatever, "A", kClosed), + StringRangeSet::Range("A", kWhatever, "A", kClosed))); + + EXPECT_FALSE(StringRangeSet::Range::EndLess()( + StringRangeSet::Range("A", kWhatever, "A", kClosed), + StringRangeSet::Range("A", kWhatever, "A", kOpen))); + EXPECT_TRUE(StringRangeSet::Range::EndLess()( + StringRangeSet::Range("A", kWhatever, "A", kOpen), + StringRangeSet::Range("A", kWhatever, "A", kClosed))); +} + +TEST(StringRangeSet, BelowStart) { + StringRangeSet::Range const open("B", kOpen, "unimportant", kWhatever); + StringRangeSet::Range const closed("B", kClosed, "unimportant", kWhatever); + StringRangeSet::Range const infinite( + StringRangeSet::Range::Infinity{}, kClosed, + StringRangeSet::Range::Infinity{}, kClosed); + + EXPECT_TRUE(open.IsBelowStart("A")); + EXPECT_TRUE(closed.IsBelowStart("A")); + EXPECT_TRUE(open.IsBelowStart("B")); + EXPECT_FALSE(closed.IsBelowStart("B")); + EXPECT_FALSE(open.IsBelowStart("C")); + EXPECT_FALSE(closed.IsBelowStart("C")); + EXPECT_FALSE(open.IsBelowStart(StringRangeSet::Range::Infinity{})); + EXPECT_FALSE(closed.IsBelowStart(StringRangeSet::Range::Infinity{})); + EXPECT_TRUE(infinite.IsBelowStart("whatever_string")); + EXPECT_FALSE(infinite.IsBelowStart(StringRangeSet::Range::Infinity{})); +} + +TEST(StringRangeSet, AboveEnd) { + StringRangeSet::Range const open("A", kWhatever, "B", kOpen); + StringRangeSet::Range const closed("A", kWhatever, "B", kClosed); + StringRangeSet::Range const infinite( + "unimportant", kWhatever, StringRangeSet::Range::Infinity{}, kClosed); + + EXPECT_FALSE(open.IsAboveEnd("A")); + EXPECT_FALSE(closed.IsAboveEnd("A")); + EXPECT_TRUE(open.IsAboveEnd("B")); + EXPECT_FALSE(closed.IsAboveEnd("B")); + EXPECT_TRUE(open.IsAboveEnd("C")); + EXPECT_TRUE(closed.IsAboveEnd("C")); + EXPECT_FALSE(infinite.IsAboveEnd("whatever_string")); + EXPECT_FALSE(infinite.IsAboveEnd(StringRangeSet::Range::Infinity{})); +} + +TEST(StringRangeSet, IsWithin) { + StringRangeSet::Range const closed("A", kClosed, "C", kClosed); + EXPECT_FALSE(closed.IsWithin("")); + EXPECT_TRUE(closed.IsWithin("A")); + EXPECT_TRUE(closed.IsWithin("B")); + EXPECT_TRUE(closed.IsWithin("C")); + EXPECT_FALSE(closed.IsWithin("D")); + EXPECT_FALSE(closed.IsWithin(StringRangeSet::Range::Infinity{})); + + StringRangeSet::Range const open("A", kOpen, "C", kOpen); + EXPECT_FALSE(open.IsWithin("")); + EXPECT_FALSE(open.IsWithin("A")); + EXPECT_TRUE(open.IsWithin("B")); + EXPECT_FALSE(open.IsWithin("C")); + EXPECT_FALSE(open.IsWithin("D")); + EXPECT_FALSE(open.IsWithin(StringRangeSet::Range::Infinity{})); +} + +TEST(StringRangeSet, RangeEqality) { + EXPECT_EQ(StringRangeSet::Range("A", kClosed, "B", kOpen), + StringRangeSet::Range("A", kClosed, "B", kOpen)); + + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "B", kOpen) == + StringRangeSet::Range("B", kClosed, "B", kOpen)); + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "B", kOpen) == + StringRangeSet::Range("A", kOpen, "B", kOpen)); + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "B", kOpen) == + StringRangeSet::Range("A", kClosed, "C", kOpen)); + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "B", kOpen) == + StringRangeSet::Range("A", kClosed, "B", kClosed)); +} + +TEST(StringRangeSet, RangePrint) { + { + std::stringstream os; + os << StringRangeSet::Range("A", kClosed, "B", kOpen); + EXPECT_EQ("[A,B)", os.str()); + } + { + std::stringstream os; + os << StringRangeSet::Range("A", kOpen, "B", kClosed); + EXPECT_EQ("(A,B]", os.str()); + } + { + std::stringstream os; + os << StringRangeSet::Range("", kOpen, "", kClosed); + EXPECT_EQ("(,]", os.str()); + } + { + std::stringstream os; + os << StringRangeSet::Range(StringRangeSet::Range::Infinity{}, kClosed, + StringRangeSet::Range::Infinity{}, kClosed); + EXPECT_EQ("[inf,inf]", os.str()); + } +} + +// FIXME - test ConsecutiveRowKeys + +TEST(StringRangeSet, IsEmpty) { + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "A", kClosed).IsEmpty()); + EXPECT_TRUE(StringRangeSet::Range("A", kClosed, "A", kOpen).IsEmpty()); + EXPECT_TRUE(StringRangeSet::Range("A", kOpen, "A", kClosed).IsEmpty()); + EXPECT_TRUE(StringRangeSet::Range("A", kOpen, "A", kOpen).IsEmpty()); + + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "C", kClosed).IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("A", kOpen, "C", kClosed).IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, "C", kOpen).IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("A", kOpen, "C", kOpen).IsEmpty()); + + EXPECT_FALSE( + StringRangeSet::Range("A", kClosed, std::string("A\0", 2), kClosed) + .IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("A", kOpen, std::string("A\0", 2), kClosed) + .IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, std::string("A\0", 2), kOpen) + .IsEmpty()); + EXPECT_TRUE(StringRangeSet::Range("A", kOpen, std::string("A\0", 2), kOpen) + .IsEmpty()); + + EXPECT_FALSE(StringRangeSet::Range("A", kClosed, + StringRangeSet::Range::Infinity{}, kClosed) + .IsEmpty()); + EXPECT_FALSE(StringRangeSet::Range("", kClosed, + StringRangeSet::Range::Infinity{}, kClosed) + .IsEmpty()); + EXPECT_TRUE(StringRangeSet::Range(StringRangeSet::Range::Infinity{}, kClosed, + StringRangeSet::Range::Infinity{}, kClosed) + .IsEmpty()); +} + +TEST(StringRangeSet, HasOverlap) { + EXPECT_FALSE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, "A", kClosed))); + EXPECT_FALSE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, "B", kOpen))); + EXPECT_FALSE( + detail::HasOverlap(StringRangeSet::Range("B", kOpen, "D", kClosed), + StringRangeSet::Range("A", kClosed, "B", kClosed))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kOpen, "D", kClosed), + StringRangeSet::Range("A", kClosed, std::string("B\0", 2), kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, std::string("B\0", 2), kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kOpen, "D", kClosed), + StringRangeSet::Range("A", kClosed, std::string("B\0", 2), kClosed))); + EXPECT_TRUE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, "B", kClosed))); + EXPECT_TRUE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, "C", kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("A", kClosed, StringRangeSet::Range::Infinity{}, + kClosed))); + + EXPECT_FALSE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kOpen), + StringRangeSet::Range("D", kClosed, "E", kOpen))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, std::string("D\0", 2), kOpen), + StringRangeSet::Range("D", kOpen, "E", kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, std::string("D\0", 2), kClosed), + StringRangeSet::Range("D", kOpen, "E", kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, std::string("D\0", 2), kOpen), + StringRangeSet::Range("D", kClosed, "E", kOpen))); + EXPECT_TRUE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("D", kClosed, StringRangeSet::Range::Infinity{}, + kClosed))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("D", kOpen, StringRangeSet::Range::Infinity{}, + kClosed))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kOpen), + StringRangeSet::Range("D", kClosed, StringRangeSet::Range::Infinity{}, + kClosed))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kOpen), + StringRangeSet::Range("D", kOpen, StringRangeSet::Range::Infinity{}, + kClosed))); + EXPECT_FALSE(detail::HasOverlap( + StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("E", kClosed, StringRangeSet::Range::Infinity{}, + kClosed))); + EXPECT_FALSE( + detail::HasOverlap(StringRangeSet::Range("B", kClosed, "D", kClosed), + StringRangeSet::Range("D", kOpen, "E", kOpen))); +} + +TEST(StringRangeSet, DisjointAdjacent) { + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "B", kOpen), + StringRangeSet::Range("C", kOpen, "D", kWhatever))); + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kOpen), + StringRangeSet::Range("C", kOpen, "D", kWhatever))); + EXPECT_TRUE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kClosed), + StringRangeSet::Range("C", kOpen, "D", kWhatever))); + EXPECT_TRUE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kOpen), + StringRangeSet::Range("C", kClosed, "D", kWhatever))); + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kOpen), + StringRangeSet::Range(std::string("C\0", 2), kOpen, "D", kWhatever))); + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kClosed), + StringRangeSet::Range(std::string("C\0", 2), kOpen, "D", kWhatever))); + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kOpen), + StringRangeSet::Range(std::string("C\0", 2), kClosed, "D", kWhatever))); + EXPECT_TRUE(detail::DisjointAndSortedRangesAdjacent( + StringRangeSet::Range("A", kWhatever, "C", kClosed), + StringRangeSet::Range(std::string("C\0", 2), kClosed, "D", kWhatever))); +} + +// FIXME test invalid data +TEST(TimestampRangeSet, FromInfiniteTimestampRange) { + using testing_util::chrono_literals::operator""_ms; + auto infinite = TimestampRangeSet::Range::FromTimestampRange( + google::bigtable::v2::TimestampRange{}); + ASSERT_STATUS_OK(infinite); + EXPECT_EQ(0_ms, infinite->start()); + EXPECT_EQ(0_ms, infinite->end()); +} + +TEST(TimestampRangeSet, FromFiniteTimestampRange) { + using testing_util::chrono_literals::operator""_ms; + google::bigtable::v2::TimestampRange proto; + proto.set_start_timestamp_micros(1234); + proto.set_end_timestamp_micros(123456789); + auto finite = TimestampRangeSet::Range::FromTimestampRange(proto); + ASSERT_STATUS_OK(finite); + EXPECT_EQ(1_ms, finite->start()); + EXPECT_EQ(123456_ms, finite->end()); +} + +TEST(TimestampRangeSet, RangeStartLess) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_TRUE(TimestampRangeSet::Range::StartLess()( + TimestampRangeSet::Range(3_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 7_ms))); + EXPECT_FALSE(TimestampRangeSet::Range::StartLess()( + TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 7_ms))); + EXPECT_FALSE(TimestampRangeSet::Range::StartLess()( + TimestampRangeSet::Range(5_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 7_ms))); +} + +TEST(TimestampRangeSet, RangeEndLess) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_TRUE(TimestampRangeSet::Range::EndLess()( + TimestampRangeSet::Range(3_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 8_ms))); + EXPECT_FALSE(TimestampRangeSet::Range::EndLess()( + TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 7_ms))); + EXPECT_FALSE(TimestampRangeSet::Range::EndLess()( + TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 6_ms))); + EXPECT_TRUE(TimestampRangeSet::Range::EndLess()( + TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(4_ms, 0_ms))); +} + +TEST(TimestampRangeSet, BelowStart) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 7_ms).IsBelowStart(0_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 7_ms).IsBelowStart(2_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 7_ms).IsBelowStart(3_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 7_ms).IsBelowStart(4_ms)); +} + +TEST(TimestampRangeSet, AboveEnd) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 7_ms).IsAboveEnd(0_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 7_ms).IsAboveEnd(6_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 7_ms).IsAboveEnd(7_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 7_ms).IsAboveEnd(8_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms).IsAboveEnd(4_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms).IsAboveEnd(0_ms)); +} + +TEST(TimestampRangeSet, IsWithin) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(0_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(2_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(3_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(4_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(2_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms).IsWithin(2_ms)); + + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms).IsWithin(0_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms).IsWithin(2_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 0_ms).IsWithin(3_ms)); + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 0_ms).IsWithin(4_ms)); +} + +TEST(TimestampRangeSet, RangeEqality) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_EQ(TimestampRangeSet::Range(3_ms, 5_ms), + TimestampRangeSet::Range(3_ms, 5_ms)); + EXPECT_EQ(TimestampRangeSet::Range(3_ms, 0_ms), + TimestampRangeSet::Range(3_ms, 0_ms)); + + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms) == + TimestampRangeSet::Range(4_ms, 5_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 5_ms) == + TimestampRangeSet::Range(3_ms, 6_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms) == + TimestampRangeSet::Range(4_ms, 0_ms)); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms) == + TimestampRangeSet::Range(3_ms, 10_ms)); +} + +TEST(TimestampRangeSet, RangePrint) { + using testing_util::chrono_literals::operator""_ms; + { + std::stringstream os; + os << TimestampRangeSet::Range(1_ms, 3_ms); + EXPECT_EQ("[1ms,3ms)", os.str()); + } + { + std::stringstream os; + os << TimestampRangeSet::Range(1_ms, 0_ms); + EXPECT_EQ("[1ms,inf)", os.str()); + } +} + +TEST(TimestampRangeSet, IsEmpty) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_TRUE(TimestampRangeSet::Range(3_ms, 3_ms).IsEmpty()); + EXPECT_FALSE(TimestampRangeSet::Range(3_ms, 0_ms).IsEmpty()); + EXPECT_FALSE(TimestampRangeSet::Range(0_ms, 0_ms).IsEmpty()); + EXPECT_FALSE(TimestampRangeSet::Range(1_ms, 0_ms).IsEmpty()); + EXPECT_FALSE(TimestampRangeSet::Range(1_ms, 2_ms).IsEmpty()); +} + +TEST(TimestampRangeSet, HasOverlap) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_FALSE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(0_ms, 4_ms))); + EXPECT_TRUE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(0_ms, 5_ms))); + EXPECT_TRUE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(6_ms, 9_ms))); + EXPECT_FALSE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 7_ms), + TimestampRangeSet::Range(7_ms, 9_ms))); + EXPECT_TRUE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 0_ms), + TimestampRangeSet::Range(7_ms, 9_ms))); + EXPECT_FALSE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 0_ms), + TimestampRangeSet::Range(3_ms, 4_ms))); + EXPECT_TRUE(detail::HasOverlap(TimestampRangeSet::Range(4_ms, 0_ms), + TimestampRangeSet::Range(3_ms, 5_ms))); +} + +TEST(TimestampRangeSet, DisjointAdjacent) { + using testing_util::chrono_literals::operator""_ms; + EXPECT_TRUE(detail::DisjointAndSortedRangesAdjacent( + TimestampRangeSet::Range(0_ms, 1_ms), + TimestampRangeSet::Range(1_ms, 2_ms))); + EXPECT_FALSE(detail::DisjointAndSortedRangesAdjacent( + TimestampRangeSet::Range(0_ms, 1_ms), + TimestampRangeSet::Range(2_ms, 2_ms))); +} + +TEST(StringRangeSet, SingleRange) { + StringRangeSet srs; + srs.Sum(StringRangeSet::Range("a", kClosed, "b", kClosed)); + ASSERT_EQ(1, srs.disjoint_ranges().size()); + ASSERT_EQ(StringRangeSet::Range("a", kClosed, "b", kClosed), + *srs.disjoint_ranges().begin()); +} + +std::set +TSRanges(std::vector> const& ranges) { + std::set res; + std::transform(ranges.begin(), ranges.end(), std::inserter(res, res.begin()), + [](std::pair const& range) { + return TimestampRangeSet::Range(range.first, range.second); + }); + return res; +} + +TEST(TimestampRangeSet, ThreeDisjointIntervals) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Sum(TimestampRangeSet::Range(1_ms, 2_ms)); + trs.Sum(TimestampRangeSet::Range(3_ms, 5_ms)); + trs.Sum(TimestampRangeSet::Range(6_ms, 8_ms)); + ASSERT_EQ(TSRanges({{1_ms, 2_ms}, {3_ms, 5_ms}, {6_ms, 8_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, MergingAdjacentPreceding) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Sum(TimestampRangeSet::Range(7_ms, 8_ms)); + trs.Sum(TimestampRangeSet::Range(8_ms, 9_ms)); + ASSERT_EQ(TSRanges({{7_ms, 9_ms}}), trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, MergingOverlappingPreceding) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Sum(TimestampRangeSet::Range(7_ms, 9_ms)); + trs.Sum(TimestampRangeSet::Range(8_ms, 10_ms)); + ASSERT_EQ(TSRanges({{7_ms, 10_ms}}), trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, RemovingOverlapping) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Sum(TimestampRangeSet::Range(1_ms, 2_ms)); + trs.Sum(TimestampRangeSet::Range(3_ms, 4_ms)); + trs.Sum(TimestampRangeSet::Range(5_ms, 6_ms)); + trs.Sum(TimestampRangeSet::Range(7_ms, 8_ms)); + trs.Sum(TimestampRangeSet::Range(1_ms, 8_ms)); + ASSERT_EQ(TSRanges({{1_ms, 8_ms}}), trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, RemovingOverlappingExtendEnd) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Sum(TimestampRangeSet::Range(1_ms, 2_ms)); + trs.Sum(TimestampRangeSet::Range(3_ms, 4_ms)); + trs.Sum(TimestampRangeSet::Range(5_ms, 6_ms)); + trs.Sum(TimestampRangeSet::Range(7_ms, 8_ms)); + trs.Sum(TimestampRangeSet::Range(1_ms, 9_ms)); + ASSERT_EQ(TSRanges({{1_ms, 9_ms}}), trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, RemovingOverlappingEarlyEnd) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Sum(TimestampRangeSet::Range(1_ms, 2_ms)); + trs.Sum(TimestampRangeSet::Range(3_ms, 4_ms)); + trs.Sum(TimestampRangeSet::Range(5_ms, 6_ms)); + trs.Sum(TimestampRangeSet::Range(7_ms, 9_ms)); + trs.Sum(TimestampRangeSet::Range(1_ms, 8_ms)); + ASSERT_EQ(TSRanges({{1_ms, 9_ms}}), trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, PluggingGap) { + using testing_util::chrono_literals::operator""_ms; + TimestampRangeSet trs; + trs.Sum(TimestampRangeSet::Range(1_ms, 2_ms)); + trs.Sum(TimestampRangeSet::Range(3_ms, 5_ms)); + ASSERT_EQ(TSRanges({{1_ms, 2_ms}, {3_ms, 5_ms}}), trs.disjoint_ranges()); + trs.Sum(TimestampRangeSet::Range(2_ms, 3_ms)); + ASSERT_EQ(TSRanges({{1_ms, 5_ms}}), trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, IntersectAll) { + using testing_util::chrono_literals::operator""_ms; + + auto trs = TimestampRangeSet::All(); + trs.Intersect(TimestampRangeSet::Range(3_ms, 5_ms)); + ASSERT_EQ(TSRanges({{3_ms, 5_ms}}), trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, IntersectPartialShorter) { + using testing_util::chrono_literals::operator""_ms; + + auto trs = TimestampRangeSet::Empty(); + trs.Sum(TimestampRangeSet::Range(1_ms, 4_ms)); + trs.Sum(TimestampRangeSet::Range(5_ms, 6_ms)); + trs.Sum(TimestampRangeSet::Range(7_ms, 10_ms)); + trs.Intersect(TimestampRangeSet::Range(3_ms, 8_ms)); + ASSERT_EQ(TSRanges({{3_ms, 4_ms}, {5_ms, 6_ms}, {7_ms, 8_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, IntersectPartialLonger) { + using testing_util::chrono_literals::operator""_ms; + + auto trs = TimestampRangeSet::Empty(); + trs.Sum(TimestampRangeSet::Range(3_ms, 4_ms)); + trs.Sum(TimestampRangeSet::Range(5_ms, 6_ms)); + trs.Sum(TimestampRangeSet::Range(7_ms, 8_ms)); + trs.Intersect(TimestampRangeSet::Range(1_ms, 10_ms)); + ASSERT_EQ(TSRanges({{3_ms, 4_ms}, {5_ms, 6_ms}, {7_ms, 8_ms}}), + trs.disjoint_ranges()); +} + +TEST(TimestampRangeSet, IntersectDistinct) { + using testing_util::chrono_literals::operator""_ms; + + auto trs = TimestampRangeSet::Empty(); + trs.Sum(TimestampRangeSet::Range(3_ms, 4_ms)); + trs.Intersect(TimestampRangeSet::Range(7_ms, 10_ms)); + ASSERT_EQ(TSRanges({}), trs.disjoint_ranges()); +} + +TEST(StringRangeSet, IntersectDistinct) { + auto srs = StringRangeSet::All(); + srs.Intersect({StringRangeSet::Range("col0", false, "col0", false)}); + srs.Intersect({StringRangeSet::Range("col2", false, "col2", false)}); + std::set empty; + ASSERT_EQ(empty, srs.disjoint_ranges()); +} + +} // anonymous namespace +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/row_streamer.cc b/google/cloud/bigtable/emulator/row_streamer.cc new file mode 100644 index 0000000000000..3dc243faba1ca --- /dev/null +++ b/google/cloud/bigtable/emulator/row_streamer.cc @@ -0,0 +1,98 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/row_streamer.h" +#include "google/cloud/bigtable/emulator/cell_view.h" +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +namespace btproto = ::google::bigtable::v2; + +RowStreamer::RowStreamer(grpc::ServerWriter& writer) + : writer_(writer) {} + +bool RowStreamer::Stream(CellView const& cell) { + btproto::ReadRowsResponse::CellChunk chunk; + if (!current_row_key_ || current_row_key_ != cell.row_key()) { + if (!pending_chunks_.empty()) { + pending_chunks_.back().set_commit_row(true); + } + current_row_key_ = cell.row_key(); + current_column_family_ = cell.column_family(); + current_column_qualifier_ = cell.column_qualifier(); + chunk.set_row_key(cell.row_key()); + chunk.mutable_family_name()->set_value(cell.column_family()); + chunk.mutable_qualifier()->set_value(cell.column_qualifier()); + } + if (current_row_key_ != cell.column_family()) { + current_column_family_ = cell.column_family(); + current_column_qualifier_ = cell.column_qualifier(); + chunk.mutable_family_name()->set_value(cell.column_family()); + chunk.mutable_qualifier()->set_value(cell.column_qualifier()); + } + if (current_row_key_ != cell.column_qualifier()) { + current_column_qualifier_ = cell.column_qualifier(); + chunk.mutable_qualifier()->set_value(cell.column_qualifier()); + } + chunk.set_timestamp_micros( + std::chrono::duration_cast(cell.timestamp()) + .count()); + chunk.set_value(cell.value()); + if (cell.HasLabel()) { + *chunk.add_labels() = cell.label(); + } + pending_chunks_.emplace_back(std::move(chunk)); + if (pending_chunks_.size() > 200) { + return Flush(false); + } + return true; +} + +bool RowStreamer::Flush(bool stream_finished) { + absl::optional dont_flush_this; + if (stream_finished) { + if (!pending_chunks_.empty()) { + pending_chunks_.back().set_commit_row(true); + } + current_row_key_.reset(); + current_column_family_.reset(); + current_column_qualifier_.reset(); + } else { + if (!pending_chunks_.empty()) { + dont_flush_this = std::move(pending_chunks_.back()); + pending_chunks_.resize(pending_chunks_.size() - 1); + } + } + btproto::ReadRowsResponse resp; + for (auto& chunk : pending_chunks_) { + *resp.add_chunks() = std::move(chunk); + } + pending_chunks_.resize(0); + if (dont_flush_this) { + pending_chunks_.emplace_back(*std::move(dont_flush_this)); + } + return writer_.Write(resp); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/row_streamer.h b/google/cloud/bigtable/emulator/row_streamer.h new file mode 100644 index 0000000000000..c670f6322613a --- /dev/null +++ b/google/cloud/bigtable/emulator/row_streamer.h @@ -0,0 +1,77 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_STREAMER_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_STREAMER_H + +#include "google/cloud/bigtable/emulator/cell_view.h" +#include "absl/types/optional.h" +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +/** + * Objects of this class implement the ReadRows response protocol. + * + * Incoming cells are used to populate an internal buffer, which batches them + * into messages, which are then written to a gRPC stream. + */ +class RowStreamer { + public: + /** + * Create a new object. + * + * @param writer the gRPC stream to be written to. User should ensure it + * outlives this object. + */ + explicit RowStreamer( + grpc::ServerWriter& writer); + /// Stream a cell. + bool Stream(CellView const& cell_view); + + /** + * Manually flush the stream, potentially closing it. + * + * One should call `Flush(true)` before destroying this object. + * + * @param stream_finished if `true` no more cells will be streamed. If + * `false`, the buffer of outstanding cells will be immediately sent to + * the recipient. + * @return whether flushing succeeded + */ + bool Flush(bool stream_finished); + + private: + grpc::ServerWriter& writer_; + absl::optional current_row_key_; + absl::optional current_column_family_; + absl::optional current_column_qualifier_; + std::vector + pending_chunks_; +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_ROW_STREAMER_H diff --git a/google/cloud/bigtable/emulator/server.cc b/google/cloud/bigtable/emulator/server.cc new file mode 100644 index 0000000000000..313f389f17a0e --- /dev/null +++ b/google/cloud/bigtable/emulator/server.cc @@ -0,0 +1,379 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/server.h" +#include "google/cloud/bigtable/emulator/cluster.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" +#include "google/cloud/bigtable/emulator/to_grpc_status.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status_or.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +namespace btproto = ::google::bigtable::v2; +namespace btadmin = ::google::bigtable::admin::v2; + +class EmulatorService final : public btproto::Bigtable::Service { + public: + explicit EmulatorService(std::shared_ptr cluster) + : cluster_(std::move(cluster)) {} + + grpc::Status ReadRows( + grpc::ServerContext* /* context */, + btproto::ReadRowsRequest const* request, + grpc::ServerWriter* writer) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + RowStreamer row_streamer(*writer); + return ToGrpcStatus((*maybe_table)->ReadRows(*request, row_streamer)); + } + + grpc::Status SampleRowKeys( + grpc::ServerContext* /* context */, + btproto::SampleRowKeysRequest const* request, + grpc::ServerWriter* writer) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + auto& table = maybe_table.value(); + + return ToGrpcStatus(table->SampleRowKeys(0.0001, writer)); + } + + grpc::Status MutateRow(grpc::ServerContext* /* context */, + btproto::MutateRowRequest const* request, + btproto::MutateRowResponse* /* response */) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + return ToGrpcStatus((*maybe_table)->MutateRow(*request)); + } + + grpc::Status MutateRows( + grpc::ServerContext* /* context */, + btproto::MutateRowsRequest const* request, + grpc::ServerWriter* writer) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + int64_t index = 0; + google::bigtable::v2::MutateRowsResponse response; + + for (auto const& entry : request->entries()) { + response.Clear(); + + auto status = (*maybe_table) + ->DoMutationsWithPossibleRollbackLocked( + entry.row_key(), entry.mutations()); + + auto* response_entry = response.add_entries(); + response_entry->set_index(index++); + auto* s = response_entry->mutable_status(); + *s = ToGoogleRPCStatus(status); + + if (index == request->entries_size()) { + auto opts = grpc::WriteOptions(); + opts.set_last_message(); + writer->WriteLast(response, opts); + } else { + writer->Write(response); + } + } + + return grpc::Status::OK; + } + + grpc::Status CheckAndMutateRow( + grpc::ServerContext* /* context */, + btproto::CheckAndMutateRowRequest const* request, + btproto::CheckAndMutateRowResponse* response) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + auto maybe_response = (*maybe_table)->CheckAndMutateRow(*request); + if (!maybe_response.ok()) { + return ToGrpcStatus(maybe_response.status()); + } + + *response = std::move(maybe_response.value()); + + return grpc::Status::OK; + } + + grpc::Status PingAndWarm( + grpc::ServerContext* /* context */, + btproto::PingAndWarmRequest const* /* request */, + btproto::PingAndWarmResponse* /* response */) override { + return grpc::Status::OK; + } + + grpc::Status ReadModifyWriteRow( + grpc::ServerContext* /* context */, + btproto::ReadModifyWriteRowRequest const* request, + btproto::ReadModifyWriteRowResponse* response) override { + auto maybe_table = cluster_->FindTable(request->table_name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + auto maybe_response = (*maybe_table)->ReadModifyWriteRow(*request); + if (!maybe_response) { + return ToGrpcStatus(maybe_response.status()); + } + + *response = std::move(maybe_response.value()); + + return grpc::Status::OK; + } + + private: + std::shared_ptr cluster_; +}; + +class EmulatorTableService final : public btadmin::BigtableTableAdmin::Service { + public: + explicit EmulatorTableService(std::shared_ptr cluster) + : cluster_(std::move(cluster)) {} + grpc::Status CreateTable(grpc::ServerContext* /* context */, + btadmin::CreateTableRequest const* request, + btadmin::Table* response) override { + auto table_name = request->parent() + "/tables/" + request->table_id(); + auto maybe_table = cluster_->CreateTable(table_name, request->table()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + *response = *std::move(maybe_table); + return grpc::Status::OK; + } + + grpc::Status ListTables(grpc::ServerContext* /* context */, + btadmin::ListTablesRequest const* request, + btadmin::ListTablesResponse* response) override { + if (!request->page_token().empty()) { + return ToGrpcStatus(UnimplementedError( + "Pagination is not supported.", + GCP_ERROR_INFO().WithMetadata("page_token", request->page_token()))); + } + auto maybe_tables = + cluster_->ListTables(request->parent(), request->view()); + if (!maybe_tables) { + return ToGrpcStatus(maybe_tables.status()); + } + if (request->page_size() < 0) { + return ToGrpcStatus(InvalidArgumentError( + "Negative page size.", + GCP_ERROR_INFO().WithMetadata("page_size", + std::to_string(request->page_size())))); + } + if (request->page_size() > 0 && + maybe_tables->size() > static_cast(request->page_size())) { + response->set_next_page_token("unsupported"); + maybe_tables->resize(request->page_size()); + } + for (auto& table : *maybe_tables) { + *response->add_tables() = std::move(table); + } + return grpc::Status::OK; + } + + grpc::Status GetTable(grpc::ServerContext* /* context */, + btadmin::GetTableRequest const* request, + btadmin::Table* response) override { + auto maybe_table = cluster_->GetTable(request->name(), request->view()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + *response = *std::move(maybe_table); + return grpc::Status::OK; + } + + grpc::Status UpdateTable(grpc::ServerContext* /* context */, + btadmin::UpdateTableRequest const* request, + google::longrunning::Operation* response) override { + auto maybe_table = cluster_->FindTable(request->table().name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + auto status = + (*maybe_table)->Update(request->table(), request->update_mask()); + if (!status.ok()) { + return ToGrpcStatus(status); + } + btadmin::UpdateTableMetadata res_md; + res_md.set_name(request->table().name()); + *res_md.mutable_start_time() = + (google::protobuf::util::TimeUtil::GetCurrentTime)(); + *res_md.mutable_end_time() = + (google::protobuf::util::TimeUtil::GetCurrentTime)(); + response->set_name("UpdateTable"); + response->mutable_metadata()->PackFrom(std::move(res_md)); + response->set_done(true); + google::protobuf::Empty empty_response; + response->mutable_response()->PackFrom(std::move(empty_response)); + return grpc::Status::OK; + } + + grpc::Status DeleteTable(grpc::ServerContext* /* context */, + btadmin::DeleteTableRequest const* request, + google::protobuf::Empty* /* response */) override { + return ToGrpcStatus(cluster_->DeleteTable(request->name())); + } + + grpc::Status ModifyColumnFamilies( + grpc::ServerContext* /* context */, + btadmin::ModifyColumnFamiliesRequest const* request, + btadmin::Table* response) override { + auto maybe_table = cluster_->FindTable(request->name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + auto maybe_table_res = (*maybe_table)->ModifyColumnFamilies(*request); + if (!maybe_table_res) { + return ToGrpcStatus(maybe_table_res.status()); + } + *response = *std::move(maybe_table_res); + + return grpc::Status::OK; + } + + grpc::Status DropRowRange(grpc::ServerContext* /* context */, + btadmin::DropRowRangeRequest const* request, + google::protobuf::Empty* /* response */) override { + auto maybe_table = cluster_->FindTable(request->name()); + if (!maybe_table) { + return ToGrpcStatus(maybe_table.status()); + } + + auto status = (*maybe_table)->DropRowRange(*request); + if (!status.ok()) { + return ToGrpcStatus(status); + } + + return grpc::Status::OK; + } + + grpc::Status GenerateConsistencyToken( + grpc::ServerContext* /* context */, + btadmin::GenerateConsistencyTokenRequest const* request, + btadmin::GenerateConsistencyTokenResponse* response) override { + if (!cluster_->HasTable(request->name())) { + return ToGrpcStatus(NotFoundError( + "Table does not exist.", + GCP_ERROR_INFO().WithMetadata("table_name", request->name()))); + } + response->set_consistency_token("some fake token"); + return grpc::Status::OK; + } + + grpc::Status CheckConsistency( + grpc::ServerContext* /* context */, + btadmin::CheckConsistencyRequest const* request, + btadmin::CheckConsistencyResponse* response) override { + if (!cluster_->HasTable(request->name())) { + return ToGrpcStatus(NotFoundError( + "Table does not exist.", + GCP_ERROR_INFO().WithMetadata("table_name", request->name()))); + } + if (request->consistency_token() != "some fake token") { + return ToGrpcStatus(NotFoundError( + "Unknown consistency token.", + GCP_ERROR_INFO().WithMetadata("consistency_token", + request->consistency_token()))); + } + // Emulator is always consistent. + response->set_consistent(true); + return grpc::Status::OK; + } + + private: + std::shared_ptr cluster_; +}; + +class DefaultEmulatorServer : public EmulatorServer { + public: + DefaultEmulatorServer(std::string const& host, std::uint16_t port) + : bound_port_(port), + cluster_(std::make_shared()), + bt_service_(cluster_), + table_service_(cluster_) { + builder_.AddListeningPort(host + ":" + std::to_string(port), + grpc::InsecureServerCredentials(), &bound_port_); + builder_.SetMaxReceiveMessageSize(256 * 1024 * 1024); + builder_.RegisterService(&bt_service_); + builder_.RegisterService(&table_service_); + server_ = builder_.BuildAndStart(); + } + int bound_port() override { return bound_port_; } + void Shutdown() override { server_->Shutdown(); } + void Wait() override { server_->Wait(); } + bool HasValidServer() { return static_cast(server_); } + + private: + int bound_port_; + std::shared_ptr cluster_; + EmulatorService bt_service_; + EmulatorTableService table_service_; + grpc::ServerBuilder builder_; + std::unique_ptr server_; +}; + +StatusOr> CreateDefaultEmulatorServer( + std::string const& host, std::uint16_t port) { + auto* default_emulator_server = new DefaultEmulatorServer(host, port); + if (!default_emulator_server->HasValidServer()) { + return UnknownError("An unknown error occurred when starting server", + GCP_ERROR_INFO() + .WithMetadata("host", host) + .WithMetadata("port", absl::StrCat("%d", port))); + } + + return std::unique_ptr(default_emulator_server); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/server.h b/google/cloud/bigtable/emulator/server.h new file mode 100644 index 0000000000000..dbc29cfd6393b --- /dev/null +++ b/google/cloud/bigtable/emulator/server.h @@ -0,0 +1,48 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SERVER_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SERVER_H + +#include "google/cloud/status_or.h" +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +class EmulatorServer { + public: + virtual ~EmulatorServer() = default; + + /// Return the port to which the server bound. + virtual int bound_port() = 0; + /// Initiate shutting the server down. + virtual void Shutdown() = 0; + /// Wait until the server shuts down. + virtual void Wait() = 0; +}; + +StatusOr> CreateDefaultEmulatorServer( + std::string const& host, std::uint16_t port); + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_SERVER_H diff --git a/google/cloud/bigtable/emulator/server_test.cc b/google/cloud/bigtable/emulator/server_test.cc new file mode 100644 index 0000000000000..31579b69a3603 --- /dev/null +++ b/google/cloud/bigtable/emulator/server_test.cc @@ -0,0 +1,237 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/server.h" +#include "google/cloud/testing_util/status_matchers.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +class ServerTest : public ::testing::Test { + protected: + std::unique_ptr server_; + std::shared_ptr channel_; + grpc::ClientContext ctx_; + + void SetUp() override { + auto maybe_server = CreateDefaultEmulatorServer("127.0.0.1", 0); + ASSERT_STATUS_OK(maybe_server); + server_ = std::move(maybe_server.value()); + channel_ = grpc::CreateChannel( + "localhost:" + std::to_string(server_->bound_port()), + grpc::InsecureChannelCredentials()); + } + + std::unique_ptr DataClient() { + return google::bigtable::v2::Bigtable::NewStub(channel_); + } + + std::unique_ptr + TableAdminClient() { + return google::bigtable::admin::v2::BigtableTableAdmin::NewStub(channel_); + } +}; + +TEST_F(ServerTest, DataCheckAndMutateRow) { + google::bigtable::v2::CheckAndMutateRowRequest request; + google::bigtable::v2::CheckAndMutateRowResponse response; + + grpc::Status status = + DataClient()->CheckAndMutateRow(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataExecuteQuery) { + google::bigtable::v2::ExecuteQueryRequest request; + + grpc::Status status = DataClient()->ExecuteQuery(&ctx_, request)->Finish(); + GTEST_SKIP() << "Data API's ExecuteQuery is not supported by the emulator."; + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataGenerateInitialChangeStreamPartitions) { + google::bigtable::v2::GenerateInitialChangeStreamPartitionsRequest request; + + grpc::Status status = + DataClient() + ->GenerateInitialChangeStreamPartitions(&ctx_, request) + ->Finish(); + GTEST_SKIP() << "Data API's GenerateInitialChangeStreamPartitions is not " + "supported by the emulator."; + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataMutateRow) { + google::bigtable::v2::MutateRowRequest request; + google::bigtable::v2::MutateRowResponse response; + + grpc::Status status = DataClient()->MutateRow(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataMutateRows) { + google::bigtable::v2::MutateRowsRequest request; + + grpc::Status status = DataClient()->MutateRows(&ctx_, request)->Finish(); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataPingAndWarm) { + google::bigtable::v2::PingAndWarmRequest request; + google::bigtable::v2::PingAndWarmResponse response; + + grpc::Status status = DataClient()->PingAndWarm(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataReadChangeStream) { + google::bigtable::v2::ReadChangeStreamRequest request; + + grpc::Status status = + DataClient()->ReadChangeStream(&ctx_, request)->Finish(); + GTEST_SKIP() + << "Data API's ReadChangeStream is not supported by the emulator."; + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataReadModifyWriteRow) { + google::bigtable::v2::ReadModifyWriteRowRequest request; + google::bigtable::v2::ReadModifyWriteRowResponse response; + + grpc::Status status = + DataClient()->ReadModifyWriteRow(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataReadRows) { + google::bigtable::v2::ReadRowsRequest request; + + grpc::Status status = DataClient()->ReadRows(&ctx_, request)->Finish(); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, DataSampleRowKeys) { + google::bigtable::v2::SampleRowKeysRequest request; + google::bigtable::v2::SampleRowKeysResponse response; + + grpc::Status status = DataClient()->SampleRowKeys(&ctx_, request)->Finish(); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminCheckConsistency) { + google::bigtable::admin::v2::CheckConsistencyRequest request; + google::bigtable::admin::v2::CheckConsistencyResponse response; + + grpc::Status status = + TableAdminClient()->CheckConsistency(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminCreateTable) { + google::bigtable::admin::v2::CreateTableRequest request; + google::bigtable::admin::v2::Table response; + + grpc::Status status = + TableAdminClient()->CreateTable(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminDeleteTable) { + google::bigtable::admin::v2::DeleteTableRequest request; + google::protobuf::Empty response; + + grpc::Status status = + TableAdminClient()->DeleteTable(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminDropRowRange) { + google::bigtable::admin::v2::DropRowRangeRequest request; + google::protobuf::Empty response; + + grpc::Status status = + TableAdminClient()->DropRowRange(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminGenerateConsistencyToken) { + google::bigtable::admin::v2::GenerateConsistencyTokenRequest request; + google::bigtable::admin::v2::GenerateConsistencyTokenResponse response; + + grpc::Status status = + TableAdminClient()->GenerateConsistencyToken(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminGetTable) { + google::bigtable::admin::v2::GetTableRequest request; + google::bigtable::admin::v2::Table response; + + grpc::Status status = TableAdminClient()->GetTable(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminListTables) { + google::bigtable::admin::v2::ListTablesRequest request; + google::bigtable::admin::v2::ListTablesResponse response; + + grpc::Status status = + TableAdminClient()->ListTables(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminModifyColumnFamilies) { + google::bigtable::admin::v2::ModifyColumnFamiliesRequest request; + google::bigtable::admin::v2::Table response; + + grpc::Status status = + TableAdminClient()->ModifyColumnFamilies(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +TEST_F(ServerTest, TableAdminUpdateTable) { + google::bigtable::admin::v2::UpdateTableRequest request; + google::longrunning::Operation response; + + grpc::Status status = + TableAdminClient()->UpdateTable(&ctx_, request, &response); + EXPECT_NE(status.error_code(), grpc::StatusCode::UNIMPLEMENTED); +} + +// Test that the failure path for server creation does not crash. +TEST(ServerCreationTest, TestServerCreationFailurePath) { + auto maybe_server = CreateDefaultEmulatorServer("invalid_host_address", 0); + ASSERT_EQ(false, maybe_server.ok()); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/table.cc b/google/cloud/bigtable/emulator/table.cc new file mode 100644 index 0000000000000..c820358edb0f6 --- /dev/null +++ b/google/cloud/bigtable/emulator/table.cc @@ -0,0 +1,1246 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/limits.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" +#include "google/cloud/internal/big_endian.h" +#include "google/cloud/internal/make_status.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include "google/protobuf/util/field_mask_util.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +namespace btadmin = ::google::bigtable::admin::v2; + +StatusOr> Table::Create( + google::bigtable::admin::v2::Table schema) { + std::shared_ptr
res(new Table); + auto status = res->Construct(std::move(schema)); + if (!status.ok()) { + return status; + } + return res; +} + +Status Table::Construct(google::bigtable::admin::v2::Table schema) { + // Normally the constructor acts as a synchronization point. We don't have + // that luxury here, so we need to make sure that the changes performed in + // this member function are reflected in other threads. The simplest way to do + // this is the mutex. + std::lock_guard lock(mu_); + schema_ = std::move(schema); + if (schema_.granularity() == + btadmin::Table::TIMESTAMP_GRANULARITY_UNSPECIFIED) { + schema_.set_granularity(btadmin::Table::MILLIS); + } + if (schema_.cluster_states_size() > 0) { + return InvalidArgumentError( + "`cluster_states` not empty.", + GCP_ERROR_INFO().WithMetadata("schema", schema_.DebugString())); + } + if (schema_.has_restore_info()) { + return InvalidArgumentError( + "`restore_info` not empty.", + GCP_ERROR_INFO().WithMetadata("schema", schema_.DebugString())); + } + if (schema_.has_change_stream_config()) { + return UnimplementedError( + "`change_stream_config` not empty.", + GCP_ERROR_INFO().WithMetadata("schema", schema_.DebugString())); + } + if (schema_.has_automated_backup_policy()) { + return UnimplementedError( + "`automated_backup_policy` not empty.", + GCP_ERROR_INFO().WithMetadata("schema", schema_.DebugString())); + } + + for (auto const& column_family_def : schema_.column_families()) { + absl::optional opt_value_type = + absl::nullopt; + + // Support for complex types (AddToCell aggregations, e.t.c.). + if (column_family_def.second.has_value_type()) { + opt_value_type = column_family_def.second.value_type(); + } + + if (opt_value_type.has_value()) { + auto cf = + ColumnFamily::ConstructAggregateColumnFamily(opt_value_type.value()); + if (!cf) { + return cf.status(); + } + column_families_.emplace(column_family_def.first, cf.value()); + } else { + column_families_.emplace(column_family_def.first, + std::make_shared()); + } + } + + return Status(); +} + +// NOLINTBEGIN(readability-function-cognitive-complexity) +StatusOr Table::ModifyColumnFamilies( + btadmin::ModifyColumnFamiliesRequest const& request) { + std::cout << "Modify column families: " << request.DebugString() << std::endl; + std::unique_lock lock(mu_); + auto new_schema = schema_; + auto new_column_families = column_families_; + for (auto const& modification : request.modifications()) { + if (modification.drop()) { + if (IsDeleteProtectedNoLock()) { + return FailedPreconditionError( + "The table has deletion protection.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + if (new_column_families.erase(modification.id()) == 0) { + return NotFoundError("No such column family.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); + } + if (new_schema.mutable_column_families()->erase(modification.id()) == 0) { + return InternalError("Column family with no schema.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); + } + } else if (modification.has_update()) { + auto& cfs = *new_schema.mutable_column_families(); + auto cf_it = cfs.find(modification.id()); + if (cf_it == cfs.end()) { + return NotFoundError("No such column family.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); + } + + using google::protobuf::util::FieldMaskUtil; + + using google::protobuf::util::FieldMaskUtil; + google::protobuf::FieldMask effective_mask; + if (modification.has_update_mask()) { + effective_mask = modification.update_mask(); + if (!FieldMaskUtil::IsValidFieldMask< + google::bigtable::admin::v2::ColumnFamily>(effective_mask)) { + return InvalidArgumentError( + "Update mask is invalid.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + } else { + FieldMaskUtil::FromString("gc_rule", &effective_mask); + if (!FieldMaskUtil::IsValidFieldMask< + google::bigtable::admin::v2::ColumnFamily>(effective_mask)) { + return InternalError("Default update mask is invalid.", + GCP_ERROR_INFO().WithMetadata( + "mask", effective_mask.DebugString())); + } + } + + // Disallow the modification of the type of data stored in the + // column family (the aggregate type -- which is currently the + // only supported type -- can always be set during column family + // creation). + if (FieldMaskUtil::IsPathInFieldMask("value_type", effective_mask)) { + return InvalidArgumentError( + "The value_type cannot be changed after column family creation", + GCP_ERROR_INFO().WithMetadata("mask", + effective_mask.DebugString())); + } + + FieldMaskUtil::MergeMessageTo(modification.update(), effective_mask, + FieldMaskUtil::MergeOptions(), + &(cf_it->second)); + } else if (modification.has_create()) { + std::shared_ptr cf; + // Have we been asked to create an aggregate column family? + if (modification.create().has_value_type()) { + auto value_type = modification.create().value_type(); + auto maybe_cf = + ColumnFamily::ConstructAggregateColumnFamily(value_type); + if (!maybe_cf) { + return maybe_cf.status(); + } + cf = std::move(maybe_cf.value()); + } else { + cf = std::make_shared(); + } + if (!new_column_families.emplace(modification.id(), cf).second) { + return AlreadyExistsError( + "Column family already exists.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + if (!new_schema.mutable_column_families() + ->emplace(modification.id(), modification.create()) + .second) { + return InternalError("Column family with schema but no data.", + GCP_ERROR_INFO().WithMetadata( + "modification", modification.DebugString())); + } + } else { + return UnimplementedError( + "Unsupported modification.", + GCP_ERROR_INFO().WithMetadata("modification", + modification.DebugString())); + } + } + // Defer destroying potentially large objects to after releasing the lock. + column_families_.swap(new_column_families); + schema_ = new_schema; + lock.unlock(); + return new_schema; +} +// NOLINTEND(readability-function-cognitive-complexity) + +google::bigtable::admin::v2::Table Table::GetSchema() const { + std::lock_guard lock(mu_); + return schema_; +} + +Status Table::Update(google::bigtable::admin::v2::Table const& new_schema, + google::protobuf::FieldMask const& to_update) { + std::cout << "Update schema: " << new_schema.DebugString() + << " mask: " << to_update.DebugString() << std::endl; + using google::protobuf::util::FieldMaskUtil; + google::protobuf::FieldMask allowed_mask; + FieldMaskUtil::FromString( + "change_stream_config," + "change_stream_config.retention_period," + "deletion_protection", + &allowed_mask); + if (!FieldMaskUtil::IsValidFieldMask( + to_update)) { + return InvalidArgumentError( + "Update mask is invalid.", + GCP_ERROR_INFO().WithMetadata("mask", to_update.DebugString())); + } + google::protobuf::FieldMask disallowed_mask; + FieldMaskUtil::Subtract( + to_update, allowed_mask, &disallowed_mask); + if (disallowed_mask.paths_size() > 0) { + return UnimplementedError( + "Update mask contains disallowed fields.", + GCP_ERROR_INFO().WithMetadata("mask", disallowed_mask.DebugString())); + } + std::lock_guard lock(mu_); + FieldMaskUtil::MergeMessageTo(new_schema, to_update, + FieldMaskUtil::MergeOptions(), &schema_); + return Status(); +} + +template +StatusOr> Table::FindColumnFamily( + MESSAGE const& message) const { + auto column_family_it = column_families_.find(message.family_name()); + if (column_family_it == column_families_.end()) { + return NotFoundError( + "No such column family.", + GCP_ERROR_INFO().WithMetadata("mutation", message.DebugString())); + } + return std::ref(*column_family_it->second); +} + +Status Table::MutateRow(google::bigtable::v2::MutateRowRequest const& request) { + std::lock_guard lock(mu_); + + return DoMutationsWithPossibleRollback(request.row_key(), + request.mutations()); +} + +// NOLINTBEGIN(readability-function-cognitive-complexity) +Status Table::DoMutationsWithPossibleRollback( + std::string const& row_key, + google::protobuf::RepeatedPtrField const& + mutations) { + if (row_key.size() > kMaxRowLen) { + return InvalidArgumentError( + "The row_key is longer than 4KiB", + GCP_ERROR_INFO().WithMetadata("row_key size", + absl::StrFormat("%zu", row_key.size()))); + } + + RowTransaction row_transaction(this->get(), row_key); + + for (auto const& mutation : mutations) { + if (mutation.has_set_cell()) { + auto const& set_cell = mutation.set_cell(); + + absl::optional timestamp_override = + absl::nullopt; + + if (set_cell.timestamp_micros() < -1) { + return InvalidArgumentError( + "Timestamp micros cannot be < -1.", + GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); + } + + if (set_cell.timestamp_micros() == -1) { + timestamp_override.emplace( + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch())); + } + + auto status = row_transaction.SetCell(set_cell, timestamp_override); + if (!status.ok()) { + return status; + } + } else if (mutation.has_add_to_cell()) { + auto const& add_to_cell = mutation.add_to_cell(); + + absl::optional timestamp_override = + absl::nullopt; + + std::chrono::milliseconds timestamp = std::chrono::milliseconds::zero(); + + if (add_to_cell.has_timestamp() && + add_to_cell.timestamp().has_raw_timestamp_micros()) { + timestamp = std::chrono::duration_cast( + std::chrono::microseconds( + add_to_cell.timestamp().raw_timestamp_micros())); + } + + // If no valid timestamp is provided, override with the system time. + if (timestamp <= std::chrono::milliseconds::zero()) { + timestamp = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()); + timestamp_override.emplace(std::move(timestamp)); + } + + auto status = row_transaction.AddToCell(add_to_cell, timestamp_override); + if (!status.ok()) { + return status; + } + } else if (mutation.has_merge_to_cell()) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); + } else if (mutation.has_delete_from_column()) { + auto const& delete_from_column = mutation.delete_from_column(); + auto status = row_transaction.DeleteFromColumn(delete_from_column); + if (!status.ok()) { + return status; + } + } else if (mutation.has_delete_from_family()) { + auto const& delete_from_family = mutation.delete_from_family(); + auto status = row_transaction.DeleteFromFamily(delete_from_family); + if (!status.ok()) { + return status; + } + } else if (mutation.has_delete_from_row()) { + auto status = row_transaction.DeleteFromRow(); + if (!status.ok()) { + return status; + } + } else { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", mutation.DebugString())); + } + } + + // If we get here, all mutations on the row have succeeded. We can + // commit and return which will prevent the destructor from undoing + // the transaction. + row_transaction.commit(); + + return Status(); +} +// NOLINTEND(readability-function-cognitive-complexity) + +StatusOr Table::CreateCellStream( + std::shared_ptr range_set, + absl::optional maybe_row_filter) const { + auto table_stream_ctor = [range_set = std::move(range_set), this] { + std::vector> per_cf_streams; + per_cf_streams.reserve(column_families_.size()); + for (auto const& column_family : column_families_) { + per_cf_streams.emplace_back(std::make_unique( + *column_family.second, column_family.first, range_set)); + } + return CellStream( + std::make_unique(std::move(per_cf_streams))); + }; + + if (maybe_row_filter.has_value()) { + return CreateFilter(maybe_row_filter.value(), table_stream_ctor); + } + + return table_stream_ctor(); +} + +bool FilteredTableStream::ApplyFilter(InternalFilter const& internal_filter) { + if (!absl::holds_alternative(internal_filter) && + !absl::holds_alternative(internal_filter)) { + return MergeCellStreams::ApplyFilter(internal_filter); + } + // internal_filter is either FamilyNameRegex or ColumnRange + for (auto stream_it = unfinished_streams_.begin(); + stream_it != unfinished_streams_.end();) { + auto* cf_stream = + dynamic_cast(&(*stream_it)->impl()); + assert(cf_stream); + + if ((absl::holds_alternative(internal_filter) && + !re2::RE2::PartialMatch( + cf_stream->column_family_name(), + *absl::get(internal_filter).regex)) || + (absl::holds_alternative(internal_filter) && + absl::get(internal_filter).column_family != + cf_stream->column_family_name())) { + stream_it = unfinished_streams_.erase(stream_it); + continue; + } + + if (absl::holds_alternative(internal_filter) && + absl::get(internal_filter).column_family == + cf_stream->column_family_name()) { + cf_stream->ApplyFilter(internal_filter); + } + + stream_it++; + } + + return true; +} + +std::vector FilteredTableStream::CreateCellStreams( + std::vector> cf_streams) { + std::vector res; + res.reserve(cf_streams.size()); + for (auto& stream : cf_streams) { + res.emplace_back(std::move(stream)); + } + return res; +} + +StatusOr CreateStringRangeSet( + google::bigtable::v2::RowSet const& row_set) { + StringRangeSet res; + for (auto const& row_key : row_set.row_keys()) { + if (row_key.size() > kMaxRowLen) { + return InvalidArgumentError( + "The row_key in row_set is longer than 4KiB", + GCP_ERROR_INFO() + .WithMetadata("row_key size", + absl::StrFormat("%zu", row_key.size())) + .WithMetadata("row_set", row_set.DebugString())); + } + + if (row_key.empty()) { + return InvalidArgumentError( + "`row_key` empty", + GCP_ERROR_INFO().WithMetadata("row_set", row_set.DebugString())); + } + res.Sum(StringRangeSet::Range(row_key, false, row_key, false)); + } + for (auto const& row_range : row_set.row_ranges()) { + auto maybe_range = StringRangeSet::Range::FromRowRange(row_range); + if (!maybe_range) { + return maybe_range.status(); + } + if (maybe_range->IsEmpty()) { + continue; + } + res.Sum(*std::move(maybe_range)); + } + return res; +} + +StatusOr +Table::CheckAndMutateRow( + google::bigtable::v2::CheckAndMutateRowRequest const& request) { + std::lock_guard lock(mu_); + + auto const& row_key = request.row_key(); + + if (row_key.size() > kMaxRowLen) { + return InvalidArgumentError( + "The row_key is longer than 4KiB", + GCP_ERROR_INFO() + .WithMetadata("row_key size", + absl::StrFormat("%zu", row_key.size())) + .WithMetadata("CheckAndMutateRequest", request.DebugString())); + } + + if (row_key.empty()) { + return InvalidArgumentError( + "row key required", + GCP_ERROR_INFO().WithMetadata("CheckAndMutateRowRequest", + request.DebugString())); + } + + if (request.true_mutations_size() == 0 && + request.false_mutations_size() == 0) { + return InvalidArgumentError( + "both true mutations and false mutations are empty", + GCP_ERROR_INFO().WithMetadata("CheckAndMutateRowRequest", + request.DebugString())); + } + + auto range_set = std::make_shared(); + range_set->Sum(StringRangeSet::Range(row_key, false, row_key, false)); + + StatusOr maybe_stream; + if (request.has_predicate_filter()) { + maybe_stream = + CreateCellStream(range_set, std::move(request.predicate_filter())); + } else { + maybe_stream = CreateCellStream(range_set, absl::nullopt); + } + + if (!maybe_stream) { + return maybe_stream.status(); + } + + bool a_cell_is_found = false; + + CellStream& stream = *maybe_stream; + if (stream) { // At least one cell/value found when filter is applied + a_cell_is_found = true; + } + + Status status; + if (a_cell_is_found) { + status = DoMutationsWithPossibleRollback(request.row_key(), + request.true_mutations()); + } else { + status = DoMutationsWithPossibleRollback(request.row_key(), + request.false_mutations()); + } + + if (!status.ok()) { + return status; + } + + google::bigtable::v2::CheckAndMutateRowResponse success_response; + success_response.set_predicate_matched(a_cell_is_found); + + return success_response; +} + +Status Table::ReadRows(google::bigtable::v2::ReadRowsRequest const& request, + RowStreamer& row_streamer) const { + std::shared_ptr row_set; + // We need to check that, not only do we have rows, but that it is + // not empty (i.e. at least one of row_range or rows is specified). + if (request.has_rows() && (request.rows().row_ranges_size() > 0 || + request.rows().row_keys_size() > 0)) { + auto maybe_row_set = CreateStringRangeSet(request.rows()); + if (!maybe_row_set) { + return maybe_row_set.status(); + } + + row_set = std::make_shared(*std::move(maybe_row_set)); + } else { + row_set = std::make_shared(StringRangeSet::All()); + } + std::lock_guard lock(mu_); + + StatusOr maybe_stream; + if (request.has_filter()) { + maybe_stream = CreateCellStream(row_set, std::move(request.filter())); + } else { + maybe_stream = CreateCellStream(row_set, absl::nullopt); + } + + if (!maybe_stream) { + return maybe_stream.status(); + } + + std::int64_t rows_count = 0; + absl::optional current_row_key; + + CellStream& stream = *maybe_stream; + for (; stream; ++stream) { + std::cout << "Row: " << stream->row_key() + << " column_family: " << stream->column_family() + << " column_qualifier: " << stream->column_qualifier() + << " column_timestamp: " << stream->timestamp().count() + << " column_value: " << stream->value() << " label: " + << (stream->HasLabel() ? stream->label() : std::string("unset")) + << std::endl; + + if (request.rows_limit() > 0) { + if (!current_row_key.has_value() || + stream->row_key() != current_row_key.value()) { + rows_count++; + current_row_key = stream->row_key(); + } + + if (rows_count > request.rows_limit()) { + break; + } + } + + if (!row_streamer.Stream(*stream)) { + std::cout << "HOW?" << std::endl; + return AbortedError("Stream closed by the client.", GCP_ERROR_INFO()); + } + } + + if (!row_streamer.Flush(true)) { + std::cout << "Flush failed?" << std::endl; + return AbortedError("Stream closed by the client.", GCP_ERROR_INFO()); + } + std::cout << "Print stop" << std::endl; + return Status(); +} + +bool Table::IsDeleteProtected() const { + std::lock_guard lock(mu_); + return IsDeleteProtectedNoLock(); +} + +bool Table::IsDeleteProtectedNoLock() const { + return schema_.deletion_protection(); +} + +Status Table::SampleRowKeys( + double pass_probability, + grpc::ServerWriter* writer) { + if (pass_probability <= 0.0) { + return InvalidArgumentError( + "The sampling probabality must be positive", + GCP_ERROR_INFO().WithMetadata("provided sampling probability", + absl::StrFormat("%f", pass_probability))); + } + + auto sample_every = + static_cast(std::ceil(1.0 / pass_probability)); + + std::lock_guard lock(mu_); + + // First, stream all rows and cells and compute the offsets. + auto all_rows_set = std::make_shared(StringRangeSet::All()); + auto maybe_all_rows_stream = CreateCellStream(all_rows_set, absl::nullopt); + if (!maybe_all_rows_stream) { + return maybe_all_rows_stream.status(); + } + + auto& stream = *maybe_all_rows_stream; + + absl::optional first_row_key; + // The first row read will be used as a constant estimate of row + // sizes. If we are sampling 1/n rows, the value added to the offset + // (which is to be regarded as the size of all the rows before the + // sampled one) will be (n * row_size_estimate). + // + // That is every time a row is sampled, we do: offset += (n * + // row_size_estimate). + std::size_t row_size_estimate = 0; + + for (; stream; ++stream) { + if (first_row_key.has_value() && + stream->row_key() != first_row_key.value()) { + break; + } + + first_row_key = stream->row_key(); + + row_size_estimate += stream->row_key().size(); + row_size_estimate += stream->column_qualifier().size(); + row_size_estimate += stream->value().size(); + row_size_estimate += sizeof(stream->timestamp()); + } + + if (!first_row_key.has_value()) { + // No rows in the table + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(""); + resp.set_offset_bytes(0); + + auto opts = grpc::WriteOptions(); + opts.set_last_message(); + + writer->WriteLast(std::move(resp), opts); + return Status(); + } + + std::int64_t offset_delta = sample_every * row_size_estimate; + + google::bigtable::v2::RowFilter sample_filter; + sample_filter.set_row_sample_filter(pass_probability); + + auto maybe_stream = CreateCellStream(all_rows_set, sample_filter); + if (!maybe_stream) { + return maybe_stream.status(); + } + + auto& sampled_stream = *maybe_stream; + + std::int64_t offset = 0; + + bool wrote_a_sample; + + for (; sampled_stream; sampled_stream.Next(NextMode::kRow)) { + google::bigtable::v2::SampleRowKeysResponse resp; + offset += offset_delta; + resp.set_row_key(sampled_stream->row_key()); + resp.set_offset_bytes(offset); + + writer->Write(std::move(resp)); + + wrote_a_sample = true; + } + + // Cloud bigtable client tests expect that, if they populated the + // table with at least one row, then at least one row sample is + // returned. + // + // In such a case, return any string that represents the last key, + // and an offset that is the estimated row size * the number of rows + // in the largest column family. We can return any string because + // the keys returned need not be in the table. See the proto + // specification. + if (!wrote_a_sample) { + std::size_t row_count_estimate = 0; + + for (auto const& cf : *get()) { + if (cf.second->size() > row_count_estimate) { + row_count_estimate = cf.second->size(); + } + } + + std::int64_t this_offset = row_count_estimate * row_size_estimate; + + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key("last_key"); + resp.set_offset_bytes(this_offset); + writer->Write(std::move(resp)); + + offset += this_offset; + } + + google::bigtable::v2::SampleRowKeysResponse resp; + resp.set_row_key(""); + // Client test code expects offset_bytes to be strictly + // increasing. + resp.set_offset_bytes(offset + 1); + auto opts = grpc::WriteOptions(); + opts.set_last_message(); + writer->WriteLast(std::move(resp), opts); + + return Status(); +} + +Status Table::DropRowRange( + ::google::bigtable::admin::v2::DropRowRangeRequest const& request) { + std::lock_guard lock(mu_); + + if (!request.has_row_key_prefix() && + !request.has_delete_all_data_from_table()) { + return InvalidArgumentError( + "Neither row prefix nor deleted all data from table is set", + GCP_ERROR_INFO().WithMetadata("DropRowRange request", + request.DebugString())); + } + + if (request.has_delete_all_data_from_table()) { + for (auto& column_family : column_families_) { + column_family.second->clear(); + } + + return Status(); + } + + auto const& row_prefix = request.row_key_prefix(); + if (request.row_key_prefix().size() > kMaxRowLen) { + return InvalidArgumentError( + "The row_key_prefix is longer than 4KiB", + GCP_ERROR_INFO().WithMetadata( + "row_key_prefix size", + absl::StrFormat("%zu", request.row_key_prefix().size()))); + } + + if (row_prefix.empty()) { + return InvalidArgumentError( + "Row prefix provided is empty.", + GCP_ERROR_INFO().WithMetadata("DropRowRange request", + request.DebugString())); + } + + for (auto& cf : column_families_) { + for (auto row_it = cf.second->lower_bound(row_prefix); + row_it != cf.second->end();) { + if (absl::StartsWith(row_it->first, row_prefix)) { + row_it = cf.second->erase(row_it); + } else { + break; + } + } + } + + return Status(); +} + +StatusOr<::google::bigtable::v2::ReadModifyWriteRowResponse> +Table::ReadModifyWriteRow( + google::bigtable::v2::ReadModifyWriteRowRequest const& request) { + if (request.row_key().size() > kMaxRowLen) { + return InvalidArgumentError( + "The row_key is longer than 4KiB", + GCP_ERROR_INFO().WithMetadata( + "row_key size", absl::StrFormat("%zu", request.row_key().size()))); + } + + std::lock_guard lock(mu_); + + RowTransaction row_transaction(this->get(), request.row_key()); + + auto maybe_response = row_transaction.ReadModifyWriteRow(request); + if (!maybe_response) { + return maybe_response.status(); + } + + row_transaction.commit(); + + return std::move(maybe_response.value()); +} + +// NOLINTBEGIN(readability-convert-member-functions-to-static) +Status RowTransaction::AddToCell( + ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell, + absl::optional timestamp_override) { + auto status = table_->FindColumnFamily(add_to_cell); + if (!status.ok()) { + return status.status(); + } + + auto& cf = status->get(); + auto cf_value_type = cf.GetValueType(); + if (!cf_value_type.has_value() || + !cf_value_type.value().has_aggregate_type()) { + return InvalidArgumentError( + "column family is not configured to contain aggregation cells or " + "aggregation type not properly configured", + GCP_ERROR_INFO().WithMetadata("column family", + add_to_cell.family_name())); + } + + // Ensure that we support the aggregation that is configured in the + // column family. + switch (cf_value_type.value().aggregate_type().aggregator_case()) { + case google::bigtable::admin::v2::Type::Aggregate::kSum: + case google::bigtable::admin::v2::Type::Aggregate::kMin: + case google::bigtable::admin::v2::Type::Aggregate::kMax: + break; + default: + return UnimplementedError( + "column family configured with unimplemented aggregation", + GCP_ERROR_INFO() + .WithMetadata("column family", add_to_cell.family_name()) + .WithMetadata("configured aggregation", + absl::StrFormat("%d", cf_value_type.value() + .aggregate_type() + .aggregator_case()))); + } + + if (!add_to_cell.has_input()) { + return InvalidArgumentError( + "input not set", + GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); + } + + switch (add_to_cell.input().kind_case()) { + case google::bigtable::v2::Value::kIntValue: + if (!add_to_cell.input().has_int_value()) { + return InvalidArgumentError("input value not set", + GCP_ERROR_INFO().WithMetadata( + "mutation", add_to_cell.DebugString())); + } + break; + default: + return InvalidArgumentError( + "only int64 values are supported", + GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); + } + auto int64_input = add_to_cell.input().int_value(); + + auto value = google::cloud::internal::EncodeBigEndian(int64_input); + auto row_key = row_key_; + + std::chrono::milliseconds ts_ms; + if (timestamp_override.has_value()) { + ts_ms = timestamp_override.value(); + } else { + ts_ms = std::chrono::duration_cast( + std::chrono::microseconds( + add_to_cell.timestamp().raw_timestamp_micros())); + } + + if (!add_to_cell.has_column_qualifier() || + !add_to_cell.column_qualifier().has_raw_value()) { + return InvalidArgumentError( + "column qualifier not set", + GCP_ERROR_INFO().WithMetadata("mutation", add_to_cell.DebugString())); + } + auto column_qualifier = add_to_cell.column_qualifier().raw_value(); + + auto maybe_old_value = cf.UpdateCell(row_key, column_qualifier, ts_ms, value); + if (!maybe_old_value) { + return maybe_old_value.status(); + } + + if (!maybe_old_value.value()) { + DeleteValue delete_value{cf, std::move(column_qualifier), ts_ms}; + undo_.emplace(std::move(delete_value)); + } else { + RestoreValue restore_value{cf, std::move(column_qualifier), ts_ms, + std::move(maybe_old_value.value().value())}; + undo_.emplace(std::move(restore_value)); + } + + return Status(); +} + +Status RowTransaction::MergeToCell( + ::google::bigtable::v2::Mutation_MergeToCell const& merge_to_cell) { + return UnimplementedError( + "Unsupported mutation type.", + GCP_ERROR_INFO().WithMetadata("mutation", merge_to_cell.DebugString())); +} +// NOLINTEND(readability-convert-member-functions-to-static) + +Status RowTransaction::DeleteFromColumn( + ::google::bigtable::v2::Mutation_DeleteFromColumn const& + delete_from_column) { + auto maybe_column_family = table_->FindColumnFamily(delete_from_column); + if (!maybe_column_family.ok()) { + return maybe_column_family.status(); + } + + // We need to check if the given timerange is empty or reversed, but + // only up to the server's time accuracy (in our case, milliseconds) + // - For example a time range of [1000, 1200] would be empty. + if (delete_from_column.has_time_range()) { + auto start = std::chrono::duration_cast( + std::chrono::microseconds( + delete_from_column.time_range().start_timestamp_micros())); + auto end = std::chrono::duration_cast( + std::chrono::microseconds( + delete_from_column.time_range().end_timestamp_micros())); + + // An end timestamp micros of 0 is to be interpreted as infinity, + // so we allow that. + if (end <= start && + delete_from_column.time_range().end_timestamp_micros() != 0) { + return InvalidArgumentError( + "empty or reversed time range: the end timestamp must be more than " + "the start timestamp when they are truncated to the server's time " + "precision (milliseconds)", + GCP_ERROR_INFO().WithMetadata("delete_from_column proto", + delete_from_column.DebugString())); + } + } + + auto& column_family = maybe_column_family->get(); + + auto deleted_cells = column_family.DeleteColumn( + row_key_, delete_from_column.column_qualifier(), + delete_from_column.time_range()); + + for (auto& cell : deleted_cells) { + RestoreValue restore_value{ + column_family, delete_from_column.column_qualifier(), + std::move(cell.timestamp), std::move(cell.value)}; + undo_.emplace(std::move(restore_value)); + } + + return Status(); +} + +Status RowTransaction::DeleteFromRow() { + bool row_existed; + for (auto& column_family : table_->column_families_) { + auto deleted_columns = column_family.second->DeleteRow(row_key_); + + for (auto& column : deleted_columns) { + for (auto& cell : column.second) { + RestoreValue restrore_value = {*column_family.second, + std::move(column.first), cell.timestamp, + std::move(cell.value)}; + undo_.emplace(std::move(restrore_value)); + row_existed = true; + } + } + } + + if (row_existed) { + return Status(); + } + + return NotFoundError("row not found in table", + GCP_ERROR_INFO().WithMetadata("row", row_key_)); +} + +Status RowTransaction::DeleteFromFamily( + ::google::bigtable::v2::Mutation_DeleteFromFamily const& + delete_from_family) { + // If the request references an incorrect schema (non-existent + // column family) then return a failure status error immediately. + auto maybe_column_family = table_->FindColumnFamily(delete_from_family); + if (!maybe_column_family.ok()) { + return maybe_column_family.status(); + } + + auto column_family_it = table_->find(delete_from_family.family_name()); + if (column_family_it == table_->end()) { + return NotFoundError( + "column family not found in table", + GCP_ERROR_INFO().WithMetadata("column family", + delete_from_family.family_name())); + } + + std::map::iterator column_family_row_it; + if (column_family_it->second->find(row_key_) == + column_family_it->second->end()) { + // The row does not exist + return NotFoundError( + "row key is not found in column family", + GCP_ERROR_INFO() + .WithMetadata("row key", row_key_) + .WithMetadata("column family", column_family_it->first)); + } + + auto deleted = column_family_it->second->DeleteRow(row_key_); + for (auto const& column : deleted) { + for (auto const& cell : column.second) { + RestoreValue restore_value{*column_family_it->second, + std::move(column.first), cell.timestamp, + std::move(cell.value)}; + undo_.emplace(std::move(restore_value)); + } + } + + return Status(); +} + +// timestamp_override, if provided, will be used instead of +// set_cell.timestamp. The override is used to set the timestamp to +// the server time in case a timestamp <= 0 is provided. +Status RowTransaction::SetCell( + ::google::bigtable::v2::Mutation_SetCell const& set_cell, + absl::optional timestamp_override) { + auto maybe_column_family = table_->FindColumnFamily(set_cell); + if (!maybe_column_family) { + return maybe_column_family.status(); + } + + auto& column_family = maybe_column_family->get(); + + auto timestamp = std::chrono::duration_cast( + std::chrono::microseconds(set_cell.timestamp_micros())); + + if (timestamp_override.has_value()) { + timestamp = timestamp_override.value(); + } + + auto maybe_old_value = column_family.SetCell( + row_key_, set_cell.column_qualifier(), timestamp, set_cell.value()); + + if (!maybe_old_value) { + DeleteValue delete_value{column_family, + std::move(set_cell.column_qualifier()), timestamp}; + undo_.emplace(std::move(delete_value)); + } else { + RestoreValue restore_value{column_family, + std::move(set_cell.column_qualifier()), + timestamp, std::move(maybe_old_value.value())}; + undo_.emplace(std::move(restore_value)); + } + + return Status(); +} + +// ProcessReadModifyWriteRuleResult records the result of a +// ReadModifyWriteRule computation for possible undo in the undo log +// and also updates the tmp_families temporary table (containing only +// one row) with the modified cell for later return. +void ProcessReadModifyWriteResult( + ColumnFamily& column_family, std::string const& row_key, + std::stack>& undo, + google::bigtable::v2::ReadModifyWriteRule const& rule, + ReadModifyWriteCellResult& result, + std::map& tmp_families) { + if (result.maybe_old_value.has_value()) { + // We overwrote a cell, we need to record a RestoreValue in the undo log + RestoreValue restore_value{column_family, rule.column_qualifier(), + result.timestamp, + std::move(result.maybe_old_value.value())}; + undo.emplace(std::move(restore_value)); + } else { + // We created a new cell -- we would need to delete it in any rollback + DeleteValue delete_value{column_family, rule.column_qualifier(), + result.timestamp}; + undo.emplace(std::move(delete_value)); + } + + // Record the cell in our local mini table here to use in + // assembling a row of changed cells for return. + tmp_families[rule.family_name()].SetCell(row_key, rule.column_qualifier(), + result.timestamp, + std::move(result.value)); +} + +google::bigtable::v2::ReadModifyWriteRowResponse +FamiliesToReadModifyWriteResponse( + std::string const& row_key, + std::map const& families) { + google::bigtable::v2::ReadModifyWriteRowResponse resp; + auto* row = resp.mutable_row(); + row->set_key(row_key); + + for (auto const& fam : families) { + auto* family = row->add_families(); + family->set_name(fam.first); + for (auto const& r : fam.second) { + for (auto const& cfr : r.second) { + auto* col = family->add_columns(); + col->set_qualifier(cfr.first); + for (auto const& cr : cfr.second) { + auto* cell = col->add_cells(); + cell->set_timestamp_micros( + std::chrono::duration_cast(cr.first) + .count()); + cell->set_value(std::move(cr.second)); + } + } + } + } + + return resp; +} + +StatusOr<::google::bigtable::v2::ReadModifyWriteRowResponse> +RowTransaction::ReadModifyWriteRow( + google::bigtable::v2::ReadModifyWriteRowRequest const& request) { + if (row_key_.empty()) { + return InvalidArgumentError( + "row key not set", + GCP_ERROR_INFO().WithMetadata("request", request.DebugString())); + } + + // tmp_families is a small one row mini table used to accumulate + // changed cells efficiently for later return in the row returned by + // the RPC. + std::map tmp_families; + + for (auto const& rule : request.rules()) { + auto maybe_column_family = table_->FindColumnFamily(rule); + if (!maybe_column_family) { + return maybe_column_family.status(); + } + + auto& column_family = maybe_column_family->get(); + if (rule.has_append_value()) { + auto result = column_family.ReadModifyWrite( + row_key_, rule.column_qualifier(), rule.append_value()); + + ProcessReadModifyWriteResult(column_family, row_key_, undo_, rule, result, + tmp_families); + + } else if (rule.has_increment_amount()) { + auto maybe_result = column_family.ReadModifyWrite( + row_key_, rule.column_qualifier(), rule.increment_amount()); + if (!maybe_result) { + return maybe_result.status(); + } + + auto& result = maybe_result.value(); + + ProcessReadModifyWriteResult(column_family, row_key_, undo_, rule, result, + tmp_families); + + } else { + return InvalidArgumentError( + "either append value or increment amount must be set", + GCP_ERROR_INFO().WithMetadata("rule", rule.DebugString())); + } + } + + // Now assemble the returned value. + return FamiliesToReadModifyWriteResponse(row_key_, tmp_families); +} + +void RowTransaction::Undo() { + auto row_key = row_key_; + + while (!undo_.empty()) { + auto op = undo_.top(); + undo_.pop(); + + auto* restore_value = absl::get_if(&op); + if (restore_value) { + restore_value->column_family.SetCell( + row_key, std::move(restore_value->column_qualifier), + restore_value->timestamp, std::move(restore_value->value)); + continue; + } + + auto* delete_value = absl::get_if(&op); + if (delete_value) { + delete_value->column_family.DeleteTimeStamp( + row_key, std::move(delete_value->column_qualifier), + delete_value->timestamp); + continue; + } + + // If we get here, there is an type of undo log that has not been + // implemented! + std::abort(); + } +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/table.h b/google/cloud/bigtable/emulator/table.h new file mode 100644 index 0000000000000..123537913c634 --- /dev/null +++ b/google/cloud/bigtable/emulator/table.h @@ -0,0 +1,226 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H + +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/bigtable/emulator/row_streamer.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include "absl/types/variant.h" +#include "google/protobuf/repeated_ptr_field.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +/// Objects of this class represent Bigtable tables. +class Table : public std::enable_shared_from_this
{ + public: + static StatusOr> Create( + google::bigtable::admin::v2::Table schema); + + google::bigtable::admin::v2::Table GetSchema() const; + + Status Update(google::bigtable::admin::v2::Table const& new_schema, + google::protobuf::FieldMask const& to_update); + + StatusOr ModifyColumnFamilies( + google::bigtable::admin::v2::ModifyColumnFamiliesRequest const& request); + + bool IsDeleteProtected() const; + + StatusOr CheckAndMutateRow( + google::bigtable::v2::CheckAndMutateRowRequest const& request); + Status MutateRow(google::bigtable::v2::MutateRowRequest const& request); + Status DoMutationsWithPossibleRollbackLocked( + std::string const& row_key, + google::protobuf::RepeatedPtrField const& + mutations) { + std::lock_guard lock(mu_); + + return DoMutationsWithPossibleRollback(row_key, mutations); + } + + StatusOr CreateCellStream( + std::shared_ptr range_set, + absl::optional) const; + + Status ReadRows(google::bigtable::v2::ReadRowsRequest const& request, + RowStreamer& row_streamer) const; + + StatusOr<::google::bigtable::v2::ReadModifyWriteRowResponse> + ReadModifyWriteRow( + google::bigtable::v2::ReadModifyWriteRowRequest const& request); + + std::map>::iterator begin() { + return column_families_.begin(); + } + std::map>::iterator end() { + return column_families_.end(); + } + std::map>::iterator find( + std::string const& column_family) { + return column_families_.find(column_family); + } + + Status SampleRowKeys( + double pass_probability, + grpc::ServerWriter* writer); + + std::shared_ptr
get() { return shared_from_this(); } + + Status DropRowRange( + ::google::bigtable::admin::v2::DropRowRangeRequest const& request); + + private: + Table() = default; + friend class RowSetIterator; + friend class RowTransaction; + + template + StatusOr> FindColumnFamily( + MESSAGE const& message) const; + bool IsDeleteProtectedNoLock() const; + Status Construct(google::bigtable::admin::v2::Table schema); + Status DoMutationsWithPossibleRollback( + std::string const& row_key, + google::protobuf::RepeatedPtrField const& + mutations); + + mutable std::mutex mu_; + google::bigtable::admin::v2::Table schema_; + std::map> column_families_; +}; + +struct RestoreValue { + ColumnFamily& column_family; + std::string column_qualifier; + std::chrono::milliseconds timestamp; + std::string value; +}; + +struct DeleteValue { + ColumnFamily& column_family; + std::string column_qualifier; + std::chrono::milliseconds timestamp; +}; + +class RowTransaction { + public: + explicit RowTransaction(std::shared_ptr
table, + std::string const& row_key) + : row_key_(row_key) { + table_ = std::move(table); + committed_ = false; + }; + + ~RowTransaction() { + if (!committed_) { + Undo(); + } + }; + + void commit() { committed_ = true; } + + // timestamp_override, if provided, will be used instead of + // set_cell.timestamp. The override is used to set the timestamp to + // the server time in case a timestamp <= 0 is provided. + Status SetCell(::google::bigtable::v2::Mutation_SetCell const& set_cell, + absl::optional timestamp_override = + absl::nullopt); + Status AddToCell( + ::google::bigtable::v2::Mutation_AddToCell const& add_to_cell, + absl::optional timestamp_override); + Status MergeToCell( + ::google::bigtable::v2::Mutation_MergeToCell const& merge_to_cell); + Status DeleteFromColumn( + ::google::bigtable::v2::Mutation_DeleteFromColumn const& + delete_from_column); + Status DeleteFromFamily( + ::google::bigtable::v2::Mutation_DeleteFromFamily const& + delete_from_family); + Status DeleteFromRow(); + + StatusOr<::google::bigtable::v2::ReadModifyWriteRowResponse> + ReadModifyWriteRow( + google::bigtable::v2::ReadModifyWriteRowRequest const& request); + + private: + void Undo(); + + bool committed_; + std::shared_ptr
table_; + std::stack> undo_; + // row_key_ is initialized from the request proto and therefore it + // is safe to access it while the mutation request is ongoing. We + // store a reference to it to avoid copying a potentially very large + // (up to 4KB) value. + std::string const& row_key_; +}; + +google::bigtable::v2::ReadModifyWriteRowResponse +FamiliesToReadModifyWriteResponse( + std::string const& row_key, + std::map const& families); + +/** + * A `AbstractCellStreamImpl` which streams filtered contents of the table. + * + * Underneath is essentially a collection of `FilteredColumnFamilyStream`s. + * All filters applied to `FilteredColumnFamilyStream` are propagated to the + * underlying `FilteredColumnFamilyStream`, except for `FamilyNameRegex`, which + * is handled by this subclass. + * + * This class is public only to enable testing. + */ +class FilteredTableStream : public MergeCellStreams { + public: + explicit FilteredTableStream( + std::vector> cf_streams) + : MergeCellStreams(CreateCellStreams(std::move(cf_streams))) {} + + bool ApplyFilter(InternalFilter const& internal_filter) override; + + private: + static std::vector CreateCellStreams( + std::vector> cf_streams); +}; + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TABLE_H diff --git a/google/cloud/bigtable/emulator/table_test.cc b/google/cloud/bigtable/emulator/table_test.cc new file mode 100644 index 0000000000000..d0f4eac00c924 --- /dev/null +++ b/google/cloud/bigtable/emulator/table_test.cc @@ -0,0 +1,153 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/bigtable/emulator/column_family.h" +#include "google/cloud/bigtable/emulator/filter.h" +#include "google/cloud/bigtable/emulator/range_set.h" +#include "google/cloud/testing_util/chrono_literals.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { +namespace { + +std::string DumpStream(AbstractCellStreamImpl& stream, + NextMode next_mode = NextMode::kCell) { + std::stringstream ss; + for (; stream.HasValue(); stream.Next(next_mode)) { + auto const& cell = stream.Value(); + ss << cell.row_key() << " " << cell.column_family() << ":" + << cell.column_qualifier() << " @" << cell.timestamp().count() + << "ms: " << cell.value() << std::endl; + } + return ss.str(); +} + +TEST(FilteredTableStream, Empty) { + FilteredTableStream stream({}); + EXPECT_EQ("", DumpStream(stream)); +} + +TEST(FilteredTableStream, EmptyColumnFamilies) { + ColumnFamily fam1; + ColumnFamily fam2; + auto ffam1 = std::make_unique( + fam1, "fam1", std::make_unique(StringRangeSet::All())); + auto ffam2 = std::make_unique( + fam2, "fam2", std::make_unique(StringRangeSet::All())); + std::vector> fams; + fams.emplace_back(std::move(ffam1)); + fams.emplace_back(std::move(ffam2)); + FilteredTableStream stream(std::move(fams)); + EXPECT_EQ("", DumpStream(stream)); +} + +TEST(FilteredTableStream, ColumnFamiliesAreFiltered) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam1; + ColumnFamily fam2; + fam1.SetCell("row0", "col0", 10_ms, "foo"); + fam2.SetCell("row0", "col0", 10_ms, "foo"); + auto ffam1 = std::make_unique( + fam1, "fam1", std::make_unique(StringRangeSet::All())); + auto ffam2 = std::make_unique( + fam2, "fam2", std::make_unique(StringRangeSet::All())); + std::vector> fams; + fams.emplace_back(std::move(ffam1)); + fams.emplace_back(std::move(ffam2)); + FilteredTableStream stream(std::move(fams)); + auto family_pattern = std::make_shared("fam1"); + ASSERT_TRUE(family_pattern->ok()); + stream.ApplyFilter(FamilyNameRegex{family_pattern}); + EXPECT_EQ("row0 fam1:col0 @10ms: foo\n", DumpStream(stream)); +} + +TEST(FilteredTableStream, OnlyRightFamilyColumnsAreFiltered) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam1; + ColumnFamily fam2; + fam1.SetCell("row0", "col0", 10_ms, "foo"); + fam2.SetCell("row0", "col0", 10_ms, "foo"); + auto ffam1 = std::make_unique( + fam1, "fam1", std::make_unique(StringRangeSet::All())); + auto ffam2 = std::make_unique( + fam2, "fam2", std::make_unique(StringRangeSet::All())); + std::vector> fams; + fams.emplace_back(std::move(ffam1)); + fams.emplace_back(std::move(ffam2)); + FilteredTableStream stream(std::move(fams)); + + stream.ApplyFilter( + ColumnRange{"fam2", StringRangeSet::Range("col0", false, "col1", true)}); + EXPECT_EQ("row0 fam2:col0 @10ms: foo\n", DumpStream(stream)); +} + +TEST(FilteredTableStream, OtherFiltersArePropagated) { + using testing_util::chrono_literals::operator""_ms; + + ColumnFamily fam1; + ColumnFamily fam2; + fam1.SetCell("row1", "col1", 10_ms, "foo"); + fam1.SetCell("row0", "col1", 10_ms, "foo"); // row key regex + fam2.SetCell("row1", "col1", 10_ms, "foo"); // column family regex + fam1.SetCell("row1", "col2", 10_ms, "foo"); // column qualifier regex + fam1.SetCell("row1", "a1", 10_ms, "foo"); // column range + fam1.SetCell("row1", "col1", 1000_ms, "foo"); // timestamp range + auto ffam1 = std::make_unique( + fam1, "fam1", std::make_unique(StringRangeSet::All())); + auto ffam2 = std::make_unique( + fam2, "fam2", std::make_unique(StringRangeSet::All())); + std::vector> fams; + fams.emplace_back(std::move(ffam1)); + fams.emplace_back(std::move(ffam2)); + FilteredTableStream stream(std::move(fams)); + + auto row_key_pattern = std::make_shared("row1"); + ASSERT_TRUE(row_key_pattern->ok()); + EXPECT_TRUE(stream.ApplyFilter(RowKeyRegex{row_key_pattern})); + + auto family_pattern = std::make_shared("fam1"); + ASSERT_TRUE(family_pattern->ok()); + EXPECT_TRUE(stream.ApplyFilter(FamilyNameRegex{family_pattern})); + + auto qualifier_pattern = std::make_shared("1$"); + ASSERT_TRUE(qualifier_pattern->ok()); + EXPECT_TRUE(stream.ApplyFilter(ColumnRegex{qualifier_pattern})); + + EXPECT_TRUE(stream.ApplyFilter( + ColumnRange{"fam1", StringRangeSet::Range("co", false, "com", false)})); + + EXPECT_TRUE(stream.ApplyFilter( + TimestampRange{TimestampRangeSet::Range(0_ms, 300_ms)})); + + EXPECT_EQ("row1 fam1:col1 @10ms: foo\n", DumpStream(stream)); +} + +} // anonymous namespace +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/test_util.cc b/google/cloud/bigtable/emulator/test_util.cc new file mode 100644 index 0000000000000..ac26cb23d96e5 --- /dev/null +++ b/google/cloud/bigtable/emulator/test_util.cc @@ -0,0 +1,51 @@ +#include "google/cloud/bigtable/emulator/test_util.h" +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +StatusOr> CreateTable( + std::string const& table_name, std::vector& column_families) { + ::google::bigtable::admin::v2::Table schema; + schema.set_name(table_name); + for (auto& column_family_name : column_families) { + (*schema.mutable_column_families())[column_family_name] = + ::google::bigtable::admin::v2::ColumnFamily(); + } + + return Table::Create(schema); +} + +Status SetCells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params) { + ::google::bigtable::v2::MutateRowRequest mutation_request; + mutation_request.set_table_name(table_name); + mutation_request.set_row_key(row_key); + + for (auto m : set_cell_params) { + auto* mutation_request_mutation = mutation_request.add_mutations(); + auto* set_cell_mutation = mutation_request_mutation->mutable_set_cell(); + set_cell_mutation->set_family_name(m.column_family_name); + set_cell_mutation->set_column_qualifier(m.column_qualifier); + set_cell_mutation->set_timestamp_micros(m.timestamp_micros); + set_cell_mutation->set_value(m.data); + } + + return table->MutateRow(mutation_request); +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/test_util.h b/google/cloud/bigtable/emulator/test_util.h new file mode 100644 index 0000000000000..1d45ec56767a4 --- /dev/null +++ b/google/cloud/bigtable/emulator/test_util.h @@ -0,0 +1,51 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H + +#include "google/cloud/bigtable/emulator/table.h" +#include "google/cloud/status.h" +#include "google/cloud/status_or.h" +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +struct SetCellParams { + std::string column_family_name; + std::string column_qualifier; + int64_t timestamp_micros; + std::string data; +}; + +Status SetCells( + std::shared_ptr& table, + std::string const& table_name, std::string const& row_key, + std::vector& set_cell_params); + +StatusOr> CreateTable( + std::string const& table_name, std::vector& column_families); + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TEST_UTIL_H diff --git a/google/cloud/bigtable/emulator/to_grpc_status.cc b/google/cloud/bigtable/emulator/to_grpc_status.cc new file mode 100644 index 0000000000000..bac9b04c2e0e4 --- /dev/null +++ b/google/cloud/bigtable/emulator/to_grpc_status.cc @@ -0,0 +1,119 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/emulator/to_grpc_status.h" +#include "google/cloud/status.h" +#include "google/rpc/status.pb.h" +#include +#include +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +/// Translate google::cloud::StatusCode into grpc::StatusCode. +grpc::StatusCode MapStatusCode(google::cloud::StatusCode code) { + switch (code) { + case google::cloud::StatusCode::kOk: + return grpc::StatusCode::OK; + case google::cloud::StatusCode::kCancelled: + return grpc::StatusCode::CANCELLED; + case google::cloud::StatusCode::kUnknown: + return grpc::StatusCode::UNKNOWN; + case google::cloud::StatusCode::kInvalidArgument: + return grpc::StatusCode::INVALID_ARGUMENT; + case google::cloud::StatusCode::kDeadlineExceeded: + return grpc::StatusCode::DEADLINE_EXCEEDED; + case google::cloud::StatusCode::kNotFound: + return grpc::StatusCode::NOT_FOUND; + case google::cloud::StatusCode::kAlreadyExists: + return grpc::StatusCode::ALREADY_EXISTS; + case google::cloud::StatusCode::kPermissionDenied: + return grpc::StatusCode::PERMISSION_DENIED; + case google::cloud::StatusCode::kUnauthenticated: + return grpc::StatusCode::UNAUTHENTICATED; + case google::cloud::StatusCode::kResourceExhausted: + return grpc::StatusCode::RESOURCE_EXHAUSTED; + case google::cloud::StatusCode::kFailedPrecondition: + return grpc::StatusCode::FAILED_PRECONDITION; + case google::cloud::StatusCode::kAborted: + return grpc::StatusCode::ABORTED; + case google::cloud::StatusCode::kOutOfRange: + return grpc::StatusCode::OUT_OF_RANGE; + case google::cloud::StatusCode::kUnimplemented: + return grpc::StatusCode::UNIMPLEMENTED; + case google::cloud::StatusCode::kInternal: + return grpc::StatusCode::INTERNAL; + case google::cloud::StatusCode::kUnavailable: + return grpc::StatusCode::UNAVAILABLE; + case google::cloud::StatusCode::kDataLoss: + return grpc::StatusCode::DATA_LOSS; + default: + return grpc::StatusCode::UNKNOWN; + } +} + +google::rpc::ErrorInfo ErrorInfoFromStatus(Status const& to_convert) { + google::rpc::ErrorInfo error_info; + error_info.set_reason(to_convert.error_info().reason()); + error_info.set_domain(to_convert.error_info().domain()); + for (auto const& md_name_value : to_convert.error_info().metadata()) { + (*error_info.mutable_metadata())[md_name_value.first] = + md_name_value.second; + } + + return error_info; +} + +google::rpc::Status RPCStatusFromStatusAndErrorInfo( + Status const& to_convert, google::rpc::ErrorInfo const& error_info) { + google::rpc::Status rpc_status; + rpc_status.set_code(static_cast(to_convert.code())); + rpc_status.set_message(to_convert.message()); + auto& rpc_status_details = *rpc_status.add_details(); + rpc_status_details.PackFrom(error_info); + + return rpc_status; +} + +::grpc::Status ToGrpcStatus(Status const& to_convert) { + google::rpc::ErrorInfo error_info = ErrorInfoFromStatus(to_convert); + + google::rpc::Status rpc_status = + RPCStatusFromStatusAndErrorInfo(to_convert, std::move(error_info)); + + std::string serialized_rpc_status; + rpc_status.SerializeToString(&serialized_rpc_status); + return ::grpc::Status(MapStatusCode(to_convert.code()), to_convert.message(), + std::move(serialized_rpc_status)); +} + +::google::rpc::Status ToGoogleRPCStatus(Status const& to_convert) { + google::rpc::ErrorInfo error_info = ErrorInfoFromStatus(to_convert); + + google::rpc::Status rpc_status = + RPCStatusFromStatusAndErrorInfo(to_convert, std::move(error_info)); + + return rpc_status; +} + +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/emulator/to_grpc_status.h b/google/cloud/bigtable/emulator/to_grpc_status.h new file mode 100644 index 0000000000000..c561db22b46b2 --- /dev/null +++ b/google/cloud/bigtable/emulator/to_grpc_status.h @@ -0,0 +1,37 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TO_GRPC_STATUS_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TO_GRPC_STATUS_H + +#include "google/cloud/status.h" +#include "google/rpc/status.pb.h" +#include + +namespace google { +namespace cloud { +namespace bigtable { +namespace emulator { + +/// Convert a google::cloud::Status to grpc::Status. +::grpc::Status ToGrpcStatus(Status const& to_convert); + +// Convert a gooogle::cloud::Status to a google::rpc::Status +::google::rpc::Status ToGoogleRPCStatus(Status const& to_convert); +} // namespace emulator +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_EMULATOR_TO_GRPC_STATUS_H diff --git a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl index ac309347f1d29..818a75601c799 100644 --- a/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl +++ b/google/cloud/bigtable/google_cloud_cpp_bigtable.bzl @@ -102,6 +102,7 @@ google_cloud_cpp_bigtable_hdrs = [ "internal/rate_limiter.h", "internal/readrowsparser.h", "internal/retry_traits.h", + "internal/row_range_helpers.h", "internal/row_reader_impl.h", "internal/rpc_policy_parameters.h", "internal/rpc_policy_parameters.inc", @@ -203,6 +204,7 @@ google_cloud_cpp_bigtable_srcs = [ "internal/prefix_range_end.cc", "internal/rate_limiter.cc", "internal/readrowsparser.cc", + "internal/row_range_helpers.cc", "internal/traced_row_reader.cc", "metadata_update_policy.cc", "mutation_batcher.cc", diff --git a/google/cloud/bigtable/internal/row_range_helpers.cc b/google/cloud/bigtable/internal/row_range_helpers.cc new file mode 100644 index 0000000000000..bb7a5a0fd2811 --- /dev/null +++ b/google/cloud/bigtable/internal/row_range_helpers.cc @@ -0,0 +1,254 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/bigtable/internal/row_range_helpers.h" +#include "google/cloud/bigtable/internal/google_bytes_traits.h" + +namespace google { +namespace cloud { +namespace bigtable { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace internal { + +namespace btproto = ::google::bigtable::v2; + +btproto::RowRange RowRangeHelpers::Empty() { + btproto::RowRange result; + // Return an open interval that contains no key, using "\0" for the end key. + // We can't use "", because when appearing as the end it means 'infinity'. + result.set_start_key_open(""); + result.set_end_key_open(std::string("\0", 1)); + return result; +} + +bool RowRangeHelpers::IsEmpty(btproto::RowRange const& row_range) { + RowKeyType unused; + // We do not want to copy the strings unnecessarily, so initialize a reference + // pointing to *_key_closed() or *_key_open(), as needed. + auto const* start = &unused; + bool start_open = false; + switch (row_range.start_key_case()) { + case btproto::RowRange::kStartKeyClosed: + start = &row_range.start_key_closed(); + break; + case btproto::RowRange::kStartKeyOpen: + start = &row_range.start_key_open(); + start_open = true; + break; + case btproto::RowRange::START_KEY_NOT_SET: + break; + } + // We need to initialize this to something to make g++ happy, but it cannot + // be a value that is discarded in all switch() cases to make Clang happy. + auto const* end = &row_range.end_key_closed(); + bool end_open = false; + switch (row_range.end_key_case()) { + case btproto::RowRange::kEndKeyClosed: + // Already initialized. + break; + case btproto::RowRange::kEndKeyOpen: + end = &row_range.end_key_open(); + end_open = true; + break; + case btproto::RowRange::END_KEY_NOT_SET: + // A range ending at +infinity is never empty. + return false; + } + + // Special case of an open interval of two consecutive strings. + if (start_open && end_open && internal::ConsecutiveRowKeys(*start, *end)) { + return true; + } + + // Compare the strings as byte vectors (careful with unsigned chars). + int cmp = internal::CompareRowKey(*start, *end); + if (cmp == 0) { + return start_open || end_open; + } + return cmp > 0; +} + +bool RowRangeHelpers::BelowStart(btproto::RowRange const& row_range, + RowKeyType const& key) { + switch (row_range.start_key_case()) { + case btproto::RowRange::START_KEY_NOT_SET: + break; + case btproto::RowRange::kStartKeyClosed: + return key < row_range.start_key_closed(); + case btproto::RowRange::kStartKeyOpen: + return key <= row_range.start_key_open(); + } + return false; +} + +bool RowRangeHelpers::AboveEnd(btproto::RowRange const& row_range, + RowKeyType const& key) { + switch (row_range.end_key_case()) { + case btproto::RowRange::END_KEY_NOT_SET: + break; + case btproto::RowRange::kEndKeyClosed: + return key > row_range.end_key_closed(); + case btproto::RowRange::kEndKeyOpen: + return key >= row_range.end_key_open(); + } + return false; +} + +std::pair RowRangeHelpers::Intersect( + btproto::RowRange const& lhs, btproto::RowRange const& rhs) { + if (IsEmpty(rhs)) { + return std::make_pair(false, Empty()); + } + std::string empty; + + // The algorithm is simple: start with lhs as a the resulting range. Update + // both endpoints based on the value of @p range. If the resulting range is + // empty there is no intersection. + btproto::RowRange intersection(lhs); + + switch (rhs.start_key_case()) { + case btproto::RowRange::START_KEY_NOT_SET: + break; + case btproto::RowRange::kStartKeyClosed: { + auto const& start = rhs.start_key_closed(); + // If `range` starts above the current range then there is no + // intersection. + if (AboveEnd(intersection, start)) { + return std::make_pair(false, Empty()); + } + // If `start` is inside the intersection (as computed so far), then the + // intersection must start at `start`, and it would be closed if `range` + // is closed at the start. + if (Contains(intersection, start)) { + intersection.set_start_key_closed(start); + } + break; + } + case btproto::RowRange::kStartKeyOpen: { + // The case where `range` is open on the start point is analogous. + auto const& start = rhs.start_key_open(); + if (AboveEnd(intersection, start)) { + return std::make_pair(false, Empty()); + } + if (Contains(intersection, start)) { + intersection.set_start_key_open(start); + } + } break; + } + + // Then check if the end limit of @p range is below *this. + switch (rhs.end_key_case()) { + case btproto::RowRange::END_KEY_NOT_SET: + break; + case btproto::RowRange::kEndKeyClosed: { + // If `range` ends before the start of the intersection there is no + // intersection and we can return immediately. + auto const& end = rhs.end_key_closed(); + if (BelowStart(intersection, end)) { + return std::make_pair(false, Empty()); + } + // If `end` is inside the intersection as computed so far, then the + // intersection must end at `end` and it is closed if `range` is closed + // at the end. + if (Contains(intersection, end)) { + intersection.set_end_key_closed(end); + } + } break; + case btproto::RowRange::kEndKeyOpen: { + // Do the analogous thing for `end` being a open endpoint. + auto const& end = rhs.end_key_open(); + if (BelowStart(intersection, end)) { + return std::make_pair(false, Empty()); + } + if (Contains(intersection, end)) { + intersection.set_end_key_open(end); + } + } break; + } + + bool is_empty = IsEmpty(intersection); + return std::make_pair(!is_empty, std::move(intersection)); +} + +void RowRangeHelpers::SanitizeEmptyEndKeys( + google::bigtable::v2::RowRange& row_range) { + // The service treats an empty end key as end of table. Some of our + // intersection logic does not, though. So we are best off sanitizing the + // input, by clearing the end key if it is empty. + if (row_range.has_end_key_closed()) { + if (IsEmptyRowKey(row_range.end_key_closed())) { + row_range.clear_end_key_closed(); + } + } + if (row_range.has_end_key_open()) { + if (IsEmptyRowKey(row_range.end_key_open())) { + row_range.clear_end_key_open(); + } + } +} + +bool RowRangeHelpers::StartLess::operator()( + btproto::RowRange const& left, btproto::RowRange const& right) const { + if (!left.has_start_key_open() && !left.has_start_key_closed()) { + // left is empty + return right.has_start_key_open() || right.has_start_key_closed(); + } + // left is non-empty + if (!right.has_start_key_open() && !right.has_start_key_closed()) { + return false; + } + // both are non-empty + auto const& left_start = left.has_start_key_closed() ? left.start_key_closed() + : left.start_key_open(); + auto const& right_start = right.has_start_key_closed() + ? right.start_key_closed() + : right.start_key_open(); + + auto cmp = internal::CompareRowKey(left_start, right_start); + if (cmp != 0) { + return cmp < 0; + } + // same row key in both + return left.has_start_key_closed() && right.has_start_key_open(); +} + +bool RowRangeHelpers::EndLess::operator()( + btproto::RowRange const& left, btproto::RowRange const& right) const { + if (!right.has_end_key_open() && !right.has_end_key_closed()) { + // right is infinite + return left.has_end_key_open() || left.has_end_key_closed(); + } + // right is finite + if (!left.has_end_key_open() && !left.has_end_key_closed()) { + return false; + } + // both are finite + auto const& left_end = + left.has_end_key_closed() ? left.end_key_closed() : left.end_key_open(); + auto const& right_end = right.has_end_key_closed() ? right.end_key_closed() + : right.end_key_open(); + + auto cmp = internal::CompareRowKey(left_end, right_end); + if (cmp != 0) { + return cmp < 0; + } + // same row key in both + return left.has_end_key_open() && right.has_end_key_closed(); +} + +} // namespace internal +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace bigtable +} // namespace cloud +} // namespace google diff --git a/google/cloud/bigtable/internal/row_range_helpers.h b/google/cloud/bigtable/internal/row_range_helpers.h new file mode 100644 index 0000000000000..39beaa6dddee5 --- /dev/null +++ b/google/cloud/bigtable/internal/row_range_helpers.h @@ -0,0 +1,66 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_ROW_RANGE_HELPERS_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_ROW_RANGE_HELPERS_H + +#include "google/cloud/bigtable/row_key.h" +#include "google/cloud/bigtable/row_range.h" +#include + +namespace google { +namespace cloud { +namespace bigtable { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace internal { + +class RowRangeHelpers { + public: + static google::bigtable::v2::RowRange Empty(); + static bool IsEmpty(google::bigtable::v2::RowRange const& row_range); + static bool BelowStart(google::bigtable::v2::RowRange const& row_range, + RowKeyType const& key); + static bool AboveEnd(google::bigtable::v2::RowRange const& row_range, + RowKeyType const& key); + static std::pair Intersect( + google::bigtable::v2::RowRange const& lhs, + google::bigtable::v2::RowRange const& rhs); + /// Return true if @p key is in the range. + template + static bool Contains(google::bigtable::v2::RowRange const& row_range, + T const& key) { + return !BelowStart(row_range, key) && !AboveEnd(row_range, key); + } + static void SanitizeEmptyEndKeys(google::bigtable::v2::RowRange& row_range); + + /// A Functor describing the order on range starts. + struct StartLess { + bool operator()(google::bigtable::v2::RowRange const& left, + google::bigtable::v2::RowRange const& right) const; + }; + + /// A Functor describing the order on range ends. + struct EndLess { + bool operator()(google::bigtable::v2::RowRange const& left, + google::bigtable::v2::RowRange const& right) const; + }; +}; + +} // namespace internal +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace bigtable +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_BIGTABLE_INTERNAL_ROW_RANGE_HELPERS_H diff --git a/google/cloud/bigtable/row_range.cc b/google/cloud/bigtable/row_range.cc index f92cafd4870a3..304dd5981952a 100644 --- a/google/cloud/bigtable/row_range.cc +++ b/google/cloud/bigtable/row_range.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "google/cloud/bigtable/row_range.h" +#include "google/cloud/bigtable/internal/row_range_helpers.h" namespace google { namespace cloud { @@ -22,164 +23,28 @@ namespace btproto = ::google::bigtable::v2; RowRange::RowRange(::google::bigtable::v2::RowRange rhs) : row_range_(std::move(rhs)) { - // The service treats an empty end key as end of table. Some of our - // intersection logic does not, though. So we are best off sanitizing the - // input, by clearing the end key if it is empty. - if (row_range_.has_end_key_closed()) { - if (internal::IsEmptyRowKey(row_range_.end_key_closed())) { - row_range_.clear_end_key_closed(); - } - } - if (row_range_.has_end_key_open()) { - if (internal::IsEmptyRowKey(row_range_.end_key_open())) { - row_range_.clear_end_key_open(); - } - } + internal::RowRangeHelpers::SanitizeEmptyEndKeys(row_range_); } -bool RowRange::IsEmpty() const { - RowKeyType unused; - // We do not want to copy the strings unnecessarily, so initialize a reference - // pointing to *_key_closed() or *_key_open(), as needed. - auto const* start = &unused; - bool start_open = false; - switch (row_range_.start_key_case()) { - case btproto::RowRange::kStartKeyClosed: - start = &row_range_.start_key_closed(); - break; - case btproto::RowRange::kStartKeyOpen: - start = &row_range_.start_key_open(); - start_open = true; - break; - case btproto::RowRange::START_KEY_NOT_SET: - break; - } - // We need to initialize this to something to make g++ happy, but it cannot - // be a value that is discarded in all switch() cases to make Clang happy. - auto const* end = &row_range_.end_key_closed(); - bool end_open = false; - switch (row_range_.end_key_case()) { - case btproto::RowRange::kEndKeyClosed: - // Already initialized. - break; - case btproto::RowRange::kEndKeyOpen: - end = &row_range_.end_key_open(); - end_open = true; - break; - case btproto::RowRange::END_KEY_NOT_SET: - // A range ending at +infinity is never empty. - return false; - } - - // Special case of an open interval of two consecutive strings. - if (start_open && end_open && internal::ConsecutiveRowKeys(*start, *end)) { - return true; - } +RowRange RowRange::Empty() { + return RowRange(internal::RowRangeHelpers::Empty()); +} - // Compare the strings as byte vectors (careful with unsigned chars). - int cmp = internal::CompareRowKey(*start, *end); - if (cmp == 0) { - return start_open || end_open; - } - return cmp > 0; +bool RowRange::IsEmpty() const { + return internal::RowRangeHelpers::IsEmpty(row_range_); } bool RowRange::BelowStart(RowKeyType const& key) const { - switch (row_range_.start_key_case()) { - case btproto::RowRange::START_KEY_NOT_SET: - break; - case btproto::RowRange::kStartKeyClosed: - return key < row_range_.start_key_closed(); - case btproto::RowRange::kStartKeyOpen: - return key <= row_range_.start_key_open(); - } - return false; + return internal::RowRangeHelpers::BelowStart(row_range_, key); } bool RowRange::AboveEnd(RowKeyType const& key) const { - switch (row_range_.end_key_case()) { - case btproto::RowRange::END_KEY_NOT_SET: - break; - case btproto::RowRange::kEndKeyClosed: - return key > row_range_.end_key_closed(); - case btproto::RowRange::kEndKeyOpen: - return key >= row_range_.end_key_open(); - } - return false; + return internal::RowRangeHelpers::AboveEnd(row_range_, key); } std::pair RowRange::Intersect(RowRange const& range) const { - if (range.IsEmpty()) { - return std::make_pair(false, RowRange::Empty()); - } - std::string empty; - - // The algorithm is simple: start with *this as a the resulting range. Update - // both endpoints based on the value of @p range. If the resulting range is - // empty there is no intersection. - RowRange intersection(*this); - - switch (range.row_range_.start_key_case()) { - case btproto::RowRange::START_KEY_NOT_SET: - break; - case btproto::RowRange::kStartKeyClosed: { - auto const& start = range.row_range_.start_key_closed(); - // If `range` starts above the current range then there is no - // intersection. - if (intersection.AboveEnd(start)) { - return std::make_pair(false, Empty()); - } - // If `start` is inside the intersection (as computed so far), then the - // intersection must start at `start`, and it would be closed if `range` - // is closed at the start. - if (intersection.Contains(start)) { - intersection.row_range_.set_start_key_closed(start); - } - } break; - case btproto::RowRange::kStartKeyOpen: { - // The case where `range` is open on the start point is analogous. - auto const& start = range.row_range_.start_key_open(); - if (intersection.AboveEnd(start)) { - return std::make_pair(false, Empty()); - } - if (intersection.Contains(start)) { - intersection.row_range_.set_start_key_open(start); - } - } break; - } - - // Then check if the end limit of @p range is below *this. - switch (range.row_range_.end_key_case()) { - case btproto::RowRange::END_KEY_NOT_SET: - break; - case btproto::RowRange::kEndKeyClosed: { - // If `range` ends before the start of the intersection there is no - // intersection and we can return immediately. - auto const& end = range.row_range_.end_key_closed(); - if (intersection.BelowStart(end)) { - return std::make_pair(false, Empty()); - } - // If `end` is inside the intersection as computed so far, then the - // intersection must end at `end` and it is closed if `range` is closed - // at the end. - if (intersection.Contains(end)) { - intersection.row_range_.set_end_key_closed(end); - } - } break; - case btproto::RowRange::kEndKeyOpen: { - // Do the analogous thing for `end` being a open endpoint. - auto const& end = range.row_range_.end_key_open(); - if (intersection.BelowStart(end)) { - return std::make_pair(false, Empty()); - } - if (intersection.Contains(end)) { - intersection.row_range_.set_end_key_open(end); - } - } break; - } - - bool is_empty = intersection.IsEmpty(); - return std::make_pair(!is_empty, std::move(intersection)); + auto res = internal::RowRangeHelpers::Intersect(row_range_, range.row_range_); + return std::make_pair(res.first, RowRange(std::move(res.second))); } bool operator==(RowRange const& lhs, RowRange const& rhs) { diff --git a/google/cloud/bigtable/row_range.h b/google/cloud/bigtable/row_range.h index e39ae676c3703..47f43d9402138 100644 --- a/google/cloud/bigtable/row_range.h +++ b/google/cloud/bigtable/row_range.h @@ -65,14 +65,7 @@ class RowRange { } /// Return an empty range. - static RowRange Empty() { - RowRange result; - // Return an open interval that contains no key, using "\0" for the end key. - // We can't use "", because when appearing as the end it means 'infinity'. - result.row_range_.set_start_key_open(""); - result.row_range_.set_end_key_open(std::string("\0", 1)); - return result; - } + static RowRange Empty(); /// Return the range representing the interval [@p begin, @p end). template diff --git a/google/cloud/bigtable/tools/run_emulator_utils.sh b/google/cloud/bigtable/tools/run_emulator_utils.sh index 807ae6efb41a9..6a7c4dd1ad68b 100755 --- a/google/cloud/bigtable/tools/run_emulator_utils.sh +++ b/google/cloud/bigtable/tools/run_emulator_utils.sh @@ -92,7 +92,6 @@ function start_emulators() { io::log "Launching Cloud Bigtable emulators in the background" trap kill_emulators EXIT - local -r CBT_EMULATOR_CMD="/usr/local/google-cloud-sdk/platform/bigtable-emulator/cbtemulator" "${CBT_EMULATOR_CMD}" -port "${emulator_port}" >emulator.log 2>&1