|
28 | 28 | #include <absl/strings/match.h> |
29 | 29 | #include <absl/strings/str_format.h> |
30 | 30 | #include <absl/types/optional.h> |
| 31 | +#include <grpcpp/support/sync_stream.h> |
31 | 32 | #include <re2/re2.h> |
32 | 33 | #include <chrono> |
| 34 | +#include <climits> |
| 35 | +#include <cstddef> |
33 | 36 | #include <cstdint> |
34 | 37 | #include <cstdlib> |
| 38 | +#include <exception> |
35 | 39 | #include <memory> |
36 | 40 | #include <mutex> |
| 41 | +#include <random> |
| 42 | +#include <ratio> |
37 | 43 | #include <optional> |
38 | 44 | #include <string> |
39 | 45 | #include <type_traits> |
@@ -593,6 +599,119 @@ bool Table::IsDeleteProtectedNoLock() const { |
593 | 599 | return schema_.deletion_protection(); |
594 | 600 | } |
595 | 601 |
|
| 602 | +Status Table::SampleRowKeys( |
| 603 | + double pass_probability, |
| 604 | + grpc::ServerWriter<google::bigtable::v2::SampleRowKeysResponse>* writer) { |
| 605 | + std::lock_guard<std::mutex> lock(mu_); |
| 606 | + |
| 607 | + // First, stream all rows and cells and compute the offsets. |
| 608 | + auto all_rows_set = std::make_shared<StringRangeSet>(StringRangeSet::All()); |
| 609 | + auto maybe_all_rows_steam = CreateCellStream(all_rows_set, absl::nullopt); |
| 610 | + if (!maybe_all_rows_steam) { |
| 611 | + return maybe_all_rows_steam.status(); |
| 612 | + } |
| 613 | + |
| 614 | + auto& stream = *maybe_all_rows_steam; |
| 615 | + |
| 616 | + std::map<std::string, std::size_t> row_offset_map; |
| 617 | + size_t row_offset = 0; |
| 618 | + |
| 619 | + std::string current_row_key; |
| 620 | + bool first_row = true; |
| 621 | + |
| 622 | + std::map<std::string, std::size_t> column_family_size_map; |
| 623 | + std::map<std::string, std::size_t> column_qualifier_size_map; |
| 624 | + size_t timestamp_total_row_size = 0; |
| 625 | + size_t value_total_row_size = 0; |
| 626 | + |
| 627 | + for (; stream; ++stream) { |
| 628 | + auto row_key = stream->row_key(); |
| 629 | + |
| 630 | + if ((row_key != current_row_key) || first_row) { |
| 631 | + row_offset += current_row_key.size(); |
| 632 | + |
| 633 | + for (auto const& cf : column_family_size_map) { |
| 634 | + row_offset += cf.second; |
| 635 | + } |
| 636 | + |
| 637 | + for (auto const& cq : column_qualifier_size_map) { |
| 638 | + row_offset += cq.second; |
| 639 | + } |
| 640 | + |
| 641 | + row_offset += timestamp_total_row_size; |
| 642 | + row_offset += value_total_row_size; |
| 643 | + |
| 644 | + // The rows before this (row_key) have this size in total. |
| 645 | + row_offset_map[row_key] = row_offset; |
| 646 | + |
| 647 | + current_row_key = row_key; |
| 648 | + |
| 649 | + first_row = false; |
| 650 | + |
| 651 | + column_family_size_map.clear(); |
| 652 | + column_qualifier_size_map.clear(); |
| 653 | + timestamp_total_row_size = 0; |
| 654 | + value_total_row_size = 0; |
| 655 | + } |
| 656 | + |
| 657 | + column_family_size_map.emplace(stream->column_family(), |
| 658 | + stream->column_family().size()); |
| 659 | + column_qualifier_size_map.emplace(stream->column_qualifier(), |
| 660 | + stream->column_qualifier().size()); |
| 661 | + timestamp_total_row_size += sizeof(stream->timestamp()); |
| 662 | + value_total_row_size += stream->value().size(); |
| 663 | + } |
| 664 | + |
| 665 | + google::bigtable::v2::RowFilter sample_filter; |
| 666 | + sample_filter.set_row_sample_filter(pass_probability); |
| 667 | + |
| 668 | + auto maybe_stream = CreateCellStream(all_rows_set, sample_filter); |
| 669 | + if (!maybe_stream) { |
| 670 | + return maybe_stream.status(); |
| 671 | + } |
| 672 | + |
| 673 | + auto& sampled_stream = *maybe_stream; |
| 674 | + |
| 675 | + bool wrote_a_sample = false; |
| 676 | + |
| 677 | + for (; sampled_stream; ++sampled_stream) { |
| 678 | + google::bigtable::v2::SampleRowKeysResponse resp; |
| 679 | + resp.set_row_key(sampled_stream->row_key()); |
| 680 | + resp.set_offset_bytes(row_offset_map[sampled_stream->row_key()]); |
| 681 | + |
| 682 | + writer->Write(std::move(resp)); |
| 683 | + |
| 684 | + wrote_a_sample = true; |
| 685 | + } |
| 686 | + |
| 687 | + // Cloud bigtable client tests expect that, if they populated the |
| 688 | + // table with at least one row, then at least one row sampele is |
| 689 | + // returned. |
| 690 | + // |
| 691 | + // In such a case, return the last row key. |
| 692 | + if (!wrote_a_sample && !row_offset_map.empty()) { |
| 693 | + auto it = std::prev(row_offset_map.end()); |
| 694 | + |
| 695 | + google::bigtable::v2::SampleRowKeysResponse resp; |
| 696 | + resp.set_row_key(it->first); |
| 697 | + resp.set_offset_bytes(it->second); |
| 698 | + } |
| 699 | + |
| 700 | + // Client code expects the last response to be an empty row key |
| 701 | + // and moreover it also expects the offset for the last response |
| 702 | + // to be more than every other offset. |
| 703 | + google::bigtable::v2::SampleRowKeysResponse resp; |
| 704 | + resp.set_row_key(""); |
| 705 | + // Client test code expects offset_bytes to be strictly |
| 706 | + // increasing. |
| 707 | + resp.set_offset_bytes(row_offset + 1); |
| 708 | + auto opts = grpc::WriteOptions(); |
| 709 | + opts.set_last_message(); |
| 710 | + writer->WriteLast(std::move(resp), opts); |
| 711 | + |
| 712 | + return Status(); |
| 713 | +} |
| 714 | + |
596 | 715 | Status Table::DropRowRange( |
597 | 716 | ::google::bigtable::admin::v2::DropRowRangeRequest const& request) { |
598 | 717 | std::lock_guard<std::mutex> lock(mu_); |
|
0 commit comments