Skip to content

Commit 1735f56

Browse files
authored
feat(bigtable): add AttemptLatency2 metric and populate peer info labels (googleapis#16095)
* feat(bigtable): add AttemptLatency2 metric and populate peer info labels This introduces the `AttemptLatency2` metric for DirectPath to record attempt latencies with the fields extracted from the decoded `PeerInfo` trailing metadata, populating `peer_info_labels_` and forwarding them to `IntoLabelMap`. Also added `AttemptLatency2Test` to test the newly populated peer info labels. Refactored `SetClusterZone` to use the new helper function `CreateServerMetadata`. * feat(bigtable): add AttemptLatency2 metric and populate peer info labels This introduces the `AttemptLatency2` metric for DirectPath to record attempt latencies with the fields extracted from the decoded `PeerInfo` trailing metadata, populating `peer_info_labels_` and forwarding them to `IntoLabelMap`. Also added `AttemptLatency2Test` to test the newly populated peer info labels. Refactored `SetClusterZone` to use the new helper function `CreateServerMetadata`.
1 parent f0bb606 commit 1735f56

4 files changed

Lines changed: 577 additions & 27 deletions

File tree

google/cloud/bigtable/internal/metrics.cc

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "google/cloud/bigtable/internal/metrics.h"
1818
#include "google/cloud/bigtable/version.h"
1919
#include "absl/strings/charconv.h"
20+
#include "absl/strings/escaping.h"
2021
#include "absl/strings/match.h"
2122
#include "absl/strings/numbers.h"
2223
#include "absl/strings/str_split.h"
@@ -38,13 +39,23 @@ auto constexpr kMeterInstrumentationScopeVersion = "v1";
3839
// to the map should be more performant than performing a set_difference every
3940
// time.
4041
LabelMap IntoLabelMap(ResourceLabels const& r, DataLabels const& d,
41-
std::set<std::string> const& filtered_data_labels) {
42+
std::set<std::string> const& filtered_data_labels,
43+
std::optional<PeerInfoLabels> const& peer_info_labels) {
4244
LabelMap labels = {
4345
{"project_id", r.project_id},
4446
{"instance", r.instance},
4547
{"table", r.table},
4648
{"cluster", r.cluster.empty() ? "<unspecified>" : r.cluster},
4749
{"zone", r.zone.empty() ? "global" : r.zone}};
50+
51+
if (peer_info_labels) {
52+
labels.insert({
53+
{"transport_type", peer_info_labels->transport_type},
54+
{"transport_region", peer_info_labels->transport_region},
55+
{"transport_subzone", peer_info_labels->transport_subzone},
56+
});
57+
}
58+
4859
std::map<std::string, std::string> data = {{
4960
{"method", d.method},
5061
{"streaming", d.streaming},
@@ -74,6 +85,7 @@ LabelMap IntoLabelMap(ResourceLabels const& r, DataLabels const& d,
7485
std::set_difference(data.begin(), data.end(), filtered_data_labels.begin(),
7586
filtered_data_labels.end(),
7687
std::inserter(labels, labels.begin()), Compare());
88+
7789
return labels;
7890
}
7991

@@ -103,6 +115,33 @@ GetResponseParamsFromTrailingMetadata(
103115
return absl::nullopt;
104116
}
105117

118+
std::optional<google::bigtable::v2::PeerInfo> GetPeerInfoFromServerMetadata(
119+
grpc::ClientContext const& client_context) {
120+
// The peer info is sent in the initial metadata and encoded in WebSafeBase64.
121+
std::string decoded;
122+
auto const& init_metadata = client_context.GetServerInitialMetadata();
123+
auto iter_init = init_metadata.find("bigtable-peer-info");
124+
if (iter_init == init_metadata.end() ||
125+
!absl::WebSafeBase64Unescape(
126+
absl::string_view{iter_init->second.data(), iter_init->second.size()},
127+
&decoded)) {
128+
// Find it in trailing metadata if not found in initial metadata or failed
129+
// to decode.
130+
auto const& trailing_metadata = client_context.GetServerTrailingMetadata();
131+
auto iter_trailing = trailing_metadata.find("bigtable-peer-info");
132+
if (iter_trailing == trailing_metadata.end() ||
133+
!absl::WebSafeBase64Unescape(
134+
absl::string_view{iter_trailing->second.data(),
135+
iter_trailing->second.size()},
136+
&decoded)) {
137+
return std::nullopt;
138+
}
139+
}
140+
google::bigtable::v2::PeerInfo p;
141+
if (p.ParseFromString(decoded)) return p;
142+
return std::nullopt;
143+
}
144+
106145
absl::optional<double> GetServerLatencyFromInitialMetadata(
107146
grpc::ClientContext const& client_context) {
108147
auto const& initial_metadata = client_context.GetServerInitialMetadata();
@@ -199,7 +238,7 @@ AttemptLatency::AttemptLatency(
199238

200239
void AttemptLatency::PreCall(opentelemetry::context::Context const&,
201240
PreCallParams const& p) {
202-
attempt_start_ = std::move(p.attempt_start);
241+
attempt_start_ = p.attempt_start;
203242
}
204243

205244
void AttemptLatency::PostCall(opentelemetry::context::Context const& context,
@@ -225,6 +264,56 @@ std::unique_ptr<Metric> AttemptLatency::clone(ResourceLabels resource_labels,
225264
return m;
226265
}
227266

267+
AttemptLatency2::AttemptLatency2(
268+
std::string const& instrumentation_scope,
269+
opentelemetry::nostd::shared_ptr<
270+
opentelemetry::metrics::MeterProvider> const& provider)
271+
: attempt_latencies2_(provider
272+
->GetMeter(instrumentation_scope,
273+
kMeterInstrumentationScopeVersion)
274+
->CreateDoubleHistogram("attempt_latencies2")) {}
275+
276+
void AttemptLatency2::PreCall(opentelemetry::context::Context const&,
277+
PreCallParams const& p) {
278+
attempt_start_ = p.attempt_start;
279+
}
280+
281+
void AttemptLatency2::PostCall(opentelemetry::context::Context const& context,
282+
grpc::ClientContext const& client_context,
283+
PostCallParams const& p) {
284+
auto response_params = GetResponseParamsFromTrailingMetadata(client_context);
285+
if (response_params) {
286+
resource_labels_.cluster = response_params->cluster_id();
287+
resource_labels_.zone = response_params->zone_id();
288+
}
289+
290+
auto peer_info = GetPeerInfoFromServerMetadata(client_context);
291+
peer_info_labels_.transport_type =
292+
absl::AsciiStrToLower(google::bigtable::v2::PeerInfo::TransportType_Name(
293+
peer_info ? peer_info->transport_type()
294+
: google::bigtable::v2::PeerInfo::TRANSPORT_TYPE_UNKNOWN));
295+
if (peer_info) {
296+
peer_info_labels_.transport_region =
297+
peer_info->application_frontend_region();
298+
peer_info_labels_.transport_subzone =
299+
peer_info->application_frontend_subzone();
300+
}
301+
302+
data_labels_.status = StatusCodeToString(p.attempt_status.code());
303+
auto attempt_elapsed = std::chrono::duration_cast<LatencyDuration>(
304+
p.attempt_end - attempt_start_);
305+
auto m = IntoLabelMap(resource_labels_, data_labels_, {}, peer_info_labels_);
306+
attempt_latencies2_->Record(attempt_elapsed.count(), std::move(m), context);
307+
}
308+
309+
std::unique_ptr<Metric> AttemptLatency2::clone(ResourceLabels resource_labels,
310+
DataLabels data_labels) const {
311+
auto m = std::make_unique<AttemptLatency2>(*this);
312+
m->resource_labels_ = std::move(resource_labels);
313+
m->data_labels_ = std::move(data_labels);
314+
return m;
315+
}
316+
228317
RetryCount::RetryCount(
229318
std::string const& instrumentation_scope,
230319
opentelemetry::nostd::shared_ptr<

google/cloud/bigtable/internal/metrics.h

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "google/cloud/bigtable/internal/operation_context.h"
2121
#include "google/cloud/bigtable/version.h"
2222
#include "google/cloud/status.h"
23+
#include "google/bigtable/v2/peer_info.pb.h"
2324
#include "google/bigtable/v2/response_params.pb.h"
2425
#include <grpcpp/grpcpp.h>
2526
#include <opentelemetry/context/context.h>
@@ -52,17 +53,30 @@ struct DataLabels {
5253
std::string status;
5354
};
5455

56+
// Labels populated from the peer info metadata.
57+
struct PeerInfoLabels {
58+
std::string transport_type;
59+
std::string transport_region;
60+
std::string transport_subzone;
61+
};
62+
5563
using LabelMap = std::unordered_map<std::string, std::string>;
56-
LabelMap IntoLabelMap(ResourceLabels const& r, DataLabels const& d,
57-
std::set<std::string> const& filtered_data_labels = {});
64+
// `peer_info_labels` is optional because only AttemptLatency2 populates it.
65+
LabelMap IntoLabelMap(
66+
ResourceLabels const& r, DataLabels const& d,
67+
std::set<std::string> const& filtered_data_labels = {},
68+
std::optional<PeerInfoLabels> const& peer_info_labels = std::nullopt);
5869

5970
bool HasServerTiming(grpc::ClientContext const& client_context);
6071
bool IsConnectivityError(google::cloud::Status const& status,
6172
grpc::ClientContext const& client_context);
6273
absl::optional<google::bigtable::v2::ResponseParams>
6374
GetResponseParamsFromTrailingMetadata(
6475
grpc::ClientContext const& client_context);
65-
76+
// Retrieve the peer info from server headers or trailers. Returns nullopt if
77+
// not found or decoding or parsing fails.
78+
std::optional<google::bigtable::v2::PeerInfo> GetPeerInfoFromServerMetadata(
79+
grpc::ClientContext const& client_context);
6680
absl::optional<double> GetServerLatencyFromInitialMetadata(
6781
grpc::ClientContext const& client_context);
6882

@@ -154,6 +168,29 @@ class AttemptLatency : public Metric {
154168
OperationContext::Clock::time_point attempt_start_;
155169
};
156170

171+
// Similar to AttemptLatency and also populates the peer info.
172+
class AttemptLatency2 : public Metric {
173+
public:
174+
AttemptLatency2(std::string const& instrumentation_scope,
175+
opentelemetry::nostd::shared_ptr<
176+
opentelemetry::metrics::MeterProvider> const& provider);
177+
void PreCall(opentelemetry::context::Context const&,
178+
PreCallParams const& p) override;
179+
void PostCall(opentelemetry::context::Context const& context,
180+
grpc::ClientContext const& client_context,
181+
PostCallParams const& p) override;
182+
std::unique_ptr<Metric> clone(ResourceLabels resource_labels,
183+
DataLabels data_labels) const override;
184+
185+
private:
186+
ResourceLabels resource_labels_;
187+
DataLabels data_labels_;
188+
PeerInfoLabels peer_info_labels_;
189+
opentelemetry::nostd::shared_ptr<opentelemetry::metrics::Histogram<double>>
190+
attempt_latencies2_;
191+
OperationContext::Clock::time_point attempt_start_;
192+
};
193+
157194
class RetryCount : public Metric {
158195
public:
159196
RetryCount(std::string const& instrumentation_scope,

0 commit comments

Comments
 (0)