Skip to content

Commit 76fb55f

Browse files
authored
Report gRPC status code in client-computed stats (#10805)
fix(ccs): Reports gRPC status code in CCS # Conflicts: # dd-trace-core/src/main/java/datadog/trace/common/metrics/MetricKey.java fix(ccs): Use captured gRPC status code numeric in metric aggregation When client-computed stats (CCS) are enabled, the agent **merges** stats it computes itself from raw spans with stats pre-computed by the tracer. For gRPC spans, without Client Computed Stats (metrics) the agent resolves the status code from the span's tags via [`getGRPCStatusCode()`](https://github.com/DataDog/datadog-agent/blob/47938ea8c9b9894dcb03dc3f81cf2c6e408f1b6c/pkg/trace/stats/aggregation.go#L167-L221), which always returns a numeric string (e.g. `4`) or an empty string. With CCS enabled, the code uses [`GRPCStatusCode`](https://github.com/DataDog/datadog-agent/blob/47938ea8c9b9894dcb03dc3f81cf2c6e408f1b6c/pkg/trace/stats/aggregation.go#L160) without translation. This change mimics the aggregation of the agent, and what is expected from the agent, in [`NewAggregationFromGroup`](https://github.com/DataDog/datadog-agent/blob/47938ea8c9b9894dcb03dc3f81cf2c6e408f1b6c/pkg/trace/stats/aggregation.go#L146-L165). Protocol wise [ClientGroupedStats.GRPC_status_code](https://github.com/DataDog/datadog-agent/blob/47938ea8c9b9894dcb03dc3f81cf2c6e408f1b6c/pkg/proto/datadog/trace/stats.proto#L103) is a `string`. chore(ccs): Use InstrumentationTags Merge branch 'master' into bdu/report-grpc-status-code-to-tracer-client-computed-stats Merge branch 'master' into bdu/report-grpc-status-code-to-tracer-client-computed-stats Co-authored-by: brice.dutheil <brice.dutheil@datadoghq.com>
1 parent 8b4302a commit 76fb55f

File tree

18 files changed

+252
-30
lines changed

18 files changed

+252
-30
lines changed

dd-java-agent/instrumentation/armeria/armeria-grpc-0.84/src/main/java/datadog/trace/instrumentation/armeria/grpc/client/GrpcClientDecorator.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import datadog.trace.api.datastreams.DataStreamsTags;
1616
import datadog.trace.api.naming.SpanNaming;
1717
import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
18+
import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags;
1819
import datadog.trace.bootstrap.instrumentation.api.InternalSpanTypes;
1920
import datadog.trace.bootstrap.instrumentation.api.Tags;
2021
import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
@@ -116,6 +117,7 @@ public AgentSpan onClose(final AgentSpan span, final Status status) {
116117

117118
span.setTag("status.code", status.getCode().name());
118119
span.setTag("grpc.status.code", status.getCode().name());
120+
span.setTag(InstrumentationTags.GRPC_STATUS_CODE, status.getCode().value());
119121
span.setTag("status.description", status.getDescription());
120122

121123
// TODO why is there a mismatch between client / server for calling the onError method?

dd-java-agent/instrumentation/armeria/armeria-grpc-0.84/src/main/java/datadog/trace/instrumentation/armeria/grpc/server/GrpcServerDecorator.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import datadog.trace.api.naming.SpanNaming;
1010
import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
1111
import datadog.trace.bootstrap.instrumentation.api.ErrorPriorities;
12+
import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags;
1213
import datadog.trace.bootstrap.instrumentation.api.InternalSpanTypes;
1314
import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
1415
import datadog.trace.bootstrap.instrumentation.decorator.ServerDecorator;
@@ -97,6 +98,7 @@ public <RespT, ReqT> AgentSpan onCall(final AgentSpan span, ServerCall<ReqT, Res
9798
public AgentSpan onStatus(final AgentSpan span, final Status status) {
9899
span.setTag("status.code", status.getCode().name());
99100
span.setTag("grpc.status.code", status.getCode().name());
101+
span.setTag(InstrumentationTags.GRPC_STATUS_CODE, status.getCode().value());
100102
span.setTag("status.description", status.getDescription());
101103
return span.setError(
102104
SERVER_ERROR_STATUSES.get(status.getCode().value()), ErrorPriorities.HTTP_SERVER_DECORATOR);

dd-java-agent/instrumentation/armeria/armeria-grpc-0.84/src/test/groovy/ArmeriaGrpcStreamingTest.groovy

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import com.linecorp.armeria.server.grpc.GrpcService
77
import com.linecorp.armeria.testing.junit4.server.ServerRule
88
import datadog.trace.agent.test.naming.VersionedNamingTestBase
99
import datadog.trace.api.DDSpanTypes
10+
import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags
1011
import datadog.trace.bootstrap.instrumentation.api.Tags
1112
import example.GreeterGrpc
1213
import example.Helloworld
@@ -178,6 +179,7 @@ abstract class ArmeriaGrpcStreamingTest extends VersionedNamingTestBase {
178179
"$Tags.RPC_SERVICE" "example.Greeter"
179180
"status.code" "OK"
180181
"grpc.status.code" "OK"
182+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
181183
"request.type" "example.Helloworld\$Response"
182184
"response.type" "example.Helloworld\$Response"
183185
peerServiceFrom(Tags.RPC_SERVICE)
@@ -215,6 +217,7 @@ abstract class ArmeriaGrpcStreamingTest extends VersionedNamingTestBase {
215217
"$Tags.SPAN_KIND" Tags.SPAN_KIND_SERVER
216218
"status.code" "OK"
217219
"grpc.status.code" "OK"
220+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
218221
defaultTags(true)
219222
}
220223
}

dd-java-agent/instrumentation/armeria/armeria-grpc-0.84/src/test/groovy/ArmeriaGrpcTest.groovy

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import datadog.trace.api.gateway.RequestContext
1818
import datadog.trace.api.gateway.RequestContextSlot
1919
import datadog.trace.bootstrap.instrumentation.api.AgentPropagation
2020
import datadog.trace.bootstrap.instrumentation.api.AgentTracer
21+
import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags
2122
import datadog.trace.bootstrap.instrumentation.api.Tags
2223
import datadog.trace.core.datastreams.StatsGroup
2324
import datadog.trace.instrumentation.armeria.grpc.server.GrpcExtractAdapter
@@ -182,6 +183,7 @@ abstract class ArmeriaGrpcTest extends VersionedNamingTestBase {
182183
"$Tags.RPC_SERVICE" "example.Greeter"
183184
"status.code" "OK"
184185
"grpc.status.code" "OK"
186+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
185187
"request.type" "example.Helloworld\$Request"
186188
"response.type" "example.Helloworld\$Response"
187189
if ({ isDataStreamsEnabled() }) {
@@ -221,6 +223,7 @@ abstract class ArmeriaGrpcTest extends VersionedNamingTestBase {
221223
"$Tags.SPAN_KIND" Tags.SPAN_KIND_SERVER
222224
"status.code" "OK"
223225
"grpc.status.code" "OK"
226+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
224227
if ({ isDataStreamsEnabled() }) {
225228
"$DDTags.PATHWAY_HASH" { String }
226229
}
@@ -319,6 +322,7 @@ abstract class ArmeriaGrpcTest extends VersionedNamingTestBase {
319322
"$Tags.RPC_SERVICE" "example.Greeter"
320323
"status.code" "${status.code.name()}"
321324
"grpc.status.code" "${status.code.name()}"
325+
"$InstrumentationTags.GRPC_STATUS_CODE" status.code.value()
322326
"status.description" description
323327
"request.type" "example.Helloworld\$Request"
324328
"response.type" "example.Helloworld\$Response"
@@ -343,6 +347,7 @@ abstract class ArmeriaGrpcTest extends VersionedNamingTestBase {
343347
"$Tags.SPAN_KIND" Tags.SPAN_KIND_SERVER
344348
"status.code" "${status.code.name()}"
345349
"grpc.status.code" "${status.code.name()}"
350+
"$InstrumentationTags.GRPC_STATUS_CODE" status.code.value()
346351
"status.description" description
347352
"canceled" { true } // 1.0.0 handles cancellation incorrectly so accesting any value
348353
if (status.cause != null) {
@@ -432,6 +437,7 @@ abstract class ArmeriaGrpcTest extends VersionedNamingTestBase {
432437
"$Tags.RPC_SERVICE" "example.Greeter"
433438
"status.code" status.code.name()
434439
"grpc.status.code" status.code.name()
440+
"$InstrumentationTags.GRPC_STATUS_CODE" status.code.value()
435441
if (status.description != null) {
436442
"status.description" status.description
437443
}
@@ -459,6 +465,7 @@ abstract class ArmeriaGrpcTest extends VersionedNamingTestBase {
459465
errorTags error.class, error.message
460466
"status.code" "${status.code.name()}"
461467
"grpc.status.code" "${status.code.name()}"
468+
"$InstrumentationTags.GRPC_STATUS_CODE" status.code.value()
462469
"status.description" { it == null || String}
463470
"canceled" { true } // 1.0.0 handles cancellation incorrectly so accesting any value
464471
if ({ isDataStreamsEnabled() }) {
@@ -574,6 +581,7 @@ abstract class ArmeriaGrpcTest extends VersionedNamingTestBase {
574581
"$Tags.SPAN_KIND" Tags.SPAN_KIND_SERVER
575582
"status.code" "OK"
576583
"grpc.status.code" "OK"
584+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
577585
if ({ isDataStreamsEnabled() }) {
578586
"$DDTags.PATHWAY_HASH" { String }
579587
}
@@ -650,6 +658,7 @@ abstract class ArmeriaGrpcTest extends VersionedNamingTestBase {
650658
"$Tags.RPC_SERVICE" "example.Greeter"
651659
"status.code" "OK"
652660
"grpc.status.code" "OK"
661+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
653662
"request.type" "example.Helloworld\$Request"
654663
"response.type" "example.Helloworld\$Response"
655664
if ({ isDataStreamsEnabled() }) {

dd-java-agent/instrumentation/google-pubsub-1.116/src/test/groovy/PubSubTest.groovy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import datadog.trace.api.DDSpanTypes
2727
import datadog.trace.api.DDTags
2828
import datadog.trace.api.config.GeneralConfig
2929
import datadog.trace.api.config.TraceInstrumentationConfig
30+
import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags
3031
import datadog.trace.bootstrap.instrumentation.api.Tags
3132
import datadog.trace.core.DDSpan
3233
import datadog.trace.core.datastreams.StatsGroup
@@ -284,6 +285,7 @@ abstract class PubSubTest extends VersionedNamingTestBase {
284285
"$Tags.RPC_SERVICE" { String }
285286
"status.code" { String }
286287
"grpc.status.code" { String }
288+
"$InstrumentationTags.GRPC_STATUS_CODE" { Integer }
287289
if ({ isDataStreamsEnabled() }) {
288290
"$DDTags.PATHWAY_HASH" { String }
289291
}

dd-java-agent/instrumentation/grpc-1.5/src/main/java/datadog/trace/instrumentation/grpc/client/GrpcClientDecorator.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import datadog.trace.api.datastreams.DataStreamsContext;
1717
import datadog.trace.api.naming.SpanNaming;
1818
import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
19+
import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags;
1920
import datadog.trace.bootstrap.instrumentation.api.InternalSpanTypes;
2021
import datadog.trace.bootstrap.instrumentation.api.Tags;
2122
import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
@@ -116,6 +117,7 @@ public <C> void injectContext(Context context, final C request, CarrierSetter<C>
116117
public AgentSpan onClose(final AgentSpan span, final Status status) {
117118
span.setTag("status.code", status.getCode().name());
118119
span.setTag("grpc.status.code", status.getCode().name());
120+
span.setTag(InstrumentationTags.GRPC_STATUS_CODE, status.getCode().value());
119121
span.setTag("status.description", status.getDescription());
120122

121123
// TODO why is there a mismatch between client / server for calling the onError method?

dd-java-agent/instrumentation/grpc-1.5/src/main/java/datadog/trace/instrumentation/grpc/server/GrpcServerDecorator.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import datadog.trace.api.naming.SpanNaming;
1111
import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
1212
import datadog.trace.bootstrap.instrumentation.api.ErrorPriorities;
13+
import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags;
1314
import datadog.trace.bootstrap.instrumentation.api.InternalSpanTypes;
1415
import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
1516
import datadog.trace.bootstrap.instrumentation.decorator.ServerDecorator;
@@ -98,6 +99,7 @@ public <RespT, ReqT> AgentSpan onCall(final AgentSpan span, ServerCall<ReqT, Res
9899
public AgentSpan onStatus(final AgentSpan span, final Status status) {
99100
span.setTag("status.code", status.getCode().name());
100101
span.setTag("grpc.status.code", status.getCode().name());
102+
span.setTag(InstrumentationTags.GRPC_STATUS_CODE, status.getCode().value());
101103
span.setTag("status.description", status.getDescription());
102104
return span.setError(
103105
SERVER_ERROR_STATUSES.get(status.getCode().value()), ErrorPriorities.HTTP_SERVER_DECORATOR);

dd-java-agent/instrumentation/grpc-1.5/src/test/groovy/GrpcStreamingTest.groovy

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import com.google.common.util.concurrent.MoreExecutors
22
import datadog.trace.agent.test.naming.VersionedNamingTestBase
33
import datadog.trace.api.DDSpanTypes
4+
import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags
45
import datadog.trace.bootstrap.instrumentation.api.Tags
56
import example.GreeterGrpc
67
import example.Helloworld
@@ -162,6 +163,7 @@ abstract class GrpcStreamingTest extends VersionedNamingTestBase {
162163
"$Tags.RPC_SERVICE" "example.Greeter"
163164
"status.code" "OK"
164165
"grpc.status.code" "OK"
166+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
165167
"request.type" "example.Helloworld\$Response"
166168
"response.type" "example.Helloworld\$Response"
167169
peerServiceFrom(Tags.RPC_SERVICE)
@@ -198,6 +200,7 @@ abstract class GrpcStreamingTest extends VersionedNamingTestBase {
198200
"$Tags.SPAN_KIND" Tags.SPAN_KIND_SERVER
199201
"status.code" "OK"
200202
"grpc.status.code" "OK"
203+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
201204
defaultTags(true)
202205
}
203206
}

dd-java-agent/instrumentation/grpc-1.5/src/test/groovy/GrpcTest.groovy

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import datadog.trace.api.gateway.RequestContext
1313
import datadog.trace.api.gateway.RequestContextSlot
1414
import datadog.trace.bootstrap.instrumentation.api.AgentPropagation
1515
import datadog.trace.bootstrap.instrumentation.api.AgentTracer
16+
import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags
1617
import datadog.trace.bootstrap.instrumentation.api.Tags
1718
import datadog.trace.core.datastreams.StatsGroup
1819
import datadog.trace.instrumentation.grpc.server.GrpcExtractAdapter
@@ -173,6 +174,7 @@ abstract class GrpcTest extends VersionedNamingTestBase {
173174
"$Tags.PEER_PORT" server.port
174175
"status.code" "OK"
175176
"grpc.status.code" "OK"
177+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
176178
"request.type" "example.Helloworld\$Request"
177179
"response.type" "example.Helloworld\$Response"
178180
if ({ isDataStreamsEnabled() }) {
@@ -212,6 +214,7 @@ abstract class GrpcTest extends VersionedNamingTestBase {
212214
"$Tags.SPAN_KIND" Tags.SPAN_KIND_SERVER
213215
"status.code" "OK"
214216
"grpc.status.code" "OK"
217+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
215218
if ({ isDataStreamsEnabled() }) {
216219
"$DDTags.PATHWAY_HASH" { String }
217220
}
@@ -317,6 +320,7 @@ abstract class GrpcTest extends VersionedNamingTestBase {
317320
"$Tags.RPC_SERVICE" "example.Greeter"
318321
"status.code" "${status.code.name()}"
319322
"grpc.status.code" "${status.code.name()}"
323+
"$InstrumentationTags.GRPC_STATUS_CODE" status.code.value()
320324
"status.description" description
321325
"request.type" "example.Helloworld\$Request"
322326
"response.type" "example.Helloworld\$Response"
@@ -341,6 +345,7 @@ abstract class GrpcTest extends VersionedNamingTestBase {
341345
"$Tags.SPAN_KIND" Tags.SPAN_KIND_SERVER
342346
"status.code" "${status.code.name()}"
343347
"grpc.status.code" "${status.code.name()}"
348+
"$InstrumentationTags.GRPC_STATUS_CODE" status.code.value()
344349
"status.description" description
345350
if (status.cause != null) {
346351
errorTags status.cause.class, status.cause.message
@@ -424,6 +429,7 @@ abstract class GrpcTest extends VersionedNamingTestBase {
424429
"$Tags.PEER_PORT" server.port
425430
"status.code" "UNKNOWN"
426431
"grpc.status.code" "UNKNOWN"
432+
"$InstrumentationTags.GRPC_STATUS_CODE" 2
427433
"request.type" "example.Helloworld\$Request"
428434
"response.type" "example.Helloworld\$Response"
429435
"status.description" { it == null || String}
@@ -448,6 +454,7 @@ abstract class GrpcTest extends VersionedNamingTestBase {
448454
"$Tags.SPAN_KIND" Tags.SPAN_KIND_SERVER
449455
"status.code" "${status.code.name()}"
450456
"grpc.status.code" "${status.code.name()}"
457+
"$InstrumentationTags.GRPC_STATUS_CODE" status.code.value()
451458
"status.description" { it == null || String}
452459
errorTags error.class, error.message
453460
if ({ isDataStreamsEnabled() }) {
@@ -554,6 +561,7 @@ abstract class GrpcTest extends VersionedNamingTestBase {
554561
"$Tags.SPAN_KIND" Tags.SPAN_KIND_SERVER
555562
"status.code" "OK"
556563
"grpc.status.code" "OK"
564+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
557565
if ({ isDataStreamsEnabled() }) {
558566
"$DDTags.PATHWAY_HASH" { String }
559567
}
@@ -621,6 +629,7 @@ abstract class GrpcTest extends VersionedNamingTestBase {
621629
"$Tags.RPC_SERVICE" "example.Greeter"
622630
"status.code" "OK"
623631
"grpc.status.code" "OK"
632+
"$InstrumentationTags.GRPC_STATUS_CODE" 0
624633
"request.type" "example.Helloworld\$Request"
625634
"response.type" "example.Helloworld\$Response"
626635
if ({ isDataStreamsEnabled() }) {

dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package datadog.trace.common.metrics;
22

33
import static datadog.communication.ddagent.DDAgentFeaturesDiscovery.V06_METRICS_ENDPOINT;
4+
import static datadog.trace.api.DDSpanTypes.RPC;
45
import static datadog.trace.api.DDTags.BASE_SERVICE;
56
import static datadog.trace.api.Functions.UTF8_ENCODE;
67
import static datadog.trace.bootstrap.instrumentation.api.Tags.HTTP_ENDPOINT;
@@ -29,6 +30,7 @@
2930
import datadog.trace.api.WellKnownTags;
3031
import datadog.trace.api.cache.DDCache;
3132
import datadog.trace.api.cache.DDCaches;
33+
import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags;
3234
import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
3335
import datadog.trace.common.metrics.SignalItem.ReportSignal;
3436
import datadog.trace.common.writer.ddagent.DDAgentApi;
@@ -326,21 +328,28 @@ private boolean publish(CoreSpan<?> span, boolean isTopLevel, CharSequence spanK
326328
httpEndpoint = httpEndpointObj != null ? httpEndpointObj.toString() : null;
327329
}
328330

331+
CharSequence spanType = span.getType();
332+
String grpcStatusCode = null;
333+
if (spanType != null && RPC.contentEquals(spanType)) {
334+
Object grpcStatusObj = span.unsafeGetTag(InstrumentationTags.GRPC_STATUS_CODE);
335+
grpcStatusCode = grpcStatusObj != null ? grpcStatusObj.toString() : null;
336+
}
329337
MetricKey newKey =
330338
new MetricKey(
331339
span.getResourceName(),
332340
SERVICE_NAMES.computeIfAbsent(span.getServiceName(), UTF8_ENCODE),
333341
span.getOperationName(),
334342
span.getServiceNameSource(),
335-
span.getType(),
343+
spanType,
336344
span.getHttpStatusCode(),
337345
isSynthetic(span),
338346
span.getParentId() == 0,
339347
SPAN_KINDS.computeIfAbsent(
340348
spanKind, UTF8BytesString::create), // save repeated utf8 conversions
341349
getPeerTags(span, spanKind.toString()),
342350
httpMethod,
343-
httpEndpoint);
351+
httpEndpoint,
352+
grpcStatusCode);
344353
MetricKey key = keys.putIfAbsent(newKey, newKey);
345354
if (null == key) {
346355
key = newKey;

0 commit comments

Comments
 (0)