forked from open-telemetry/opentelemetry-java-contrib
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathAwsSpanMetricsProcessor.java
More file actions
182 lines (157 loc) · 6.76 KB
/
AwsSpanMetricsProcessor.java
File metadata and controls
182 lines (157 loc) · 6.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
package io.opentelemetry.contrib.awsxray;
import static io.opentelemetry.api.common.AttributeKey.longKey;
import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.metrics.DoubleHistogram;
import io.opentelemetry.api.metrics.LongCounter;
import io.opentelemetry.context.Context;
import io.opentelemetry.sdk.resources.Resource;
import io.opentelemetry.sdk.trace.ReadWriteSpan;
import io.opentelemetry.sdk.trace.ReadableSpan;
import io.opentelemetry.sdk.trace.SpanProcessor;
import io.opentelemetry.sdk.trace.data.EventData;
import io.opentelemetry.sdk.trace.data.ExceptionEventData;
import io.opentelemetry.sdk.trace.data.SpanData;
import java.lang.reflect.Method;
import javax.annotation.Nullable;
import javax.annotation.concurrent.Immutable;
/**
* This processor will generate metrics based on span data. It depends on a {@link
* MetricAttributeGenerator} being provided on instantiation, which will provide a means to
* determine attributes which should be used to create metrics. A {@link Resource} must also be
* provided, which is used to generate metrics. Finally, two {@link LongCounter}'s and a {@link
* DoubleHistogram} must be provided, which will be used to actually create desired metrics (see
* below)
*
* <p>AwsSpanMetricsProcessor produces metrics for errors (e.g. HTTP 4XX status codes), faults (e.g.
* HTTP 5XX status codes), and latency (in Milliseconds). Errors and faults are counted, while
* latency is measured with a histogram. Metrics are emitted with attributes derived from span
* attributes.
*
* <p>For highest fidelity metrics, this processor should be coupled with the {@link
* AlwaysRecordSampler}, which will result in 100% of spans being sent to the processor.
*/
@Immutable
public final class AwsSpanMetricsProcessor implements SpanProcessor {
private static final AttributeKey<Long> HTTP_STATUS_CODE =
longKey("http.status_code");
private static final double NANOS_TO_MILLIS = 1_000_000.0;
// Constants for deriving error and fault metrics
private static final int ERROR_CODE_LOWER_BOUND = 400;
private static final int ERROR_CODE_UPPER_BOUND = 499;
private static final int FAULT_CODE_LOWER_BOUND = 500;
private static final int FAULT_CODE_UPPER_BOUND = 599;
// Metric instruments
private final LongCounter errorCounter;
private final LongCounter faultCounter;
private final DoubleHistogram latencyHistogram;
private final MetricAttributeGenerator generator;
private final Resource resource;
/** Use {@link AwsSpanMetricsProcessorBuilder} to construct this processor. */
static AwsSpanMetricsProcessor create(
LongCounter errorCounter,
LongCounter faultCounter,
DoubleHistogram latencyHistogram,
MetricAttributeGenerator generator,
Resource resource) {
return new AwsSpanMetricsProcessor(
errorCounter, faultCounter, latencyHistogram, generator, resource);
}
private AwsSpanMetricsProcessor(
LongCounter errorCounter,
LongCounter faultCounter,
DoubleHistogram latencyHistogram,
MetricAttributeGenerator generator,
Resource resource) {
this.errorCounter = errorCounter;
this.faultCounter = faultCounter;
this.latencyHistogram = latencyHistogram;
this.generator = generator;
this.resource = resource;
}
@Override
public void onStart(Context parentContext, ReadWriteSpan span) {}
@Override
public boolean isStartRequired() {
return false;
}
@Override
public void onEnd(ReadableSpan span) {
SpanData spanData = span.toSpanData();
Attributes attributes = generator.generateMetricAttributesFromSpan(spanData, resource);
// Only record metrics if non-empty attributes are returned.
if (!attributes.isEmpty()) {
recordErrorOrFault(spanData, attributes);
recordLatency(span, attributes);
}
}
@Override
public boolean isEndRequired() {
return true;
}
private void recordErrorOrFault(SpanData spanData, Attributes attributes) {
Long httpStatusCode = spanData.getAttributes().get(HTTP_STATUS_CODE);
if (httpStatusCode == null) {
httpStatusCode = getAwsStatusCode(spanData);
if (httpStatusCode == null || httpStatusCode < 100L || httpStatusCode > 599L) {
return;
}
}
if (httpStatusCode >= ERROR_CODE_LOWER_BOUND && httpStatusCode <= ERROR_CODE_UPPER_BOUND) {
errorCounter.add(1, attributes);
} else if (httpStatusCode >= FAULT_CODE_LOWER_BOUND
&& httpStatusCode <= FAULT_CODE_UPPER_BOUND) {
faultCounter.add(1, attributes);
}
}
/**
* Attempt to pull status code from spans produced by AWS SDK instrumentation (both v1 and v2).
* AWS SDK instrumentation does not populate http.status_code when non-200 status codes are
* returned, as the AWS SDK throws exceptions rather than returning responses with status codes.
* To work around this, we are attempting to get the exception out of the events, then calling
* getStatusCode (for AWS SDK V1) and statusCode (for AWS SDK V2) to get the status code fromt the
* exception. We rely on reflection here because we cannot cast the throwable to
* AmazonServiceExceptions (V1) or AwsServiceExceptions (V2) because the throwable comes from a
* separate class loader and attempts to cast will fail with ClassCastException.
*
* <p>TODO: Short term workaround. This can be completely removed once
* https://github.com/open-telemetry/opentelemetry-java-contrib/issues/919 is resolved.
*/
@Nullable
private static Long getAwsStatusCode(SpanData spanData) {
String scopeName = spanData.getInstrumentationScopeInfo().getName();
if (!scopeName.contains("aws-sdk")) {
return null;
}
for (EventData event : spanData.getEvents()) {
if (event instanceof ExceptionEventData) {
ExceptionEventData exceptionEvent = (ExceptionEventData) event;
Throwable throwable = exceptionEvent.getException();
try {
Method method = throwable.getClass().getMethod("getStatusCode", new Class<?>[] {});
Object code = method.invoke(throwable, new Object[] {});
return Long.valueOf((Integer) code);
} catch (Exception e) {
// Take no action
}
try {
Method method = throwable.getClass().getMethod("statusCode", new Class<?>[] {});
Object code = method.invoke(throwable, new Object[] {});
return Long.valueOf((Integer) code);
} catch (Exception e) {
// Take no action
}
}
}
return null;
}
private void recordLatency(ReadableSpan span, Attributes attributes) {
long nanos = span.getLatencyNanos();
double millis = nanos / NANOS_TO_MILLIS;
latencyHistogram.record(millis, attributes);
}
}