Skip to content

Commit d2edbfb

Browse files
impl(o11y): introduce error attributes (#12189)
This PR implements error type recording on attempt spans for improved observability in `gax-java`, addressing requirements for better failure analysis. #### Key Changes * **`ErrorTypeUtil` Class**: This new utility class classifies errors based on a defined priority to populate observability attributes. ### Error Classification Priority The extraction logic determines the error type based on the following priority: 1. **`google.rpc.ErrorInfo.reason`**: If the error response from the service includes `ErrorInfo` details, the reason field (e.g., `RATE_LIMIT_EXCEEDED`) is used. 2. **Server Status Code**: If no reason is available, it checks for a server status code. For HTTP, this is the numeric status code (e.g., `403`, `503`). For gRPC, this is the status code name (e.g., `PERMISSION_DENIED`, `UNAVAILABLE`). 3. **Client-Side Network/Operational Errors**: If it's a client-side failure, it maps common exceptions to specific enum representations (e.g., `CLIENT_TIMEOUT`, `CLIENT_CONNECTION_ERROR`). 4. **Language-specific error type**: Falls back to the class simple name of the exception (e.g., `NullPointerException`). 5. **Internal Fallback**: Defaults to `INTERNAL` if no other classification applies. ### Exceptions to be Unwrapped We investigated standard execution wrappers to ensure accurate error classification in `ErrorTypeUtil`. We only found one exception so far that needs unwrapping in this context. #### `UncheckedExecutionException` Occurs in [ServerStreamIterator.java](https://github.com/googleapis/sdk-platform-java/blob/main/gax-java/gax/src/main/java/com/google/api/gax/rpc/ServerStreamIterator.java) when wrapping checked exceptions observed during stream iteration. ```java if (last instanceof Throwable) { Throwable throwable = (Throwable) last; throw new UncheckedExecutionException(throwable); } ``` It is also thrown in [ApiExceptions.java](https://github.com/googleapis/sdk-platform-java/blob/main/gax-java/gax/src/main/java/com/google/api/gax/rpc/ApiExceptions.java) during synchronous call translation: ```java public static <ResponseT> ResponseT callAndTranslateApiException(ApiFuture<ResponseT> future) { try { return Futures.getUnchecked(future); } catch (UncheckedExecutionException exception) { if (exception.getCause() instanceof RuntimeException) { // ... } throw exception; } } ```
1 parent b194801 commit d2edbfb

File tree

7 files changed

+939
-18
lines changed

7 files changed

+939
-18
lines changed
Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
/*
2+
* Copyright 2026 Google LLC
3+
*
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are
6+
* met:
7+
*
8+
* * Redistributions of source code must retain the above copyright
9+
* notice, this list of conditions and the following disclaimer.
10+
* * Redistributions in binary form must reproduce the above
11+
* copyright notice, this list of conditions and the following disclaimer
12+
* in the documentation and/or other materials provided with the
13+
* distribution.
14+
* * Neither the name of Google LLC nor the names of its
15+
* contributors may be used to endorse or promote products derived from
16+
* this software without specific prior written permission.
17+
*
18+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
*/
30+
package com.google.api.gax.tracing;
31+
32+
import com.google.api.gax.rpc.ApiException;
33+
import com.google.api.gax.rpc.DeadlineExceededException;
34+
import com.google.api.gax.rpc.WatchdogTimeoutException;
35+
import com.google.common.base.Strings;
36+
import com.google.common.collect.ImmutableSet;
37+
import com.google.common.util.concurrent.UncheckedExecutionException;
38+
import java.net.BindException;
39+
import java.net.ConnectException;
40+
import java.net.NoRouteToHostException;
41+
import java.net.SocketTimeoutException;
42+
import java.net.UnknownHostException;
43+
import java.nio.channels.UnresolvedAddressException;
44+
import java.security.GeneralSecurityException;
45+
import java.util.Set;
46+
import javax.annotation.Nullable;
47+
import javax.net.ssl.SSLHandshakeException;
48+
49+
public class ErrorTypeUtil {
50+
51+
enum ErrorType {
52+
CLIENT_TIMEOUT,
53+
CLIENT_CONNECTION_ERROR,
54+
CLIENT_REQUEST_ERROR,
55+
/** Placeholder for potential future request body errors. */
56+
CLIENT_REQUEST_BODY_ERROR,
57+
/** Placeholder for potential future response decode errors. */
58+
CLIENT_RESPONSE_DECODE_ERROR,
59+
/** Placeholder for potential future redirect errors. */
60+
CLIENT_REDIRECT_ERROR,
61+
CLIENT_AUTHENTICATION_ERROR,
62+
/** Placeholder for potential future unknown errors. */
63+
CLIENT_UNKNOWN_ERROR,
64+
INTERNAL;
65+
}
66+
67+
private static final Set<Class<? extends Throwable>> AUTHENTICATION_EXCEPTION_CLASSES =
68+
ImmutableSet.of(GeneralSecurityException.class);
69+
70+
private static final Set<Class<? extends Throwable>> CLIENT_TIMEOUT_EXCEPTION_CLASSES =
71+
ImmutableSet.of(
72+
SocketTimeoutException.class,
73+
WatchdogTimeoutException.class,
74+
DeadlineExceededException.class);
75+
76+
private static final Set<Class<? extends Throwable>> CLIENT_CONNECTION_EXCEPTIONS =
77+
ImmutableSet.of(
78+
ConnectException.class,
79+
UnknownHostException.class,
80+
SSLHandshakeException.class,
81+
UnresolvedAddressException.class,
82+
NoRouteToHostException.class,
83+
BindException.class);
84+
85+
/**
86+
* Extracts a low-cardinality string representing the specific classification of the error to be
87+
* used in the {@link ObservabilityAttributes#ERROR_TYPE_ATTRIBUTE} attribute.
88+
*
89+
* <p>This value is determined based on the following priority:
90+
*
91+
* <ol>
92+
* <li><b>{@code google.rpc.ErrorInfo.reason}:</b> If the error response from the service
93+
* includes {@code google.rpc.ErrorInfo} details, the reason field (e.g.,
94+
* "RATE_LIMIT_EXCEEDED", "SERVICE_DISABLED") will be used. This offers the most precise
95+
* error cause.
96+
* <li><b>Specific Server Error Code:</b> If no {@code ErrorInfo.reason} is available and it is
97+
* not a client-side failure, but a server error code was received:
98+
* <ul>
99+
* <li>For HTTP: The HTTP status code (e.g., "403", "503").
100+
* <li>For gRPC: The gRPC status code name (e.g., "PERMISSION_DENIED", "UNAVAILABLE").
101+
* </ul>
102+
* <li><b>Client-Side Network/Operational Errors:</b> For errors occurring within the client
103+
* library or network stack, mapping to specific enum representations from {@link
104+
* ErrorType}. This includes checking the exception for diagnostic markers (e.g., {@code
105+
* ConnectException} or {@code SocketTimeoutException}).
106+
* <li><b>Language-specific error type:</b> The class or struct name of the exception or error
107+
* if available. This must be low-cardinality, meaning it returns the short name of the
108+
* exception class (e.g. {@code "IllegalStateException"}) rather than its message.
109+
* <li><b>Internal Fallback:</b> If the error doesn't fit any of the above categories, {@code
110+
* "INTERNAL"} will be used, indicating an unexpected issue within the client library's own
111+
* logic.
112+
* </ol>
113+
*
114+
* @param error the Throwable from which to extract the error type string.
115+
* @return a low-cardinality string representing the specific error type, or {@code
116+
* ErrorType.INTERNAL.toString()} if the provided error is {@code null} or non-determined.
117+
*/
118+
// Requirement source: go/clo:product-requirements-v1
119+
public static String extractErrorType(@Nullable Throwable error) {
120+
if (error == null) {
121+
// No information about the error; we default to INTERNAL.
122+
return ErrorType.INTERNAL.toString();
123+
}
124+
125+
// 1. Unwrap standard wrapper exceptions if present
126+
Throwable realError = getRealCause(error);
127+
128+
// 2. Attempt to extract specific error type from the main exception
129+
String specificError = extractKnownErrorType(realError);
130+
if (specificError != null) {
131+
return specificError;
132+
}
133+
134+
// 3. Language-specific error type fallback
135+
String exceptionName = realError.getClass().getSimpleName();
136+
if (!Strings.isNullOrEmpty(exceptionName)) {
137+
return exceptionName;
138+
}
139+
140+
// 4. Internal Fallback
141+
return ErrorType.INTERNAL.toString();
142+
}
143+
144+
/**
145+
* Unwraps standard execution wrappers to find the real cause of the failure.
146+
*
147+
* <p>This method specifically unwraps:
148+
*
149+
* <ul>
150+
* <li>{@link com.google.common.util.concurrent.UncheckedExecutionException}: This is an
151+
* unchecked exception often thrown by {@code ApiExceptions.callAndTranslateApiException} or
152+
* {@code ServerStreamIterator} when a checked exception or error occurs.
153+
* </ul>
154+
*
155+
* @param t the Throwable to unwrap.
156+
* @return the cause of the exception if it is an instance of {@link UncheckedExecutionException}
157+
* and has a cause; otherwise, the throwable itself.
158+
*/
159+
private static Throwable getRealCause(Throwable t) {
160+
if (t.getCause() == null || !(t instanceof UncheckedExecutionException)) {
161+
return t;
162+
}
163+
return t.getCause();
164+
}
165+
166+
/**
167+
* Attempts to extract a specific error type (reason, code, or client error) but returns null if
168+
* it cannot be specifically classified.
169+
*/
170+
@Nullable
171+
private static String extractKnownErrorType(Throwable error) {
172+
// 1. Extract error info reason
173+
if (error instanceof ApiException) {
174+
String reason = ((ApiException) error).getReason();
175+
if (!Strings.isNullOrEmpty(reason)) {
176+
return reason;
177+
}
178+
}
179+
180+
// 2. Extract server status code (swapped order)
181+
if (error instanceof ApiException) {
182+
String errorCode = extractServerErrorCode((ApiException) error);
183+
if (errorCode != null) {
184+
return errorCode;
185+
}
186+
}
187+
188+
// 3. Attempt client side error
189+
String clientError = getClientSideError(error);
190+
if (clientError != null) {
191+
return clientError;
192+
}
193+
194+
return null;
195+
}
196+
197+
/**
198+
* Extracts the server error code from an ApiException.
199+
*
200+
* @param apiException The ApiException to extract the error code from.
201+
* @return A string representing the error code, or null if no specific code can be determined.
202+
*/
203+
@Nullable
204+
private static String extractServerErrorCode(ApiException apiException) {
205+
if (apiException.getStatusCode() != null) {
206+
Object transportCode = apiException.getStatusCode().getTransportCode();
207+
if (transportCode != null) {
208+
return String.valueOf(transportCode);
209+
}
210+
}
211+
return null;
212+
}
213+
214+
/**
215+
* Determines the client-side error type based on the provided Throwable. This method checks for
216+
* various network and client-specific exceptions.
217+
*
218+
* @param error The Throwable to analyze.
219+
* @return A string representing the client-side error type, or null if not matched.
220+
*/
221+
@Nullable
222+
private static String getClientSideError(Throwable error) {
223+
if (isClientTimeout(error)) {
224+
return ErrorType.CLIENT_TIMEOUT.toString();
225+
}
226+
if (isClientConnectionError(error)) {
227+
return ErrorType.CLIENT_CONNECTION_ERROR.toString();
228+
}
229+
if (isClientAuthenticationError(error)) {
230+
return ErrorType.CLIENT_AUTHENTICATION_ERROR.toString();
231+
}
232+
// This covers CLIENT_REQUEST_ERROR for general illegal arguments in client requests.
233+
if (error instanceof IllegalArgumentException) {
234+
return ErrorType.CLIENT_REQUEST_ERROR.toString();
235+
}
236+
return null;
237+
}
238+
239+
/**
240+
* Checks if the given Throwable represents a client-side timeout error. This includes socket
241+
* timeouts and GAX-specific watchdog timeouts.
242+
*
243+
* @param e The Throwable to check.
244+
* @return true if the error is a client timeout, false otherwise.
245+
*/
246+
private static boolean isClientTimeout(Throwable e) {
247+
return hasErrorClass(e, CLIENT_TIMEOUT_EXCEPTION_CLASSES);
248+
}
249+
250+
/**
251+
* Checks if the given Throwable represents a client-side connection error. This includes issues
252+
* with establishing connections, unknown hosts, SSL handshakes, and unresolved addresses.
253+
*
254+
* @param e The Throwable to check.
255+
* @return true if the error is a client connection error, false otherwise.
256+
*/
257+
private static boolean isClientConnectionError(Throwable e) {
258+
return hasErrorClass(e, CLIENT_CONNECTION_EXCEPTIONS);
259+
}
260+
261+
private static boolean isClientAuthenticationError(Throwable e) {
262+
return hasErrorClass(e, AUTHENTICATION_EXCEPTION_CLASSES);
263+
}
264+
265+
/**
266+
* Checks if the throwable is an instance of any of the specified error classes.
267+
*
268+
* @param t The Throwable to check.
269+
* @param errorClasses A set of class objects to check against.
270+
* @return true if the error is an instance of a class from the set, false otherwise.
271+
*/
272+
private static boolean hasErrorClass(Throwable t, Set<Class<? extends Throwable>> errorClasses) {
273+
for (Class<? extends Throwable> errorClass : errorClasses) {
274+
if (errorClass.isInstance(t)) {
275+
return true;
276+
}
277+
}
278+
return false;
279+
}
280+
}

sdk-platform-java/gax-java/gax/src/main/java/com/google/api/gax/tracing/ObservabilityAttributes.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ public class ObservabilityAttributes {
8585
/** The url template of the request (e.g. /v1/{name}:access). */
8686
public static final String URL_TEMPLATE_ATTRIBUTE = "url.template";
8787

88+
/** A human-readable error message, which may include details from the exception or response. */
89+
public static final String STATUS_MESSAGE_ATTRIBUTE = "status.message";
90+
91+
/** If the error was caused by an exception, the exception class name. */
92+
public static final String EXCEPTION_TYPE_ATTRIBUTE = "exception.type";
93+
8894
/** Size of the response body in bytes. */
8995
public static final String HTTP_RESPONSE_BODY_SIZE = "http.response.body.size";
9096

@@ -103,7 +109,10 @@ public class ObservabilityAttributes {
103109
/** The full URL of the HTTP request, with sensitive query parameters redacted. */
104110
public static final String HTTP_URL_FULL_ATTRIBUTE = "url.full";
105111

106-
/** The type of error that occurred (e.g., from google.rpc.ErrorInfo.reason). */
112+
/**
113+
* * The specific error type. Value will be google.rpc.ErrorInfo.reason, a specific Server Error
114+
* Code, Client-Side Network/Operational Error (e.g., CLIENT_TIMEOUT) or internal fallback.
115+
*/
107116
public static final String ERROR_TYPE_ATTRIBUTE = "error.type";
108117

109118
/** The domain of the error (e.g., from google.rpc.ErrorInfo.domain). */

sdk-platform-java/gax-java/gax/src/main/java/com/google/api/gax/tracing/ObservabilityUtils.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@
4343

4444
final class ObservabilityUtils {
4545

46+
/**
47+
* Extracts a low-cardinality string representing the specific classification of the error to be
48+
* used in the {@link ObservabilityAttributes#ERROR_TYPE_ATTRIBUTE} attribute. See {@link
49+
* ErrorTypeUtil#extractErrorType} for extended documentation.
50+
*/
51+
static String extractErrorType(@Nullable Throwable error) {
52+
return ErrorTypeUtil.extractErrorType(error);
53+
}
54+
4655
/** Function to extract the status of the error as a canonical code. */
4756
static StatusCode.Code extractStatus(@Nullable Throwable error) {
4857
if (error == null) {

0 commit comments

Comments
 (0)