Skip to content

Commit 1697208

Browse files
committed
feat: Record transaction metrics for Datastore
1 parent 12e2e94 commit 1697208

File tree

9 files changed

+723
-70
lines changed

9 files changed

+723
-70
lines changed

java-datastore/google-cloud-datastore/src/main/java/com/google/cloud/datastore/DatastoreException.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,4 +160,33 @@ static DatastoreException throwInvalidRequest(String massage, Object... params)
160160
static DatastoreException propagateUserException(Exception ex) {
161161
throw new DatastoreException(BaseServiceException.UNKNOWN_CODE, ex.getMessage(), null, ex);
162162
}
163+
164+
/**
165+
* Extracts the status code name from the given throwable. Walks the exception cause chain looking
166+
* for a {@link DatastoreException} that carries a reason string representing the status code
167+
* (e.g. "ABORTED", "UNAVAILABLE"). The reason is set from {@link
168+
* com.google.api.gax.rpc.StatusCode.Code} which is transport-neutral, supporting both gRPC and
169+
* HttpJson. Falls back to "UNKNOWN" if the status cannot be determined.
170+
*
171+
* <p>Note: Some {@link DatastoreException} instances are constructed without a reason (e.g. via
172+
* {@link DatastoreException#DatastoreException(int, String, Throwable)}). If all {@link
173+
* DatastoreException} instances in the cause chain have a null or empty reason, this method
174+
* returns "UNKNOWN" even if the underlying error carries a meaningful status.
175+
*
176+
* @param throwable the throwable to extract the status code from
177+
* @return the status code name, or "UNKNOWN" if not determinable
178+
*/
179+
public static String extractStatusCode(Throwable throwable) {
180+
Throwable current = throwable;
181+
while (current != null) {
182+
if (current instanceof DatastoreException) {
183+
String reason = ((DatastoreException) current).getReason();
184+
if (!Strings.isNullOrEmpty(reason)) {
185+
return reason;
186+
}
187+
}
188+
current = current.getCause();
189+
}
190+
return StatusCode.Code.UNKNOWN.toString();
191+
}
163192
}

java-datastore/google-cloud-datastore/src/main/java/com/google/cloud/datastore/DatastoreImpl.java

Lines changed: 74 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,21 @@
3838

3939
import com.google.api.core.BetaApi;
4040
import com.google.api.gax.retrying.RetrySettings;
41+
import com.google.api.gax.rpc.StatusCode;
4142
import com.google.cloud.BaseService;
4243
import com.google.cloud.ExceptionHandler;
4344
import com.google.cloud.RetryHelper;
4445
import com.google.cloud.RetryHelper.RetryHelperException;
4546
import com.google.cloud.ServiceOptions;
4647
import com.google.cloud.datastore.execution.AggregationQueryExecutor;
4748
import com.google.cloud.datastore.spi.v1.DatastoreRpc;
49+
import com.google.cloud.datastore.telemetry.MetricsRecorder;
50+
import com.google.cloud.datastore.telemetry.TelemetryConstants;
4851
import com.google.cloud.datastore.telemetry.TraceUtil;
4952
import com.google.cloud.datastore.telemetry.TraceUtil.Scope;
5053
import com.google.common.base.MoreObjects;
5154
import com.google.common.base.Preconditions;
55+
import com.google.common.base.Stopwatch;
5256
import com.google.common.collect.AbstractIterator;
5357
import com.google.common.collect.ImmutableList;
5458
import com.google.common.collect.ImmutableMap;
@@ -61,10 +65,12 @@
6165
import com.google.datastore.v1.RunQueryResponse;
6266
import com.google.datastore.v1.TransactionOptions;
6367
import com.google.protobuf.ByteString;
68+
import io.grpc.Status;
6469
import io.opentelemetry.context.Context;
6570
import java.util.ArrayList;
6671
import java.util.Arrays;
6772
import java.util.Collections;
73+
import java.util.HashMap;
6874
import java.util.Iterator;
6975
import java.util.LinkedHashMap;
7076
import java.util.LinkedHashSet;
@@ -73,6 +79,7 @@
7379
import java.util.Optional;
7480
import java.util.Set;
7581
import java.util.concurrent.Callable;
82+
import java.util.concurrent.TimeUnit;
7683
import java.util.logging.Level;
7784
import java.util.logging.Logger;
7885
import javax.annotation.Nullable;
@@ -89,6 +96,7 @@ final class DatastoreImpl extends BaseService<DatastoreOptions> implements Datas
8996

9097
private final com.google.cloud.datastore.telemetry.TraceUtil otelTraceUtil =
9198
getOptions().getTraceUtil();
99+
private final MetricsRecorder metricsRecorder = getOptions().getMetricsRecorder();
92100

93101
private final ReadOptionProtoPreparer readOptionProtoPreparer;
94102
private final AggregationQueryExecutor aggregationQueryExecutor;
@@ -122,63 +130,31 @@ public Transaction newTransaction() {
122130
return new TransactionImpl(this);
123131
}
124132

133+
/**
134+
* A wrapper around {@link ReadWriteTransactionCallable} that adds OpenTelemetry tracing context
135+
* propagation. All transaction logic (begin, run, commit, rollback, metrics recording) is
136+
* delegated to the underlying {@link ReadWriteTransactionCallable}.
137+
*/
125138
static class TracedReadWriteTransactionCallable<T> implements Callable<T> {
126-
private final Datastore datastore;
127-
private final TransactionCallable<T> callable;
128-
private volatile TransactionOptions options;
129-
private volatile Transaction transaction;
130-
139+
private final ReadWriteTransactionCallable<T> delegate;
131140
private final TraceUtil.Span parentSpan;
132141

133142
TracedReadWriteTransactionCallable(
134-
Datastore datastore,
135-
TransactionCallable<T> callable,
136-
TransactionOptions options,
143+
ReadWriteTransactionCallable<T> delegate,
137144
@Nullable com.google.cloud.datastore.telemetry.TraceUtil.Span parentSpan) {
138-
this.datastore = datastore;
139-
this.callable = callable;
140-
this.options = options;
141-
this.transaction = null;
145+
this.delegate = delegate;
142146
this.parentSpan = parentSpan;
143147
}
144148

145-
Datastore getDatastore() {
146-
return datastore;
147-
}
148-
149-
TransactionOptions getOptions() {
150-
return options;
151-
}
152-
153-
Transaction getTransaction() {
154-
return transaction;
155-
}
156-
157-
void setPrevTransactionId(ByteString transactionId) {
158-
TransactionOptions.ReadWrite readWrite =
159-
TransactionOptions.ReadWrite.newBuilder().setPreviousTransaction(transactionId).build();
160-
options = options.toBuilder().setReadWrite(readWrite).build();
149+
ReadWriteTransactionCallable<T> getDelegate() {
150+
return delegate;
161151
}
162152

163153
@Override
164154
public T call() throws DatastoreException {
165155
try (io.opentelemetry.context.Scope ignored =
166156
Context.current().with(parentSpan.getSpan()).makeCurrent()) {
167-
transaction = datastore.newTransaction(options);
168-
T value = callable.run(transaction);
169-
transaction.commit();
170-
return value;
171-
} catch (Exception ex) {
172-
transaction.rollback();
173-
throw DatastoreException.propagateUserException(ex);
174-
} finally {
175-
if (transaction.isActive()) {
176-
transaction.rollback();
177-
}
178-
if (options != null
179-
&& options.getModeCase().equals(TransactionOptions.ModeCase.READ_WRITE)) {
180-
setPrevTransactionId(transaction.getTransactionId());
181-
}
157+
return delegate.call();
182158
}
183159
}
184160
}
@@ -200,14 +176,19 @@ public boolean isClosed() {
200176
static class ReadWriteTransactionCallable<T> implements Callable<T> {
201177
private final Datastore datastore;
202178
private final TransactionCallable<T> callable;
179+
private final MetricsRecorder metricsRecorder;
203180
private volatile TransactionOptions options;
204181
private volatile Transaction transaction;
205182

206183
ReadWriteTransactionCallable(
207-
Datastore datastore, TransactionCallable<T> callable, TransactionOptions options) {
184+
Datastore datastore,
185+
TransactionCallable<T> callable,
186+
TransactionOptions options,
187+
MetricsRecorder metricsRecorder) {
208188
this.datastore = datastore;
209189
this.callable = callable;
210190
this.options = options;
191+
this.metricsRecorder = metricsRecorder;
211192
this.transaction = null;
212193
}
213194

@@ -231,15 +212,28 @@ void setPrevTransactionId(ByteString transactionId) {
231212

232213
@Override
233214
public T call() throws DatastoreException {
215+
String attemptStatus = StatusCode.Code.UNKNOWN.toString();
234216
try {
235217
transaction = datastore.newTransaction(options);
236218
T value = callable.run(transaction);
237219
transaction.commit();
220+
attemptStatus = Status.Code.OK.toString();
238221
return value;
239222
} catch (Exception ex) {
240-
transaction.rollback();
223+
attemptStatus = DatastoreException.extractStatusCode(ex);
224+
// An exception here can originate from either callable.run() (before commit was attempted)
225+
// or from transaction.commit() itself. In both cases the transaction is still active.
226+
// isActive() returns false if the transaction was already committed or rolled back, so
227+
// it is safe to use as the sole guard here without tracking a separate committed flag.
228+
if (transaction.isActive()) {
229+
transaction.rollback();
230+
}
241231
throw DatastoreException.propagateUserException(ex);
242232
} finally {
233+
recordAttempt(attemptStatus);
234+
// transaction.isActive() returns false after both a successful commit or a completed
235+
// rollback, so it already guards against rolling back a committed transaction or
236+
// rolling back a transaction that has already been rolled back.
243237
if (transaction.isActive()) {
244238
transaction.rollback();
245239
}
@@ -249,50 +243,63 @@ public T call() throws DatastoreException {
249243
}
250244
}
251245
}
246+
247+
/**
248+
* Records a single transaction commit attempt with the given status code. This is called once
249+
* per invocation of {@link #call()}, capturing the outcome of each individual commit attempt.
250+
*/
251+
private void recordAttempt(String status) {
252+
Map<String, String> attributes = new HashMap<>();
253+
attributes.put(TelemetryConstants.ATTRIBUTES_KEY_STATUS, status);
254+
attributes.put(
255+
TelemetryConstants.ATTRIBUTES_KEY_METHOD, TelemetryConstants.METHOD_TRANSACTION_COMMIT);
256+
attributes.put(
257+
TelemetryConstants.ATTRIBUTES_KEY_PROJECT_ID, datastore.getOptions().getProjectId());
258+
attributes.put(
259+
TelemetryConstants.ATTRIBUTES_KEY_DATABASE_ID, datastore.getOptions().getDatabaseId());
260+
metricsRecorder.recordTransactionAttemptCount(1, attributes);
261+
}
252262
}
253263

254264
@Override
255265
public <T> T runInTransaction(final TransactionCallable<T> callable) {
256-
TraceUtil.Span span = otelTraceUtil.startSpan(SPAN_NAME_TRANSACTION_RUN);
257-
Callable<T> transactionCallable =
258-
(getOptions().getOpenTelemetryOptions().isTracingEnabled()
259-
? new TracedReadWriteTransactionCallable<T>(
260-
this, callable, /* transactionOptions= */ null, span)
261-
: new ReadWriteTransactionCallable<T>(this, callable, /* transactionOptions= */ null));
262-
try (Scope ignored = span.makeCurrent()) {
263-
return RetryHelper.runWithRetries(
264-
transactionCallable,
265-
retrySettings,
266-
TRANSACTION_EXCEPTION_HANDLER,
267-
getOptions().getClock());
268-
} catch (RetryHelperException e) {
269-
span.end(e);
270-
throw DatastoreException.translateAndThrow(e);
271-
} finally {
272-
span.end();
273-
}
266+
return runInTransaction(callable, /* transactionOptions= */ null);
274267
}
275268

276269
@Override
277270
public <T> T runInTransaction(
278271
final TransactionCallable<T> callable, TransactionOptions transactionOptions) {
279272
TraceUtil.Span span = otelTraceUtil.startSpan(SPAN_NAME_TRANSACTION_RUN);
280273

281-
Callable<T> transactionCallable =
282-
(getOptions().getOpenTelemetryOptions().isTracingEnabled()
283-
? new TracedReadWriteTransactionCallable<T>(this, callable, transactionOptions, span)
284-
: new ReadWriteTransactionCallable<T>(this, callable, transactionOptions));
274+
ReadWriteTransactionCallable<T> baseCallable =
275+
new ReadWriteTransactionCallable<>(this, callable, transactionOptions, metricsRecorder);
276+
277+
Callable<T> transactionCallable = baseCallable;
278+
if (getOptions().getOpenTelemetryOptions().isTracingEnabled()) {
279+
transactionCallable = new TracedReadWriteTransactionCallable<>(baseCallable, span);
280+
}
285281

282+
String status = StatusCode.Code.OK.toString();
283+
Stopwatch stopwatch = Stopwatch.createStarted();
286284
try (Scope ignored = span.makeCurrent()) {
287285
return RetryHelper.runWithRetries(
288286
transactionCallable,
289287
retrySettings,
290288
TRANSACTION_EXCEPTION_HANDLER,
291289
getOptions().getClock());
292290
} catch (RetryHelperException e) {
291+
status = DatastoreException.extractStatusCode(e);
293292
span.end(e);
294293
throw DatastoreException.translateAndThrow(e);
295294
} finally {
295+
long latencyMs = stopwatch.elapsed(TimeUnit.MILLISECONDS);
296+
Map<String, String> attributes = new HashMap<>();
297+
attributes.put(TelemetryConstants.ATTRIBUTES_KEY_STATUS, status);
298+
attributes.put(
299+
TelemetryConstants.ATTRIBUTES_KEY_METHOD, TelemetryConstants.METHOD_TRANSACTION_RUN);
300+
attributes.put(TelemetryConstants.ATTRIBUTES_KEY_PROJECT_ID, getOptions().getProjectId());
301+
attributes.put(TelemetryConstants.ATTRIBUTES_KEY_DATABASE_ID, getOptions().getDatabaseId());
302+
metricsRecorder.recordTransactionLatency(latencyMs, attributes);
296303
span.end();
297304
}
298305
}

java-datastore/google-cloud-datastore/src/main/java/com/google/cloud/datastore/DatastoreOptions.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import com.google.cloud.datastore.spi.v1.DatastoreRpc;
3232
import com.google.cloud.datastore.spi.v1.GrpcDatastoreRpc;
3333
import com.google.cloud.datastore.spi.v1.HttpDatastoreRpc;
34+
import com.google.cloud.datastore.telemetry.MetricsRecorder;
3435
import com.google.cloud.datastore.v1.DatastoreSettings;
3536
import com.google.cloud.grpc.GrpcTransportOptions;
3637
import com.google.cloud.http.HttpTransportOptions;
@@ -64,6 +65,7 @@ public class DatastoreOptions extends ServiceOptions<Datastore, DatastoreOptions
6465

6566
private final transient @Nonnull DatastoreOpenTelemetryOptions openTelemetryOptions;
6667
private final transient @Nonnull com.google.cloud.datastore.telemetry.TraceUtil traceUtil;
68+
private final transient @Nonnull MetricsRecorder metricsRecorder;
6769

6870
public static class DefaultDatastoreFactory implements DatastoreFactory {
6971

@@ -104,6 +106,11 @@ public DatastoreOpenTelemetryOptions getOpenTelemetryOptions() {
104106
return openTelemetryOptions;
105107
}
106108

109+
@Nonnull
110+
MetricsRecorder getMetricsRecorder() {
111+
return metricsRecorder;
112+
}
113+
107114
public static class Builder extends ServiceOptions.Builder<Datastore, DatastoreOptions, Builder> {
108115

109116
private String namespace;
@@ -216,6 +223,7 @@ private DatastoreOptions(Builder builder) {
216223
? builder.openTelemetryOptions
217224
: DatastoreOpenTelemetryOptions.newBuilder().build();
218225
this.traceUtil = com.google.cloud.datastore.telemetry.TraceUtil.getInstance(this);
226+
this.metricsRecorder = MetricsRecorder.getInstance(openTelemetryOptions);
219227

220228
namespace = MoreObjects.firstNonNull(builder.namespace, defaultNamespace());
221229
databaseId = MoreObjects.firstNonNull(builder.databaseId, DEFAULT_DATABASE_ID);

java-datastore/google-cloud-datastore/src/main/java/com/google/cloud/datastore/telemetry/MetricsRecorder.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,21 @@
1616

1717
package com.google.cloud.datastore.telemetry;
1818

19+
import com.google.api.core.InternalExtensionOnly;
1920
import com.google.cloud.datastore.DatastoreOpenTelemetryOptions;
2021
import io.opentelemetry.api.GlobalOpenTelemetry;
2122
import io.opentelemetry.api.OpenTelemetry;
2223
import java.util.Map;
2324
import javax.annotation.Nonnull;
2425

25-
/** Interface to record specific metric operations. */
26-
interface MetricsRecorder {
26+
/**
27+
* Interface to record specific metric operations.
28+
*
29+
* <p><b>Warning:</b> This is an internal API and is not intended for external use. Do not implement
30+
* or extend this interface.
31+
*/
32+
@InternalExtensionOnly
33+
public interface MetricsRecorder {
2734
/** Records the total latency of a transaction in milliseconds. */
2835
void recordTransactionLatency(double latencyMs, Map<String, String> attributes);
2936

java-datastore/google-cloud-datastore/src/main/java/com/google/cloud/datastore/telemetry/OpenTelemetryMetricsRecorder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class OpenTelemetryMetricsRecorder implements MetricsRecorder {
4242
this.transactionLatency =
4343
meter
4444
.histogramBuilder(TelemetryConstants.SERVICE_NAME + "/transaction_latency")
45-
.setDescription("Total latency for successful transaction operations")
45+
.setDescription("Total latency of transaction operations")
4646
.setUnit("ms")
4747
.build();
4848

java-datastore/google-cloud-datastore/src/main/java/com/google/cloud/datastore/telemetry/TelemetryConstants.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,18 @@ public class TelemetryConstants {
3333
public static final String ATTRIBUTES_KEY_DEFERRED = "Deferred";
3434
public static final String ATTRIBUTES_KEY_MORE_RESULTS = "more_results";
3535

36+
/** Attribute key for the gRPC status code (e.g. "OK", "ABORTED", "UNAVAILABLE"). */
37+
public static final String ATTRIBUTES_KEY_STATUS = "status";
38+
39+
/** Attribute key for the RPC method name (e.g. "Transaction.Run"). */
40+
public static final String ATTRIBUTES_KEY_METHOD = "method";
41+
42+
/** Attribute key for the GCP project ID. */
43+
public static final String ATTRIBUTES_KEY_PROJECT_ID = "project_id";
44+
45+
/** Attribute key for the Datastore database ID. */
46+
public static final String ATTRIBUTES_KEY_DATABASE_ID = "database_id";
47+
3648
/* TODO(lawrenceqiu): For now, these are a duplicate of method names in TraceUtil. Those will use these eventually */
3749
// Format is not SnakeCase to keep backward compatibility with the existing values TraceUtil spans
3850
public static final String METHOD_ALLOCATE_IDS = "AllocateIds";

0 commit comments

Comments
 (0)