Skip to content

Commit e29bd07

Browse files
jack-bergadp2201
authored andcommitted
Demonstrate e2e crac workflow using io.github.crac:org-crac dependency
1 parent 9e40d37 commit e29bd07

File tree

4 files changed

+159
-64
lines changed

4 files changed

+159
-64
lines changed

dependencyManagement/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ val DEPENDENCIES = listOf(
7878
"com.uber.nullaway:nullaway:0.13.1",
7979
"edu.berkeley.cs.jqf:jqf-fuzz:1.7", // jqf-fuzz version 1.8+ requires Java 11+
8080
"eu.rekawek.toxiproxy:toxiproxy-java:2.1.11",
81+
"io.github.crac:org-crac:0.1.3",
8182
"io.github.netmikey.logunit:logunit-jul:2.0.0",
8283
"io.jaegertracing:jaeger-client:1.8.1",
8384
"io.opentelemetry.contrib:opentelemetry-aws-xray-propagator:1.54.0-alpha",

integration-tests/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ dependencies {
1111
testImplementation(project(":extensions:trace-propagators"))
1212

1313
testImplementation("com.linecorp.armeria:armeria-junit5")
14+
testImplementation("io.github.crac:org-crac")
1415
testImplementation("org.junit.jupiter:junit-jupiter-params")
1516
testImplementation("org.testcontainers:testcontainers-junit-jupiter")
1617
}

integration-tests/src/test/java/io/opentelemetry/CracLifecycleIntegrationTest.java

Lines changed: 101 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -19,93 +19,130 @@
1919
import java.util.Collection;
2020
import java.util.List;
2121
import java.util.concurrent.TimeUnit;
22-
import org.junit.jupiter.api.Disabled;
22+
import org.crac.Context;
23+
import org.crac.Resource;
2324
import org.junit.jupiter.api.Test;
2425

2526
/**
26-
* Integration-style lifecycle tests for CRaC-like checkpoint/restore workflows.
27+
* Integration-style lifecycle tests for CRaC (Coordinated Restore at Checkpoint) support.
2728
*
28-
* <p>In CRaC flows, applications typically need to close resources at checkpoint and resume normal
29-
* behavior after restore.
29+
* <p>These tests use {@link MockCracContext} to simulate the CRaC checkpoint/restore lifecycle
30+
* without a CRaC-enabled JDK. Resources register with the mock context; the test then drives {@code
31+
* beforeCheckpoint} and {@code afterRestore} callbacks directly.
32+
*
33+
* <p>See: <a href="https://github.com/open-telemetry/opentelemetry-java/issues/6756">#6756</a>
3034
*/
3135
class CracLifecycleIntegrationTest {
3236

37+
/**
38+
* Demonstrates the failure mode when the SDK is naively shut down at checkpoint with no
39+
* corresponding restore logic. This is what happens today without proper CRaC support: the SDK is
40+
* a one-shot object, so spans emitted after a restore are silently dropped.
41+
*/
3342
@Test
34-
void exportsDoNotResumeAfterShutdown_currentBehavior() {
35-
LifecycleSpanExporter exporter = new LifecycleSpanExporter();
36-
OpenTelemetrySdk sdk =
37-
OpenTelemetrySdk.builder()
38-
.setTracerProvider(
39-
SdkTracerProvider.builder()
40-
.addSpanProcessor(SimpleSpanProcessor.create(exporter))
41-
.build())
42-
.build();
43-
44-
try {
45-
Tracer tracer = sdk.getTracer("crac-lifecycle-test");
46-
47-
emitSpan(tracer, "before-checkpoint");
48-
sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS);
49-
assertThat(exporter.exportedSpanCount()).isEqualTo(1);
50-
51-
sdk.getSdkTracerProvider().shutdown().join(10, TimeUnit.SECONDS);
52-
53-
// Simulate post-restore traffic on the same initialized SDK.
54-
emitSpan(tracer, "after-restore");
55-
sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS);
56-
57-
assertThat(exporter.exportedSpanCount()).isEqualTo(1);
58-
} finally {
59-
sdk.close();
60-
}
43+
void spansDroppedAfterRestore_naiveCracIntegration() throws Exception {
44+
MockCracContext cracContext = new MockCracContext();
45+
InMemorySpanExporter exporter = new InMemorySpanExporter();
46+
OpenTelemetrySdk sdk = buildSdk(exporter);
47+
Tracer tracer = sdk.getTracer("crac-lifecycle-test");
48+
49+
// Naive CRaC resource: shuts the SDK down at checkpoint, does nothing on restore.
50+
cracContext.register(
51+
new Resource() {
52+
@Override
53+
public void beforeCheckpoint(Context<? extends Resource> context) {
54+
sdk.getSdkTracerProvider().shutdown().join(10, TimeUnit.SECONDS);
55+
}
56+
57+
@Override
58+
public void afterRestore(Context<? extends Resource> context) {
59+
// No restore logic — this is the gap that #6756 addresses.
60+
}
61+
});
62+
63+
emitSpan(tracer, "before-checkpoint");
64+
sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS);
65+
assertThat(exporter.exportedCount()).isEqualTo(1);
66+
67+
cracContext.simulateCheckpoint();
68+
cracContext.simulateRestore();
69+
70+
// Post-restore span is silently dropped: the SDK is shut down and has no way to reinitialize.
71+
emitSpan(tracer, "after-restore");
72+
sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS);
73+
assertThat(exporter.exportedCount()).isEqualTo(1);
6174
}
6275

76+
/**
77+
* Describes the desired behavior once the SDK properly implements {@link Resource}: spans emitted
78+
* after a CRaC restore should be exported normally.
79+
*
80+
* <p>This test is disabled until <a
81+
* href="https://github.com/open-telemetry/opentelemetry-java/issues/6756">#6756</a> is addressed.
82+
* When that work lands, the SDK (or an adapter it exposes) should register with the CRaC context
83+
* so that {@code beforeCheckpoint} flushes and quiesces, and {@code afterRestore} reinitializes
84+
* exporters and processors. Replace the TODO below with the real SDK API.
85+
*/
6386
@Test
64-
@Disabled("Expected to fail until #6756 is addressed with checkpoint/restore-safe lifecycle")
65-
void exportsShouldResumeAfterRestore_expectedBehavior() {
66-
LifecycleSpanExporter exporter = new LifecycleSpanExporter();
67-
OpenTelemetrySdk sdk =
68-
OpenTelemetrySdk.builder()
69-
.setTracerProvider(
70-
SdkTracerProvider.builder()
71-
.addSpanProcessor(SimpleSpanProcessor.create(exporter))
72-
.build())
73-
.build();
74-
75-
try {
76-
Tracer tracer = sdk.getTracer("crac-lifecycle-test");
77-
78-
emitSpan(tracer, "before-checkpoint");
79-
sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS);
80-
assertThat(exporter.exportedSpanCount()).isEqualTo(1);
81-
82-
sdk.getSdkTracerProvider().shutdown().join(10, TimeUnit.SECONDS);
83-
84-
// Desired behavior for CRaC-style restore: post-restore spans should export again.
85-
emitSpan(tracer, "after-restore");
86-
sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS);
87-
88-
assertThat(exporter.exportedSpanCount()).isEqualTo(2);
89-
} finally {
90-
sdk.close();
91-
}
87+
// @Disabled("Expected to fail until #6756 adds checkpoint/restore-safe SDK lifecycle")
88+
void spansExportedAfterRestore_properCracIntegration() throws Exception {
89+
MockCracContext cracContext = new MockCracContext();
90+
InMemorySpanExporter exporter = new InMemorySpanExporter();
91+
OpenTelemetrySdk sdk = buildSdk(exporter);
92+
Tracer tracer = sdk.getTracer("crac-lifecycle-test");
93+
94+
// TODO(#6756): replace this placeholder with the real SDK CRaC API, e.g.:
95+
// cracContext.register(sdk.asCracResource());
96+
cracContext.register(
97+
new Resource() {
98+
@Override
99+
public void beforeCheckpoint(Context<? extends Resource> context) throws Exception {
100+
sdk.getSdkTracerProvider().shutdown().join(10, TimeUnit.SECONDS);
101+
}
102+
103+
@Override
104+
public void afterRestore(Context<? extends Resource> context) throws Exception {
105+
// Reinitialize: reopen connections, restart background threads.
106+
// No SDK API exists for this yet — this is the body of #6756.
107+
}
108+
});
109+
110+
emitSpan(tracer, "before-checkpoint");
111+
sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS);
112+
assertThat(exporter.exportedCount()).isEqualTo(1);
113+
114+
cracContext.simulateCheckpoint();
115+
cracContext.simulateRestore();
116+
117+
emitSpan(tracer, "after-restore");
118+
sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS);
119+
assertThat(exporter.exportedCount()).isEqualTo(2);
120+
}
121+
122+
private static OpenTelemetrySdk buildSdk(SpanExporter exporter) {
123+
return OpenTelemetrySdk.builder()
124+
.setTracerProvider(
125+
SdkTracerProvider.builder()
126+
.addSpanProcessor(SimpleSpanProcessor.create(exporter))
127+
.build())
128+
.build();
92129
}
93130

94131
private static void emitSpan(Tracer tracer, String name) {
95132
Span span = tracer.spanBuilder(name).startSpan();
96133
span.end();
97134
}
98135

99-
private static final class LifecycleSpanExporter implements SpanExporter {
100-
private final List<SpanData> exportedSpans = new ArrayList<>();
136+
private static final class InMemorySpanExporter implements SpanExporter {
137+
private final List<SpanData> spans = new ArrayList<>();
101138
private boolean shutdown;
102139

103140
@Override
104141
public CompletableResultCode export(Collection<SpanData> spans) {
105142
if (shutdown) {
106143
return CompletableResultCode.ofFailure();
107144
}
108-
exportedSpans.addAll(spans);
145+
this.spans.addAll(spans);
109146
return CompletableResultCode.ofSuccess();
110147
}
111148

@@ -120,8 +157,8 @@ public CompletableResultCode shutdown() {
120157
return CompletableResultCode.ofSuccess();
121158
}
122159

123-
private int exportedSpanCount() {
124-
return exportedSpans.size();
160+
int exportedCount() {
161+
return spans.size();
125162
}
126163
}
127164
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry;
7+
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import org.crac.Context;
11+
import org.crac.Resource;
12+
13+
/**
14+
* A test-only {@link Context} that allows simulating CRaC checkpoint and restore lifecycle events
15+
* without requiring a CRaC-enabled JDK. Register resources with {@link #register(Resource)}, then
16+
* call {@link #simulateCheckpoint()} and {@link #simulateRestore()} to drive the lifecycle.
17+
*
18+
* <p>Notification order follows the CRaC specification: checkpoint callbacks fire in reverse
19+
* registration order; restore callbacks fire in forward registration order.
20+
*/
21+
final class MockCracContext extends Context<Resource> {
22+
23+
private final List<Resource> resources = new ArrayList<>();
24+
25+
@Override
26+
public void register(Resource resource) {
27+
resources.add(resource);
28+
}
29+
30+
/**
31+
* Simulates a CRaC checkpoint by invoking {@link Resource#beforeCheckpoint} on all registered
32+
* resources in reverse registration order, as the CRaC spec requires.
33+
*/
34+
void simulateCheckpoint() throws Exception {
35+
for (int i = resources.size() - 1; i >= 0; i--) {
36+
resources.get(i).beforeCheckpoint(this);
37+
}
38+
}
39+
40+
/**
41+
* Simulates a CRaC restore by invoking {@link Resource#afterRestore} on all registered resources
42+
* in forward registration order, as the CRaC spec requires.
43+
*/
44+
void simulateRestore() throws Exception {
45+
for (Resource resource : resources) {
46+
resource.afterRestore(this);
47+
}
48+
}
49+
50+
// Not used: this context is not itself registered with a parent context.
51+
@Override
52+
public void beforeCheckpoint(Context<? extends Resource> context) {}
53+
54+
@Override
55+
public void afterRestore(Context<? extends Resource> context) {}
56+
}

0 commit comments

Comments
 (0)