|
| 1 | +# Retry Marker Plan |
| 2 | + |
| 3 | +## Goal |
| 4 | + |
| 5 | +For any retried test, all attempts **except the last** get `dd_tags[test.final_status] = skip` |
| 6 | +via the existing `addFinalStatusProperty` mechanism in `JUnitReport.java`. The last attempt keeps |
| 7 | +its natural outcome (`pass` or `fail`). |
| 8 | + |
| 9 | +## Approach |
| 10 | + |
| 11 | +`RetryMarkerListener` (JUnit Platform `TestExecutionListener`) runs inside the test JVM. It tracks |
| 12 | +retries by `TestIdentifier.getUniqueId()` — immune to display-name instability — and writes |
| 13 | +`TEST-retried-{classname}.xml` alongside the standard Gradle JUnit XML. `ResultCollector` reads |
| 14 | +these marker files and calls `report.tagRetriedTests(keys)` **before** |
| 15 | +`report.normalizeStableTestNames()` so that names in the marker file match the XML before |
| 16 | +normalization rewrites them. Marker files must be called before normalization because |
| 17 | +`normalizeStableTestNames()` collapses distinct unstable names (e.g. `localhost:12345` and |
| 18 | +`localhost:23456` both become `localhost:PORT`), which would cause `tagRetriedTests` to incorrectly |
| 19 | +skip genuinely distinct tests if matching happened after normalization. The marker-file approach is |
| 20 | +safe because only actually-retried tests (tracked by unique ID) enter the marker file. |
| 21 | + |
| 22 | +Each retry round is a separate `TestPlan`; the listener accumulates counts across all rounds and |
| 23 | +overwrites the marker file after each round, so the final write is correct. Forked tests |
| 24 | +(`forkEvery = 1`) each run in their own JVM; per-class files avoid write races. |
| 25 | + |
| 26 | +## Call order in `ResultCollector.collect()` |
| 27 | + |
| 28 | +``` |
| 29 | +applyRetryMarkers(dir, report) ← BEFORE normalization |
| 30 | +normalizeStableTestNames() |
| 31 | +tagSyntheticFailures() |
| 32 | +tagFinalStatuses() |
| 33 | +``` |
| 34 | + |
| 35 | +## Files |
| 36 | + |
| 37 | +### 1. NEW `utils/junit-utils/src/main/java/datadog/trace/junit/utils/retry/RetryMarkerListener.java` |
| 38 | + |
| 39 | +```java |
| 40 | +package datadog.trace.junit.utils.retry; |
| 41 | + |
| 42 | +import java.io.BufferedWriter; |
| 43 | +import java.nio.charset.StandardCharsets; |
| 44 | +import java.nio.file.Files; |
| 45 | +import java.nio.file.Path; |
| 46 | +import java.nio.file.Paths; |
| 47 | +import java.util.LinkedHashMap; |
| 48 | +import java.util.LinkedHashSet; |
| 49 | +import java.util.Map; |
| 50 | +import java.util.Set; |
| 51 | +import java.util.concurrent.ConcurrentHashMap; |
| 52 | +import javax.xml.stream.XMLOutputFactory; |
| 53 | +import javax.xml.stream.XMLStreamWriter; |
| 54 | +import org.junit.platform.engine.TestExecutionResult; |
| 55 | +import org.junit.platform.engine.TestSource; |
| 56 | +import org.junit.platform.engine.support.descriptor.ClassSource; |
| 57 | +import org.junit.platform.engine.support.descriptor.MethodSource; |
| 58 | +import org.junit.platform.launcher.TestExecutionListener; |
| 59 | +import org.junit.platform.launcher.TestIdentifier; |
| 60 | +import org.junit.platform.launcher.TestPlan; |
| 61 | + |
| 62 | +public class RetryMarkerListener implements TestExecutionListener { |
| 63 | + |
| 64 | + static final String OUTPUT_DIR_PROP = "dd.test.results.dir"; |
| 65 | + |
| 66 | + private final Map<String, Integer> executionCounts = new ConcurrentHashMap<>(); |
| 67 | + private final Map<String, TestIdentifier> identifiers = new ConcurrentHashMap<>(); |
| 68 | + |
| 69 | + @Override |
| 70 | + public void executionFinished(TestIdentifier id, TestExecutionResult result) { |
| 71 | + if (!id.isTest()) return; |
| 72 | + executionCounts.merge(id.getUniqueId(), 1, Integer::sum); |
| 73 | + identifiers.put(id.getUniqueId(), id); |
| 74 | + } |
| 75 | + |
| 76 | + // Called once per retry round; overwrites marker files so the last round wins. |
| 77 | + @Override |
| 78 | + public void testPlanExecutionFinished(TestPlan plan) { |
| 79 | + String outputDirProp = System.getProperty(OUTPUT_DIR_PROP); |
| 80 | + if (outputDirProp == null) return; |
| 81 | + var retriedByClass = retriedTestsByClass(); |
| 82 | + if (retriedByClass.isEmpty()) return; |
| 83 | + var outputDir = Paths.get(outputDirProp); |
| 84 | + try { |
| 85 | + Files.createDirectories(outputDir); |
| 86 | + for (var entry : retriedByClass.entrySet()) { |
| 87 | + writeMarkerFile(outputDir, entry.getKey(), entry.getValue()); |
| 88 | + } |
| 89 | + } catch (Exception ex) { |
| 90 | + System.err.println("[RetryMarkerListener] Failed to write retry markers: " + ex.getMessage()); |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + private Map<String, Set<String>> retriedTestsByClass() { |
| 95 | + var byClass = new LinkedHashMap<String, Set<String>>(); |
| 96 | + for (var entry : executionCounts.entrySet()) { |
| 97 | + if (entry.getValue() <= 1) continue; |
| 98 | + var id = identifiers.get(entry.getKey()); |
| 99 | + byClass.computeIfAbsent(classNameOf(id), k -> new LinkedHashSet<>()).add(id.getDisplayName()); |
| 100 | + } |
| 101 | + return byClass; |
| 102 | + } |
| 103 | + |
| 104 | + private static void writeMarkerFile(Path outputDir, String className, Set<String> testNames) |
| 105 | + throws Exception { |
| 106 | + var file = outputDir.resolve("TEST-retried-" + className + ".xml"); |
| 107 | + try (BufferedWriter writer = Files.newBufferedWriter(file, StandardCharsets.UTF_8)) { |
| 108 | + XMLStreamWriter xml = XMLOutputFactory.newInstance().createXMLStreamWriter(writer); |
| 109 | + xml.writeStartDocument("UTF-8", "1.0"); |
| 110 | + xml.writeStartElement("testsuite"); |
| 111 | + xml.writeAttribute("name", className); |
| 112 | + for (var testName : testNames) { |
| 113 | + xml.writeEmptyElement("testcase"); |
| 114 | + xml.writeAttribute("name", testName); |
| 115 | + xml.writeAttribute("classname", className); |
| 116 | + } |
| 117 | + xml.writeEndElement(); |
| 118 | + xml.writeEndDocument(); |
| 119 | + xml.flush(); |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + private static String classNameOf(TestIdentifier id) { |
| 124 | + TestSource src = id.getSource().orElse(null); |
| 125 | + if (src instanceof MethodSource) return ((MethodSource) src).getClassName(); |
| 126 | + if (src instanceof ClassSource) return ((ClassSource) src).getClassName(); |
| 127 | + return id.getDisplayName(); |
| 128 | + } |
| 129 | +} |
| 130 | +``` |
| 131 | + |
| 132 | +### 2. NEW `utils/junit-utils/src/main/resources/META-INF/services/org.junit.platform.launcher.TestExecutionListener` |
| 133 | + |
| 134 | +``` |
| 135 | +datadog.trace.junit.utils.retry.RetryMarkerListener |
| 136 | +``` |
| 137 | + |
| 138 | +### 3. EDIT `utils/junit-utils/build.gradle.kts` |
| 139 | + |
| 140 | +Add one line to `dependencies`: |
| 141 | +```kotlin |
| 142 | +compileOnly(libs.junit-platform-launcher) |
| 143 | +``` |
| 144 | + |
| 145 | +### 4. EDIT `buildSrc/src/main/kotlin/dd-trace-java.configure-tests.gradle.kts` |
| 146 | + |
| 147 | +Inside the first `tasks.withType<Test>().configureEach` block, after the `java.util.prefs.userRoot` line: |
| 148 | +```kotlin |
| 149 | +systemProperty("dd.test.results.dir", reports.junitXml.outputLocation.get().asFile.absolutePath) |
| 150 | +``` |
| 151 | + |
| 152 | +### 5. EDIT `.gitlab/collect-result/JUnitReport.java` |
| 153 | + |
| 154 | +Add imports: |
| 155 | +```java |
| 156 | +import java.util.ArrayList; |
| 157 | +import java.util.List; |
| 158 | +import java.util.Set; |
| 159 | +``` |
| 160 | + |
| 161 | +Add two methods after `tagFinalStatuses()`: |
| 162 | + |
| 163 | +```java |
| 164 | + Set<String> testcaseKeys() { |
| 165 | + var keys = new java.util.LinkedHashSet<String>(); |
| 166 | + for (var testcase : testcases()) { |
| 167 | + keys.add(testcase.getAttribute("classname") + "#" + testcase.getAttribute("name")); |
| 168 | + } |
| 169 | + return keys; |
| 170 | + } |
| 171 | + |
| 172 | + // Tags all <testcase> elements except the last for each retried key as skip. |
| 173 | + // Must be called before tagFinalStatuses() so hasFinalStatusProperty() skips tagged entries. |
| 174 | + void tagRetriedTests(Set<String> retriedTestKeys) { |
| 175 | + if (retriedTestKeys.isEmpty()) return; |
| 176 | + var testcasesByKey = new LinkedHashMap<String, List<Element>>(); |
| 177 | + for (var testcase : testcases()) { |
| 178 | + var key = testcase.getAttribute("classname") + "#" + testcase.getAttribute("name"); |
| 179 | + if (retriedTestKeys.contains(key)) { |
| 180 | + testcasesByKey.computeIfAbsent(key, k -> new ArrayList<>()).add(testcase); |
| 181 | + } |
| 182 | + } |
| 183 | + for (var attempts : testcasesByKey.values()) { |
| 184 | + for (var i = 0; i < attempts.size() - 1; i++) { |
| 185 | + addFinalStatusProperty(attempts.get(i), "skip", MissingPropertiesPlacement.FIRST_CHILD); |
| 186 | + } |
| 187 | + } |
| 188 | + } |
| 189 | +``` |
| 190 | + |
| 191 | +### 6. EDIT `.gitlab/collect-result/ResultCollector.java` |
| 192 | + |
| 193 | +**a) `collect(Path sourceXml)`** — skip marker files; apply markers before normalization: |
| 194 | + |
| 195 | +```java |
| 196 | + private void collect(Path sourceXml) throws Exception { |
| 197 | + if (fileName(sourceXml).startsWith("TEST-retried-")) return; |
| 198 | + |
| 199 | + var aggregatedName = aggregatedFileName(sourceXml); |
| 200 | + var targetXml = resultsDir.resolve(aggregatedName); |
| 201 | + System.out.print("- " + toUnixString(sourceXml) + " as " + aggregatedName); |
| 202 | + |
| 203 | + var sourceFile = sourceFileResolver.resolve(sourceXml); |
| 204 | + var report = JUnitReport.parse(sourceXml); |
| 205 | + var reportChangedBeforeFinalStatus = report.addFileAttribute(sourceFile); |
| 206 | + applyRetryMarkers(sourceXml.getParent(), report); // before normalizeStableTestNames |
| 207 | + reportChangedBeforeFinalStatus |= report.normalizeStableTestNames(); |
| 208 | + report.tagSyntheticFailures(); |
| 209 | + report.tagFinalStatuses(); |
| 210 | + report.write(targetXml); |
| 211 | + |
| 212 | + if (reportChangedBeforeFinalStatus) { |
| 213 | + System.out.print(" (non-stable test names detected)"); |
| 214 | + } |
| 215 | + System.out.println(); |
| 216 | + } |
| 217 | +``` |
| 218 | + |
| 219 | +**b) Add `applyRetryMarkers`** after `collect()`: |
| 220 | + |
| 221 | +```java |
| 222 | + private static void applyRetryMarkers(Path dir, JUnitReport report) { |
| 223 | + if (dir == null) return; |
| 224 | + try (var paths = Files.list(dir)) { |
| 225 | + paths |
| 226 | + .filter(p -> fileName(p).startsWith("TEST-retried-") && fileName(p).endsWith(".xml")) |
| 227 | + .forEach(markerFile -> { |
| 228 | + try { |
| 229 | + report.tagRetriedTests(JUnitReport.parse(markerFile).testcaseKeys()); |
| 230 | + } catch (Exception e) { |
| 231 | + System.err.println( |
| 232 | + "[ResultCollector] Failed to apply retry markers from " |
| 233 | + + markerFile.getFileName() + ": " + e.getMessage()); |
| 234 | + } |
| 235 | + }); |
| 236 | + } catch (IOException e) { |
| 237 | + System.err.println( |
| 238 | + "[ResultCollector] Failed to scan for retry markers in " + dir + ": " + e.getMessage()); |
| 239 | + } |
| 240 | + } |
| 241 | +``` |
| 242 | + |
| 243 | +## Examples |
| 244 | + |
| 245 | +**Flaky (retried → passed):** |
| 246 | +``` |
| 247 | +XML: <testcase name="t()"><failure/></testcase> attempt 1 |
| 248 | + <testcase name="t()"/> attempt 2 |
| 249 | +
|
| 250 | +applyRetryMarkers → tagRetriedTests({"C#t()"}): |
| 251 | + attempt 1 → skip (tagged), attempt 2 → untagged |
| 252 | +
|
| 253 | +tagFinalStatuses: |
| 254 | + attempt 1 → skip ✓ attempt 2 → pass ✓ |
| 255 | +``` |
| 256 | + |
| 257 | +**Always failing (retried → still fails):** |
| 258 | +``` |
| 259 | +XML: <testcase name="t()"><failure/></testcase> attempt 1 |
| 260 | + <testcase name="t()"><failure/></testcase> attempt 2 |
| 261 | + <testcase name="t()"><failure/></testcase> attempt 3 |
| 262 | +
|
| 263 | +applyRetryMarkers → tagRetriedTests({"C#t()"}): |
| 264 | + attempts 1–2 → skip (tagged), attempt 3 → untagged |
| 265 | +
|
| 266 | +tagFinalStatuses: |
| 267 | + attempts 1–2 → skip ✓ attempt 3 → fail ✓ |
| 268 | +``` |
0 commit comments