Skip to content

Commit ac5f8b1

Browse files
authored
Fix TempLocationManagerTest flakiness and tighten assertions (#11239)
Fix TempLocationManagerTest flakiness and tighten assertions Run cleanup on a dedicated thread instead of the shared AgentTaskScheduler so testConcurrentCleanup is not queued behind cleanup tasks scheduled by other tests in the same JVM. Also fix a buggy assertion that masked cleanup failures, drop a dead visitFileFailed override, and minor cleanup. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Merge branch 'master' into jb/tempman_tests Co-authored-by: jaroslav.bachorik <jaroslav.bachorik@datadoghq.com>
1 parent bfbfa5b commit ac5f8b1

1 file changed

Lines changed: 42 additions & 51 deletions

File tree

internal-api/src/test/java/datadog/trace/util/TempLocationManagerTest.java

Lines changed: 42 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -97,30 +97,22 @@ void testCleanup(String subPath) throws Exception {
9797
"ddprof-test-",
9898
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
9999
myDir.toFile().deleteOnExit();
100-
Properties props = new Properties();
101-
props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString());
102-
props.put(
103-
ProfilingConfig.PROFILING_UPLOAD_PERIOD,
104-
"0"); // to force immediate cleanup; must be a string value!
105-
ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props);
106-
TempLocationManager tempLocationManager = new TempLocationManager(configProvider);
100+
TempLocationManager tempLocationManager =
101+
instance(myDir, false, TempLocationManager.CleanupHook.EMPTY);
107102
Path tempDir = tempLocationManager.getTempDir(Paths.get(subPath));
108103
assertNotNull(tempDir);
109104

110105
// fake temp location
111-
Path fakeTempDir = tempDir.getParent();
112-
while (fakeTempDir != null && !fakeTempDir.getFileName().toString().contains("ddprof")) {
113-
fakeTempDir = fakeTempDir.getParent();
114-
}
115-
fakeTempDir = fakeTempDir.resolve("pid_0000");
106+
Path fakeTempDir = myDir.resolve(TempLocationManager.getBaseTempDirName()).resolve("pid_0000");
116107
Files.createDirectories(fakeTempDir);
117108
Path tmpFile = Files.createFile(fakeTempDir.resolve("test.txt"));
118109
tmpFile.toFile().deleteOnExit(); // make sure this is deleted at exit
119110
fakeTempDir.toFile().deleteOnExit(); // also this one
120111
boolean rslt = tempLocationManager.cleanup();
121-
// fake temp location should be deleted
122-
// real temp location should be kept
123-
assertFalse(rslt && Files.exists(fakeTempDir));
112+
// fake temp location should be deleted, real temp location should be kept
113+
assertTrue(rslt, "cleanup should succeed");
114+
assertFalse(Files.exists(tmpFile), "stale pid file should be gone");
115+
assertFalse(Files.exists(fakeTempDir), "stale pid dir should be gone");
124116
assertTrue(Files.exists(tempDir));
125117
}
126118

@@ -143,6 +135,7 @@ void testConcurrentCleanup(String section) throws Exception {
143135
Files.createTempDirectory(
144136
"ddprof-test-",
145137
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
138+
baseDir.toFile().deleteOnExit();
146139

147140
Path fakeTempDir =
148141
baseDir.resolve(TempLocationManager.getBaseTempDirName() + "/pid_1234/scratch");
@@ -180,7 +173,7 @@ public FileVisitResult preVisitDirectory(
180173
if (section.equals("preVisitDirectory") && dir.equals(fakeTempDir)) {
181174
syncPoint(timeout);
182175
}
183-
return null;
176+
return TempLocationManager.CleanupHook.super.preVisitDirectory(dir, attrs, timeout);
184177
}
185178

186179
@Override
@@ -189,7 +182,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs, boolean t
189182
if (section.equals("visitFile") && file.equals(fakeTempFile)) {
190183
syncPoint(timeout);
191184
}
192-
return null;
185+
return TempLocationManager.CleanupHook.super.visitFile(file, attrs, timeout);
193186
}
194187

195188
@Override
@@ -198,34 +191,41 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc, boolean tim
198191
if (section.equals("postVisitDirectory") && dir.equals(fakeTempDir)) {
199192
syncPoint(timeout);
200193
}
201-
return null;
194+
return TempLocationManager.CleanupHook.super.postVisitDirectory(dir, exc, timeout);
202195
}
203196
};
204197

205-
TempLocationManager mgr = instance(baseDir, true, blocker);
206-
207-
// Wait for cleanup to reach the synchronization point
208-
boolean reached = hookReached.await(30, TimeUnit.SECONDS);
209-
assertTrue(reached, "Cleanup thread should reach hook within 30 seconds");
210-
211-
// Now we know cleanup is paused at the hook - safe to delete
212-
Files.deleteIfExists(fakeTempFile);
213-
214-
// Signal cleanup to proceed
215-
proceedSignal.countDown();
216-
217-
// Wait for cleanup to complete
218-
assertTrue(mgr.waitForCleanup(30, TimeUnit.SECONDS), "Cleanup should complete within 30s");
219-
220-
// Check for errors in cleanup thread
221-
Throwable error = cleanupError.get();
222-
if (error != null) {
223-
throw new AssertionError("Cleanup thread encountered error", error);
198+
TempLocationManager mgr = instance(baseDir, false, blocker);
199+
200+
// Run cleanup on a dedicated thread instead of the shared AgentTaskScheduler
201+
// worker, which can be queued behind cleanup tasks scheduled by other tests.
202+
Thread cleanupThread = new Thread(mgr::cleanup, "test-cleanup");
203+
cleanupThread.setDaemon(true);
204+
cleanupThread.start();
205+
206+
try {
207+
assertTrue(
208+
hookReached.await(30, TimeUnit.SECONDS),
209+
"Cleanup thread should reach hook within 30 seconds");
210+
// The visitor may have already deleted the file by this point; tolerate.
211+
Files.deleteIfExists(fakeTempFile);
212+
proceedSignal.countDown();
213+
cleanupThread.join(TimeUnit.SECONDS.toMillis(30));
214+
assertFalse(cleanupThread.isAlive(), "Cleanup should complete within 30s");
215+
216+
Throwable error = cleanupError.get();
217+
if (error != null) {
218+
throw new AssertionError("Cleanup thread encountered error", error);
219+
}
220+
221+
assertFalse(withTimeout.get(), "Cleanup should not have timed out");
222+
assertFalse(Files.exists(fakeTempFile));
223+
assertFalse(Files.exists(fakeTempDir));
224+
} finally {
225+
// Release the cleanup thread so it can drain on early assertion failure.
226+
proceedSignal.countDown();
227+
cleanupThread.join(TimeUnit.SECONDS.toMillis(5));
224228
}
225-
226-
assertFalse(withTimeout.get(), "Cleanup should not have timed out");
227-
assertFalse(Files.exists(fakeTempFile));
228-
assertFalse(Files.exists(fakeTempDir));
229229
}
230230

231231
@ParameterizedTest
@@ -264,16 +264,6 @@ public FileVisitResult preVisitDirectory(
264264
return TempLocationManager.CleanupHook.super.preVisitDirectory(dir, attrs, timeout);
265265
}
266266

267-
@Override
268-
public FileVisitResult visitFileFailed(Path file, IOException exc, boolean timeout)
269-
throws IOException {
270-
withTimeout.compareAndSet(false, timeout);
271-
if (section.equals("visitFileFailed")) {
272-
maybeAdvanceTime();
273-
}
274-
return TempLocationManager.CleanupHook.super.visitFileFailed(file, exc, timeout);
275-
}
276-
277267
@Override
278268
public FileVisitResult postVisitDirectory(Path dir, IOException exc, boolean timeout)
279269
throws IOException {
@@ -298,6 +288,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs, boolean t
298288
Files.createTempDirectory(
299289
"ddprof-test-",
300290
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
291+
baseDir.toFile().deleteOnExit();
301292
TempLocationManager instance = instance(baseDir, false, delayer, timeSource);
302293
Path mytempdir = instance.getTempDir();
303294
Path otherTempdir = mytempdir.getParent().resolve("pid_0000");

0 commit comments

Comments
 (0)