Skip to content

Commit fd1706e

Browse files
authored
(fix)logger use-after-free in ZeLogger during teardown (#483)
Replace the thread_local std::string line/tid buffers with POD thread_local char buffers, so log calls that arrive during exit teardown no longer write into freed memory. Also add stand alone ASan regression test. Signed-off-by: Russell McGuire <russell.w.mcguire@intel.com>
1 parent 75dc1fd commit fd1706e

4 files changed

Lines changed: 204 additions & 31 deletions

File tree

source/utils/ze_logger.cpp

Lines changed: 80 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <cerrno>
1313
#include <chrono>
1414
#include <cstdio>
15+
#include <cstring>
1516
#include <ctime>
1617
#include <iomanip>
1718
#include <iostream>
@@ -156,10 +157,13 @@ struct LogSink {
156157
is_good(true)
157158
{}
158159

159-
void write(const std::string &line) {
160+
// Raw (data,len) overload — lets callers emit a fixed char buffer without
161+
// constructing an intermediate std::string (keeps the log hot path allocation-free).
162+
void write(const char *data, std::size_t len) {
160163
std::lock_guard<std::mutex> lk(mtx);
161164
if (is_good) {
162-
*stream << line << '\n';
165+
stream->write(data, static_cast<std::streamsize>(len));
166+
stream->put('\n');
163167
// stream->fail() is a single rdstate() bitmask read — negligible cost.
164168
// Once a failure is detected, is_good=false so all future calls return
165169
// immediately at the guard above, paying zero additional cost.
@@ -172,6 +176,8 @@ struct LogSink {
172176
}
173177
}
174178

179+
void write(const std::string &line) { write(line.data(), line.size()); }
180+
175181
void flush() {
176182
std::lock_guard<std::mutex> lk(mtx);
177183
if (stream) {
@@ -235,10 +241,13 @@ void ZeLogger::flush() {
235241
// %^%l%$ — level label (with color when tty)
236242
// %v — message
237243
//
238-
// formatLine writes into an existing string (passed by ref) to avoid
239-
// a return-by-value heap allocation on every log call.
244+
// formatLine writes into a caller-provided fixed char buffer (snprintf-style)
245+
// and returns the total length the line needs. It keeps no owning static state
246+
// (POD thread_local buffers only), so it is allocation-free on the hot path and
247+
// safe to call during exit-time teardown.
240248
// ---------------------------------------------------------------------------
241-
void ZeLogger::formatLine(LogLevel msg_level, const std::string &msg, std::string &out) {
249+
std::size_t ZeLogger::formatLine(LogLevel msg_level, const std::string &msg,
250+
char *out, std::size_t cap) {
242251
// Build timestamp: YYYY-MM-DD HH:MM:SS.mmm
243252
auto now = std::chrono::system_clock::now();
244253
auto now_t = std::chrono::system_clock::to_time_t(now);
@@ -259,22 +268,50 @@ void ZeLogger::formatLine(LogLevel msg_level, const std::string &msg, std::strin
259268
std::snprintf(ts_full, sizeof(ts_full), "%s.%03lld", ts,
260269
static_cast<long long>(ms.count()));
261270

262-
// Thread id — computed once per thread, stored in a thread_local local
263-
// static inside this non-inline, non-template .cpp function.
264-
// This produces STB_LOCAL linkage (not STB_GNU_UNIQUE), so dlclose() is unaffected.
265-
static thread_local const std::string tid_str = [](){
271+
// Thread id — computed once per thread into a POD thread_local char buffer.
272+
// POD storage has a trivial destructor (nothing runs at thread/exit teardown),
273+
// so it cannot become a use-after-free when logging happens during process exit;
274+
// and as a function-local static in this non-inline, non-template .cpp function
275+
// it has STB_LOCAL linkage (not STB_GNU_UNIQUE), so dlclose() is unaffected.
276+
static thread_local char tid_buf[32];
277+
static thread_local std::size_t tid_len = 0;
278+
if (tid_len == 0) {
266279
std::ostringstream ss;
267280
ss << std::this_thread::get_id();
268-
return ss.str();
269-
}();
281+
const std::string s = ss.str();
282+
tid_len = s.size() < sizeof(tid_buf) ? s.size() : sizeof(tid_buf);
283+
std::memcpy(tid_buf, s.data(), tid_len);
284+
}
285+
286+
char pid_buf[24];
287+
int pid_written = std::snprintf(pid_buf, sizeof(pid_buf), "%lld",
288+
static_cast<long long>(GET_PID()));
289+
if (pid_written < 0) pid_written = 0;
290+
const std::size_t pid_len =
291+
static_cast<std::size_t>(pid_written) < sizeof(pid_buf)
292+
? static_cast<std::size_t>(pid_written)
293+
: sizeof(pid_buf) - 1;
270294

271295
const char *label = levelLabel(msg_level);
272296
const char *color = _sink->color_enabled ? levelColor(msg_level) : "";
273297
const char *reset = _sink->color_enabled ? AnsiColor::reset() : "";
274298

275-
// Write directly into the caller-provided string — no extra allocation.
276-
out.clear();
277-
out.reserve(_pattern.size() + msg.size() + 64);
299+
// Bounded append into the caller buffer. `len` always tracks the TOTAL bytes
300+
// the full line needs (so the caller can detect overflow), but we never write
301+
// past `cap`. No heap allocation on this path.
302+
std::size_t len = 0;
303+
auto put = [&](const char *s, std::size_t n) {
304+
if (len < cap) {
305+
std::size_t avail = cap - len;
306+
std::memcpy(out + len, s, n < avail ? n : avail);
307+
}
308+
len += n;
309+
};
310+
auto put_cstr = [&](const char *s) { put(s, std::strlen(s)); };
311+
auto put_ch = [&](char c) {
312+
if (len < cap) out[len] = c;
313+
len += 1;
314+
};
278315

279316
bool color_span_open = false;
280317

@@ -285,64 +322,77 @@ void ZeLogger::formatLine(LogLevel msg_level, const std::string &msg, std::strin
285322
case 'Y': {
286323
const char *seq = "%Y-%m-%d %H:%M:%S.%e";
287324
if (_pattern.compare(i, 20, seq) == 0) {
288-
out += ts_full;
325+
put_cstr(ts_full);
289326
i += 19;
290327
} else {
291-
out += '%';
292-
out += tok;
328+
put_ch('%');
329+
put_ch(tok);
293330
++i;
294331
}
295332
break;
296333
}
297334
case 't':
298-
out += tid_str;
335+
put(tid_buf, tid_len);
299336
++i;
300337
break;
301338
case 'P':
302-
out += std::to_string(GET_PID());
339+
put(pid_buf, pid_len);
303340
++i;
304341
break;
305342
case '^':
306-
out += color;
343+
put_cstr(color);
307344
color_span_open = true;
308345
++i;
309346
break;
310347
case 'l':
311-
out += label;
348+
put_cstr(label);
312349
++i;
313350
break;
314351
case '$':
315-
out += reset;
352+
put_cstr(reset);
316353
color_span_open = false;
317354
++i;
318355
break;
319356
case 'v':
320-
out += msg;
357+
put(msg.data(), msg.size());
321358
++i;
322359
break;
323360
default:
324-
out += '%';
361+
put_ch('%');
325362
break;
326363
}
327364
} else {
328-
out += _pattern[i];
365+
put_ch(_pattern[i]);
329366
}
330367
}
331368

332369
if (color_span_open) {
333-
out += reset;
370+
put_cstr(reset);
334371
}
372+
373+
return len;
335374
}
336375

337376
void ZeLogger::write(LogLevel msg_level, const std::string &msg) {
338377
if (!_sink || msg_level < _level) {
339378
return;
340379
}
341-
// Reuse a thread_local buffer to avoid a heap allocation per log call.
380+
// POD thread_local buffer — trivial destructor, freed with the thread, no heap
381+
// leak. A log call can arrive during exit-time teardown (e.g. SYCL shutdown →
382+
// validation-layer DDI → log) AFTER a non-trivial thread_local (std::string)
383+
// would have been destroyed; a POD char buffer is safe to use at any time.
342384
// STB_LOCAL linkage (non-inline, non-template .cpp function) — safe for dlclose.
343-
static thread_local std::string line_buf;
344-
formatLine(msg_level, msg, line_buf);
345-
_sink->write(line_buf);
385+
static thread_local char line_buf[2048];
386+
const std::size_t n = formatLine(msg_level, msg, line_buf, sizeof(line_buf));
387+
if (n <= sizeof(line_buf)) {
388+
_sink->write(line_buf, n); // hot path: zero allocation
389+
} else {
390+
// Rare oversized line: one allocation, same output (no truncation).
391+
// Line length is deterministic for a given message, so a single retry fits.
392+
std::string big(n, '\0');
393+
const std::size_t n2 = formatLine(msg_level, msg, &big[0], big.size());
394+
_sink->write(big.data(), n2 < big.size() ? n2 : big.size());
395+
}
346396
}
347397

348398
void ZeLogger::trace(const std::string &msg) { write(LogLevel::trace, msg); }

source/utils/ze_logger.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,13 @@ class ZeLogger {
8989

9090
private:
9191
void write(LogLevel msg_level, const std::string &msg);
92-
void formatLine(LogLevel msg_level, const std::string &msg, std::string &out);
92+
// Formats the log line into a caller-provided fixed buffer (snprintf-style).
93+
// Returns the total number of bytes the full line needs; if that exceeds
94+
// `cap` the buffer holds a truncated prefix and the caller must retry with a
95+
// larger buffer. Uses no owning static state, so it is safe to call during
96+
// exit-time teardown (no thread_local std::string to outlive its destructor).
97+
std::size_t formatLine(LogLevel msg_level, const std::string &msg,
98+
char *out, std::size_t cap);
9399

94100
LogLevel _level;
95101
std::string _pattern;

test/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,3 +1159,21 @@ set_property(TEST test_validation_layer_api_tracing_immediate_cmdlist PROPERTY E
11591159

11601160
add_test(NAME test_validation_layer_api_tracing_command_queue COMMAND tests --gtest_filter=ValidationLayerApiTracing.GivenValidationLayerEnabledWithTraceLevelLoggingWhenCallingCommandQueueApisThenTracingDoesNotCrash)
11611161
set_property(TEST test_validation_layer_api_tracing_command_queue PROPERTY ENVIRONMENT "ZE_ENABLE_VALIDATION_LAYER=1;ZEL_ENABLE_LOADER_LOGGING=1;ZEL_LOADER_LOGGING_LEVEL=trace;ZE_ENABLE_NULL_DRIVER=1")
1162+
1163+
# -----------------------------------------------------------------------------
1164+
# Standalone unit test for the ZeLogger exit-time / teardown use-after-free fix.
1165+
# Links ONLY level_zero_utils (no loader, drivers, or layers), so it is a true
1166+
# unit test independent of the static/dynamic build model and of any hardware.
1167+
# Built with -DUSE_ASAN=ON it is a deterministic regression detector for the
1168+
# logger UAF; without ASan it still exercises the teardown-time logging path.
1169+
# -----------------------------------------------------------------------------
1170+
add_executable(ze_logger_teardown_unit_tests ze_logger_teardown_unit_tests.cpp)
1171+
target_include_directories(ze_logger_teardown_unit_tests PRIVATE
1172+
${PROJECT_SOURCE_DIR}/include
1173+
${PROJECT_SOURCE_DIR}/source/inc
1174+
)
1175+
target_link_libraries(ze_logger_teardown_unit_tests PRIVATE GTest::gtest_main level_zero_utils)
1176+
if(UNIX AND NOT APPLE)
1177+
target_link_libraries(ze_logger_teardown_unit_tests PRIVATE pthread)
1178+
endif()
1179+
add_test(NAME ze_logger_teardown_unit_tests COMMAND ze_logger_teardown_unit_tests)
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/*
2+
*
3+
* Copyright (C) 2026 Intel Corporation
4+
*
5+
* SPDX-License-Identifier: MIT
6+
*
7+
*/
8+
9+
// Standalone unit test for the exit-time / teardown use-after-free in
10+
// loader::ZeLogger. It links ONLY level_zero_utils (the static library that
11+
// contains ze_logger.cpp) -- no loader, no drivers, no validation/tracing
12+
// layers, and none of the SYCL / Unified Runtime stack that originally
13+
// surfaced the bug.
14+
//
15+
// Background
16+
// ----------
17+
// The production crash was a heap-use-after-free: during process exit, a late
18+
// destructor (SYCL shutdown -> UR queue dtor -> zeCommandListHostSynchronize ->
19+
// validation layer) called into ZeLogger AFTER the logger's function-local
20+
// `thread_local std::string` line/tid buffers had already been destroyed by the
21+
// thread-local teardown phase. formatLine() then wrote into the freed buffer.
22+
//
23+
// Reproducing it without SYCL/UR
24+
// ------------------------------
25+
// thread_local objects are destroyed in REVERSE order of initialization. If we
26+
// declare a thread_local object whose destructor logs *before* the first real
27+
// log call on a worker thread, then on thread exit:
28+
// 1. ZeLogger's own thread_local buffers (initialized during the first log)
29+
// are destroyed first, and
30+
// 2. our logging destructor runs last -- emitting a log line into buffers
31+
// that teardown has already released.
32+
// With the buggy `thread_local std::string` implementation this is a
33+
// heap-use-after-free, caught deterministically when built with -DUSE_ASAN=ON.
34+
// With the POD `thread_local char[]` fix the buffers have trivial destructors
35+
// (their storage survives until the thread's TLS block is freed, after all
36+
// thread_local destructors have run), so the late log call is safe.
37+
38+
#include "gtest/gtest.h"
39+
40+
#include "ze_logger.h"
41+
42+
#include <thread>
43+
44+
namespace {
45+
46+
// A thread_local object whose destructor logs through the shared logger.
47+
struct LogFromThreadLocalDtor {
48+
loader::ZeLogger *logger;
49+
~LogFromThreadLocalDtor() {
50+
// Runs during the worker thread's thread_local teardown, AFTER the
51+
// logger's own thread_local line/tid buffers have been destroyed.
52+
logger->trace("ZeLogger invoked from a thread_local destructor during teardown");
53+
}
54+
};
55+
56+
// Runs on a dedicated worker thread so that its thread_local destructors fire
57+
// deterministically at thread exit (i.e. at join()), independent of process
58+
// exit ordering.
59+
void workerLogsDuringTeardown(loader::ZeLogger *logger) {
60+
// Initialized FIRST on this thread -> its destructor runs LAST (after the
61+
// logger's own thread_local buffers, which are initialized by the warmup
62+
// log call below).
63+
thread_local LogFromThreadLocalDtor guard{logger};
64+
65+
// First real log call: initializes ZeLogger's thread_local line/tid buffers
66+
// (registered SECOND -> destroyed FIRST). Exercises every formatLine append
67+
// path via the pattern set on the logger.
68+
logger->trace("warmup: initialize the logger's thread_local buffers");
69+
70+
// Returning ends the thread: the line/tid buffers are torn down first, then
71+
// `guard`'s destructor logs into them.
72+
}
73+
74+
} // namespace
75+
76+
// Sanity: the logger formats and writes without crashing on a live thread.
77+
TEST(ZeLoggerTeardown, BasicLogDoesNotCrash) {
78+
loader::ZeLogger logger(/*use_stderr=*/true, loader::LogLevel::trace,
79+
"[%Y-%m-%d %H:%M:%S.%e] [tid:%t pid:%P] [%l] %v");
80+
logger.trace("trace message");
81+
logger.info("info message");
82+
logger.warn("warn message");
83+
SUCCEED();
84+
}
85+
86+
// Regression guard for the exit-time logger UAF. Builds with -DUSE_ASAN=ON turn
87+
// this into a deterministic detector: the buggy thread_local-std::string logger
88+
// aborts here with heap-use-after-free; the POD-buffer fix completes cleanly.
89+
TEST(ZeLoggerTeardown, LogFromThreadLocalDestructorAfterBufferTeardownIsSafe) {
90+
loader::ZeLogger logger(/*use_stderr=*/true, loader::LogLevel::trace,
91+
"[%Y-%m-%d %H:%M:%S.%e] [tid:%t pid:%P] [%l] %v");
92+
93+
std::thread worker(workerLogsDuringTeardown, &logger);
94+
worker.join(); // teardown-time logging happens here
95+
96+
// Reaching this point without an ASan abort or crash means logging during
97+
// the thread_local destruction phase did not touch freed memory.
98+
SUCCEED();
99+
}

0 commit comments

Comments
 (0)