Skip to content

Commit ce81e95

Browse files
committed
Improve GPU runtime trace output
1 parent 004d8d2 commit ce81e95

3 files changed

Lines changed: 129 additions & 13 deletions

File tree

src/common/m_trace_runtime.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ static int trace_file_fd = -1;
7171
static void mfc_trace_initialize_once(void) __attribute__((no_instrument_function));
7272
static void mfc_trace_initialize(void) __attribute__((no_instrument_function));
7373
static int mfc_trace_process_rank(void) __attribute__((no_instrument_function));
74+
static void mfc_trace_executable_name(char *buffer, size_t buffer_size) __attribute__((no_instrument_function));
7475
static void mfc_trace_format_function_name(const char *name, char *buffer, size_t buffer_size) __attribute__((no_instrument_function));
7576
static void mfc_trace_pretty_file(const char *file, char *buffer, size_t buffer_size) __attribute__((no_instrument_function));
7677
static void mfc_trace_shell_quote(const char *text, char *buffer, size_t buffer_size) __attribute__((no_instrument_function));
@@ -150,6 +151,12 @@ static void mfc_trace_initialize_once(void) {
150151
(void)write(STDERR_FILENO, message, sizeof(message) - 1);
151152
}
152153
}
154+
155+
if (trace_enabled) {
156+
char executable[MFC_TRACE_NAME_MAX];
157+
mfc_trace_executable_name(executable, sizeof(executable));
158+
mfc_trace_write_line("TRACE_RUN", executable, NULL, 0);
159+
}
153160
}
154161

155162
static void mfc_trace_initialize(void) {
@@ -181,6 +188,24 @@ static int mfc_trace_process_rank(void) {
181188
return 0;
182189
}
183190

191+
static void mfc_trace_executable_name(char *buffer, size_t buffer_size) {
192+
char path[1024];
193+
const char *name;
194+
ssize_t len;
195+
196+
if (buffer_size == 0) return;
197+
198+
len = readlink("/proc/self/exe", path, sizeof(path) - 1);
199+
if (len > 0) {
200+
path[len] = '\0';
201+
name = strrchr(path, '/');
202+
snprintf(buffer, buffer_size, "%s", name != NULL ? name + 1 : path);
203+
return;
204+
}
205+
206+
snprintf(buffer, buffer_size, "program");
207+
}
208+
184209
static void mfc_trace_format_function_name(const char *name, char *buffer, size_t buffer_size) {
185210
const char *mod_marker;
186211
const char *mp_marker;
@@ -407,6 +432,10 @@ static int mfc_trace_skip_symbol(const char *name) {
407432
strstr(name, "mfc_trace_") != NULL ||
408433
strstr(name, "m_trace:") != NULL ||
409434
strstr(name, "m_trace_s_") != NULL ||
435+
strstr(name, "m_nvtx_") != NULL ||
436+
strncmp(name, "..acc_", 6) == 0 ||
437+
strstr(name, "acc_cuda_funcreg_constructor") != NULL ||
438+
strstr(name, "acc_data_constructor") != NULL ||
410439
strcmp(name, "s_trace_point_begin") == 0 ||
411440
strcmp(name, "s_trace_point_end") == 0;
412441
}

toolchain/mfc/run/run.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,15 @@ def __profiler_prepend() -> typing.List[str]:
6161
return []
6262

6363

64+
def __runner_prepend() -> typing.List[str]:
65+
profiler = __profiler_prepend()
66+
67+
if __runtime_trace_requested() and ARG("gpu") == gpuConfigOptions.ACC.value:
68+
return ["python3", os.path.join(MFC_ROOT_DIR, "toolchain", "mfc", "run", "trace.py")] + profiler
69+
70+
return profiler
71+
72+
6473
def get_baked_templates() -> dict:
6574
return {os.path.splitext(os.path.basename(f))[0]: file_read(f) for f in glob(os.path.join(MFC_TEMPLATE_DIR, "*.mako"))}
6675

@@ -90,6 +99,18 @@ def __get_template() -> Template:
9099

91100

92101
def __generate_job_script(targets, case: input.MFCInputFile):
102+
# Compute GPU mode booleans for templates
103+
gpu_mode = ARG("gpu")
104+
105+
# Validate gpu_mode is one of the expected values
106+
valid_gpu_modes = {e.value for e in gpuConfigOptions}
107+
if gpu_mode not in valid_gpu_modes:
108+
raise MFCException(f"Invalid GPU mode '{gpu_mode}'. Must be one of: {', '.join(sorted(valid_gpu_modes))}")
109+
110+
gpu_enabled = gpu_mode != gpuConfigOptions.NONE.value
111+
gpu_acc = gpu_mode == gpuConfigOptions.ACC.value
112+
gpu_mp = gpu_mode == gpuConfigOptions.MP.value
113+
93114
env = {}
94115
if ARG("gpus") is not None:
95116
gpu_ids = ",".join([str(_) for _ in ARG("gpus")])
@@ -109,21 +130,11 @@ def __generate_job_script(targets, case: input.MFCInputFile):
109130
"MFC_TRACE_STDOUT": "1" if sys.stdout.isatty() else "0",
110131
}
111132
)
133+
if gpu_acc:
134+
env.update({"MFC_TRACE_ACC_NOTIFY": "1", "NV_ACC_NOTIFY": "1"})
112135
if ARG("mpi"):
113136
env.update({"MFC_TRACE_MPI": "1"})
114137

115-
# Compute GPU mode booleans for templates
116-
gpu_mode = ARG("gpu")
117-
118-
# Validate gpu_mode is one of the expected values
119-
valid_gpu_modes = {e.value for e in gpuConfigOptions}
120-
if gpu_mode not in valid_gpu_modes:
121-
raise MFCException(f"Invalid GPU mode '{gpu_mode}'. Must be one of: {', '.join(sorted(valid_gpu_modes))}")
122-
123-
gpu_enabled = gpu_mode != gpuConfigOptions.NONE.value
124-
gpu_acc = gpu_mode == gpuConfigOptions.ACC.value
125-
gpu_mp = gpu_mode == gpuConfigOptions.MP.value
126-
127138
content = __get_template().render(
128139
**{**ARGS(), "targets": targets},
129140
ARG=ARG,
@@ -132,7 +143,7 @@ def __generate_job_script(targets, case: input.MFCInputFile):
132143
MFC_ROOT_DIR=MFC_ROOT_DIR,
133144
SIMULATION=SIMULATION,
134145
qsystem=queues.get_system(),
135-
profiler=shlex.join(__profiler_prepend()),
146+
profiler=shlex.join(__runner_prepend()),
136147
gpu_enabled=gpu_enabled,
137148
gpu_acc=gpu_acc,
138149
gpu_mp=gpu_mp,

toolchain/mfc/run/trace.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/usr/bin/env python3
2+
import collections
3+
import os
4+
import re
5+
import subprocess
6+
import sys
7+
8+
9+
def _normalize_acc_line(line: str) -> str:
10+
line = re.sub(r"file=[^ ]*/(src/[^ ]*)", r"file=\1", line)
11+
line = re.sub(r"[^ ]*/(src/[^ ]*)", r"\1", line)
12+
line = re.sub(r"devaddr=0x[0-9A-Fa-f]+", "devaddr=<addr>", line)
13+
return line
14+
15+
16+
def _is_acc_noise(line: str) -> bool:
17+
prefixes = (
18+
"Enter ",
19+
"Leave ",
20+
"Wait ",
21+
"Implicit wait",
22+
"upload CUDA data",
23+
"download CUDA data",
24+
"create CUDA data",
25+
"delete CUDA data",
26+
"alloc",
27+
)
28+
return line.startswith(prefixes) or (".fpp function=" in line and " device=" in line)
29+
30+
31+
def main() -> int:
32+
if len(sys.argv) < 2:
33+
print("usage: trace.py <command> [args...]", file=sys.stderr)
34+
return 2
35+
36+
command = sys.argv[1:]
37+
38+
if any(arg.endswith("/syscheck") for arg in command):
39+
env = os.environ.copy()
40+
env["MFC_TRACE"] = "0"
41+
env.pop("MFC_TRACE_ACC_NOTIFY", None)
42+
env["NV_ACC_NOTIFY"] = "0"
43+
return subprocess.run(command, env=env, check=False).returncode
44+
45+
trace_file = os.environ.get("MFC_TRACE_FILE")
46+
if not os.environ.get("MFC_TRACE_ACC_NOTIFY") or not trace_file:
47+
return subprocess.run(command, check=False).returncode
48+
49+
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
50+
assert process.stdout is not None
51+
52+
seen = set()
53+
repeats = collections.Counter()
54+
55+
with open(trace_file, "a", encoding="utf-8") as trace:
56+
for output_line in process.stdout:
57+
line = output_line.rstrip("\n")
58+
if line.startswith("launch CUDA kernel"):
59+
short_line = _normalize_acc_line(line)
60+
if short_line in seen:
61+
repeats[short_line] += 1
62+
else:
63+
seen.add(short_line)
64+
trace.write(f"TRACE_ACC {short_line}\n")
65+
trace.flush()
66+
elif not _is_acc_noise(line):
67+
print(line, flush=True)
68+
69+
for line, count in sorted(repeats.items()):
70+
trace.write(f"TRACE_ACC_REPEAT x{count} {line}\n")
71+
72+
return process.wait()
73+
74+
75+
if __name__ == "__main__":
76+
sys.exit(main())

0 commit comments

Comments
 (0)