Skip to content

Commit 68b2c4d

Browse files
authored
chore: Add Java Flight Recorder profiling to TPC benchmarks (#3597)
1 parent 394b014 commit 68b2c4d

4 files changed

Lines changed: 72 additions & 9 deletions

File tree

benchmarks/tpc/README.md

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,17 @@ All benchmarks are run via `run.py`:
3838
python3 run.py --engine <engine> --benchmark <tpch|tpcds> [options]
3939
```
4040

41-
| Option | Description |
42-
| -------------- | ------------------------------------------------ |
43-
| `--engine` | Engine name (matches a TOML file in `engines/`) |
44-
| `--benchmark` | `tpch` or `tpcds` |
45-
| `--iterations` | Number of iterations (default: 1) |
46-
| `--output` | Output directory (default: `.`) |
47-
| `--query` | Run a single query number |
48-
| `--no-restart` | Skip Spark master/worker restart |
49-
| `--dry-run` | Print the spark-submit command without executing |
41+
| Option | Description |
42+
| -------------- | -------------------------------------------------------- |
43+
| `--engine` | Engine name (matches a TOML file in `engines/`) |
44+
| `--benchmark` | `tpch` or `tpcds` |
45+
| `--iterations` | Number of iterations (default: 1) |
46+
| `--output` | Output directory (default: `.`) |
47+
| `--query` | Run a single query number |
48+
| `--no-restart` | Skip Spark master/worker restart |
49+
| `--dry-run` | Print the spark-submit command without executing |
50+
| `--jfr` | Enable Java Flight Recorder profiling |
51+
| `--jfr-dir` | Directory for JFR output files (default: `/results/jfr`) |
5052

5153
Available engines: `spark`, `comet`, `comet-iceberg`, `gluten`
5254

@@ -363,3 +365,30 @@ python3 generate-comparison.py --benchmark tpch \
363365
--title "TPC-H @ 100 GB: Parquet vs Iceberg" \
364366
comet-tpch-*.json comet-iceberg-tpch-*.json
365367
```
368+
369+
## Java Flight Recorder Profiling
370+
371+
Use the `--jfr` flag to capture JFR profiles from the Spark driver and executors.
372+
JFR is built into JDK 11+ so no additional dependencies are needed.
373+
374+
```shell
375+
python3 run.py --engine comet --benchmark tpch --jfr
376+
```
377+
378+
JFR recordings are written to `/results/jfr/` by default (configurable with
379+
`--jfr-dir`). The driver writes `driver.jfr` and each executor writes
380+
`executor.jfr` (JFR appends the PID when multiple executors share a path).
381+
382+
With Docker Compose, the `/results` volume is shared across all containers,
383+
so JFR files from both driver and executors are collected in
384+
`$RESULTS_DIR/jfr/` on the host:
385+
386+
```shell
387+
docker compose -f benchmarks/tpc/infra/docker/docker-compose.yml \
388+
run --rm bench \
389+
python3 /opt/benchmarks/run.py \
390+
--engine comet --benchmark tpch --output /results --no-restart --jfr
391+
```
392+
393+
Open the `.jfr` files with [JDK Mission Control](https://jdk.java.net/jmc/),
394+
IntelliJ IDEA's profiler, or `jfr` CLI tool (`jfr summary driver.jfr`).

benchmarks/tpc/infra/docker/docker-compose-laptop.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ services:
7272
- SPARK_NO_DAEMONIZE=true
7373
mem_limit: 8g
7474
memswap_limit: 8g
75+
stop_grace_period: 30s
7576

7677
bench:
7778
image: ${BENCH_IMAGE:-comet-bench}

benchmarks/tpc/infra/docker/docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ x-worker: &worker
5656
- SPARK_NO_DAEMONIZE=true
5757
mem_limit: ${WORKER_MEM_LIMIT:-32g}
5858
memswap_limit: ${WORKER_MEM_LIMIT:-32g}
59+
stop_grace_period: 30s
5960

6061
services:
6162
spark-master:

benchmarks/tpc/run.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,24 @@ def build_spark_submit_cmd(config, benchmark, args):
261261
val = "true" if val else "false"
262262
conf[resolve_env(key)] = resolve_env(str(val))
263263

264+
# JFR profiling: append to extraJavaOptions (preserving any existing values)
265+
if args.jfr:
266+
jfr_dir = args.jfr_dir
267+
driver_jfr = (
268+
f"-XX:StartFlightRecording=disk=true,dumponexit=true,"
269+
f"filename={jfr_dir}/driver.jfr,settings=profile"
270+
)
271+
executor_jfr = (
272+
f"-XX:StartFlightRecording=disk=true,dumponexit=true,"
273+
f"filename={jfr_dir}/executor.jfr,settings=profile"
274+
)
275+
for spark_key, jfr_opts in [
276+
("spark.driver.extraJavaOptions", driver_jfr),
277+
("spark.executor.extraJavaOptions", executor_jfr),
278+
]:
279+
existing = conf.get(spark_key, "")
280+
conf[spark_key] = f"{existing} {jfr_opts}".strip()
281+
264282
for key, val in sorted(conf.items()):
265283
cmd += ["--conf", f"{key}={val}"]
266284

@@ -357,6 +375,16 @@ def main():
357375
action="store_true",
358376
help="Print the spark-submit command without executing",
359377
)
378+
parser.add_argument(
379+
"--jfr",
380+
action="store_true",
381+
help="Enable Java Flight Recorder profiling for driver and executors",
382+
)
383+
parser.add_argument(
384+
"--jfr-dir",
385+
default="/results/jfr",
386+
help="Directory for JFR output files (default: /results/jfr)",
387+
)
360388
args = parser.parse_args()
361389

362390
config = load_engine_config(args.engine)
@@ -373,6 +401,10 @@ def main():
373401
if not args.no_restart and not args.dry_run:
374402
restart_spark()
375403

404+
# Create JFR output directory if profiling is enabled
405+
if args.jfr:
406+
os.makedirs(args.jfr_dir, exist_ok=True)
407+
376408
cmd = build_spark_submit_cmd(config, args.benchmark, args)
377409

378410
if args.dry_run:

0 commit comments

Comments
 (0)