1313 benchmark_matrix :
1414 required : false
1515 type : string
16- description : " JSON string containing the matrix configuration"
16+ description : " JSON string containing the full matrix configuration"
1717 default : |
1818 [
1919 {
@@ -277,6 +277,193 @@ on:
277277 "iterations": "10"
278278 }
279279 ]
280+ base_benchmark_matrix :
281+ required : false
282+ type : string
283+ description : " JSON string containing the base matrix configuration"
284+ default : |
285+ [
286+ {
287+ "id": "clickbench-nvme",
288+ "subcommand": "clickbench",
289+ "name": "Clickbench on NVME",
290+ "data_formats": ["parquet", "vortex"],
291+ "pr_targets": [
292+ {"engine": "datafusion", "format": "parquet"},
293+ {"engine": "datafusion", "format": "vortex"},
294+ {"engine": "duckdb", "format": "parquet"},
295+ {"engine": "duckdb", "format": "vortex"}
296+ ],
297+ "develop_targets": [
298+ {"engine": "datafusion", "format": "parquet"},
299+ {"engine": "datafusion", "format": "vortex"},
300+ {"engine": "datafusion", "format": "lance"},
301+ {"engine": "duckdb", "format": "parquet"},
302+ {"engine": "duckdb", "format": "vortex"}
303+ ]
304+ },
305+ {
306+ "id": "tpch-nvme",
307+ "subcommand": "tpch",
308+ "name": "TPC-H SF=1 on NVME",
309+ "data_formats": ["parquet", "vortex"],
310+ "pr_targets": [
311+ {"engine": "datafusion", "format": "arrow"},
312+ {"engine": "datafusion", "format": "parquet"},
313+ {"engine": "datafusion", "format": "vortex"},
314+ {"engine": "duckdb", "format": "parquet"},
315+ {"engine": "duckdb", "format": "vortex"}
316+ ],
317+ "develop_targets": [
318+ {"engine": "datafusion", "format": "arrow"},
319+ {"engine": "datafusion", "format": "parquet"},
320+ {"engine": "datafusion", "format": "vortex"},
321+ {"engine": "datafusion", "format": "lance"},
322+ {"engine": "duckdb", "format": "parquet"},
323+ {"engine": "duckdb", "format": "vortex"}
324+ ],
325+ "scale_factor": "1.0",
326+ "iterations": "10"
327+ },
328+ {
329+ "id": "tpch-s3",
330+ "subcommand": "tpch",
331+ "name": "TPC-H SF=1 on S3",
332+ "local_dir": "vortex-bench/data/tpch/1.0",
333+ "remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/1.0/",
334+ "data_formats": ["parquet", "vortex"],
335+ "pr_targets": [
336+ {"engine": "datafusion", "format": "parquet"},
337+ {"engine": "datafusion", "format": "vortex"},
338+ {"engine": "duckdb", "format": "parquet"},
339+ {"engine": "duckdb", "format": "vortex"}
340+ ],
341+ "develop_targets": [
342+ {"engine": "datafusion", "format": "parquet"},
343+ {"engine": "datafusion", "format": "vortex"},
344+ {"engine": "duckdb", "format": "parquet"},
345+ {"engine": "duckdb", "format": "vortex"}
346+ ],
347+ "scale_factor": "1.0",
348+ "iterations": "10"
349+ },
350+ {
351+ "id": "tpch-nvme-10",
352+ "subcommand": "tpch",
353+ "name": "TPC-H SF=10 on NVME",
354+ "data_formats": ["parquet", "vortex"],
355+ "pr_targets": [
356+ {"engine": "datafusion", "format": "arrow"},
357+ {"engine": "datafusion", "format": "parquet"},
358+ {"engine": "datafusion", "format": "vortex"},
359+ {"engine": "duckdb", "format": "parquet"},
360+ {"engine": "duckdb", "format": "vortex"}
361+ ],
362+ "develop_targets": [
363+ {"engine": "datafusion", "format": "arrow"},
364+ {"engine": "datafusion", "format": "parquet"},
365+ {"engine": "datafusion", "format": "vortex"},
366+ {"engine": "datafusion", "format": "lance"},
367+ {"engine": "duckdb", "format": "parquet"},
368+ {"engine": "duckdb", "format": "vortex"}
369+ ],
370+ "scale_factor": "10.0",
371+ "iterations": "10"
372+ },
373+ {
374+ "id": "tpcds-nvme",
375+ "subcommand": "tpcds",
376+ "name": "TPC-DS SF=1 on NVME",
377+ "data_formats": ["parquet", "vortex"],
378+ "pr_targets": [
379+ {"engine": "datafusion", "format": "parquet"},
380+ {"engine": "datafusion", "format": "vortex"},
381+ {"engine": "duckdb", "format": "parquet"},
382+ {"engine": "duckdb", "format": "vortex"}
383+ ],
384+ "develop_targets": [
385+ {"engine": "datafusion", "format": "parquet"},
386+ {"engine": "datafusion", "format": "vortex"},
387+ {"engine": "duckdb", "format": "parquet"},
388+ {"engine": "duckdb", "format": "vortex"}
389+ ],
390+ "scale_factor": "1.0"
391+ },
392+ {
393+ "id": "statpopgen",
394+ "subcommand": "statpopgen",
395+ "name": "Statistical and Population Genetics",
396+ "local_dir": "vortex-bench/data/statpopgen",
397+ "data_formats": ["parquet", "vortex"],
398+ "pr_targets": [
399+ {"engine": "duckdb", "format": "parquet"},
400+ {"engine": "duckdb", "format": "vortex"}
401+ ],
402+ "develop_targets": [
403+ {"engine": "duckdb", "format": "parquet"},
404+ {"engine": "duckdb", "format": "vortex"}
405+ ],
406+ "scale_factor": "100"
407+ },
408+ {
409+ "id": "fineweb",
410+ "subcommand": "fineweb",
411+ "name": "FineWeb NVMe",
412+ "data_formats": ["parquet", "vortex"],
413+ "pr_targets": [
414+ {"engine": "datafusion", "format": "parquet"},
415+ {"engine": "datafusion", "format": "vortex"},
416+ {"engine": "duckdb", "format": "parquet"},
417+ {"engine": "duckdb", "format": "vortex"}
418+ ],
419+ "develop_targets": [
420+ {"engine": "datafusion", "format": "parquet"},
421+ {"engine": "datafusion", "format": "vortex"},
422+ {"engine": "duckdb", "format": "parquet"},
423+ {"engine": "duckdb", "format": "vortex"}
424+ ],
425+ "scale_factor": "100"
426+ },
427+ {
428+ "id": "fineweb-s3",
429+ "subcommand": "fineweb",
430+ "name": "FineWeb S3",
431+ "local_dir": "vortex-bench/data/fineweb",
432+ "remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/fineweb/",
433+ "data_formats": ["parquet", "vortex"],
434+ "pr_targets": [
435+ {"engine": "datafusion", "format": "parquet"},
436+ {"engine": "datafusion", "format": "vortex"},
437+ {"engine": "duckdb", "format": "parquet"},
438+ {"engine": "duckdb", "format": "vortex"}
439+ ],
440+ "develop_targets": [
441+ {"engine": "datafusion", "format": "parquet"},
442+ {"engine": "datafusion", "format": "vortex"},
443+ {"engine": "duckdb", "format": "parquet"},
444+ {"engine": "duckdb", "format": "vortex"}
445+ ],
446+ "scale_factor": "100"
447+ },
448+ {
449+ "id": "polarsignals",
450+ "subcommand": "polarsignals",
451+ "name": "PolarSignals Profiling",
452+ "data_formats": ["vortex"],
453+ "pr_targets": [
454+ {"engine": "datafusion", "format": "vortex"}
455+ ],
456+ "develop_targets": [
457+ {"engine": "datafusion", "format": "vortex"}
458+ ],
459+ "scale_factor": "1"
460+ }
461+ ]
462+ benchmark_profile :
463+ required : false
464+ type : string
465+ description : " Benchmark profile to run: full or base"
466+ default : " full"
280467
281468jobs :
282469 bench :
@@ -289,7 +476,7 @@ jobs:
289476 strategy :
290477 fail-fast : false
291478 matrix :
292- include : ${{ fromJSON(inputs.benchmark_matrix) }}
479+ include : ${{ fromJSON(inputs.benchmark_profile == 'base' && inputs.base_benchmark_matrix || inputs. benchmark_matrix) }}
293480
294481 runs-on : >-
295482 ${{ github.repository == 'vortex-data/vortex'
@@ -321,7 +508,7 @@ jobs:
321508 run : |
322509 wget -qO- https://github.com/duckdb/duckdb/releases/download/v1.5.3/duckdb_cli-linux-amd64.zip | funzip > duckdb
323510 chmod +x duckdb
324- echo "$PWD" >> $GITHUB_PATH
511+ echo "$PWD" >> " $GITHUB_PATH"
325512
326513 - uses : ./.github/actions/system-info
327514
@@ -345,11 +532,11 @@ jobs:
345532 env :
346533 RUSTFLAGS : " -C target-cpu=native"
347534 run : |
348- packages=" --bin data-gen --bin datafusion-bench --bin duckdb-bench"
535+ packages=( --bin data-gen --bin datafusion-bench --bin duckdb-bench)
349536 if [ "${{ inputs.mode }}" != "pr" ]; then
350- packages="$packages --bin lance-bench"
537+ packages+=( --bin lance-bench)
351538 fi
352- cargo build $ packages --profile release_debug --features unstable_encodings
539+ cargo build "${ packages[@]}" --profile release_debug --features unstable_encodings
353540
354541 - name : Generate data
355542 shell : bash
@@ -446,11 +633,16 @@ jobs:
446633 python3 scripts/s3-download.py s3://vortex-ci-benchmark-results/data.json.gz data.json.gz --no-sign-request
447634 gzip -d -c data.json.gz > base.json
448635
449- echo '# Benchmarks: ${{ matrix.name }}' > comment.md
636+ benchmark_name="${{ matrix.name }}"
637+ if [ "${{ inputs.benchmark_profile }}" != "full" ]; then
638+ benchmark_name="$benchmark_name (${{ inputs.benchmark_profile }})"
639+ fi
640+
641+ echo "# Benchmarks: $benchmark_name" > comment.md
450642 echo '' >> comment.md
451- uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json "${{ matrix.name }} " \
643+ uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json "$benchmark_name " \
452644 >> comment.md
453- cat comment.md >> $GITHUB_STEP_SUMMARY
645+ cat comment.md >> " $GITHUB_STEP_SUMMARY"
454646
455647 - name : Comment PR
456648 if : inputs.mode == 'pr' && github.event.pull_request.head.repo.fork == false
@@ -460,7 +652,7 @@ jobs:
460652 # There is exactly one comment per comment-tag. If a comment with this tag already exists,
461653 # this action will *update* the comment instead of posting a new comment. Therefore, each
462654 # unique benchmark configuration must have a unique comment-tag.
463- comment-tag : bench-pr-comment-${{ matrix.id }}
655+ comment-tag : bench-pr-comment-${{ matrix.id }}${{ inputs.benchmark_profile == 'base' && '-base' || '' }}
464656
465657 - name : Comment PR on failure
466658 if : failure() && inputs.mode == 'pr' && github.event.pull_request.head.repo.fork == false
@@ -469,8 +661,8 @@ jobs:
469661 message : |
470662 # 🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨
471663
472- Benchmark `${{ matrix.name }}` failed! Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
473- comment-tag : bench-pr-comment-${{ matrix.id }}
664+ Benchmark `${{ matrix.name }}` (${{ inputs.benchmark_profile }}) failed! Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
665+ comment-tag : bench-pr-comment-${{ matrix.id }}${{ inputs.benchmark_profile == 'base' && '-base' || '' }}
474666
475667 - name : Upload Benchmark Results
476668 if : inputs.mode == 'develop'
0 commit comments