@@ -13,15 +13,12 @@ permissions:
1313env :
1414 NLTK_DATA : ${{ github.workspace }}/nltk_data
1515 PYTHON_VERSION : " 3.12"
16- # Number of timed iterations per file.
17- # Kept at 1 in CI to minimise runner time; raise locally for more stable averages.
18- NUM_ITERATIONS : " 1"
19- # Fraction by which the current total may exceed the stored best before the
20- # job is marked as failed. 0.20 = 20%.
16+ # Number of times to run the full benchmark suite.
17+ NUM_ITERATIONS : " 3"
18+ # 20% threshold for now and tune later
2119 REGRESSION_THRESHOLD : " 0.20"
22- # Increment to bust all stored caches (e.g. after a deliberate performance
23- # trade-off is accepted).
24- CACHE_VERSION : " v1"
20+ # Increment to change cache key when benchmark-affecting dependencies are updated, to ensure clean slate runs.
21+ CACHE_VERSION : " v2"
2522 # S3 location for metrics – matches core-product convention.
2623 S3_METRICS_BUCKET_KEY : utic-metrics/ci-metrics
2724 S3_BENCHMARK_PATH : open-source/partition-benchmark/benchmark_best.json
4138 needs : [setup]
4239
4340 steps :
44- # ------------------------------------------------------------------ #
45- # 1. Source & Python environment #
46- # ------------------------------------------------------------------ #
41+
4742 - uses : actions/checkout@v4
4843
4944 - uses : ./.github/actions/base-cache
5853 sudo apt-get update
5954 sudo apt-get install -y tesseract-ocr tesseract-ocr-kor
6055
61- # ------------------------------------------------------------------ #
62- # 2. Cache HuggingFace models #
63- # hi_res downloads layout-detection models on first use. #
64- # Caching keeps benchmark timings stable across runs. #
65- # ------------------------------------------------------------------ #
56+
6657 - name : Restore HuggingFace model cache
6758 uses : actions/cache/restore@v4
6859 with :
7263 hf-models-${{ runner.os }}-${{ env.CACHE_VERSION }}-
7364 hf-models-${{ runner.os }}-
7465
75- # ------------------------------------------------------------------ #
76- # 3. Run the benchmark #
77- # Writes per-file timings + __total__ to benchmark_results.json. #
78- # ------------------------------------------------------------------ #
66+
7967 - name : Run partition benchmark
8068 env :
8169 NUM_ITERATIONS : ${{ env.NUM_ITERATIONS }}
8977 path : ~/.cache/huggingface
9078 key : hf-models-${{ runner.os }}-${{ env.CACHE_VERSION }}-${{ github.sha }}
9179
92- # ------------------------------------------------------------------ #
93- # 4. Download the stored best runtime from S3 #
94- # continue-on-error: first run will have nothing stored yet. #
95- # ------------------------------------------------------------------ #
80+
9681 - name : Download previous best from S3
9782 continue-on-error : true
9883 env :
@@ -103,13 +88,7 @@ jobs:
10388 "s3://${{ env.S3_METRICS_BUCKET_KEY }}/${{ env.S3_BENCHMARK_PATH }}" \
10489 benchmark_best.json
10590
106- # ------------------------------------------------------------------ #
107- # 5. Compare results; fail on regression #
108- # compare_benchmark.py: #
109- # - Logs a per-file table with deltas. #
110- # - Exits 1 if current > best * (1 + threshold). #
111- # - Overwrites benchmark_best.json when current is faster. #
112- # ------------------------------------------------------------------ #
91+
11392 - name : Compare results against stored best
11493 id : compare
11594 run : |
@@ -118,12 +97,7 @@ jobs:
11897 benchmark_best.json \
11998 ${{ env.REGRESSION_THRESHOLD }}
12099
121- # ------------------------------------------------------------------ #
122- # 6. Upload the (possibly updated) best result back to S3 #
123- # compare_benchmark.py only overwrites benchmark_best.json when #
124- # the current run is strictly faster, so the true minimum is #
125- # preserved. #
126- # ------------------------------------------------------------------ #
100+
127101 - name : Upload best result to S3
128102 continue-on-error : true
129103 env :
@@ -134,9 +108,7 @@ jobs:
134108 benchmark_best.json \
135109 "s3://${{ env.S3_METRICS_BUCKET_KEY }}/${{ env.S3_BENCHMARK_PATH }}"
136110
137- # ------------------------------------------------------------------ #
138- # 7. Upload artifacts – always, so regressions can be inspected #
139- # ------------------------------------------------------------------ #
111+
140112 - name : Upload benchmark artifacts
141113 if : always()
142114 uses : actions/upload-artifact@v4
0 commit comments