@@ -119,6 +119,8 @@ jobs:
119119 name : cluster-log-single-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
120120 path : integration-test/target/cluster-logs
121121 retention-days : 30
122+ # 12 IT classes split across 3 parallel shards to cut the historical ~42 min
123+ # wall clock to ~14 min. See cluster-it-1c1d.yml for the shard pattern.
122124 dual-tree-auto-basic :
123125 strategy :
124126 fail-fast : false
@@ -128,6 +130,7 @@ jobs:
128130 # StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
129131 cluster : [HighPerformanceMode]
130132 os : [ubuntu-latest]
133+ shard : [0, 1, 2]
131134 runs-on : ${{ matrix.os }}
132135 steps :
133136 - uses : actions/checkout@v5
@@ -147,6 +150,21 @@ jobs:
147150 - name : Sleep for a random duration between 0 and 10000 milliseconds
148151 run : |
149152 sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
153+ - name : Build IT shard list
154+ shell : bash
155+ # See cluster-it-1c1d.yml for the shard-list pattern. Write under
156+ # $RUNNER_TEMP (outside the repo) so Apache RAT doesn't flag the file.
157+ run : |
158+ set -euo pipefail
159+ SHARD=${{ matrix.shard }}
160+ TOTAL=3
161+ grep -rlE --include='*IT.java' '\bMultiClusterIT2DualTreeAutoBasic\b' integration-test/src/test/java \
162+ | awk -F'/' '{print $NF}' | sed 's/\.java$//' \
163+ | sort \
164+ | awk -v s=$SHARD -v t=$TOTAL 'NR%t==s' \
165+ > "$RUNNER_TEMP/it-shard.txt"
166+ echo "Shard $SHARD/$TOTAL contains $(wc -l < "$RUNNER_TEMP/it-shard.txt") test classes"
167+ head -5 "$RUNNER_TEMP/it-shard.txt"
150168 - name : IT Test
151169 shell : bash
152170 # we do not compile client-cpp for saving time, it is tested in client.yml
@@ -164,6 +182,9 @@ jobs:
164182 -DskipUTs \
165183 -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
166184 -DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
185+ -Dfailsafe.includesFile="$RUNNER_TEMP/it-shard.txt" \
186+ -DfailIfNoTests=false \
187+ -Dfailsafe.failIfNoSpecifiedTests=false \
167188 -pl integration-test \
168189 -am -PMultiClusterIT2DualTreeAutoBasic \
169190 -ntp >> ~/run-tests-$attempt.log && return 0
@@ -201,9 +222,11 @@ jobs:
201222 if : failure()
202223 uses : actions/upload-artifact@v6
203224 with :
204- name : cluster-log-dual-tree-auto-basic-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
225+ name : cluster-log-dual-tree-auto-basic-shard${{ matrix.shard }}- java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
205226 path : integration-test/target/cluster-logs
206227 retention-days : 30
228+ # 9 IT classes split across 3 parallel shards to cut the historical ~51 min
229+ # wall clock to ~17 min. See cluster-it-1c1d.yml for the shard pattern.
207230 dual-tree-auto-enhanced :
208231 strategy :
209232 fail-fast : false
@@ -214,6 +237,7 @@ jobs:
214237 cluster1 : [HighPerformanceMode]
215238 cluster2 : [HighPerformanceMode]
216239 os : [ubuntu-latest]
240+ shard : [0, 1, 2]
217241 runs-on : ${{ matrix.os }}
218242 steps :
219243 - uses : actions/checkout@v5
@@ -233,6 +257,21 @@ jobs:
233257 - name : Sleep for a random duration between 0 and 10000 milliseconds
234258 run : |
235259 sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
260+ - name : Build IT shard list
261+ shell : bash
262+ # See cluster-it-1c1d.yml for the shard-list pattern. Write under
263+ # $RUNNER_TEMP (outside the repo) so Apache RAT doesn't flag the file.
264+ run : |
265+ set -euo pipefail
266+ SHARD=${{ matrix.shard }}
267+ TOTAL=3
268+ grep -rlE --include='*IT.java' '\bMultiClusterIT2DualTreeAutoEnhanced\b' integration-test/src/test/java \
269+ | awk -F'/' '{print $NF}' | sed 's/\.java$//' \
270+ | sort \
271+ | awk -v s=$SHARD -v t=$TOTAL 'NR%t==s' \
272+ > "$RUNNER_TEMP/it-shard.txt"
273+ echo "Shard $SHARD/$TOTAL contains $(wc -l < "$RUNNER_TEMP/it-shard.txt") test classes"
274+ head -5 "$RUNNER_TEMP/it-shard.txt"
236275 - name : IT Test
237276 shell : bash
238277 # we do not compile client-cpp for saving time, it is tested in client.yml
@@ -250,6 +289,9 @@ jobs:
250289 -DskipUTs \
251290 -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
252291 -DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
292+ -Dfailsafe.includesFile="$RUNNER_TEMP/it-shard.txt" \
293+ -DfailIfNoTests=false \
294+ -Dfailsafe.failIfNoSpecifiedTests=false \
253295 -pl integration-test \
254296 -am -PMultiClusterIT2DualTreeAutoEnhanced \
255297 -ntp >> ~/run-tests-$attempt.log && return 0
@@ -287,9 +329,11 @@ jobs:
287329 if : failure()
288330 uses : actions/upload-artifact@v6
289331 with :
290- name : cluster-log-dual-tree-auto-enhanced-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
332+ name : cluster-log-dual-tree-auto-enhanced-shard${{ matrix.shard }}- java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
291333 path : integration-test/target/cluster-logs
292334 retention-days : 30
335+ # 11 IT classes split across 3 parallel shards to cut the historical ~27 min
336+ # wall clock to ~9 min. See cluster-it-1c1d.yml for the shard pattern.
293337 dual-tree-manual :
294338 strategy :
295339 fail-fast : false
@@ -300,6 +344,7 @@ jobs:
300344 cluster1 : [HighPerformanceMode]
301345 cluster2 : [HighPerformanceMode]
302346 os : [ubuntu-latest]
347+ shard : [0, 1, 2]
303348 runs-on : ${{ matrix.os }}
304349 steps :
305350 - uses : actions/checkout@v5
@@ -319,6 +364,21 @@ jobs:
319364 - name : Sleep for a random duration between 0 and 10000 milliseconds
320365 run : |
321366 sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
367+ - name : Build IT shard list
368+ shell : bash
369+ # See cluster-it-1c1d.yml for the shard-list pattern. Write under
370+ # $RUNNER_TEMP (outside the repo) so Apache RAT doesn't flag the file.
371+ run : |
372+ set -euo pipefail
373+ SHARD=${{ matrix.shard }}
374+ TOTAL=3
375+ grep -rlE --include='*IT.java' '\bMultiClusterIT2DualTreeManual\b' integration-test/src/test/java \
376+ | awk -F'/' '{print $NF}' | sed 's/\.java$//' \
377+ | sort \
378+ | awk -v s=$SHARD -v t=$TOTAL 'NR%t==s' \
379+ > "$RUNNER_TEMP/it-shard.txt"
380+ echo "Shard $SHARD/$TOTAL contains $(wc -l < "$RUNNER_TEMP/it-shard.txt") test classes"
381+ head -5 "$RUNNER_TEMP/it-shard.txt"
322382 - name : IT Test
323383 shell : bash
324384 # we do not compile client-cpp for saving time, it is tested in client.yml
@@ -336,6 +396,9 @@ jobs:
336396 -DskipUTs \
337397 -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
338398 -DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
399+ -Dfailsafe.includesFile="$RUNNER_TEMP/it-shard.txt" \
400+ -DfailIfNoTests=false \
401+ -Dfailsafe.failIfNoSpecifiedTests=false \
339402 -pl integration-test \
340403 -am -PMultiClusterIT2DualTreeManual \
341404 -ntp >> ~/run-tests-$attempt.log && return 0
@@ -373,7 +436,7 @@ jobs:
373436 if : failure()
374437 uses : actions/upload-artifact@v6
375438 with :
376- name : cluster-log-dual-tree-manual-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
439+ name : cluster-log-dual-tree-manual-shard${{ matrix.shard }}- java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
377440 path : integration-test/target/cluster-logs
378441 retention-days : 30
379442 subscription-tree-arch-verification :
@@ -720,6 +783,8 @@ jobs:
720783 name : cluster-log-subscription-tree-regression-misc-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
721784 path : integration-test/target/cluster-logs
722785 retention-days : 30
786+ # 13 IT classes split across 3 parallel shards to cut the historical ~63 min
787+ # wall clock to ~22 min. See cluster-it-1c1d.yml for the shard pattern.
723788 dual-table-manual-basic :
724789 strategy :
725790 fail-fast : false
@@ -729,6 +794,7 @@ jobs:
729794 # StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
730795 cluster : [HighPerformanceMode]
731796 os : [ubuntu-latest]
797+ shard : [0, 1, 2]
732798 runs-on : ${{ matrix.os }}
733799 steps :
734800 - uses : actions/checkout@v5
@@ -748,6 +814,21 @@ jobs:
748814 - name : Sleep for a random duration between 0 and 10000 milliseconds
749815 run : |
750816 sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
817+ - name : Build IT shard list
818+ shell : bash
819+ # See cluster-it-1c1d.yml for the shard-list pattern. Write under
820+ # $RUNNER_TEMP (outside the repo) so Apache RAT doesn't flag the file.
821+ run : |
822+ set -euo pipefail
823+ SHARD=${{ matrix.shard }}
824+ TOTAL=3
825+ grep -rlE --include='*IT.java' '\bMultiClusterIT2DualTableManualBasic\b' integration-test/src/test/java \
826+ | awk -F'/' '{print $NF}' | sed 's/\.java$//' \
827+ | sort \
828+ | awk -v s=$SHARD -v t=$TOTAL 'NR%t==s' \
829+ > "$RUNNER_TEMP/it-shard.txt"
830+ echo "Shard $SHARD/$TOTAL contains $(wc -l < "$RUNNER_TEMP/it-shard.txt") test classes"
831+ head -5 "$RUNNER_TEMP/it-shard.txt"
751832 - name : IT Test
752833 shell : bash
753834 # we do not compile client-cpp for saving time, it is tested in client.yml
@@ -765,6 +846,9 @@ jobs:
765846 -DskipUTs \
766847 -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
767848 -DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
849+ -Dfailsafe.includesFile="$RUNNER_TEMP/it-shard.txt" \
850+ -DfailIfNoTests=false \
851+ -Dfailsafe.failIfNoSpecifiedTests=false \
768852 -pl integration-test \
769853 -am -PMultiClusterIT2DualTableManualBasic \
770854 -ntp >> ~/run-tests-$attempt.log && return 0
@@ -802,9 +886,11 @@ jobs:
802886 if : failure()
803887 uses : actions/upload-artifact@v6
804888 with :
805- name : cluster-log-dual-table-manual-basic-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
889+ name : cluster-log-dual-table-manual-basic-shard${{ matrix.shard }}- java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
806890 path : integration-test/target/cluster-logs
807891 retention-days : 30
892+ # 11 IT classes split across 3 parallel shards to cut the historical ~62 min
893+ # wall clock to ~22 min. See cluster-it-1c1d.yml for the shard pattern.
808894 dual-table-manual-enhanced :
809895 strategy :
810896 fail-fast : false
@@ -814,6 +900,7 @@ jobs:
814900 # StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
815901 cluster : [HighPerformanceMode]
816902 os : [ubuntu-latest]
903+ shard : [0, 1, 2]
817904 runs-on : ${{ matrix.os }}
818905 steps :
819906 - uses : actions/checkout@v5
@@ -833,6 +920,21 @@ jobs:
833920 - name : Sleep for a random duration between 0 and 10000 milliseconds
834921 run : |
835922 sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
923+ - name : Build IT shard list
924+ shell : bash
925+ # See cluster-it-1c1d.yml for the shard-list pattern. Write under
926+ # $RUNNER_TEMP (outside the repo) so Apache RAT doesn't flag the file.
927+ run : |
928+ set -euo pipefail
929+ SHARD=${{ matrix.shard }}
930+ TOTAL=3
931+ grep -rlE --include='*IT.java' '\bMultiClusterIT2DualTableManualEnhanced\b' integration-test/src/test/java \
932+ | awk -F'/' '{print $NF}' | sed 's/\.java$//' \
933+ | sort \
934+ | awk -v s=$SHARD -v t=$TOTAL 'NR%t==s' \
935+ > "$RUNNER_TEMP/it-shard.txt"
936+ echo "Shard $SHARD/$TOTAL contains $(wc -l < "$RUNNER_TEMP/it-shard.txt") test classes"
937+ head -5 "$RUNNER_TEMP/it-shard.txt"
836938 - name : IT Test
837939 shell : bash
838940 # we do not compile client-cpp for saving time, it is tested in client.yml
@@ -850,6 +952,9 @@ jobs:
850952 -DskipUTs \
851953 -DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
852954 -DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
955+ -Dfailsafe.includesFile="$RUNNER_TEMP/it-shard.txt" \
956+ -DfailIfNoTests=false \
957+ -Dfailsafe.failIfNoSpecifiedTests=false \
853958 -pl integration-test \
854959 -am -PMultiClusterIT2DualTableManualEnhanced \
855960 -ntp >> ~/run-tests-$attempt.log && return 0
@@ -887,7 +992,7 @@ jobs:
887992 if : failure()
888993 uses : actions/upload-artifact@v6
889994 with :
890- name : cluster-log-dual-table-manual-enhanced-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
995+ name : cluster-log-dual-table-manual-enhanced-shard${{ matrix.shard }}- java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
891996 path : integration-test/target/cluster-logs
892997 retention-days : 30
893998 triple :
0 commit comments