eolivelli
diff --git a/‎.github/workflows/run-compaction.yml‎
Lines changed: 114 additions & 0 deletions b/‎.github/workflows/run-compaction.yml‎
Lines changed: 114 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎benchmarks-jmh/pom.xml‎
Lines changed: 39 additions & 1 deletion b/‎benchmarks-jmh/pom.xml‎
Lines changed: 39 additions & 1 deletion
@@ -0,0 +1,114 @@
+name: Run Compaction Bench
+
+on:
+  workflow_dispatch:
+    inputs:
+      dataset:
+        description: 'Dataset name passed to CompactorBenchmark (-p datasetNames)'
+        required: false
+        default: 'ada002-100k'
+      branches:
+        description: 'Space-separated list of branches to benchmark'
+        required: false
+        default: 'main'
+  pull_request:
+    types: [opened, synchronize, ready_for_review]
+    branches:
+      - main
+    paths:
+      - '**/src/main/java/**'
+      - 'pom.xml'
+      - '**/pom.xml'
+
+jobs:
+  # Job to generate the matrix configuration
+  generate-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - name: Generate matrix
+        id: set-matrix
+        run: |
+          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
+            BRANCHES='["main", "${{ github.head_ref }}"]'
+          elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then
+            BRANCHES_INPUT="${{ github.event.inputs.branches }}"
+            BRANCHES="["
+            for branch in $BRANCHES_INPUT; do
+              if [[ "$BRANCHES" != "[" ]]; then
+                BRANCHES="$BRANCHES, "
+              fi
+              BRANCHES="$BRANCHES\"$branch\""
+            done
+            BRANCHES="$BRANCHES]"
+          else
+            BRANCHES='["main"]'
+          fi
+
+          echo "matrix={\"jdk\":[24],\"isa\":[\"isa-avx512f\"],\"branch\":$BRANCHES}" >> $GITHUB_OUTPUT
+
+  test-compaction:
+    needs: generate-matrix
+    strategy:
+      matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
+    runs-on: ${{ matrix.isa }}
+    steps:
+      - name: Set up GCC
+        run: sudo apt install -y gcc
+      - uses: actions/checkout@v4
+      - name: Set up JDK ${{ matrix.jdk }}
+        uses: actions/setup-java@v3
+        with:
+          java-version: ${{ matrix.jdk }}
+          distribution: temurin
+          cache: maven
+
+      - name: Checkout branch
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ matrix.branch }}
+          fetch-depth: 0
+
+      - name: Build branch
+        run: mvn -B -Punix-amd64-profile package --file pom.xml
+
+      - name: Run CompactorBenchmark
+        id: run-benchmark
+        run: |
+          TOTAL_MEM_GB=$(free -g | awk '/^Mem:/ {print $2}')
+          if [[ -z "$TOTAL_MEM_GB" ]] || [[ "$TOTAL_MEM_GB" -le 0 ]]; then
+            TOTAL_MEM_GB=16
+          fi
+          HALF_MEM_GB=$((TOTAL_MEM_GB / 2))
+          if [[ "$HALF_MEM_GB" -lt 1 ]]; then
+            HALF_MEM_GB=1
+          fi
+
+          DATASET="${{ github.event.inputs.dataset }}"
+          if [[ -z "$DATASET" ]]; then
+            DATASET="ada002-100k"
+          fi
+
+          SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g')
+          echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT
+
+          java --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector \
+            -Djvector.experimental.enable_native_vectorization=true \
+            -Xmx${HALF_MEM_GB}g \
+            -cp benchmarks-jmh/target/compactor-benchmark.jar \
+            io.github.jbellis.jvector.bench.CompactorBenchmark \
+            -p workloadMode=PARTITION_AND_COMPACT \
+            -p datasetNames=$DATASET \
+            -p numPartitions=4 \
+            -p splitDistribution=FIBONACCI \
+            -p indexPrecision=FUSEDPQ \
+            -jvmArgsPrepend "-Xmx${HALF_MEM_GB}g" \
+            -wi 0 -i 1 -f 1
+
+      - name: Upload compaction results
+        uses: actions/upload-artifact@v4
+        with:
+          name: compaction-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}-${{ steps.run-benchmark.outputs.safe_branch }}
+          path: target/benchmark-results/compactor-*/compactor-results.jsonl
+          if-no-files-found: warn
@@ -6,6 +6,10 @@ local/
 dataset_
 **/local_datasets/**
 
+### Testing Results
+**results**.json
+**results**.jsonl
+
 ### Bench caches
 pq_cache/
 index_cache/
 
@@ -15,6 +15,9 @@
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
         <maven.compiler.release>22</maven.compiler.release>
         <jmh.version>1.37</jmh.version>
+        <awssdk.version>2.21.10</awssdk.version>
+        <!-- Default benchmark arguments (empty) -->
+        <args></args>
     </properties>
 
     <dependencies>
@@ -53,6 +56,11 @@
             <artifactId>log4j-slf4j2-impl</artifactId>
             <version>2.24.3</version>
         </dependency>
+        <dependency>
+            <groupId>software.amazon.awssdk</groupId>
+            <artifactId>ec2</artifactId>
+            <version>${awssdk.version}</version>
+        </dependency>
 
     </dependencies>
 
@@ -85,6 +93,7 @@
                             <goal>shade</goal>
                         </goals>
                         <configuration>
+                            <outputFile>${project.build.directory}/compactor-benchmark.jar</outputFile>
                             <transformers>
                                 <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                     <mainClass>org.openjdk.jmh.Main</mainClass>
@@ -94,6 +103,35 @@
                     </execution>
                 </executions>
             </plugin>
+
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>compactor</id>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <skip>false</skip>
+                            <executable>java</executable>
+                            <commandlineArgs>--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector -Djvector.experimental.enable_native_vectorization=true -cp %classpath io.github.jbellis.jvector.bench.CompactorBenchmark ${args}</commandlineArgs>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>analyze</id>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <skip>false</skip>
+                            <executable>java</executable>
+                            <commandlineArgs>-cp %classpath io.github.jbellis.jvector.bench.benchtools.EventLogAnalyzer ${args}</commandlineArgs>
+                          </configuration>
+                    </execution>
+                </executions>
+            </plugin>
         </plugins>
     </build>
-</project>
+</project>