apache
diff --git a/‎gradle/java/javac.gradle‎
Lines changed: 2 additions & 8 deletions b/‎gradle/java/javac.gradle‎
Lines changed: 2 additions & 8 deletions
diff --git a/‎gradle/testing/defaults-tests.gradle‎
Lines changed: 0 additions & 2 deletions b/‎gradle/testing/defaults-tests.gradle‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎gradle/testing/randomization/policies/tests.policy‎
Lines changed: 7 additions & 1 deletion b/‎gradle/testing/randomization/policies/tests.policy‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎lucene/benchmark-jmh/build.gradle‎
Lines changed: 1 addition & 0 deletions b/‎lucene/benchmark-jmh/build.gradle‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorUtilBenchmark.java‎
Lines changed: 72 additions & 17 deletions b/‎lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorUtilBenchmark.java‎
Lines changed: 72 additions & 17 deletions
diff --git a/‎lucene/core/build.gradle‎
Lines changed: 4 additions & 45 deletions b/‎lucene/core/build.gradle‎
Lines changed: 4 additions & 45 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java‎
Lines changed: 8 additions & 3 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene99/OffHeapQuantizedByteVectorValues.java‎
Lines changed: 1 addition & 0 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene99/OffHeapQuantizedByteVectorValues.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/util/Constants.java‎
Lines changed: 4 additions & 0 deletions b/‎lucene/core/src/java/org/apache/lucene/util/Constants.java‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorer.java‎
Lines changed: 44 additions & 1 deletion b/‎lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorer.java‎
Lines changed: 44 additions & 1 deletion
@@ -24,11 +24,7 @@ allprojects { project ->
 
     // Use 'release' flag instead of 'source' and 'target'
     tasks.withType(JavaCompile) {
-      options.compilerArgs += ["--release", rootProject.minJavaVersion.toString(), "--enable-preview"]
-    }
-
-    tasks.withType(Test) {
-      jvmArgs += "--enable-preview"
+      options.compilerArgs += ["--release", rootProject.minJavaVersion.toString()]
     }
 
     // Configure warnings.
@@ -76,19 +72,17 @@ allprojects { project ->
         "-Xdoclint:-accessibility"
       ]
 
-      if (project.path == ":lucene:benchmark-jmh" ) {
+      if (project.path == ":lucene:benchmark-jmh") {
         // JMH benchmarks use JMH preprocessor and incubating modules.
       } else {
         // proc:none was added because of LOG4J2-1925 / JDK-8186647
         options.compilerArgs += [
             "-proc:none"
         ]
 
-        /**
         if (propertyOrDefault("javac.failOnWarnings", true).toBoolean()) {
           options.compilerArgs += "-Werror"
         }
-        */
       }
     }
   }
 
@@ -139,8 +139,6 @@ allprojects {
               ":lucene:test-framework"
       ] ? 'ALL-UNNAMED' : 'org.apache.lucene.core')
 
-      jvmArgs '-Djava.library.path=' + file("${buildDir}/libs/dotProduct/shared").absolutePath
-
       def loggingConfigFile = layout.projectDirectory.file("${resources}/logging.properties")
       def tempDir = layout.projectDirectory.dir(testsTmpDir.toString())
       jvmArgumentProviders.add(
 
@@ -52,6 +52,9 @@ grant {
   // Needed for DirectIODirectory to retrieve block size
   permission java.lang.RuntimePermission "getFileStoreAttributes";
 
+  // Needed to load native library containing optimized dot product implementation
+  permission java.lang.RuntimePermission "loadLibrary.dotProduct";
+
   // TestLockFactoriesMultiJVM opens a random port on 127.0.0.1 (port 0 = ephemeral port range):
   permission java.net.SocketPermission "127.0.0.1:0", "accept,listen,resolve";
   // Replicator tests connect to ephemeral ports
@@ -104,7 +107,10 @@ grant codeBase "file:${gradle.worker.jar}" {
 };
 
 grant {
-  permission java.security.AllPermission;
+  // Allow reading gradle worker JAR.
+  permission java.io.FilePermission "${gradle.worker.jar}", "read";
+  // Allow reading from classpath JARs (resources).
+  permission java.io.FilePermission "${gradle.user.home}${/}-", "read";
 };
 
 // Grant permissions to certain test-related JARs (https://github.com/apache/lucene/pull/13146)
 
@@ -38,6 +38,7 @@ tasks.matching { it.name == "forbiddenApisMain" }.configureEach {
   ])
 }
 
+
 // Skip certain infrastructure tasks that we can't use or don't care about.
 tasks.matching { it.name in [
     // Turn off JMH dependency checksums and licensing (it's GPL w/ classpath exception
 
@@ -16,9 +16,9 @@
  */
 package org.apache.lucene.benchmark.jmh;
 
-import java.lang.foreign.Arena;
-import java.lang.foreign.MemorySegment;
-import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.TimeUnit;
 import org.apache.lucene.util.VectorUtil;
@@ -52,12 +52,11 @@ static void compressBytes(byte[] raw, byte[] compressed) {
   private float[] floatsB;
   private int expectedhalfByteDotProduct;
 
-  private MemorySegment nativeBytesA;
+  private Object nativeBytesA;
+  private Object nativeBytesB;
 
-  private MemorySegment nativeBytesB;
-
-  // @Param({"1", "128", "207", "256", "300", "512", "702", "1024"})
-  @Param({"768"})
+  /** private Object nativeBytesA; private Object nativeBytesB; */
+  @Param({"1", "128", "207", "256", "300", "512", "702", "1024"})
   int size;
 
   @Setup(Level.Iteration)
@@ -92,20 +91,76 @@ public void init() {
       floatsA[i] = random.nextFloat();
       floatsB[i] = random.nextFloat();
     }
-
-    Arena offHeap = Arena.ofAuto();
-    nativeBytesA = offHeap.allocate(size, ValueLayout.JAVA_BYTE.byteAlignment());
-    nativeBytesB = offHeap.allocate(size, ValueLayout.JAVA_BYTE.byteAlignment());
-    for (int i = 0; i < size; ++i) {
-      nativeBytesA.set(ValueLayout.JAVA_BYTE, i, (byte) random.nextInt(128));
-      nativeBytesA.set(ValueLayout.JAVA_BYTE, i, (byte) random.nextInt(128));
+    // Java 21+ specific initialization
+    final int runtimeVersion = Runtime.version().feature();
+    if (runtimeVersion >= 21) {
+      // Reflection based code to eliminate the use of Preview classes in JMH benchmarks
+      try {
+        final Class<?> vectorUtilSupportClass = VectorUtil.getVectorUtilSupportClass();
+        final var className = "org.apache.lucene.internal.vectorization.PanamaVectorUtilSupport";
+        if (vectorUtilSupportClass.getName().equals(className) == false) {
+          nativeBytesA = null;
+          nativeBytesB = null;
+        } else {
+          MethodHandles.Lookup lookup = MethodHandles.lookup();
+          final var MemorySegment = "java.lang.foreign.MemorySegment";
+          final var methodType =
+              MethodType.methodType(lookup.findClass(MemorySegment), byte[].class);
+          MethodHandle nativeMemorySegment =
+              lookup.findStatic(vectorUtilSupportClass, "nativeMemorySegment", methodType);
+          byte[] a = new byte[size];
+          byte[] b = new byte[size];
+          for (int i = 0; i < size; ++i) {
+            a[i] = (byte) random.nextInt(128);
+            b[i] = (byte) random.nextInt(128);
+          }
+          nativeBytesA = nativeMemorySegment.invoke(a);
+          nativeBytesB = nativeMemorySegment.invoke(b);
+        }
+      } catch (Throwable e) {
+        throw new RuntimeException(e);
+      }
+      /*
+      Arena offHeap = Arena.ofAuto();
+      nativeBytesA = offHeap.allocate(size, ValueLayout.JAVA_BYTE.byteAlignment());
+      nativeBytesB = offHeap.allocate(size, ValueLayout.JAVA_BYTE.byteAlignment());
+      for (int i = 0; i < size; ++i) {
+        nativeBytesA.set(ValueLayout.JAVA_BYTE, i, (byte) random.nextInt(128));
+        nativeBytesB.set(ValueLayout.JAVA_BYTE, i, (byte) random.nextInt(128));
+      }*/
     }
   }
 
+  /**
+   * High overhead (lower score) from using NATIVE_DOT_PRODUCT.invoke(nativeBytesA, nativeBytesB).
+   * Both nativeBytesA and nativeBytesB are offHeap MemorySegments created by invoking the method
+   * PanamaVectorUtilSupport.nativeMemorySegment(byte[]) which allocated these segments and copies
+   * bytes from the supplied byte[] to offHeap memory. The benchmark output below shows
+   * significantly more overhead. <b>NOTE:</b> Return type of dots8s() was set to void for the
+   * benchmark run to avoid boxing/unboxing overhead.
+   *
+   * <pre>
+   * Benchmark                  (size)   Mode  Cnt   Score   Error   Units
+   * VectorUtilBenchmark.dot8s     768  thrpt   15  36.406 ± 0.496  ops/us
+   * </pre>
+   *
+   * Much lower overhead was observed when preview APIs were used directly in JMH benchmarking code
+   * and exact method invocation was made as shown below <b>return (int)
+   * VectorUtil.NATIVE_DOT_PRODUCT.invokeExact(nativeBytesA, nativeBytesB);</b>
+   *
+   * <pre>
+   * Benchmark                  (size)   Mode  Cnt   Score   Error   Units
+   * VectorUtilBenchmark.dot8s     768   thrpt   15   43.662 ± 0.818  ops/us
+   * </pre>
+   */
   @Benchmark
   @Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
-  public int dot8s() {
-    return VectorUtil.dot8s(nativeBytesA, nativeBytesB, size);
+  public void dot8s() {
+    try {
+      VectorUtil.NATIVE_DOT_PRODUCT.invoke(nativeBytesA, nativeBytesB);
+    } catch (Throwable e) {
+      throw new RuntimeException(e);
+    }
   }
 
   @Benchmark
 
@@ -14,63 +14,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-plugins {
-  id "c"
-}
 
 apply plugin: 'java-library'
-apply plugin: 'c'
 
 description = 'Lucene core library'
-model {
-  binaries {
-    all {
-      cCompiler.args "--shared", "-O3", "-march=native", "-funroll-loops"
-    }
-  }
-
-  toolChains {
-    gcc(Gcc) {
-        target("linux_aarch64") {
-            cCompiler.executable = System.getenv("CC")
-        }
-    }
-    clang(Clang) {
-        target("osx_aarch64"){
-            cCompiler.executable = System.getenv("CC")
-        }
-    }
-  }
-     
-  components {
-    dotProduct(NativeLibrarySpec) {
-      sources {
-        c {
-          source {
-            srcDir 'src/c' // Path to your C source files
-            include "**/*.c"
-          }
-          exportedHeaders {
-            srcDir "src/c"
-            include "**/*.h"
-          }
-        }
-      }
-    }
-  }
-
-}
-
-test.dependsOn 'dotProductSharedLibrary'
 
 dependencies {
   moduleTestImplementation project(':lucene:codecs')
   moduleTestImplementation project(':lucene:test-framework')
 }
 
 test {
+  build {
+    dependsOn ':lucene:native:build'
+  }
   systemProperty(
           "java.library.path",
-          file("${buildDir}/libs/dotProduct/shared").absolutePath
+          project(":lucene:native").layout.buildDirectory.get().asFile.absolutePath + "/libs/dotProduct/shared"
   )
 }
@@ -22,6 +22,7 @@
 import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
 import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
 import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
+import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
 import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
@@ -70,7 +71,8 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
 
   final byte bits;
   final boolean compress;
-  final Lucene99ScalarQuantizedVectorScorer flatVectorScorer;
+  // final Lucene99ScalarQuantizedVectorScorer flatVectorScorer;
+  final FlatVectorsScorer flatVectorScorer;
 
   /** Constructs a format using default graph construction parameters */
   public Lucene99ScalarQuantizedVectorsFormat() {
@@ -117,8 +119,11 @@ public Lucene99ScalarQuantizedVectorsFormat(
     this.bits = (byte) bits;
     this.confidenceInterval = confidenceInterval;
     this.compress = compress;
-    this.flatVectorScorer =
-        new Lucene99ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE);
+    FlatVectorsScorer scorer = FlatVectorScorerUtil.getLucene99FlatVectorsScorer();
+    if (scorer == DefaultFlatVectorScorer.INSTANCE) {
+      scorer = new Lucene99ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE);
+    }
+    this.flatVectorScorer = scorer;
   }
 
   public static float calculateDefaultConfidenceInterval(int vectorDimension) {
 
@@ -146,6 +146,7 @@ public float getScoreCorrectionConstant(int targetOrd) throws IOException {
     }
     slice.seek(((long) targetOrd * byteSize) + numBytes);
     slice.readFloats(scoreCorrectionConstant, 0, 1);
+    lastOrd = targetOrd;
     return scoreCorrectionConstant[0];
   }
 
 
@@ -100,6 +100,10 @@ private static boolean is64Bit() {
   /** true iff we know VFMA has faster throughput than separate vmul/vadd. */
   public static final boolean HAS_FAST_VECTOR_FMA = hasFastVectorFMA();
 
+  // TODO: <below condition> &&  Boolean.parseBoolean(getSysProp("lucene.useNativeDotProduct",
+  // "False")
+  public static final boolean NATIVE_DOT_PRODUCT_ENABLED = OS_ARCH.equalsIgnoreCase("aarch64");
+
   /** true iff we know FMA has faster throughput than separate mul/add. */
   public static final boolean HAS_FAST_SCALAR_FMA = hasFastScalarFMA();
 
 
@@ -17,14 +17,17 @@
 package org.apache.lucene.internal.vectorization;
 
 import java.io.IOException;
+import java.lang.foreign.Arena;
 import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
 import java.util.Optional;
 import org.apache.lucene.index.ByteVectorValues;
 import org.apache.lucene.index.KnnVectorValues;
 import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.store.FilterIndexInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.MemorySegmentAccessInput;
+import org.apache.lucene.util.Constants;
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
 
 abstract sealed class Lucene99MemorySegmentByteVectorScorer
@@ -34,6 +37,8 @@ abstract sealed class Lucene99MemorySegmentByteVectorScorer
   final MemorySegmentAccessInput input;
   final MemorySegment query;
   byte[] scratch;
+  MemorySegment offHeapScratch;
+  MemorySegment offHeapQuery;
 
   /**
    * Return an optional whose value, if present, is the scorer. Otherwise, an empty optional is
@@ -49,7 +54,10 @@ public static Optional<Lucene99MemorySegmentByteVectorScorer> create(
     checkInvariants(values.size(), values.getVectorByteLength(), input);
     return switch (type) {
       case COSINE -> Optional.of(new CosineScorer(msInput, values, queryVector));
-      case DOT_PRODUCT -> Optional.of(new DotProductScorer(msInput, values, queryVector));
+      case DOT_PRODUCT ->
+          Constants.NATIVE_DOT_PRODUCT_ENABLED == false
+              ? Optional.of(new DotProductScorer(msInput, values, queryVector))
+              : Optional.of(new NativeDotProductScorer(msInput, values, queryVector));
       case EUCLIDEAN -> Optional.of(new EuclideanScorer(msInput, values, queryVector));
       case MAXIMUM_INNER_PRODUCT ->
           Optional.of(new MaxInnerProductScorer(msInput, values, queryVector));
@@ -64,6 +72,20 @@ public static Optional<Lucene99MemorySegmentByteVectorScorer> create(
     this.query = MemorySegment.ofArray(queryVector);
   }
 
+  final MemorySegment getNativeSegment(int ord) throws IOException {
+    long byteOffset = (long) ord * vectorByteSize;
+    MemorySegment seg = input.segmentSliceOrNull(byteOffset, vectorByteSize);
+    if (seg == null) {
+      if (offHeapScratch == null) {
+        offHeapScratch =
+            Arena.ofAuto().allocate(vectorByteSize, ValueLayout.JAVA_BYTE.byteAlignment());
+      }
+      input.readBytes(byteOffset, offHeapScratch, 0, vectorByteSize);
+      seg = offHeapScratch;
+    }
+    return seg;
+  }
+
   final MemorySegment getSegment(int ord) throws IOException {
     checkOrdinal(ord);
     long byteOffset = (long) ord * vectorByteSize;
@@ -103,6 +125,27 @@ public float score(int node) throws IOException {
     }
   }
 
+  static final class NativeDotProductScorer extends Lucene99MemorySegmentByteVectorScorer {
+
+    NativeDotProductScorer(
+        MemorySegmentAccessInput input, KnnVectorValues values, byte[] queryVector) {
+      super(input, values, queryVector);
+      if (offHeapQuery == null) {
+        offHeapQuery =
+            Arena.ofAuto().allocate(vectorByteSize, ValueLayout.JAVA_BYTE.byteAlignment());
+      }
+      offHeapQuery.copyFrom(query);
+    }
+
+    @Override
+    public float score(int node) throws IOException {
+      checkOrdinal(node);
+      // divide by 2 * 2^14 (maximum absolute value of product of 2 signed bytes) * len
+      int raw = PanamaVectorUtilSupport.nativeDotProduct(offHeapQuery, getNativeSegment(node));
+      return 0.5f + raw / (float) (query.byteSize() * (1 << 15));
+    }
+  }
+
   static final class DotProductScorer extends Lucene99MemorySegmentByteVectorScorer {
     DotProductScorer(MemorySegmentAccessInput input, KnnVectorValues values, byte[] query) {
       super(input, values, query);
Original file line number	Diff line number	Diff line change
`@@ -24,11 +24,7 @@ allprojects { project ->`
`24`	`24`
`25`	`25`	`// Use 'release' flag instead of 'source' and 'target'`
`26`	`26`	`tasks.withType(JavaCompile) {`
`27`		`- options.compilerArgs += ["--release", rootProject.minJavaVersion.toString(), "--enable-preview"]`
`28`		`- }`
`29`		`-`
`30`		`- tasks.withType(Test) {`
`31`		`- jvmArgs += "--enable-preview"`
	`27`	`+ options.compilerArgs += ["--release", rootProject.minJavaVersion.toString()]`
`32`	`28`	`}`
`33`	`29`
`34`	`30`	`// Configure warnings.`
`@@ -76,19 +72,17 @@ allprojects { project ->`
`76`	`72`	`"-Xdoclint:-accessibility"`
`77`	`73`	`]`
`78`	`74`
`79`		`- if (project.path == ":lucene:benchmark-jmh" ) {`
	`75`	`+ if (project.path == ":lucene:benchmark-jmh") {`
`80`	`76`	`// JMH benchmarks use JMH preprocessor and incubating modules.`
`81`	`77`	`} else {`
`82`	`78`	`// proc:none was added because of LOG4J2-1925 / JDK-8186647`
`83`	`79`	`options.compilerArgs += [`
`84`	`80`	`"-proc:none"`
`85`	`81`	`]`
`86`	`82`
`87`		`- /**`
`88`	`83`	`if (propertyOrDefault("javac.failOnWarnings", true).toBoolean()) {`
`89`	`84`	`options.compilerArgs += "-Werror"`
`90`	`85`	`}`
`91`		`- */`
`92`	`86`	`}`
`93`	`87`	`}`
`94`	`88`	`}`
Original file line number	Diff line number	Diff line change
`@@ -38,6 +38,7 @@ tasks.matching { it.name == "forbiddenApisMain" }.configureEach {`
`38`	`38`	`])`
`39`	`39`	`}`
`40`	`40`
	`41`	`+`
`41`	`42`	`// Skip certain infrastructure tasks that we can't use or don't care about.`
`42`	`43`	`tasks.matching { it.name in [`
`43`	`44`	`// Turn off JMH dependency checksums and licensing (it's GPL w/ classpath exception`
Original file line number	Diff line number	Diff line change
`@@ -146,6 +146,7 @@ public float getScoreCorrectionConstant(int targetOrd) throws IOException {`
`146`	`146`	`}`
`147`	`147`	`slice.seek(((long) targetOrd * byteSize) + numBytes);`
`148`	`148`	`slice.readFloats(scoreCorrectionConstant, 0, 1);`
	`149`	`+ lastOrd = targetOrd;`
`149`	`150`	`return scoreCorrectionConstant[0];`
`150`	`151`	`}`
`151`	`152`