Skip to content

Commit 279b5aa

Browse files
committed
rebase fix
Signed-off-by: Samuel Herman <sherman8915@gmail.com>
1 parent e043a63 commit 279b5aa

3 files changed

Lines changed: 66 additions & 8 deletions

File tree

jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -957,4 +957,58 @@ private void loadV3(RandomAccessReader in, int size) throws IOException {
957957
graph.updateEntryNode(new NodeAtLevel(0, entryNode));
958958
graph.setDegrees(List.of(maxDegree));
959959
}
960+
961+
/**
962+
* Convenience method to build a new graph from an existing one, with the addition of new nodes.
963+
* This is useful when we want to merge a new set of vectors into an existing graph that is already on disk.
964+
*
965+
* @param onDiskGraphIndex the on-disk representation of the graph index to be processed and converted.
966+
* @param perLevelNeighborsScoreCache the cache containing pre-computed neighbor scores,
967+
* @param newVectors a super set RAVV containing the new vectors to be added to the graph as well as the old ones that are already in the graph
968+
* @param buildScoreProvider the provider responsible for calculating build scores.
969+
* @param startingNodeOffset the offset in the newVectors RAVV where the new vectors start
970+
* @param graphToRavvOrdMap a mapping from the old graph's node ids to the newVectors RAVV node ids
971+
* @param beamWidth the width of the beam used during the graph building process.
972+
* @param overflowRatio the ratio of extra neighbors to allow temporarily when inserting a node.
973+
* @param alpha the weight factor for balancing score computations.
974+
* @param addHierarchy whether to add hierarchical structures while building the graph.
975+
*
976+
* @return the in-memory representation of the graph index.
977+
* @throws IOException if an I/O error occurs during the graph loading or conversion process.
978+
*/
979+
public static ImmutableGraphIndex buildAndMergeNewNodes(OnDiskGraphIndex onDiskGraphIndex,
980+
NeighborsScoreCache perLevelNeighborsScoreCache,
981+
RandomAccessVectorValues newVectors,
982+
BuildScoreProvider buildScoreProvider,
983+
int startingNodeOffset,
984+
int[] graphToRavvOrdMap,
985+
int beamWidth,
986+
float overflowRatio,
987+
float alpha,
988+
boolean addHierarchy) throws IOException {
989+
990+
991+
992+
try (GraphIndexBuilder builder = new GraphIndexBuilder(buildScoreProvider,
993+
onDiskGraphIndex,
994+
perLevelNeighborsScoreCache,
995+
beamWidth,
996+
overflowRatio,
997+
alpha,
998+
addHierarchy,
999+
true,
1000+
PhysicalCoreExecutor.pool(),
1001+
ForkJoinPool.commonPool())) {
1002+
1003+
var vv = newVectors.threadLocalSupplier();
1004+
1005+
// parallel graph construction from the merge documents Ids
1006+
PhysicalCoreExecutor.pool().submit(() -> IntStream.range(startingNodeOffset, newVectors.size()).parallel().forEach(ord -> {
1007+
builder.addGraphNode(ord, vv.get().getVector(graphToRavvOrdMap[ord]));
1008+
})).join();
1009+
1010+
builder.cleanup();
1011+
return builder.getGraph();
1012+
}
1013+
}
9601014
}

jvector-base/src/main/java/io/github/jbellis/jvector/graph/OnHeapGraphIndex.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,10 @@ public View getView() {
303303
}
304304
}
305305

306+
public FrozenView getFrozenView() {
307+
return new FrozenView();
308+
}
309+
306310
public ThreadSafeGrowableBitSet getDeletedNodes() {
307311
return deletedNodes;
308312
}
@@ -661,7 +665,7 @@ public static OnHeapGraphIndex convertToHeap(OnDiskGraphIndex diskIndex,
661665
final NodeArray neighbors = levelNeighborsScoreCache.get(nodeId).copy();
662666

663667
// Add the node with its neighbors
664-
heapIndex.addNode(level, nodeId, neighbors);
668+
heapIndex.connectNode(level, nodeId, neighbors);
665669
heapIndex.markComplete(new NodeAtLevel(level, nodeId));
666670
}
667671
}

jvector-tests/src/test/java/io/github/jbellis/jvector/graph/OnHeapGraphIndexTest.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ public class OnHeapGraphIndexTest extends RandomizedTest {
7777
private BuildScoreProvider baseBuildScoreProvider;
7878
private BuildScoreProvider newBuildScoreProvider;
7979
private BuildScoreProvider allBuildScoreProvider;
80-
private OnHeapGraphIndex baseGraphIndex;
81-
private OnHeapGraphIndex newGraphIndex;
82-
private OnHeapGraphIndex allGraphIndex;
80+
private ImmutableGraphIndex baseGraphIndex;
81+
private ImmutableGraphIndex newGraphIndex;
82+
private ImmutableGraphIndex allGraphIndex;
8383

8484
@Before
8585
public void setup() throws IOException {
@@ -149,7 +149,7 @@ public void testReconstructionOfOnHeapGraphIndex() throws IOException {
149149
TestUtil.writeGraph(baseGraphIndex, baseVectorsRavv, graphOutputPath);
150150

151151
log.info("Writing neighbors score cache to {}", neighborsScoreCacheOutputPath);
152-
final NeighborsScoreCache neighborsScoreCache = new NeighborsScoreCache(baseGraphIndex);
152+
final NeighborsScoreCache neighborsScoreCache = new NeighborsScoreCache((OnHeapGraphIndex) baseGraphIndex);
153153
try (SimpleWriter writer = new SimpleWriter(neighborsScoreCacheOutputPath.toAbsolutePath())) {
154154
neighborsScoreCache.write(writer);
155155
}
@@ -182,14 +182,14 @@ public void testReconstructionOfOnHeapGraphIndex() throws IOException {
182182
public void testIncrementalInsertionFromOnDiskIndex() throws IOException {
183183
var outputPath = testDirectory.resolve("testReconstructionOfOnHeapGraphIndex_" + baseGraphIndex.getClass().getSimpleName());
184184
log.info("Writing graph to {}", outputPath);
185-
final NeighborsScoreCache neighborsScoreCache = new NeighborsScoreCache(baseGraphIndex);
185+
final NeighborsScoreCache neighborsScoreCache = new NeighborsScoreCache((OnHeapGraphIndex) baseGraphIndex);
186186
TestUtil.writeGraph(baseGraphIndex, baseVectorsRavv, outputPath);
187187
try (var readerSupplier = new SimpleMappedReader.Supplier(outputPath.toAbsolutePath());
188188
var onDiskGraph = OnDiskGraphIndex.load(readerSupplier)) {
189189
TestUtil.assertGraphEquals(baseGraphIndex, onDiskGraph);
190190
// We will create a trivial 1:1 mapping between the new graph and the ravv
191191
final int[] graphToRavvOrdMap = IntStream.range(0, allVectorsRavv.size()).toArray();
192-
OnHeapGraphIndex reconstructedAllNodeOnHeapGraphIndex = GraphIndexBuilder.buildAndMergeNewNodes(onDiskGraph, neighborsScoreCache, allVectorsRavv, allBuildScoreProvider, NUM_BASE_VECTORS, graphToRavvOrdMap, BEAM_WIDTH, NEIGHBOR_OVERFLOW, ALPHA, ADD_HIERARCHY);
192+
ImmutableGraphIndex reconstructedAllNodeOnHeapGraphIndex = GraphIndexBuilder.buildAndMergeNewNodes(onDiskGraph, neighborsScoreCache, allVectorsRavv, allBuildScoreProvider, NUM_BASE_VECTORS, graphToRavvOrdMap, BEAM_WIDTH, NEIGHBOR_OVERFLOW, ALPHA, ADD_HIERARCHY);
193193

194194
// Verify that the recall is similar
195195
float recallFromReconstructedAllNodeOnHeapGraphIndex = calculateRecall(reconstructedAllNodeOnHeapGraphIndex, allBuildScoreProvider, queryVector, groundTruthAllVectors, TOP_K);
@@ -231,7 +231,7 @@ private static int[] getGroundTruth(RandomAccessVectorValues ravv, VectorFloat<?
231231
return exactResults.stream().limit(topK).mapToInt(nodeScore -> nodeScore.node).toArray();
232232
}
233233

234-
private static float calculateRecall(OnHeapGraphIndex graphIndex, BuildScoreProvider buildScoreProvider, VectorFloat<?> queryVector, int[] groundTruth, int k) throws IOException {
234+
private static float calculateRecall(ImmutableGraphIndex graphIndex, BuildScoreProvider buildScoreProvider, VectorFloat<?> queryVector, int[] groundTruth, int k) throws IOException {
235235
try (GraphSearcher graphSearcher = new GraphSearcher(graphIndex)){
236236
SearchScoreProvider ssp = buildScoreProvider.searchProviderFor(queryVector);
237237
var searchResults = graphSearcher.search(ssp, k, Bits.ALL);

0 commit comments

Comments
 (0)