Use hppc IntIntHashMap to avoid Integer box/unbox when remapping vector ordinals during merge (#12950)

This commit is contained in:
Michael Sokolov 2023-12-15 14:24:05 -04:00 committed by GitHub
parent 423f8279f0
commit 49d521145d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 5 additions and 5 deletions

View File

@ -19,7 +19,6 @@ package org.apache.lucene.util.hnsw;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.codecs.HnswGraphProvider;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
@ -30,9 +29,9 @@ import org.apache.lucene.index.MergeState;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.hppc.IntIntHashMap;
/**
* This selects the biggest Hnsw graph from the provided merge state and initializes a new
@ -162,7 +161,7 @@ public class IncrementalHnswGraphMerger implements HnswGraphMerger {
case FLOAT32 -> initializerIterator = initReader.getFloatVectorValues(fieldInfo.name);
}
Map<Integer, Integer> newIdToOldOrdinal = CollectionUtil.newHashMap(initGraphSize);
IntIntHashMap newIdToOldOrdinal = new IntIntHashMap(initGraphSize);
int oldOrd = 0;
int maxNewDocID = -1;
for (int oldId = initializerIterator.nextDoc();
@ -182,9 +181,10 @@ public class IncrementalHnswGraphMerger implements HnswGraphMerger {
for (int newDocId = mergedVectorIterator.nextDoc();
newDocId <= maxNewDocID;
newDocId = mergedVectorIterator.nextDoc()) {
if (newIdToOldOrdinal.containsKey(newDocId)) {
int hashDocIndex = newIdToOldOrdinal.indexOf(newDocId);
if (newIdToOldOrdinal.indexExists(hashDocIndex)) {
initializedNodes.set(newOrd);
oldToNewOrdinalMap[newIdToOldOrdinal.get(newDocId)] = newOrd;
oldToNewOrdinalMap[newIdToOldOrdinal.indexGet(hashDocIndex)] = newOrd;
}
newOrd++;
}