mirror of
https://github.com/apache/lucene.git
synced 2025-02-10 03:55:46 +00:00
Fix HNSW graph reading with excessive connections (#12571)
When re-using the HNSW graph during segment merges, it is possible that more than the configured M*2 connections could be made per vector. In those instances, we should allow the graph to still be read from the codec and searchable.
This commit is contained in:
parent
278d0fc1f9
commit
485b8729b8
@ -120,6 +120,8 @@ Bug Fixes
|
||||
* GITHUB#12555: Fix bug in TermsEnum#seekCeil on doc values terms enums
|
||||
that causes IndexOutOfBoundsException. (Egor Potemkin)
|
||||
|
||||
* GITHUB#12571: Fix HNSW graph read bug when built with excessive connections. (Ben Trent).
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
|
@ -41,6 +41,7 @@ import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RandomAccessInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
@ -465,7 +466,7 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader {
|
||||
private final DirectMonotonicReader graphLevelNodeOffsets;
|
||||
private final long[] graphLevelNodeIndexOffsets;
|
||||
// Allocated to be M*2 to track the current neighbors being explored
|
||||
private final int[] currentNeighborsBuffer;
|
||||
private int[] currentNeighborsBuffer;
|
||||
|
||||
OffHeapHnswGraph(FieldEntry entry, IndexInput vectorIndex) throws IOException {
|
||||
this.dataIn =
|
||||
@ -499,6 +500,9 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader {
|
||||
dataIn.seek(graphLevelNodeOffsets.get(targetIndex + graphLevelNodeIndexOffsets[level]));
|
||||
arcCount = dataIn.readVInt();
|
||||
if (arcCount > 0) {
|
||||
if (arcCount > currentNeighborsBuffer.length) {
|
||||
currentNeighborsBuffer = ArrayUtil.grow(currentNeighborsBuffer, arcCount);
|
||||
}
|
||||
currentNeighborsBuffer[0] = dataIn.readVInt();
|
||||
for (int i = 1; i < arcCount; i++) {
|
||||
currentNeighborsBuffer[i] = currentNeighborsBuffer[i - 1] + dataIn.readVInt();
|
||||
|
Loading…
x
Reference in New Issue
Block a user