mirror of https://github.com/apache/lucene.git
Fix HNSW graph reading with excessive connections (#12571)
When re-using the HNSW graph during segment merges, it is possible that more than the configured M*2 connections could be made per vector. In those instances, we should allow the graph to still be read from the codec and searchable.
This commit is contained in:
parent
1d0edd76a5
commit
fe348de619
|
@ -236,6 +236,8 @@ Bug Fixes
|
|||
* GITHUB#12555: Fix bug in TermsEnum#seekCeil on doc values terms enums
|
||||
that causes IndexOutOfBoundsException. (Egor Potemkin)
|
||||
|
||||
* GITHUB#12571: Fix HNSW graph read bug when built with excessive connections. (Ben Trent).
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.store.DataInput;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RandomAccessInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
@ -457,7 +458,7 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader {
|
|||
private final DirectMonotonicReader graphLevelNodeOffsets;
|
||||
private final long[] graphLevelNodeIndexOffsets;
|
||||
// Allocated to be M*2 to track the current neighbors being explored
|
||||
private final int[] currentNeighborsBuffer;
|
||||
private int[] currentNeighborsBuffer;
|
||||
|
||||
OffHeapHnswGraph(FieldEntry entry, IndexInput vectorIndex) throws IOException {
|
||||
this.dataIn =
|
||||
|
@ -491,6 +492,9 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader {
|
|||
dataIn.seek(graphLevelNodeOffsets.get(targetIndex + graphLevelNodeIndexOffsets[level]));
|
||||
arcCount = dataIn.readVInt();
|
||||
if (arcCount > 0) {
|
||||
if (arcCount > currentNeighborsBuffer.length) {
|
||||
currentNeighborsBuffer = ArrayUtil.grow(currentNeighborsBuffer, arcCount);
|
||||
}
|
||||
currentNeighborsBuffer[0] = dataIn.readVInt();
|
||||
for (int i = 1; i < arcCount; i++) {
|
||||
currentNeighborsBuffer[i] = currentNeighborsBuffer[i - 1] + dataIn.readVInt();
|
||||
|
|
Loading…
Reference in New Issue