mirror of https://github.com/apache/lucene.git
Fix HNSW graph reading with excessive connections (#12571)
When re-using the HNSW graph during segment merges, it is possible that more than the configured M*2 connections could be made per vector. In those instances, we should allow the graph to still be read from the codec and searchable.
This commit is contained in:
parent
1d0edd76a5
commit
fe348de619
|
@ -236,6 +236,8 @@ Bug Fixes
|
||||||
* GITHUB#12555: Fix bug in TermsEnum#seekCeil on doc values terms enums
|
* GITHUB#12555: Fix bug in TermsEnum#seekCeil on doc values terms enums
|
||||||
that causes IndexOutOfBoundsException. (Egor Potemkin)
|
that causes IndexOutOfBoundsException. (Egor Potemkin)
|
||||||
|
|
||||||
|
* GITHUB#12571: Fix HNSW graph read bug when built with excessive connections. (Ben Trent).
|
||||||
|
|
||||||
Other
|
Other
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.RandomAccessInput;
|
import org.apache.lucene.store.RandomAccessInput;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
@ -457,7 +458,7 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader {
|
||||||
private final DirectMonotonicReader graphLevelNodeOffsets;
|
private final DirectMonotonicReader graphLevelNodeOffsets;
|
||||||
private final long[] graphLevelNodeIndexOffsets;
|
private final long[] graphLevelNodeIndexOffsets;
|
||||||
// Allocated to be M*2 to track the current neighbors being explored
|
// Allocated to be M*2 to track the current neighbors being explored
|
||||||
private final int[] currentNeighborsBuffer;
|
private int[] currentNeighborsBuffer;
|
||||||
|
|
||||||
OffHeapHnswGraph(FieldEntry entry, IndexInput vectorIndex) throws IOException {
|
OffHeapHnswGraph(FieldEntry entry, IndexInput vectorIndex) throws IOException {
|
||||||
this.dataIn =
|
this.dataIn =
|
||||||
|
@ -491,6 +492,9 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader {
|
||||||
dataIn.seek(graphLevelNodeOffsets.get(targetIndex + graphLevelNodeIndexOffsets[level]));
|
dataIn.seek(graphLevelNodeOffsets.get(targetIndex + graphLevelNodeIndexOffsets[level]));
|
||||||
arcCount = dataIn.readVInt();
|
arcCount = dataIn.readVInt();
|
||||||
if (arcCount > 0) {
|
if (arcCount > 0) {
|
||||||
|
if (arcCount > currentNeighborsBuffer.length) {
|
||||||
|
currentNeighborsBuffer = ArrayUtil.grow(currentNeighborsBuffer, arcCount);
|
||||||
|
}
|
||||||
currentNeighborsBuffer[0] = dataIn.readVInt();
|
currentNeighborsBuffer[0] = dataIn.readVInt();
|
||||||
for (int i = 1; i < arcCount; i++) {
|
for (int i = 1; i < arcCount; i++) {
|
||||||
currentNeighborsBuffer[i] = currentNeighborsBuffer[i - 1] + dataIn.readVInt();
|
currentNeighborsBuffer[i] = currentNeighborsBuffer[i - 1] + dataIn.readVInt();
|
||||||
|
|
Loading…
Reference in New Issue