mirror of https://github.com/apache/lucene.git
Convert FieldEntry to record (#13296)
Co-authored-by: iamsanjay <sanjaydutt.india@yahoo.com>
This commit is contained in:
parent
dca87235be
commit
dcb512289f
|
@ -222,6 +222,8 @@ Other
|
|||
* GITHUB#12753: Bump minimum required Java version to 21
|
||||
(Chris Hegarty, Robert Muir, Uwe Schindler)
|
||||
|
||||
* GITHUB#13296: Convert the FieldEntry, a static nested class, into a record. (Sanjay Dutt)
|
||||
|
||||
======================== Lucene 9.11.0 =======================
|
||||
|
||||
API Changes
|
||||
|
|
|
@ -210,7 +210,7 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
+ " != "
|
||||
+ info.getVectorSimilarityFunction());
|
||||
}
|
||||
return new FieldEntry(input, info.getVectorSimilarityFunction());
|
||||
return FieldEntry.create(input, info.getVectorSimilarityFunction());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -303,37 +303,44 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
IOUtils.close(vectorData, vectorIndex);
|
||||
}
|
||||
|
||||
private static class FieldEntry {
|
||||
private record FieldEntry(
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
long vectorDataOffset,
|
||||
long vectorDataLength,
|
||||
long indexDataOffset,
|
||||
long indexDataLength,
|
||||
int dimension,
|
||||
int[] ordToDoc,
|
||||
long[] ordOffsets) {
|
||||
|
||||
final int dimension;
|
||||
final VectorSimilarityFunction similarityFunction;
|
||||
|
||||
final long vectorDataOffset;
|
||||
final long vectorDataLength;
|
||||
final long indexDataOffset;
|
||||
final long indexDataLength;
|
||||
final int[] ordToDoc;
|
||||
final long[] ordOffsets;
|
||||
|
||||
FieldEntry(DataInput input, VectorSimilarityFunction similarityFunction) throws IOException {
|
||||
this.similarityFunction = similarityFunction;
|
||||
vectorDataOffset = input.readVLong();
|
||||
vectorDataLength = input.readVLong();
|
||||
indexDataOffset = input.readVLong();
|
||||
indexDataLength = input.readVLong();
|
||||
dimension = input.readInt();
|
||||
int size = input.readInt();
|
||||
ordToDoc = new int[size];
|
||||
static FieldEntry create(DataInput input, VectorSimilarityFunction similarityFunction)
|
||||
throws IOException {
|
||||
final var vectorDataOffset = input.readVLong();
|
||||
final var vectorDataLength = input.readVLong();
|
||||
final var indexDataOffset = input.readVLong();
|
||||
final var indexDataLength = input.readVLong();
|
||||
final var dimension = input.readInt();
|
||||
final var size = input.readInt();
|
||||
final var ordToDoc = new int[size];
|
||||
for (int i = 0; i < size; i++) {
|
||||
int doc = input.readVInt();
|
||||
ordToDoc[i] = doc;
|
||||
}
|
||||
ordOffsets = new long[size()];
|
||||
final var ordOffsets = new long[size];
|
||||
long offset = 0;
|
||||
for (int i = 0; i < ordOffsets.length; i++) {
|
||||
offset += input.readVLong();
|
||||
ordOffsets[i] = offset;
|
||||
}
|
||||
return new FieldEntry(
|
||||
similarityFunction,
|
||||
vectorDataOffset,
|
||||
vectorDataLength,
|
||||
indexDataOffset,
|
||||
indexDataLength,
|
||||
dimension,
|
||||
ordToDoc,
|
||||
ordOffsets);
|
||||
}
|
||||
|
||||
int size() {
|
||||
|
|
|
@ -202,7 +202,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
+ " != "
|
||||
+ info.getVectorSimilarityFunction());
|
||||
}
|
||||
return new FieldEntry(input, info.getVectorSimilarityFunction());
|
||||
return FieldEntry.create(input, info.getVectorSimilarityFunction());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -286,32 +286,30 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
IOUtils.close(vectorData, vectorIndex);
|
||||
}
|
||||
|
||||
private static class FieldEntry {
|
||||
|
||||
final VectorSimilarityFunction similarityFunction;
|
||||
final long vectorDataOffset;
|
||||
final long vectorDataLength;
|
||||
final long vectorIndexOffset;
|
||||
final long vectorIndexLength;
|
||||
final int maxConn;
|
||||
final int numLevels;
|
||||
final int dimension;
|
||||
private final int size;
|
||||
final int[] ordToDoc;
|
||||
private final IntUnaryOperator ordToDocOperator;
|
||||
final int[][] nodesByLevel;
|
||||
// for each level the start offsets in vectorIndex file from where to read neighbours
|
||||
final long[] graphOffsetsByLevel;
|
||||
|
||||
FieldEntry(DataInput input, VectorSimilarityFunction similarityFunction) throws IOException {
|
||||
this.similarityFunction = similarityFunction;
|
||||
vectorDataOffset = input.readVLong();
|
||||
vectorDataLength = input.readVLong();
|
||||
vectorIndexOffset = input.readVLong();
|
||||
vectorIndexLength = input.readVLong();
|
||||
dimension = input.readInt();
|
||||
size = input.readInt();
|
||||
|
||||
private record FieldEntry(
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
long vectorDataOffset,
|
||||
long vectorDataLength,
|
||||
long vectorIndexOffset,
|
||||
long vectorIndexLength,
|
||||
int maxConn,
|
||||
int numLevels,
|
||||
int dimension,
|
||||
int size,
|
||||
int[] ordToDoc,
|
||||
IntUnaryOperator ordToDocOperator,
|
||||
int[][] nodesByLevel,
|
||||
// for each level the start offsets in vectorIndex file from where to read neighbours
|
||||
long[] graphOffsetsByLevel) {
|
||||
static FieldEntry create(DataInput input, VectorSimilarityFunction similarityFunction)
|
||||
throws IOException {
|
||||
final var vectorDataOffset = input.readVLong();
|
||||
final var vectorDataLength = input.readVLong();
|
||||
final var vectorIndexOffset = input.readVLong();
|
||||
final var vectorIndexLength = input.readVLong();
|
||||
final var dimension = input.readInt();
|
||||
final var size = input.readInt();
|
||||
final int[] ordToDoc;
|
||||
int denseSparseMarker = input.readByte();
|
||||
if (denseSparseMarker == -1) {
|
||||
ordToDoc = null; // each document has a vector value
|
||||
|
@ -328,12 +326,13 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
ordToDoc[i] = doc;
|
||||
}
|
||||
}
|
||||
ordToDocOperator = ordToDoc == null ? IntUnaryOperator.identity() : (ord) -> ordToDoc[ord];
|
||||
final IntUnaryOperator ordToDocOperator =
|
||||
ordToDoc == null ? IntUnaryOperator.identity() : (ord) -> ordToDoc[ord];
|
||||
|
||||
// read nodes by level
|
||||
maxConn = input.readInt();
|
||||
numLevels = input.readInt();
|
||||
nodesByLevel = new int[numLevels][];
|
||||
final var maxConn = input.readInt();
|
||||
final var numLevels = input.readInt();
|
||||
final var nodesByLevel = new int[numLevels][];
|
||||
for (int level = 0; level < numLevels; level++) {
|
||||
int numNodesOnLevel = input.readInt();
|
||||
if (level == 0) {
|
||||
|
@ -350,7 +349,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
|
||||
// calculate for each level the start offsets in vectorIndex file from where to read
|
||||
// neighbours
|
||||
graphOffsetsByLevel = new long[numLevels];
|
||||
final var graphOffsetsByLevel = new long[numLevels];
|
||||
final long connectionsAndSizeBytes =
|
||||
Math.multiplyExact(Math.addExact(1L, maxConn), Integer.BYTES);
|
||||
for (int level = 0; level < numLevels; level++) {
|
||||
|
@ -364,10 +363,21 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
Math.multiplyExact(connectionsAndSizeBytes, numNodesOnPrevLevel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int size() {
|
||||
return size;
|
||||
return new FieldEntry(
|
||||
similarityFunction,
|
||||
vectorDataOffset,
|
||||
vectorDataLength,
|
||||
vectorIndexOffset,
|
||||
vectorIndexLength,
|
||||
maxConn,
|
||||
numLevels,
|
||||
dimension,
|
||||
size,
|
||||
ordToDoc,
|
||||
ordToDocOperator,
|
||||
nodesByLevel,
|
||||
graphOffsetsByLevel);
|
||||
}
|
||||
|
||||
int ordToDoc(int ord) {
|
||||
|
|
|
@ -201,7 +201,7 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
|
|||
+ " != "
|
||||
+ info.getVectorSimilarityFunction());
|
||||
}
|
||||
return new FieldEntry(input, info.getVectorSimilarityFunction());
|
||||
return FieldEntry.create(input, info.getVectorSimilarityFunction());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -257,52 +257,54 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
|
|||
IOUtils.close(vectorData, vectorIndex);
|
||||
}
|
||||
|
||||
static class FieldEntry {
|
||||
static record FieldEntry(
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
long vectorDataOffset,
|
||||
long vectorDataLength,
|
||||
long vectorIndexOffset,
|
||||
long vectorIndexLength,
|
||||
int M,
|
||||
int numLevels,
|
||||
int dimension,
|
||||
int size,
|
||||
int[][] nodesByLevel,
|
||||
// for each level the start offsets in vectorIndex file from where to read neighbours
|
||||
long[] graphOffsetsByLevel,
|
||||
|
||||
final VectorSimilarityFunction similarityFunction;
|
||||
final long vectorDataOffset;
|
||||
final long vectorDataLength;
|
||||
final long vectorIndexOffset;
|
||||
final long vectorIndexLength;
|
||||
final int M;
|
||||
final int numLevels;
|
||||
final int dimension;
|
||||
final int size;
|
||||
final int[][] nodesByLevel;
|
||||
// for each level the start offsets in vectorIndex file from where to read neighbours
|
||||
final long[] graphOffsetsByLevel;
|
||||
// the following four variables used to read docIds encoded by IndexDISI
|
||||
// special values of docsWithFieldOffset are -1 and -2
|
||||
// -1 : dense
|
||||
// -2 : empty
|
||||
// other: sparse
|
||||
long docsWithFieldOffset,
|
||||
long docsWithFieldLength,
|
||||
short jumpTableEntryCount,
|
||||
byte denseRankPower,
|
||||
|
||||
// the following four variables used to read docIds encoded by IndexDISI
|
||||
// special values of docsWithFieldOffset are -1 and -2
|
||||
// -1 : dense
|
||||
// -2 : empty
|
||||
// other: sparse
|
||||
final long docsWithFieldOffset;
|
||||
final long docsWithFieldLength;
|
||||
final short jumpTableEntryCount;
|
||||
final byte denseRankPower;
|
||||
// the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
|
||||
// note that only spare case needs to store ordToDoc
|
||||
long addressesOffset,
|
||||
int blockShift,
|
||||
DirectMonotonicReader.Meta meta,
|
||||
long addressesLength) {
|
||||
static FieldEntry create(IndexInput input, VectorSimilarityFunction similarityFunction)
|
||||
throws IOException {
|
||||
final var vectorDataOffset = input.readVLong();
|
||||
final var vectorDataLength = input.readVLong();
|
||||
final var vectorIndexOffset = input.readVLong();
|
||||
final var vectorIndexLength = input.readVLong();
|
||||
final var dimension = input.readInt();
|
||||
final var size = input.readInt();
|
||||
|
||||
// the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
|
||||
// note that only spare case needs to store ordToDoc
|
||||
final long addressesOffset;
|
||||
final int blockShift;
|
||||
final DirectMonotonicReader.Meta meta;
|
||||
final long addressesLength;
|
||||
|
||||
FieldEntry(IndexInput input, VectorSimilarityFunction similarityFunction) throws IOException {
|
||||
this.similarityFunction = similarityFunction;
|
||||
vectorDataOffset = input.readVLong();
|
||||
vectorDataLength = input.readVLong();
|
||||
vectorIndexOffset = input.readVLong();
|
||||
vectorIndexLength = input.readVLong();
|
||||
dimension = input.readInt();
|
||||
size = input.readInt();
|
||||
|
||||
docsWithFieldOffset = input.readLong();
|
||||
docsWithFieldLength = input.readLong();
|
||||
jumpTableEntryCount = input.readShort();
|
||||
denseRankPower = input.readByte();
|
||||
final var docsWithFieldOffset = input.readLong();
|
||||
final var docsWithFieldLength = input.readLong();
|
||||
final var jumpTableEntryCount = input.readShort();
|
||||
final var denseRankPower = input.readByte();
|
||||
|
||||
final long addressesOffset;
|
||||
final int blockShift;
|
||||
final DirectMonotonicReader.Meta meta;
|
||||
final long addressesLength;
|
||||
// dense or empty
|
||||
if (docsWithFieldOffset == -1 || docsWithFieldOffset == -2) {
|
||||
addressesOffset = 0;
|
||||
|
@ -318,9 +320,9 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
|
|||
}
|
||||
|
||||
// read nodes by level
|
||||
M = input.readInt();
|
||||
numLevels = input.readInt();
|
||||
nodesByLevel = new int[numLevels][];
|
||||
final var M = input.readInt();
|
||||
final var numLevels = input.readInt();
|
||||
final var nodesByLevel = new int[numLevels][];
|
||||
for (int level = 0; level < numLevels; level++) {
|
||||
int numNodesOnLevel = input.readInt();
|
||||
if (level == 0) {
|
||||
|
@ -337,7 +339,7 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
|
|||
|
||||
// calculate for each level the start offsets in vectorIndex file from where to read
|
||||
// neighbours
|
||||
graphOffsetsByLevel = new long[numLevels];
|
||||
final var graphOffsetsByLevel = new long[numLevels];
|
||||
final long connectionsAndSizeLevel0Bytes =
|
||||
Math.multiplyExact(Math.addExact(1, Math.multiplyExact(M, 2L)), Integer.BYTES);
|
||||
final long connectionsAndSizeBytes = Math.multiplyExact(Math.addExact(1L, M), Integer.BYTES);
|
||||
|
@ -354,10 +356,26 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
|
|||
Math.multiplyExact(connectionsAndSizeBytes, numNodesOnPrevLevel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int size() {
|
||||
return size;
|
||||
return new FieldEntry(
|
||||
similarityFunction,
|
||||
vectorDataOffset,
|
||||
vectorDataLength,
|
||||
vectorIndexOffset,
|
||||
vectorIndexLength,
|
||||
M,
|
||||
numLevels,
|
||||
dimension,
|
||||
size,
|
||||
nodesByLevel,
|
||||
graphOffsetsByLevel,
|
||||
docsWithFieldOffset,
|
||||
docsWithFieldLength,
|
||||
jumpTableEntryCount,
|
||||
denseRankPower,
|
||||
addressesOffset,
|
||||
blockShift,
|
||||
meta,
|
||||
addressesLength);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -68,13 +68,14 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
|
||||
static OffHeapFloatVectorValues load(
|
||||
Lucene92HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
|
||||
if (fieldEntry.docsWithFieldOffset == -2) {
|
||||
return new EmptyOffHeapVectorValues(fieldEntry.dimension);
|
||||
if (fieldEntry.docsWithFieldOffset() == -2) {
|
||||
return new EmptyOffHeapVectorValues(fieldEntry.dimension());
|
||||
}
|
||||
IndexInput bytesSlice =
|
||||
vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
|
||||
if (fieldEntry.docsWithFieldOffset == -1) {
|
||||
return new DenseOffHeapVectorValues(fieldEntry.dimension, fieldEntry.size, bytesSlice);
|
||||
vectorData.slice(
|
||||
"vector-data", fieldEntry.vectorDataOffset(), fieldEntry.vectorDataLength());
|
||||
if (fieldEntry.docsWithFieldOffset() == -1) {
|
||||
return new DenseOffHeapVectorValues(fieldEntry.dimension(), fieldEntry.size(), bytesSlice);
|
||||
} else {
|
||||
return new SparseOffHeapVectorValues(fieldEntry, vectorData, bytesSlice);
|
||||
}
|
||||
|
@ -134,20 +135,20 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
Lucene92HnswVectorsReader.FieldEntry fieldEntry, IndexInput dataIn, IndexInput slice)
|
||||
throws IOException {
|
||||
|
||||
super(fieldEntry.dimension, fieldEntry.size, slice);
|
||||
super(fieldEntry.dimension(), fieldEntry.size(), slice);
|
||||
this.fieldEntry = fieldEntry;
|
||||
final RandomAccessInput addressesData =
|
||||
dataIn.randomAccessSlice(fieldEntry.addressesOffset, fieldEntry.addressesLength);
|
||||
dataIn.randomAccessSlice(fieldEntry.addressesOffset(), fieldEntry.addressesLength());
|
||||
this.dataIn = dataIn;
|
||||
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta, addressesData);
|
||||
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta(), addressesData);
|
||||
this.disi =
|
||||
new IndexedDISI(
|
||||
dataIn,
|
||||
fieldEntry.docsWithFieldOffset,
|
||||
fieldEntry.docsWithFieldLength,
|
||||
fieldEntry.jumpTableEntryCount,
|
||||
fieldEntry.denseRankPower,
|
||||
fieldEntry.size);
|
||||
fieldEntry.docsWithFieldOffset(),
|
||||
fieldEntry.docsWithFieldLength(),
|
||||
fieldEntry.jumpTableEntryCount(),
|
||||
fieldEntry.denseRankPower(),
|
||||
fieldEntry.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -219,7 +219,7 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
|
|||
+ " != "
|
||||
+ info.getVectorSimilarityFunction());
|
||||
}
|
||||
return new FieldEntry(input, vectorEncoding, info.getVectorSimilarityFunction());
|
||||
return FieldEntry.create(input, vectorEncoding, info.getVectorSimilarityFunction());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -307,58 +307,58 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
|
|||
IOUtils.close(vectorData, vectorIndex);
|
||||
}
|
||||
|
||||
static class FieldEntry {
|
||||
static record FieldEntry(
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
VectorEncoding vectorEncoding,
|
||||
long vectorDataOffset,
|
||||
long vectorDataLength,
|
||||
long vectorIndexOffset,
|
||||
long vectorIndexLength,
|
||||
int M,
|
||||
int numLevels,
|
||||
int dimension,
|
||||
int size,
|
||||
int[][] nodesByLevel,
|
||||
// for each level the start offsets in vectorIndex file from where to read neighbours
|
||||
long[] graphOffsetsByLevel,
|
||||
|
||||
final VectorSimilarityFunction similarityFunction;
|
||||
final VectorEncoding vectorEncoding;
|
||||
final long vectorDataOffset;
|
||||
final long vectorDataLength;
|
||||
final long vectorIndexOffset;
|
||||
final long vectorIndexLength;
|
||||
final int M;
|
||||
final int numLevels;
|
||||
final int dimension;
|
||||
final int size;
|
||||
final int[][] nodesByLevel;
|
||||
// for each level the start offsets in vectorIndex file from where to read neighbours
|
||||
final long[] graphOffsetsByLevel;
|
||||
// the following four variables used to read docIds encoded by IndexDISI
|
||||
// special values of docsWithFieldOffset are -1 and -2
|
||||
// -1 : dense
|
||||
// -2 : empty
|
||||
// other: sparse
|
||||
long docsWithFieldOffset,
|
||||
long docsWithFieldLength,
|
||||
short jumpTableEntryCount,
|
||||
byte denseRankPower,
|
||||
|
||||
// the following four variables used to read docIds encoded by IndexDISI
|
||||
// special values of docsWithFieldOffset are -1 and -2
|
||||
// -1 : dense
|
||||
// -2 : empty
|
||||
// other: sparse
|
||||
final long docsWithFieldOffset;
|
||||
final long docsWithFieldLength;
|
||||
final short jumpTableEntryCount;
|
||||
final byte denseRankPower;
|
||||
|
||||
// the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
|
||||
// note that only spare case needs to store ordToDoc
|
||||
final long addressesOffset;
|
||||
final int blockShift;
|
||||
final DirectMonotonicReader.Meta meta;
|
||||
final long addressesLength;
|
||||
|
||||
FieldEntry(
|
||||
// the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
|
||||
// note that only spare case needs to store ordToDoc
|
||||
long addressesOffset,
|
||||
int blockShift,
|
||||
DirectMonotonicReader.Meta meta,
|
||||
long addressesLength) {
|
||||
static FieldEntry create(
|
||||
IndexInput input,
|
||||
VectorEncoding vectorEncoding,
|
||||
VectorSimilarityFunction similarityFunction)
|
||||
throws IOException {
|
||||
this.similarityFunction = similarityFunction;
|
||||
this.vectorEncoding = vectorEncoding;
|
||||
vectorDataOffset = input.readVLong();
|
||||
vectorDataLength = input.readVLong();
|
||||
vectorIndexOffset = input.readVLong();
|
||||
vectorIndexLength = input.readVLong();
|
||||
dimension = input.readInt();
|
||||
size = input.readInt();
|
||||
final var vectorDataOffset = input.readVLong();
|
||||
final var vectorDataLength = input.readVLong();
|
||||
final var vectorIndexOffset = input.readVLong();
|
||||
final var vectorIndexLength = input.readVLong();
|
||||
final var dimension = input.readInt();
|
||||
final var size = input.readInt();
|
||||
|
||||
docsWithFieldOffset = input.readLong();
|
||||
docsWithFieldLength = input.readLong();
|
||||
jumpTableEntryCount = input.readShort();
|
||||
denseRankPower = input.readByte();
|
||||
final var docsWithFieldOffset = input.readLong();
|
||||
final var docsWithFieldLength = input.readLong();
|
||||
final var jumpTableEntryCount = input.readShort();
|
||||
final var denseRankPower = input.readByte();
|
||||
|
||||
final long addressesOffset;
|
||||
final int blockShift;
|
||||
final DirectMonotonicReader.Meta meta;
|
||||
final long addressesLength;
|
||||
// dense or empty
|
||||
if (docsWithFieldOffset == -1 || docsWithFieldOffset == -2) {
|
||||
addressesOffset = 0;
|
||||
|
@ -374,9 +374,9 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
|
|||
}
|
||||
|
||||
// read nodes by level
|
||||
M = input.readInt();
|
||||
numLevels = input.readInt();
|
||||
nodesByLevel = new int[numLevels][];
|
||||
final var M = input.readInt();
|
||||
final var numLevels = input.readInt();
|
||||
final var nodesByLevel = new int[numLevels][];
|
||||
for (int level = 0; level < numLevels; level++) {
|
||||
int numNodesOnLevel = input.readInt();
|
||||
if (level == 0) {
|
||||
|
@ -393,7 +393,7 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
|
|||
|
||||
// calculate for each level the start offsets in vectorIndex file from where to read
|
||||
// neighbours
|
||||
graphOffsetsByLevel = new long[numLevels];
|
||||
final var graphOffsetsByLevel = new long[numLevels];
|
||||
final long connectionsAndSizeLevel0Bytes =
|
||||
Math.multiplyExact(Math.addExact(1, Math.multiplyExact(M, 2L)), Integer.BYTES);
|
||||
final long connectionsAndSizeBytes = Math.multiplyExact(Math.addExact(1L, M), Integer.BYTES);
|
||||
|
@ -410,10 +410,27 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
|
|||
Math.multiplyExact(connectionsAndSizeBytes, numNodesOnPrevLevel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int size() {
|
||||
return size;
|
||||
return new FieldEntry(
|
||||
similarityFunction,
|
||||
vectorEncoding,
|
||||
vectorDataOffset,
|
||||
vectorDataLength,
|
||||
vectorIndexOffset,
|
||||
vectorIndexLength,
|
||||
M,
|
||||
numLevels,
|
||||
dimension,
|
||||
size,
|
||||
nodesByLevel,
|
||||
graphOffsetsByLevel,
|
||||
docsWithFieldOffset,
|
||||
docsWithFieldLength,
|
||||
jumpTableEntryCount,
|
||||
denseRankPower,
|
||||
addressesOffset,
|
||||
blockShift,
|
||||
meta,
|
||||
addressesLength);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -75,15 +75,17 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
|
||||
static OffHeapByteVectorValues load(
|
||||
Lucene94HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
|
||||
if (fieldEntry.docsWithFieldOffset == -2 || fieldEntry.vectorEncoding != VectorEncoding.BYTE) {
|
||||
return new EmptyOffHeapVectorValues(fieldEntry.dimension);
|
||||
if (fieldEntry.docsWithFieldOffset() == -2
|
||||
|| fieldEntry.vectorEncoding() != VectorEncoding.BYTE) {
|
||||
return new EmptyOffHeapVectorValues(fieldEntry.dimension());
|
||||
}
|
||||
IndexInput bytesSlice =
|
||||
vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
|
||||
int byteSize = fieldEntry.dimension;
|
||||
if (fieldEntry.docsWithFieldOffset == -1) {
|
||||
vectorData.slice(
|
||||
"vector-data", fieldEntry.vectorDataOffset(), fieldEntry.vectorDataLength());
|
||||
int byteSize = fieldEntry.dimension();
|
||||
if (fieldEntry.docsWithFieldOffset() == -1) {
|
||||
return new DenseOffHeapVectorValues(
|
||||
fieldEntry.dimension, fieldEntry.size, bytesSlice, byteSize);
|
||||
fieldEntry.dimension(), fieldEntry.size(), bytesSlice, byteSize);
|
||||
} else {
|
||||
return new SparseOffHeapVectorValues(fieldEntry, vectorData, bytesSlice, byteSize);
|
||||
}
|
||||
|
@ -146,20 +148,20 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
int byteSize)
|
||||
throws IOException {
|
||||
|
||||
super(fieldEntry.dimension, fieldEntry.size, slice, byteSize);
|
||||
super(fieldEntry.dimension(), fieldEntry.size(), slice, byteSize);
|
||||
this.fieldEntry = fieldEntry;
|
||||
final RandomAccessInput addressesData =
|
||||
dataIn.randomAccessSlice(fieldEntry.addressesOffset, fieldEntry.addressesLength);
|
||||
dataIn.randomAccessSlice(fieldEntry.addressesOffset(), fieldEntry.addressesLength());
|
||||
this.dataIn = dataIn;
|
||||
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta, addressesData);
|
||||
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta(), addressesData);
|
||||
this.disi =
|
||||
new IndexedDISI(
|
||||
dataIn,
|
||||
fieldEntry.docsWithFieldOffset,
|
||||
fieldEntry.docsWithFieldLength,
|
||||
fieldEntry.jumpTableEntryCount,
|
||||
fieldEntry.denseRankPower,
|
||||
fieldEntry.size);
|
||||
fieldEntry.docsWithFieldOffset(),
|
||||
fieldEntry.docsWithFieldLength(),
|
||||
fieldEntry.jumpTableEntryCount(),
|
||||
fieldEntry.denseRankPower(),
|
||||
fieldEntry.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -68,19 +68,20 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
|
||||
static OffHeapFloatVectorValues load(
|
||||
Lucene94HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
|
||||
if (fieldEntry.docsWithFieldOffset == -2) {
|
||||
return new EmptyOffHeapVectorValues(fieldEntry.dimension);
|
||||
if (fieldEntry.docsWithFieldOffset() == -2) {
|
||||
return new EmptyOffHeapVectorValues(fieldEntry.dimension());
|
||||
}
|
||||
IndexInput bytesSlice =
|
||||
vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
|
||||
vectorData.slice(
|
||||
"vector-data", fieldEntry.vectorDataOffset(), fieldEntry.vectorDataLength());
|
||||
int byteSize =
|
||||
switch (fieldEntry.vectorEncoding) {
|
||||
case BYTE -> fieldEntry.dimension;
|
||||
case FLOAT32 -> fieldEntry.dimension * Float.BYTES;
|
||||
switch (fieldEntry.vectorEncoding()) {
|
||||
case BYTE -> fieldEntry.dimension();
|
||||
case FLOAT32 -> fieldEntry.dimension() * Float.BYTES;
|
||||
};
|
||||
if (fieldEntry.docsWithFieldOffset == -1) {
|
||||
if (fieldEntry.docsWithFieldOffset() == -1) {
|
||||
return new DenseOffHeapVectorValues(
|
||||
fieldEntry.dimension, fieldEntry.size, bytesSlice, byteSize);
|
||||
fieldEntry.dimension(), fieldEntry.size(), bytesSlice, byteSize);
|
||||
} else {
|
||||
return new SparseOffHeapVectorValues(fieldEntry, vectorData, bytesSlice, byteSize);
|
||||
}
|
||||
|
@ -143,20 +144,20 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
int byteSize)
|
||||
throws IOException {
|
||||
|
||||
super(fieldEntry.dimension, fieldEntry.size, slice, byteSize);
|
||||
super(fieldEntry.dimension(), fieldEntry.size(), slice, byteSize);
|
||||
this.fieldEntry = fieldEntry;
|
||||
final RandomAccessInput addressesData =
|
||||
dataIn.randomAccessSlice(fieldEntry.addressesOffset, fieldEntry.addressesLength);
|
||||
dataIn.randomAccessSlice(fieldEntry.addressesOffset(), fieldEntry.addressesLength());
|
||||
this.dataIn = dataIn;
|
||||
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta, addressesData);
|
||||
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta(), addressesData);
|
||||
this.disi =
|
||||
new IndexedDISI(
|
||||
dataIn,
|
||||
fieldEntry.docsWithFieldOffset,
|
||||
fieldEntry.docsWithFieldLength,
|
||||
fieldEntry.jumpTableEntryCount,
|
||||
fieldEntry.denseRankPower,
|
||||
fieldEntry.size);
|
||||
fieldEntry.docsWithFieldOffset(),
|
||||
fieldEntry.docsWithFieldLength(),
|
||||
fieldEntry.jumpTableEntryCount(),
|
||||
fieldEntry.denseRankPower(),
|
||||
fieldEntry.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -229,7 +229,7 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
|
|||
+ " != "
|
||||
+ info.getVectorSimilarityFunction());
|
||||
}
|
||||
return new FieldEntry(input, vectorEncoding, info.getVectorSimilarityFunction());
|
||||
return FieldEntry.create(input, vectorEncoding, info.getVectorSimilarityFunction());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -360,50 +360,49 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
|
|||
IOUtils.close(vectorData, vectorIndex);
|
||||
}
|
||||
|
||||
static class FieldEntry implements Accountable {
|
||||
static record FieldEntry(
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
VectorEncoding vectorEncoding,
|
||||
long vectorDataOffset,
|
||||
long vectorDataLength,
|
||||
long vectorIndexOffset,
|
||||
long vectorIndexLength,
|
||||
int M,
|
||||
int numLevels,
|
||||
int dimension,
|
||||
int size,
|
||||
int[][] nodesByLevel,
|
||||
// for each level the start offsets in vectorIndex file from where to read neighbours
|
||||
DirectMonotonicReader.Meta offsetsMeta,
|
||||
long offsetsOffset,
|
||||
int offsetsBlockShift,
|
||||
long offsetsLength,
|
||||
|
||||
// Contains the configuration for reading sparse vectors and translating vector ordinals to
|
||||
// docId
|
||||
OrdToDocDISIReaderConfiguration ordToDocVectorValues)
|
||||
implements Accountable {
|
||||
private static final long SHALLOW_SIZE =
|
||||
RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class);
|
||||
final VectorSimilarityFunction similarityFunction;
|
||||
final VectorEncoding vectorEncoding;
|
||||
final long vectorDataOffset;
|
||||
final long vectorDataLength;
|
||||
final long vectorIndexOffset;
|
||||
final long vectorIndexLength;
|
||||
final int M;
|
||||
final int numLevels;
|
||||
final int dimension;
|
||||
final int size;
|
||||
final int[][] nodesByLevel;
|
||||
// for each level the start offsets in vectorIndex file from where to read neighbours
|
||||
final DirectMonotonicReader.Meta offsetsMeta;
|
||||
final long offsetsOffset;
|
||||
final int offsetsBlockShift;
|
||||
final long offsetsLength;
|
||||
|
||||
// Contains the configuration for reading sparse vectors and translating vector ordinals to
|
||||
// docId
|
||||
OrdToDocDISIReaderConfiguration ordToDocVectorValues;
|
||||
|
||||
FieldEntry(
|
||||
static FieldEntry create(
|
||||
IndexInput input,
|
||||
VectorEncoding vectorEncoding,
|
||||
VectorSimilarityFunction similarityFunction)
|
||||
throws IOException {
|
||||
this.similarityFunction = similarityFunction;
|
||||
this.vectorEncoding = vectorEncoding;
|
||||
vectorDataOffset = input.readVLong();
|
||||
vectorDataLength = input.readVLong();
|
||||
vectorIndexOffset = input.readVLong();
|
||||
vectorIndexLength = input.readVLong();
|
||||
dimension = input.readVInt();
|
||||
size = input.readInt();
|
||||
final var vectorDataOffset = input.readVLong();
|
||||
final var vectorDataLength = input.readVLong();
|
||||
final var vectorIndexOffset = input.readVLong();
|
||||
final var vectorIndexLength = input.readVLong();
|
||||
final var dimension = input.readVInt();
|
||||
final var size = input.readInt();
|
||||
|
||||
ordToDocVectorValues = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
|
||||
final var ordToDocVectorValues = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
|
||||
|
||||
// read nodes by level
|
||||
M = input.readVInt();
|
||||
numLevels = input.readVInt();
|
||||
nodesByLevel = new int[numLevels][];
|
||||
final var M = input.readVInt();
|
||||
final var numLevels = input.readVInt();
|
||||
final var nodesByLevel = new int[numLevels][];
|
||||
long numberOfOffsets = 0;
|
||||
for (int level = 0; level < numLevels; level++) {
|
||||
if (level > 0) {
|
||||
|
@ -418,6 +417,10 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
|
|||
numberOfOffsets += size;
|
||||
}
|
||||
}
|
||||
final long offsetsOffset;
|
||||
final int offsetsBlockShift;
|
||||
final DirectMonotonicReader.Meta offsetsMeta;
|
||||
final long offsetsLength;
|
||||
if (numberOfOffsets > 0) {
|
||||
offsetsOffset = input.readLong();
|
||||
offsetsBlockShift = input.readVInt();
|
||||
|
@ -429,10 +432,23 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
|
|||
offsetsMeta = null;
|
||||
offsetsLength = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int size() {
|
||||
return size;
|
||||
return new FieldEntry(
|
||||
similarityFunction,
|
||||
vectorEncoding,
|
||||
vectorDataOffset,
|
||||
vectorDataLength,
|
||||
vectorIndexOffset,
|
||||
vectorIndexLength,
|
||||
M,
|
||||
numLevels,
|
||||
dimension,
|
||||
size,
|
||||
nodesByLevel,
|
||||
offsetsMeta,
|
||||
offsetsOffset,
|
||||
offsetsBlockShift,
|
||||
offsetsLength,
|
||||
ordToDocVectorValues);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -275,21 +275,8 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
|
|||
dataIn.close();
|
||||
}
|
||||
|
||||
private static class FieldEntry {
|
||||
|
||||
final int dimension;
|
||||
|
||||
final long vectorDataOffset;
|
||||
final long vectorDataLength;
|
||||
final int[] ordToDoc;
|
||||
|
||||
FieldEntry(int dimension, long vectorDataOffset, long vectorDataLength, int[] ordToDoc) {
|
||||
this.dimension = dimension;
|
||||
this.vectorDataOffset = vectorDataOffset;
|
||||
this.vectorDataLength = vectorDataLength;
|
||||
this.ordToDoc = ordToDoc;
|
||||
}
|
||||
|
||||
private record FieldEntry(
|
||||
int dimension, long vectorDataOffset, long vectorDataLength, int[] ordToDoc) {
|
||||
int size() {
|
||||
return ordToDoc.length;
|
||||
}
|
||||
|
|
|
@ -152,61 +152,11 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader {
|
|||
if (info == null) {
|
||||
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
|
||||
}
|
||||
FieldEntry fieldEntry = readField(meta, info);
|
||||
validateFieldEntry(info, fieldEntry);
|
||||
FieldEntry fieldEntry = FieldEntry.create(meta, info);
|
||||
fields.put(info.name, fieldEntry);
|
||||
}
|
||||
}
|
||||
|
||||
private void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
|
||||
int dimension = info.getVectorDimension();
|
||||
if (dimension != fieldEntry.dimension) {
|
||||
throw new IllegalStateException(
|
||||
"Inconsistent vector dimension for field=\""
|
||||
+ info.name
|
||||
+ "\"; "
|
||||
+ dimension
|
||||
+ " != "
|
||||
+ fieldEntry.dimension);
|
||||
}
|
||||
|
||||
int byteSize =
|
||||
switch (info.getVectorEncoding()) {
|
||||
case BYTE -> Byte.BYTES;
|
||||
case FLOAT32 -> Float.BYTES;
|
||||
};
|
||||
long vectorBytes = Math.multiplyExact((long) dimension, byteSize);
|
||||
long numBytes = Math.multiplyExact(vectorBytes, fieldEntry.size);
|
||||
if (numBytes != fieldEntry.vectorDataLength) {
|
||||
throw new IllegalStateException(
|
||||
"Vector data length "
|
||||
+ fieldEntry.vectorDataLength
|
||||
+ " not matching size="
|
||||
+ fieldEntry.size
|
||||
+ " * dim="
|
||||
+ dimension
|
||||
+ " * byteSize="
|
||||
+ byteSize
|
||||
+ " = "
|
||||
+ numBytes);
|
||||
}
|
||||
}
|
||||
|
||||
private FieldEntry readField(IndexInput input, FieldInfo info) throws IOException {
|
||||
VectorEncoding vectorEncoding = readVectorEncoding(input);
|
||||
VectorSimilarityFunction similarityFunction = readSimilarityFunction(input);
|
||||
if (similarityFunction != info.getVectorSimilarityFunction()) {
|
||||
throw new IllegalStateException(
|
||||
"Inconsistent vector similarity function for field=\""
|
||||
+ info.name
|
||||
+ "\"; "
|
||||
+ similarityFunction
|
||||
+ " != "
|
||||
+ info.getVectorSimilarityFunction());
|
||||
}
|
||||
return new FieldEntry(input, vectorEncoding, info.getVectorSimilarityFunction());
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return Lucene99FlatVectorsReader.SHALLOW_SIZE
|
||||
|
@ -302,29 +252,78 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader {
|
|||
IOUtils.close(vectorData);
|
||||
}
|
||||
|
||||
private static class FieldEntry implements Accountable {
|
||||
private static final long SHALLOW_SIZE =
|
||||
RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class);
|
||||
final VectorSimilarityFunction similarityFunction;
|
||||
final VectorEncoding vectorEncoding;
|
||||
final int dimension;
|
||||
final long vectorDataOffset;
|
||||
final long vectorDataLength;
|
||||
final int size;
|
||||
final OrdToDocDISIReaderConfiguration ordToDoc;
|
||||
private record FieldEntry(
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
VectorEncoding vectorEncoding,
|
||||
long vectorDataOffset,
|
||||
long vectorDataLength,
|
||||
int dimension,
|
||||
int size,
|
||||
OrdToDocDISIReaderConfiguration ordToDoc,
|
||||
FieldInfo info)
|
||||
implements Accountable {
|
||||
static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class);
|
||||
|
||||
FieldEntry(
|
||||
IndexInput input,
|
||||
VectorEncoding vectorEncoding,
|
||||
VectorSimilarityFunction similarityFunction)
|
||||
throws IOException {
|
||||
this.similarityFunction = similarityFunction;
|
||||
this.vectorEncoding = vectorEncoding;
|
||||
vectorDataOffset = input.readVLong();
|
||||
vectorDataLength = input.readVLong();
|
||||
dimension = input.readVInt();
|
||||
size = input.readInt();
|
||||
ordToDoc = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
|
||||
FieldEntry {
|
||||
if (similarityFunction != info.getVectorSimilarityFunction()) {
|
||||
throw new IllegalStateException(
|
||||
"Inconsistent vector similarity function for field=\""
|
||||
+ info.name
|
||||
+ "\"; "
|
||||
+ similarityFunction
|
||||
+ " != "
|
||||
+ info.getVectorSimilarityFunction());
|
||||
}
|
||||
int infoVectorDimension = info.getVectorDimension();
|
||||
if (infoVectorDimension != dimension) {
|
||||
throw new IllegalStateException(
|
||||
"Inconsistent vector dimension for field=\""
|
||||
+ info.name
|
||||
+ "\"; "
|
||||
+ infoVectorDimension
|
||||
+ " != "
|
||||
+ dimension);
|
||||
}
|
||||
|
||||
int byteSize =
|
||||
switch (info.getVectorEncoding()) {
|
||||
case BYTE -> Byte.BYTES;
|
||||
case FLOAT32 -> Float.BYTES;
|
||||
};
|
||||
long vectorBytes = Math.multiplyExact((long) infoVectorDimension, byteSize);
|
||||
long numBytes = Math.multiplyExact(vectorBytes, size);
|
||||
if (numBytes != vectorDataLength) {
|
||||
throw new IllegalStateException(
|
||||
"Vector data length "
|
||||
+ vectorDataLength
|
||||
+ " not matching size="
|
||||
+ size
|
||||
+ " * dim="
|
||||
+ dimension
|
||||
+ " * byteSize="
|
||||
+ byteSize
|
||||
+ " = "
|
||||
+ numBytes);
|
||||
}
|
||||
}
|
||||
|
||||
static FieldEntry create(IndexInput input, FieldInfo info) throws IOException {
|
||||
final VectorEncoding vectorEncoding = readVectorEncoding(input);
|
||||
final VectorSimilarityFunction similarityFunction = readSimilarityFunction(input);
|
||||
final var vectorDataOffset = input.readVLong();
|
||||
final var vectorDataLength = input.readVLong();
|
||||
final var dimension = input.readVInt();
|
||||
final var size = input.readInt();
|
||||
final var ordToDoc = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
|
||||
return new FieldEntry(
|
||||
similarityFunction,
|
||||
vectorEncoding,
|
||||
vectorDataOffset,
|
||||
vectorDataLength,
|
||||
dimension,
|
||||
size,
|
||||
ordToDoc,
|
||||
info);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -219,7 +219,7 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
|
|||
+ " != "
|
||||
+ info.getVectorSimilarityFunction());
|
||||
}
|
||||
return new FieldEntry(input, vectorEncoding, info.getVectorSimilarityFunction());
|
||||
return FieldEntry.create(input, vectorEncoding, info.getVectorSimilarityFunction());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -347,40 +347,43 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
|
|||
return null;
|
||||
}
|
||||
|
||||
static class FieldEntry implements Accountable {
|
||||
private record FieldEntry(
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
VectorEncoding vectorEncoding,
|
||||
long vectorIndexOffset,
|
||||
long vectorIndexLength,
|
||||
int M,
|
||||
int numLevels,
|
||||
int dimension,
|
||||
int size,
|
||||
int[][] nodesByLevel,
|
||||
// for each level the start offsets in vectorIndex file from where to read neighbours
|
||||
DirectMonotonicReader.Meta offsetsMeta,
|
||||
long offsetsOffset,
|
||||
int offsetsBlockShift,
|
||||
long offsetsLength)
|
||||
implements Accountable {
|
||||
private static final long SHALLOW_SIZE =
|
||||
RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class);
|
||||
final VectorSimilarityFunction similarityFunction;
|
||||
final VectorEncoding vectorEncoding;
|
||||
final long vectorIndexOffset;
|
||||
final long vectorIndexLength;
|
||||
final int M;
|
||||
final int numLevels;
|
||||
final int dimension;
|
||||
final int size;
|
||||
final int[][] nodesByLevel;
|
||||
// for each level the start offsets in vectorIndex file from where to read neighbours
|
||||
final DirectMonotonicReader.Meta offsetsMeta;
|
||||
final long offsetsOffset;
|
||||
final int offsetsBlockShift;
|
||||
final long offsetsLength;
|
||||
|
||||
FieldEntry(
|
||||
static FieldEntry create(
|
||||
IndexInput input,
|
||||
VectorEncoding vectorEncoding,
|
||||
VectorSimilarityFunction similarityFunction)
|
||||
throws IOException {
|
||||
this.similarityFunction = similarityFunction;
|
||||
this.vectorEncoding = vectorEncoding;
|
||||
vectorIndexOffset = input.readVLong();
|
||||
vectorIndexLength = input.readVLong();
|
||||
dimension = input.readVInt();
|
||||
size = input.readInt();
|
||||
final var vectorIndexOffset = input.readVLong();
|
||||
final var vectorIndexLength = input.readVLong();
|
||||
final var dimension = input.readVInt();
|
||||
final var size = input.readInt();
|
||||
// read nodes by level
|
||||
M = input.readVInt();
|
||||
numLevels = input.readVInt();
|
||||
nodesByLevel = new int[numLevels][];
|
||||
final var M = input.readVInt();
|
||||
final var numLevels = input.readVInt();
|
||||
final var nodesByLevel = new int[numLevels][];
|
||||
long numberOfOffsets = 0;
|
||||
final long offsetsOffset;
|
||||
final int offsetsBlockShift;
|
||||
final DirectMonotonicReader.Meta offsetsMeta;
|
||||
final long offsetsLength;
|
||||
for (int level = 0; level < numLevels; level++) {
|
||||
if (level > 0) {
|
||||
int numNodesOnLevel = input.readVInt();
|
||||
|
@ -405,10 +408,20 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
|
|||
offsetsMeta = null;
|
||||
offsetsLength = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int size() {
|
||||
return size;
|
||||
return new FieldEntry(
|
||||
similarityFunction,
|
||||
vectorEncoding,
|
||||
vectorIndexOffset,
|
||||
vectorIndexLength,
|
||||
M,
|
||||
numLevels,
|
||||
dimension,
|
||||
size,
|
||||
nodesByLevel,
|
||||
offsetsMeta,
|
||||
offsetsOffset,
|
||||
offsetsBlockShift,
|
||||
offsetsLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -266,7 +266,8 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
|
|||
+ " != "
|
||||
+ info.getVectorSimilarityFunction());
|
||||
}
|
||||
return new FieldEntry(input, versionMeta, vectorEncoding, info.getVectorSimilarityFunction());
|
||||
return FieldEntry.create(
|
||||
input, versionMeta, vectorEncoding, info.getVectorSimilarityFunction());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -295,32 +296,34 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
|
|||
return fieldEntry.scalarQuantizer;
|
||||
}
|
||||
|
||||
private static class FieldEntry implements Accountable {
|
||||
private record FieldEntry(
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
VectorEncoding vectorEncoding,
|
||||
int dimension,
|
||||
long vectorDataOffset,
|
||||
long vectorDataLength,
|
||||
ScalarQuantizer scalarQuantizer,
|
||||
int size,
|
||||
byte bits,
|
||||
boolean compress,
|
||||
OrdToDocDISIReaderConfiguration ordToDoc)
|
||||
implements Accountable {
|
||||
private static final long SHALLOW_SIZE =
|
||||
RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class);
|
||||
final VectorSimilarityFunction similarityFunction;
|
||||
final VectorEncoding vectorEncoding;
|
||||
final int dimension;
|
||||
final long vectorDataOffset;
|
||||
final long vectorDataLength;
|
||||
final ScalarQuantizer scalarQuantizer;
|
||||
final int size;
|
||||
final byte bits;
|
||||
final boolean compress;
|
||||
final OrdToDocDISIReaderConfiguration ordToDoc;
|
||||
|
||||
FieldEntry(
|
||||
static FieldEntry create(
|
||||
IndexInput input,
|
||||
int versionMeta,
|
||||
VectorEncoding vectorEncoding,
|
||||
VectorSimilarityFunction similarityFunction)
|
||||
throws IOException {
|
||||
this.similarityFunction = similarityFunction;
|
||||
this.vectorEncoding = vectorEncoding;
|
||||
vectorDataOffset = input.readVLong();
|
||||
vectorDataLength = input.readVLong();
|
||||
dimension = input.readVInt();
|
||||
size = input.readInt();
|
||||
final var vectorDataOffset = input.readVLong();
|
||||
final var vectorDataLength = input.readVLong();
|
||||
final var dimension = input.readVInt();
|
||||
final var size = input.readInt();
|
||||
final ScalarQuantizer scalarQuantizer;
|
||||
final byte bits;
|
||||
final boolean compress;
|
||||
if (size > 0) {
|
||||
if (versionMeta < Lucene99ScalarQuantizedVectorsFormat.VERSION_ADD_BITS) {
|
||||
int floatBits = input.readInt(); // confidenceInterval, unused
|
||||
|
@ -328,25 +331,36 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
|
|||
throw new CorruptIndexException(
|
||||
"Missing confidence interval for scalar quantizer", input);
|
||||
}
|
||||
this.bits = (byte) 7;
|
||||
this.compress = false;
|
||||
bits = (byte) 7;
|
||||
compress = false;
|
||||
float minQuantile = Float.intBitsToFloat(input.readInt());
|
||||
float maxQuantile = Float.intBitsToFloat(input.readInt());
|
||||
scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, (byte) 7);
|
||||
} else {
|
||||
input.readInt(); // confidenceInterval, unused
|
||||
this.bits = input.readByte();
|
||||
this.compress = input.readByte() == 1;
|
||||
bits = input.readByte();
|
||||
compress = input.readByte() == 1;
|
||||
float minQuantile = Float.intBitsToFloat(input.readInt());
|
||||
float maxQuantile = Float.intBitsToFloat(input.readInt());
|
||||
scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, bits);
|
||||
}
|
||||
} else {
|
||||
scalarQuantizer = null;
|
||||
this.bits = (byte) 7;
|
||||
this.compress = false;
|
||||
bits = (byte) 7;
|
||||
compress = false;
|
||||
}
|
||||
ordToDoc = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
|
||||
final var ordToDoc = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
|
||||
return new FieldEntry(
|
||||
similarityFunction,
|
||||
vectorEncoding,
|
||||
dimension,
|
||||
vectorDataOffset,
|
||||
vectorDataLength,
|
||||
scalarQuantizer,
|
||||
size,
|
||||
bits,
|
||||
compress,
|
||||
ordToDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue