Convert FieldEntry to record (#13296)

Co-authored-by: iamsanjay <sanjaydutt.india@yahoo.com>
This commit is contained in:
Sanjay Dutt 2024-04-16 15:04:44 +05:30 committed by GitHub
parent dca87235be
commit dcb512289f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 474 additions and 387 deletions

View File

@ -222,6 +222,8 @@ Other
* GITHUB#12753: Bump minimum required Java version to 21
(Chris Hegarty, Robert Muir, Uwe Schindler)
* GITHUB#13296: Convert the FieldEntry, a static nested class, into a record. (Sanjay Dutt)
======================== Lucene 9.11.0 =======================
API Changes

View File

@ -210,7 +210,7 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, info.getVectorSimilarityFunction());
return FieldEntry.create(input, info.getVectorSimilarityFunction());
}
@Override
@ -303,37 +303,44 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
IOUtils.close(vectorData, vectorIndex);
}
private static class FieldEntry {
private record FieldEntry(
VectorSimilarityFunction similarityFunction,
long vectorDataOffset,
long vectorDataLength,
long indexDataOffset,
long indexDataLength,
int dimension,
int[] ordToDoc,
long[] ordOffsets) {
final int dimension;
final VectorSimilarityFunction similarityFunction;
final long vectorDataOffset;
final long vectorDataLength;
final long indexDataOffset;
final long indexDataLength;
final int[] ordToDoc;
final long[] ordOffsets;
FieldEntry(DataInput input, VectorSimilarityFunction similarityFunction) throws IOException {
this.similarityFunction = similarityFunction;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
indexDataOffset = input.readVLong();
indexDataLength = input.readVLong();
dimension = input.readInt();
int size = input.readInt();
ordToDoc = new int[size];
static FieldEntry create(DataInput input, VectorSimilarityFunction similarityFunction)
throws IOException {
final var vectorDataOffset = input.readVLong();
final var vectorDataLength = input.readVLong();
final var indexDataOffset = input.readVLong();
final var indexDataLength = input.readVLong();
final var dimension = input.readInt();
final var size = input.readInt();
final var ordToDoc = new int[size];
for (int i = 0; i < size; i++) {
int doc = input.readVInt();
ordToDoc[i] = doc;
}
ordOffsets = new long[size()];
final var ordOffsets = new long[size];
long offset = 0;
for (int i = 0; i < ordOffsets.length; i++) {
offset += input.readVLong();
ordOffsets[i] = offset;
}
return new FieldEntry(
similarityFunction,
vectorDataOffset,
vectorDataLength,
indexDataOffset,
indexDataLength,
dimension,
ordToDoc,
ordOffsets);
}
int size() {

View File

@ -202,7 +202,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, info.getVectorSimilarityFunction());
return FieldEntry.create(input, info.getVectorSimilarityFunction());
}
@Override
@ -286,32 +286,30 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
IOUtils.close(vectorData, vectorIndex);
}
private static class FieldEntry {
final VectorSimilarityFunction similarityFunction;
final long vectorDataOffset;
final long vectorDataLength;
final long vectorIndexOffset;
final long vectorIndexLength;
final int maxConn;
final int numLevels;
final int dimension;
private final int size;
final int[] ordToDoc;
private final IntUnaryOperator ordToDocOperator;
final int[][] nodesByLevel;
// for each level the start offsets in vectorIndex file from where to read neighbours
final long[] graphOffsetsByLevel;
FieldEntry(DataInput input, VectorSimilarityFunction similarityFunction) throws IOException {
this.similarityFunction = similarityFunction;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
vectorIndexOffset = input.readVLong();
vectorIndexLength = input.readVLong();
dimension = input.readInt();
size = input.readInt();
private record FieldEntry(
VectorSimilarityFunction similarityFunction,
long vectorDataOffset,
long vectorDataLength,
long vectorIndexOffset,
long vectorIndexLength,
int maxConn,
int numLevels,
int dimension,
int size,
int[] ordToDoc,
IntUnaryOperator ordToDocOperator,
int[][] nodesByLevel,
// for each level the start offsets in vectorIndex file from where to read neighbours
long[] graphOffsetsByLevel) {
static FieldEntry create(DataInput input, VectorSimilarityFunction similarityFunction)
throws IOException {
final var vectorDataOffset = input.readVLong();
final var vectorDataLength = input.readVLong();
final var vectorIndexOffset = input.readVLong();
final var vectorIndexLength = input.readVLong();
final var dimension = input.readInt();
final var size = input.readInt();
final int[] ordToDoc;
int denseSparseMarker = input.readByte();
if (denseSparseMarker == -1) {
ordToDoc = null; // each document has a vector value
@ -328,12 +326,13 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
ordToDoc[i] = doc;
}
}
ordToDocOperator = ordToDoc == null ? IntUnaryOperator.identity() : (ord) -> ordToDoc[ord];
final IntUnaryOperator ordToDocOperator =
ordToDoc == null ? IntUnaryOperator.identity() : (ord) -> ordToDoc[ord];
// read nodes by level
maxConn = input.readInt();
numLevels = input.readInt();
nodesByLevel = new int[numLevels][];
final var maxConn = input.readInt();
final var numLevels = input.readInt();
final var nodesByLevel = new int[numLevels][];
for (int level = 0; level < numLevels; level++) {
int numNodesOnLevel = input.readInt();
if (level == 0) {
@ -350,7 +349,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
// calculate for each level the start offsets in vectorIndex file from where to read
// neighbours
graphOffsetsByLevel = new long[numLevels];
final var graphOffsetsByLevel = new long[numLevels];
final long connectionsAndSizeBytes =
Math.multiplyExact(Math.addExact(1L, maxConn), Integer.BYTES);
for (int level = 0; level < numLevels; level++) {
@ -364,10 +363,21 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
Math.multiplyExact(connectionsAndSizeBytes, numNodesOnPrevLevel));
}
}
}
int size() {
return size;
return new FieldEntry(
similarityFunction,
vectorDataOffset,
vectorDataLength,
vectorIndexOffset,
vectorIndexLength,
maxConn,
numLevels,
dimension,
size,
ordToDoc,
ordToDocOperator,
nodesByLevel,
graphOffsetsByLevel);
}
int ordToDoc(int ord) {

View File

@ -201,7 +201,7 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, info.getVectorSimilarityFunction());
return FieldEntry.create(input, info.getVectorSimilarityFunction());
}
@Override
@ -257,52 +257,54 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
IOUtils.close(vectorData, vectorIndex);
}
static class FieldEntry {
static record FieldEntry(
VectorSimilarityFunction similarityFunction,
long vectorDataOffset,
long vectorDataLength,
long vectorIndexOffset,
long vectorIndexLength,
int M,
int numLevels,
int dimension,
int size,
int[][] nodesByLevel,
// for each level the start offsets in vectorIndex file from where to read neighbours
long[] graphOffsetsByLevel,
final VectorSimilarityFunction similarityFunction;
final long vectorDataOffset;
final long vectorDataLength;
final long vectorIndexOffset;
final long vectorIndexLength;
final int M;
final int numLevels;
final int dimension;
final int size;
final int[][] nodesByLevel;
// for each level the start offsets in vectorIndex file from where to read neighbours
final long[] graphOffsetsByLevel;
// the following four variables used to read docIds encoded by IndexDISI
// special values of docsWithFieldOffset are -1 and -2
// -1 : dense
// -2 : empty
// other: sparse
long docsWithFieldOffset,
long docsWithFieldLength,
short jumpTableEntryCount,
byte denseRankPower,
// the following four variables used to read docIds encoded by IndexDISI
// special values of docsWithFieldOffset are -1 and -2
// -1 : dense
// -2 : empty
// other: sparse
final long docsWithFieldOffset;
final long docsWithFieldLength;
final short jumpTableEntryCount;
final byte denseRankPower;
// the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
// note that only spare case needs to store ordToDoc
long addressesOffset,
int blockShift,
DirectMonotonicReader.Meta meta,
long addressesLength) {
static FieldEntry create(IndexInput input, VectorSimilarityFunction similarityFunction)
throws IOException {
final var vectorDataOffset = input.readVLong();
final var vectorDataLength = input.readVLong();
final var vectorIndexOffset = input.readVLong();
final var vectorIndexLength = input.readVLong();
final var dimension = input.readInt();
final var size = input.readInt();
// the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
// note that only spare case needs to store ordToDoc
final long addressesOffset;
final int blockShift;
final DirectMonotonicReader.Meta meta;
final long addressesLength;
FieldEntry(IndexInput input, VectorSimilarityFunction similarityFunction) throws IOException {
this.similarityFunction = similarityFunction;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
vectorIndexOffset = input.readVLong();
vectorIndexLength = input.readVLong();
dimension = input.readInt();
size = input.readInt();
docsWithFieldOffset = input.readLong();
docsWithFieldLength = input.readLong();
jumpTableEntryCount = input.readShort();
denseRankPower = input.readByte();
final var docsWithFieldOffset = input.readLong();
final var docsWithFieldLength = input.readLong();
final var jumpTableEntryCount = input.readShort();
final var denseRankPower = input.readByte();
final long addressesOffset;
final int blockShift;
final DirectMonotonicReader.Meta meta;
final long addressesLength;
// dense or empty
if (docsWithFieldOffset == -1 || docsWithFieldOffset == -2) {
addressesOffset = 0;
@ -318,9 +320,9 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
}
// read nodes by level
M = input.readInt();
numLevels = input.readInt();
nodesByLevel = new int[numLevels][];
final var M = input.readInt();
final var numLevels = input.readInt();
final var nodesByLevel = new int[numLevels][];
for (int level = 0; level < numLevels; level++) {
int numNodesOnLevel = input.readInt();
if (level == 0) {
@ -337,7 +339,7 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
// calculate for each level the start offsets in vectorIndex file from where to read
// neighbours
graphOffsetsByLevel = new long[numLevels];
final var graphOffsetsByLevel = new long[numLevels];
final long connectionsAndSizeLevel0Bytes =
Math.multiplyExact(Math.addExact(1, Math.multiplyExact(M, 2L)), Integer.BYTES);
final long connectionsAndSizeBytes = Math.multiplyExact(Math.addExact(1L, M), Integer.BYTES);
@ -354,10 +356,26 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
Math.multiplyExact(connectionsAndSizeBytes, numNodesOnPrevLevel));
}
}
}
int size() {
return size;
return new FieldEntry(
similarityFunction,
vectorDataOffset,
vectorDataLength,
vectorIndexOffset,
vectorIndexLength,
M,
numLevels,
dimension,
size,
nodesByLevel,
graphOffsetsByLevel,
docsWithFieldOffset,
docsWithFieldLength,
jumpTableEntryCount,
denseRankPower,
addressesOffset,
blockShift,
meta,
addressesLength);
}
}

View File

@ -68,13 +68,14 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
static OffHeapFloatVectorValues load(
Lucene92HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
if (fieldEntry.docsWithFieldOffset == -2) {
return new EmptyOffHeapVectorValues(fieldEntry.dimension);
if (fieldEntry.docsWithFieldOffset() == -2) {
return new EmptyOffHeapVectorValues(fieldEntry.dimension());
}
IndexInput bytesSlice =
vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
if (fieldEntry.docsWithFieldOffset == -1) {
return new DenseOffHeapVectorValues(fieldEntry.dimension, fieldEntry.size, bytesSlice);
vectorData.slice(
"vector-data", fieldEntry.vectorDataOffset(), fieldEntry.vectorDataLength());
if (fieldEntry.docsWithFieldOffset() == -1) {
return new DenseOffHeapVectorValues(fieldEntry.dimension(), fieldEntry.size(), bytesSlice);
} else {
return new SparseOffHeapVectorValues(fieldEntry, vectorData, bytesSlice);
}
@ -134,20 +135,20 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
Lucene92HnswVectorsReader.FieldEntry fieldEntry, IndexInput dataIn, IndexInput slice)
throws IOException {
super(fieldEntry.dimension, fieldEntry.size, slice);
super(fieldEntry.dimension(), fieldEntry.size(), slice);
this.fieldEntry = fieldEntry;
final RandomAccessInput addressesData =
dataIn.randomAccessSlice(fieldEntry.addressesOffset, fieldEntry.addressesLength);
dataIn.randomAccessSlice(fieldEntry.addressesOffset(), fieldEntry.addressesLength());
this.dataIn = dataIn;
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta, addressesData);
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta(), addressesData);
this.disi =
new IndexedDISI(
dataIn,
fieldEntry.docsWithFieldOffset,
fieldEntry.docsWithFieldLength,
fieldEntry.jumpTableEntryCount,
fieldEntry.denseRankPower,
fieldEntry.size);
fieldEntry.docsWithFieldOffset(),
fieldEntry.docsWithFieldLength(),
fieldEntry.jumpTableEntryCount(),
fieldEntry.denseRankPower(),
fieldEntry.size());
}
@Override

View File

@ -219,7 +219,7 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, vectorEncoding, info.getVectorSimilarityFunction());
return FieldEntry.create(input, vectorEncoding, info.getVectorSimilarityFunction());
}
@Override
@ -307,58 +307,58 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
IOUtils.close(vectorData, vectorIndex);
}
static class FieldEntry {
static record FieldEntry(
VectorSimilarityFunction similarityFunction,
VectorEncoding vectorEncoding,
long vectorDataOffset,
long vectorDataLength,
long vectorIndexOffset,
long vectorIndexLength,
int M,
int numLevels,
int dimension,
int size,
int[][] nodesByLevel,
// for each level the start offsets in vectorIndex file from where to read neighbours
long[] graphOffsetsByLevel,
final VectorSimilarityFunction similarityFunction;
final VectorEncoding vectorEncoding;
final long vectorDataOffset;
final long vectorDataLength;
final long vectorIndexOffset;
final long vectorIndexLength;
final int M;
final int numLevels;
final int dimension;
final int size;
final int[][] nodesByLevel;
// for each level the start offsets in vectorIndex file from where to read neighbours
final long[] graphOffsetsByLevel;
// the following four variables used to read docIds encoded by IndexDISI
// special values of docsWithFieldOffset are -1 and -2
// -1 : dense
// -2 : empty
// other: sparse
long docsWithFieldOffset,
long docsWithFieldLength,
short jumpTableEntryCount,
byte denseRankPower,
// the following four variables used to read docIds encoded by IndexDISI
// special values of docsWithFieldOffset are -1 and -2
// -1 : dense
// -2 : empty
// other: sparse
final long docsWithFieldOffset;
final long docsWithFieldLength;
final short jumpTableEntryCount;
final byte denseRankPower;
// the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
// note that only spare case needs to store ordToDoc
final long addressesOffset;
final int blockShift;
final DirectMonotonicReader.Meta meta;
final long addressesLength;
FieldEntry(
// the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
// note that only spare case needs to store ordToDoc
long addressesOffset,
int blockShift,
DirectMonotonicReader.Meta meta,
long addressesLength) {
static FieldEntry create(
IndexInput input,
VectorEncoding vectorEncoding,
VectorSimilarityFunction similarityFunction)
throws IOException {
this.similarityFunction = similarityFunction;
this.vectorEncoding = vectorEncoding;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
vectorIndexOffset = input.readVLong();
vectorIndexLength = input.readVLong();
dimension = input.readInt();
size = input.readInt();
final var vectorDataOffset = input.readVLong();
final var vectorDataLength = input.readVLong();
final var vectorIndexOffset = input.readVLong();
final var vectorIndexLength = input.readVLong();
final var dimension = input.readInt();
final var size = input.readInt();
docsWithFieldOffset = input.readLong();
docsWithFieldLength = input.readLong();
jumpTableEntryCount = input.readShort();
denseRankPower = input.readByte();
final var docsWithFieldOffset = input.readLong();
final var docsWithFieldLength = input.readLong();
final var jumpTableEntryCount = input.readShort();
final var denseRankPower = input.readByte();
final long addressesOffset;
final int blockShift;
final DirectMonotonicReader.Meta meta;
final long addressesLength;
// dense or empty
if (docsWithFieldOffset == -1 || docsWithFieldOffset == -2) {
addressesOffset = 0;
@ -374,9 +374,9 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
}
// read nodes by level
M = input.readInt();
numLevels = input.readInt();
nodesByLevel = new int[numLevels][];
final var M = input.readInt();
final var numLevels = input.readInt();
final var nodesByLevel = new int[numLevels][];
for (int level = 0; level < numLevels; level++) {
int numNodesOnLevel = input.readInt();
if (level == 0) {
@ -393,7 +393,7 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
// calculate for each level the start offsets in vectorIndex file from where to read
// neighbours
graphOffsetsByLevel = new long[numLevels];
final var graphOffsetsByLevel = new long[numLevels];
final long connectionsAndSizeLevel0Bytes =
Math.multiplyExact(Math.addExact(1, Math.multiplyExact(M, 2L)), Integer.BYTES);
final long connectionsAndSizeBytes = Math.multiplyExact(Math.addExact(1L, M), Integer.BYTES);
@ -410,10 +410,27 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
Math.multiplyExact(connectionsAndSizeBytes, numNodesOnPrevLevel));
}
}
}
int size() {
return size;
return new FieldEntry(
similarityFunction,
vectorEncoding,
vectorDataOffset,
vectorDataLength,
vectorIndexOffset,
vectorIndexLength,
M,
numLevels,
dimension,
size,
nodesByLevel,
graphOffsetsByLevel,
docsWithFieldOffset,
docsWithFieldLength,
jumpTableEntryCount,
denseRankPower,
addressesOffset,
blockShift,
meta,
addressesLength);
}
}

View File

@ -75,15 +75,17 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
static OffHeapByteVectorValues load(
Lucene94HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
if (fieldEntry.docsWithFieldOffset == -2 || fieldEntry.vectorEncoding != VectorEncoding.BYTE) {
return new EmptyOffHeapVectorValues(fieldEntry.dimension);
if (fieldEntry.docsWithFieldOffset() == -2
|| fieldEntry.vectorEncoding() != VectorEncoding.BYTE) {
return new EmptyOffHeapVectorValues(fieldEntry.dimension());
}
IndexInput bytesSlice =
vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
int byteSize = fieldEntry.dimension;
if (fieldEntry.docsWithFieldOffset == -1) {
vectorData.slice(
"vector-data", fieldEntry.vectorDataOffset(), fieldEntry.vectorDataLength());
int byteSize = fieldEntry.dimension();
if (fieldEntry.docsWithFieldOffset() == -1) {
return new DenseOffHeapVectorValues(
fieldEntry.dimension, fieldEntry.size, bytesSlice, byteSize);
fieldEntry.dimension(), fieldEntry.size(), bytesSlice, byteSize);
} else {
return new SparseOffHeapVectorValues(fieldEntry, vectorData, bytesSlice, byteSize);
}
@ -146,20 +148,20 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
int byteSize)
throws IOException {
super(fieldEntry.dimension, fieldEntry.size, slice, byteSize);
super(fieldEntry.dimension(), fieldEntry.size(), slice, byteSize);
this.fieldEntry = fieldEntry;
final RandomAccessInput addressesData =
dataIn.randomAccessSlice(fieldEntry.addressesOffset, fieldEntry.addressesLength);
dataIn.randomAccessSlice(fieldEntry.addressesOffset(), fieldEntry.addressesLength());
this.dataIn = dataIn;
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta, addressesData);
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta(), addressesData);
this.disi =
new IndexedDISI(
dataIn,
fieldEntry.docsWithFieldOffset,
fieldEntry.docsWithFieldLength,
fieldEntry.jumpTableEntryCount,
fieldEntry.denseRankPower,
fieldEntry.size);
fieldEntry.docsWithFieldOffset(),
fieldEntry.docsWithFieldLength(),
fieldEntry.jumpTableEntryCount(),
fieldEntry.denseRankPower(),
fieldEntry.size());
}
@Override

View File

@ -68,19 +68,20 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
static OffHeapFloatVectorValues load(
Lucene94HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
if (fieldEntry.docsWithFieldOffset == -2) {
return new EmptyOffHeapVectorValues(fieldEntry.dimension);
if (fieldEntry.docsWithFieldOffset() == -2) {
return new EmptyOffHeapVectorValues(fieldEntry.dimension());
}
IndexInput bytesSlice =
vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
vectorData.slice(
"vector-data", fieldEntry.vectorDataOffset(), fieldEntry.vectorDataLength());
int byteSize =
switch (fieldEntry.vectorEncoding) {
case BYTE -> fieldEntry.dimension;
case FLOAT32 -> fieldEntry.dimension * Float.BYTES;
switch (fieldEntry.vectorEncoding()) {
case BYTE -> fieldEntry.dimension();
case FLOAT32 -> fieldEntry.dimension() * Float.BYTES;
};
if (fieldEntry.docsWithFieldOffset == -1) {
if (fieldEntry.docsWithFieldOffset() == -1) {
return new DenseOffHeapVectorValues(
fieldEntry.dimension, fieldEntry.size, bytesSlice, byteSize);
fieldEntry.dimension(), fieldEntry.size(), bytesSlice, byteSize);
} else {
return new SparseOffHeapVectorValues(fieldEntry, vectorData, bytesSlice, byteSize);
}
@ -143,20 +144,20 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
int byteSize)
throws IOException {
super(fieldEntry.dimension, fieldEntry.size, slice, byteSize);
super(fieldEntry.dimension(), fieldEntry.size(), slice, byteSize);
this.fieldEntry = fieldEntry;
final RandomAccessInput addressesData =
dataIn.randomAccessSlice(fieldEntry.addressesOffset, fieldEntry.addressesLength);
dataIn.randomAccessSlice(fieldEntry.addressesOffset(), fieldEntry.addressesLength());
this.dataIn = dataIn;
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta, addressesData);
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta(), addressesData);
this.disi =
new IndexedDISI(
dataIn,
fieldEntry.docsWithFieldOffset,
fieldEntry.docsWithFieldLength,
fieldEntry.jumpTableEntryCount,
fieldEntry.denseRankPower,
fieldEntry.size);
fieldEntry.docsWithFieldOffset(),
fieldEntry.docsWithFieldLength(),
fieldEntry.jumpTableEntryCount(),
fieldEntry.denseRankPower(),
fieldEntry.size());
}
@Override

View File

@ -229,7 +229,7 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, vectorEncoding, info.getVectorSimilarityFunction());
return FieldEntry.create(input, vectorEncoding, info.getVectorSimilarityFunction());
}
@Override
@ -360,50 +360,49 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
IOUtils.close(vectorData, vectorIndex);
}
static class FieldEntry implements Accountable {
static record FieldEntry(
VectorSimilarityFunction similarityFunction,
VectorEncoding vectorEncoding,
long vectorDataOffset,
long vectorDataLength,
long vectorIndexOffset,
long vectorIndexLength,
int M,
int numLevels,
int dimension,
int size,
int[][] nodesByLevel,
// for each level the start offsets in vectorIndex file from where to read neighbours
DirectMonotonicReader.Meta offsetsMeta,
long offsetsOffset,
int offsetsBlockShift,
long offsetsLength,
// Contains the configuration for reading sparse vectors and translating vector ordinals to
// docId
OrdToDocDISIReaderConfiguration ordToDocVectorValues)
implements Accountable {
private static final long SHALLOW_SIZE =
RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class);
final VectorSimilarityFunction similarityFunction;
final VectorEncoding vectorEncoding;
final long vectorDataOffset;
final long vectorDataLength;
final long vectorIndexOffset;
final long vectorIndexLength;
final int M;
final int numLevels;
final int dimension;
final int size;
final int[][] nodesByLevel;
// for each level the start offsets in vectorIndex file from where to read neighbours
final DirectMonotonicReader.Meta offsetsMeta;
final long offsetsOffset;
final int offsetsBlockShift;
final long offsetsLength;
// Contains the configuration for reading sparse vectors and translating vector ordinals to
// docId
OrdToDocDISIReaderConfiguration ordToDocVectorValues;
FieldEntry(
static FieldEntry create(
IndexInput input,
VectorEncoding vectorEncoding,
VectorSimilarityFunction similarityFunction)
throws IOException {
this.similarityFunction = similarityFunction;
this.vectorEncoding = vectorEncoding;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
vectorIndexOffset = input.readVLong();
vectorIndexLength = input.readVLong();
dimension = input.readVInt();
size = input.readInt();
final var vectorDataOffset = input.readVLong();
final var vectorDataLength = input.readVLong();
final var vectorIndexOffset = input.readVLong();
final var vectorIndexLength = input.readVLong();
final var dimension = input.readVInt();
final var size = input.readInt();
ordToDocVectorValues = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
final var ordToDocVectorValues = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
// read nodes by level
M = input.readVInt();
numLevels = input.readVInt();
nodesByLevel = new int[numLevels][];
final var M = input.readVInt();
final var numLevels = input.readVInt();
final var nodesByLevel = new int[numLevels][];
long numberOfOffsets = 0;
for (int level = 0; level < numLevels; level++) {
if (level > 0) {
@ -418,6 +417,10 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
numberOfOffsets += size;
}
}
final long offsetsOffset;
final int offsetsBlockShift;
final DirectMonotonicReader.Meta offsetsMeta;
final long offsetsLength;
if (numberOfOffsets > 0) {
offsetsOffset = input.readLong();
offsetsBlockShift = input.readVInt();
@ -429,10 +432,23 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
offsetsMeta = null;
offsetsLength = 0;
}
}
int size() {
return size;
return new FieldEntry(
similarityFunction,
vectorEncoding,
vectorDataOffset,
vectorDataLength,
vectorIndexOffset,
vectorIndexLength,
M,
numLevels,
dimension,
size,
nodesByLevel,
offsetsMeta,
offsetsOffset,
offsetsBlockShift,
offsetsLength,
ordToDocVectorValues);
}
@Override

View File

@ -275,21 +275,8 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
dataIn.close();
}
private static class FieldEntry {
final int dimension;
final long vectorDataOffset;
final long vectorDataLength;
final int[] ordToDoc;
FieldEntry(int dimension, long vectorDataOffset, long vectorDataLength, int[] ordToDoc) {
this.dimension = dimension;
this.vectorDataOffset = vectorDataOffset;
this.vectorDataLength = vectorDataLength;
this.ordToDoc = ordToDoc;
}
private record FieldEntry(
int dimension, long vectorDataOffset, long vectorDataLength, int[] ordToDoc) {
int size() {
return ordToDoc.length;
}

View File

@ -152,61 +152,11 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader {
if (info == null) {
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
}
FieldEntry fieldEntry = readField(meta, info);
validateFieldEntry(info, fieldEntry);
FieldEntry fieldEntry = FieldEntry.create(meta, info);
fields.put(info.name, fieldEntry);
}
}
private void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
int dimension = info.getVectorDimension();
if (dimension != fieldEntry.dimension) {
throw new IllegalStateException(
"Inconsistent vector dimension for field=\""
+ info.name
+ "\"; "
+ dimension
+ " != "
+ fieldEntry.dimension);
}
int byteSize =
switch (info.getVectorEncoding()) {
case BYTE -> Byte.BYTES;
case FLOAT32 -> Float.BYTES;
};
long vectorBytes = Math.multiplyExact((long) dimension, byteSize);
long numBytes = Math.multiplyExact(vectorBytes, fieldEntry.size);
if (numBytes != fieldEntry.vectorDataLength) {
throw new IllegalStateException(
"Vector data length "
+ fieldEntry.vectorDataLength
+ " not matching size="
+ fieldEntry.size
+ " * dim="
+ dimension
+ " * byteSize="
+ byteSize
+ " = "
+ numBytes);
}
}
private FieldEntry readField(IndexInput input, FieldInfo info) throws IOException {
VectorEncoding vectorEncoding = readVectorEncoding(input);
VectorSimilarityFunction similarityFunction = readSimilarityFunction(input);
if (similarityFunction != info.getVectorSimilarityFunction()) {
throw new IllegalStateException(
"Inconsistent vector similarity function for field=\""
+ info.name
+ "\"; "
+ similarityFunction
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, vectorEncoding, info.getVectorSimilarityFunction());
}
@Override
public long ramBytesUsed() {
return Lucene99FlatVectorsReader.SHALLOW_SIZE
@ -302,29 +252,78 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader {
IOUtils.close(vectorData);
}
private static class FieldEntry implements Accountable {
private static final long SHALLOW_SIZE =
RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class);
final VectorSimilarityFunction similarityFunction;
final VectorEncoding vectorEncoding;
final int dimension;
final long vectorDataOffset;
final long vectorDataLength;
final int size;
final OrdToDocDISIReaderConfiguration ordToDoc;
private record FieldEntry(
VectorSimilarityFunction similarityFunction,
VectorEncoding vectorEncoding,
long vectorDataOffset,
long vectorDataLength,
int dimension,
int size,
OrdToDocDISIReaderConfiguration ordToDoc,
FieldInfo info)
implements Accountable {
static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class);
FieldEntry(
IndexInput input,
VectorEncoding vectorEncoding,
VectorSimilarityFunction similarityFunction)
throws IOException {
this.similarityFunction = similarityFunction;
this.vectorEncoding = vectorEncoding;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
dimension = input.readVInt();
size = input.readInt();
ordToDoc = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
FieldEntry {
if (similarityFunction != info.getVectorSimilarityFunction()) {
throw new IllegalStateException(
"Inconsistent vector similarity function for field=\""
+ info.name
+ "\"; "
+ similarityFunction
+ " != "
+ info.getVectorSimilarityFunction());
}
int infoVectorDimension = info.getVectorDimension();
if (infoVectorDimension != dimension) {
throw new IllegalStateException(
"Inconsistent vector dimension for field=\""
+ info.name
+ "\"; "
+ infoVectorDimension
+ " != "
+ dimension);
}
int byteSize =
switch (info.getVectorEncoding()) {
case BYTE -> Byte.BYTES;
case FLOAT32 -> Float.BYTES;
};
long vectorBytes = Math.multiplyExact((long) infoVectorDimension, byteSize);
long numBytes = Math.multiplyExact(vectorBytes, size);
if (numBytes != vectorDataLength) {
throw new IllegalStateException(
"Vector data length "
+ vectorDataLength
+ " not matching size="
+ size
+ " * dim="
+ dimension
+ " * byteSize="
+ byteSize
+ " = "
+ numBytes);
}
}
static FieldEntry create(IndexInput input, FieldInfo info) throws IOException {
final VectorEncoding vectorEncoding = readVectorEncoding(input);
final VectorSimilarityFunction similarityFunction = readSimilarityFunction(input);
final var vectorDataOffset = input.readVLong();
final var vectorDataLength = input.readVLong();
final var dimension = input.readVInt();
final var size = input.readInt();
final var ordToDoc = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
return new FieldEntry(
similarityFunction,
vectorEncoding,
vectorDataOffset,
vectorDataLength,
dimension,
size,
ordToDoc,
info);
}
@Override

View File

@ -219,7 +219,7 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, vectorEncoding, info.getVectorSimilarityFunction());
return FieldEntry.create(input, vectorEncoding, info.getVectorSimilarityFunction());
}
@Override
@ -347,40 +347,43 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
return null;
}
static class FieldEntry implements Accountable {
private record FieldEntry(
VectorSimilarityFunction similarityFunction,
VectorEncoding vectorEncoding,
long vectorIndexOffset,
long vectorIndexLength,
int M,
int numLevels,
int dimension,
int size,
int[][] nodesByLevel,
// for each level the start offsets in vectorIndex file from where to read neighbours
DirectMonotonicReader.Meta offsetsMeta,
long offsetsOffset,
int offsetsBlockShift,
long offsetsLength)
implements Accountable {
private static final long SHALLOW_SIZE =
RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class);
final VectorSimilarityFunction similarityFunction;
final VectorEncoding vectorEncoding;
final long vectorIndexOffset;
final long vectorIndexLength;
final int M;
final int numLevels;
final int dimension;
final int size;
final int[][] nodesByLevel;
// for each level the start offsets in vectorIndex file from where to read neighbours
final DirectMonotonicReader.Meta offsetsMeta;
final long offsetsOffset;
final int offsetsBlockShift;
final long offsetsLength;
FieldEntry(
static FieldEntry create(
IndexInput input,
VectorEncoding vectorEncoding,
VectorSimilarityFunction similarityFunction)
throws IOException {
this.similarityFunction = similarityFunction;
this.vectorEncoding = vectorEncoding;
vectorIndexOffset = input.readVLong();
vectorIndexLength = input.readVLong();
dimension = input.readVInt();
size = input.readInt();
final var vectorIndexOffset = input.readVLong();
final var vectorIndexLength = input.readVLong();
final var dimension = input.readVInt();
final var size = input.readInt();
// read nodes by level
M = input.readVInt();
numLevels = input.readVInt();
nodesByLevel = new int[numLevels][];
final var M = input.readVInt();
final var numLevels = input.readVInt();
final var nodesByLevel = new int[numLevels][];
long numberOfOffsets = 0;
final long offsetsOffset;
final int offsetsBlockShift;
final DirectMonotonicReader.Meta offsetsMeta;
final long offsetsLength;
for (int level = 0; level < numLevels; level++) {
if (level > 0) {
int numNodesOnLevel = input.readVInt();
@ -405,10 +408,20 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
offsetsMeta = null;
offsetsLength = 0;
}
}
int size() {
return size;
return new FieldEntry(
similarityFunction,
vectorEncoding,
vectorIndexOffset,
vectorIndexLength,
M,
numLevels,
dimension,
size,
nodesByLevel,
offsetsMeta,
offsetsOffset,
offsetsBlockShift,
offsetsLength);
}
@Override

View File

@ -266,7 +266,8 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, versionMeta, vectorEncoding, info.getVectorSimilarityFunction());
return FieldEntry.create(
input, versionMeta, vectorEncoding, info.getVectorSimilarityFunction());
}
@Override
@ -295,32 +296,34 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
return fieldEntry.scalarQuantizer;
}
private static class FieldEntry implements Accountable {
private record FieldEntry(
VectorSimilarityFunction similarityFunction,
VectorEncoding vectorEncoding,
int dimension,
long vectorDataOffset,
long vectorDataLength,
ScalarQuantizer scalarQuantizer,
int size,
byte bits,
boolean compress,
OrdToDocDISIReaderConfiguration ordToDoc)
implements Accountable {
private static final long SHALLOW_SIZE =
RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class);
final VectorSimilarityFunction similarityFunction;
final VectorEncoding vectorEncoding;
final int dimension;
final long vectorDataOffset;
final long vectorDataLength;
final ScalarQuantizer scalarQuantizer;
final int size;
final byte bits;
final boolean compress;
final OrdToDocDISIReaderConfiguration ordToDoc;
FieldEntry(
static FieldEntry create(
IndexInput input,
int versionMeta,
VectorEncoding vectorEncoding,
VectorSimilarityFunction similarityFunction)
throws IOException {
this.similarityFunction = similarityFunction;
this.vectorEncoding = vectorEncoding;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
dimension = input.readVInt();
size = input.readInt();
final var vectorDataOffset = input.readVLong();
final var vectorDataLength = input.readVLong();
final var dimension = input.readVInt();
final var size = input.readInt();
final ScalarQuantizer scalarQuantizer;
final byte bits;
final boolean compress;
if (size > 0) {
if (versionMeta < Lucene99ScalarQuantizedVectorsFormat.VERSION_ADD_BITS) {
int floatBits = input.readInt(); // confidenceInterval, unused
@ -328,25 +331,36 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
throw new CorruptIndexException(
"Missing confidence interval for scalar quantizer", input);
}
this.bits = (byte) 7;
this.compress = false;
bits = (byte) 7;
compress = false;
float minQuantile = Float.intBitsToFloat(input.readInt());
float maxQuantile = Float.intBitsToFloat(input.readInt());
scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, (byte) 7);
} else {
input.readInt(); // confidenceInterval, unused
this.bits = input.readByte();
this.compress = input.readByte() == 1;
bits = input.readByte();
compress = input.readByte() == 1;
float minQuantile = Float.intBitsToFloat(input.readInt());
float maxQuantile = Float.intBitsToFloat(input.readInt());
scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, bits);
}
} else {
scalarQuantizer = null;
this.bits = (byte) 7;
this.compress = false;
bits = (byte) 7;
compress = false;
}
ordToDoc = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
final var ordToDoc = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);
return new FieldEntry(
similarityFunction,
vectorEncoding,
dimension,
vectorDataOffset,
vectorDataLength,
scalarQuantizer,
size,
bits,
compress,
ordToDoc);
}
@Override