mirror of https://github.com/apache/lucene.git
LUCENE-10408: Write doc IDs of KNN vectors as ints rather than vints. (#708)
Since doc IDs with a vector are loaded as an int[] in memory, this changes the on-disk format of vectors to align with the in-memory representation by using ints instead of vints to represent doc IDs. This might make vectors a bit larger on disk, but also a bit faster to open. I made the same change to how we encode nodes on levels for the same reason.
This commit is contained in:
parent
550d1305db
commit
44d7d962ae
|
@ -69,13 +69,13 @@ import org.apache.lucene.util.hnsw.HnswGraph;
|
|||
* <li><b>[int]</b> the number of documents having values for this field
|
||||
* <li><b>[int8]</b> if equals to -1, dense – all documents have values for a field. If equals to
|
||||
* 0, sparse – some documents missing values.
|
||||
* <li><b>array[vint]</b> for sparse case, the docids of documents having vectors, in order
|
||||
* <li><b>array[int]</b> for sparse case, the docids of documents having vectors, in order
|
||||
* <li><b>[int]</b> the maximum number of connections (neigbours) that each node can have
|
||||
* <li><b>[int]</b> number of levels in the graph
|
||||
* <li>Graph nodes by level. For each level
|
||||
* <ul>
|
||||
* <li><b>[int]</b> the number of nodes on this level
|
||||
* <li><b>array[vint]</b> for levels greater than 0 list of nodes on this level, stored as
|
||||
* <li><b>array[int]</b> for levels greater than 0 list of nodes on this level, stored as
|
||||
* the the level 0th nodes ordinals.
|
||||
* </ul>
|
||||
* </ul>
|
||||
|
|
|
@ -347,7 +347,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
// as not all docs have vector values, fill a mapping from dense vector ordinals to docIds
|
||||
ordToDoc = new int[size];
|
||||
for (int i = 0; i < size; i++) {
|
||||
int doc = input.readVInt();
|
||||
int doc = input.readInt();
|
||||
ordToDoc[i] = doc;
|
||||
}
|
||||
}
|
||||
|
@ -366,7 +366,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
} else {
|
||||
nodesByLevel[level] = new int[numNodesOnLevel];
|
||||
for (int i = 0; i < numNodesOnLevel; i++) {
|
||||
nodesByLevel[level][i] = input.readVInt();
|
||||
nodesByLevel[level][i] = input.readInt();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -213,7 +213,7 @@ public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
|
|||
meta.writeByte((byte) 0); // sparse marker, some documents don't have vector values
|
||||
DocIdSetIterator iter = docsWithField.iterator();
|
||||
for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
|
||||
meta.writeVInt(doc);
|
||||
meta.writeInt(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -229,7 +229,7 @@ public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
|
|||
if (level > 0) {
|
||||
while (nodesOnLevel.hasNext()) {
|
||||
int node = nodesOnLevel.nextInt();
|
||||
meta.writeVInt(node); // list of nodes on a level
|
||||
meta.writeInt(node); // list of nodes on a level
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue