mirror of https://github.com/apache/lucene.git
LUCENE-10408: Write doc IDs of KNN vectors as ints rather than vints. (#708)
Since doc IDs with a vector are loaded as an int[] in memory, this changes the on-disk format of vectors to align with the in-memory representation by using ints instead of vints to represent doc IDs. This might make vectors a bit larger on disk, but also a bit faster to open. I made the same change to how we encode nodes on levels for the same reason.
This commit is contained in:
parent
550d1305db
commit
44d7d962ae
|
@ -69,13 +69,13 @@ import org.apache.lucene.util.hnsw.HnswGraph;
|
||||||
* <li><b>[int]</b> the number of documents having values for this field
|
* <li><b>[int]</b> the number of documents having values for this field
|
||||||
* <li><b>[int8]</b> if equals to -1, dense – all documents have values for a field. If equals to
|
* <li><b>[int8]</b> if equals to -1, dense – all documents have values for a field. If equals to
|
||||||
* 0, sparse – some documents missing values.
|
* 0, sparse – some documents missing values.
|
||||||
* <li><b>array[vint]</b> for sparse case, the docids of documents having vectors, in order
|
* <li><b>array[int]</b> for sparse case, the docids of documents having vectors, in order
|
||||||
* <li><b>[int]</b> the maximum number of connections (neigbours) that each node can have
|
* <li><b>[int]</b> the maximum number of connections (neigbours) that each node can have
|
||||||
* <li><b>[int]</b> number of levels in the graph
|
* <li><b>[int]</b> number of levels in the graph
|
||||||
* <li>Graph nodes by level. For each level
|
* <li>Graph nodes by level. For each level
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li><b>[int]</b> the number of nodes on this level
|
* <li><b>[int]</b> the number of nodes on this level
|
||||||
* <li><b>array[vint]</b> for levels greater than 0 list of nodes on this level, stored as
|
* <li><b>array[int]</b> for levels greater than 0 list of nodes on this level, stored as
|
||||||
* the the level 0th nodes ordinals.
|
* the the level 0th nodes ordinals.
|
||||||
* </ul>
|
* </ul>
|
||||||
* </ul>
|
* </ul>
|
||||||
|
|
|
@ -347,7 +347,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
||||||
// as not all docs have vector values, fill a mapping from dense vector ordinals to docIds
|
// as not all docs have vector values, fill a mapping from dense vector ordinals to docIds
|
||||||
ordToDoc = new int[size];
|
ordToDoc = new int[size];
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
int doc = input.readVInt();
|
int doc = input.readInt();
|
||||||
ordToDoc[i] = doc;
|
ordToDoc[i] = doc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -366,7 +366,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
||||||
} else {
|
} else {
|
||||||
nodesByLevel[level] = new int[numNodesOnLevel];
|
nodesByLevel[level] = new int[numNodesOnLevel];
|
||||||
for (int i = 0; i < numNodesOnLevel; i++) {
|
for (int i = 0; i < numNodesOnLevel; i++) {
|
||||||
nodesByLevel[level][i] = input.readVInt();
|
nodesByLevel[level][i] = input.readInt();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -213,7 +213,7 @@ public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
|
||||||
meta.writeByte((byte) 0); // sparse marker, some documents don't have vector values
|
meta.writeByte((byte) 0); // sparse marker, some documents don't have vector values
|
||||||
DocIdSetIterator iter = docsWithField.iterator();
|
DocIdSetIterator iter = docsWithField.iterator();
|
||||||
for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
|
for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
|
||||||
meta.writeVInt(doc);
|
meta.writeInt(doc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -229,7 +229,7 @@ public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
|
||||||
if (level > 0) {
|
if (level > 0) {
|
||||||
while (nodesOnLevel.hasNext()) {
|
while (nodesOnLevel.hasNext()) {
|
||||||
int node = nodesOnLevel.nextInt();
|
int node = nodesOnLevel.nextInt();
|
||||||
meta.writeVInt(node); // list of nodes on a level
|
meta.writeInt(node); // list of nodes on a level
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue