diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswGraphBuilder.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswGraphBuilder.java
index 81327fbfcd5..d17b4019364 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswGraphBuilder.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswGraphBuilder.java
@@ -22,10 +22,10 @@ import java.util.Locale;
import java.util.Objects;
import java.util.SplittableRandom;
import java.util.concurrent.TimeUnit;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.hnsw.NeighborQueue;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
/**
* Builder for HNSW graph. See {@link Lucene90OnHeapHnswGraph} for a gloss on the algorithm and the
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
index 83a3b090bdb..8377072a07f 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
@@ -31,7 +31,6 @@ import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
@@ -47,6 +46,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.hnsw.HnswGraph;
import org.apache.lucene.util.hnsw.NeighborQueue;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
/**
* Reads vectors from the index segments along with index data structures supporting KNN search.
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90OnHeapHnswGraph.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90OnHeapHnswGraph.java
index aeffedcc287..e3b15bf22b1 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90OnHeapHnswGraph.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90OnHeapHnswGraph.java
@@ -23,12 +23,12 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.SplittableRandom;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.SparseFixedBitSet;
import org.apache.lucene.util.hnsw.HnswGraph;
import org.apache.lucene.util.hnsw.NeighborQueue;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
/**
* An {@link HnswGraph} where all nodes and connections are held in memory. This class is used to
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
index d889ff4fbda..752b87d0179 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
@@ -31,7 +31,6 @@ import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
@@ -49,6 +48,7 @@ import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.hnsw.HnswGraph;
import org.apache.lucene.util.hnsw.HnswGraphSearcher;
import org.apache.lucene.util.hnsw.NeighborQueue;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
/**
* Reads vectors from the index segments along with index data structures supporting KNN search.
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java
index 88968e42238..b1a0eb5b5ea 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java
@@ -20,12 +20,12 @@ package org.apache.lucene.backward_codecs.lucene92;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.lucene.codecs.lucene90.IndexedDISI;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.packed.DirectMonotonicReader;
/** Read the vector values from the index input. This supports both iterated and random access. */
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene94/Lucene94HnswVectorsWriter.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene94/Lucene94HnswVectorsWriter.java
index 6da2df8ca81..965ae0ea48d 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene94/Lucene94HnswVectorsWriter.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene94/Lucene94HnswVectorsWriter.java
@@ -34,7 +34,6 @@ import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Sorter;
import org.apache.lucene.index.VectorValues;
@@ -51,6 +50,7 @@ import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
import org.apache.lucene.util.hnsw.HnswGraphBuilder;
import org.apache.lucene.util.hnsw.NeighborArray;
import org.apache.lucene.util.hnsw.OnHeapHnswGraph;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
/**
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene94/OffHeapVectorValues.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene94/OffHeapVectorValues.java
index 042ab6e4904..66f2bf012c4 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene94/OffHeapVectorValues.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene94/OffHeapVectorValues.java
@@ -20,12 +20,12 @@ package org.apache.lucene.backward_codecs.lucene94;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.lucene.codecs.lucene90.IndexedDISI;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.packed.DirectMonotonicReader;
/** Read the vector values from the index input. This supports both iterated and random access. */
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsWriter.java
index 489db7f310e..6478152e104 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsWriter.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsWriter.java
@@ -21,12 +21,11 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
import java.util.Arrays;
+import org.apache.lucene.codecs.BufferingKnnVectorsWriter;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.KnnVectorsReader;
-import org.apache.lucene.index.BufferingKnnVectorsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
@@ -35,6 +34,7 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
/**
* Writes vector values and knn graphs to index segments.
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswGraphBuilder.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswGraphBuilder.java
index 344c01ab085..5f37e905e2f 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswGraphBuilder.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswGraphBuilder.java
@@ -24,7 +24,6 @@ import java.util.Locale;
import java.util.Objects;
import java.util.SplittableRandom;
import java.util.concurrent.TimeUnit;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.FixedBitSet;
@@ -32,6 +31,7 @@ import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.hnsw.HnswGraph;
import org.apache.lucene.util.hnsw.HnswGraphSearcher;
import org.apache.lucene.util.hnsw.NeighborQueue;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
/**
* Builder for HNSW graph. See {@link HnswGraph} for a gloss on the algorithm and the meaning of the
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsWriter.java
index 855f469f174..5e9b62c8dd7 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsWriter.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsWriter.java
@@ -21,13 +21,12 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
import java.util.Arrays;
+import org.apache.lucene.codecs.BufferingKnnVectorsWriter;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.KnnVectorsReader;
-import org.apache.lucene.index.BufferingKnnVectorsWriter;
import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
@@ -37,6 +36,7 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
/**
* Writes vector values and knn graphs to index segments.
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/Lucene92HnswVectorsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/Lucene92HnswVectorsWriter.java
index 0cf6aa817ca..0e6c72186c7 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/Lucene92HnswVectorsWriter.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/Lucene92HnswVectorsWriter.java
@@ -22,14 +22,13 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
import java.util.Arrays;
+import org.apache.lucene.codecs.BufferingKnnVectorsWriter;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.lucene90.IndexedDISI;
-import org.apache.lucene.index.BufferingKnnVectorsWriter;
import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
@@ -43,6 +42,7 @@ import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
import org.apache.lucene.util.hnsw.HnswGraphBuilder;
import org.apache.lucene.util.hnsw.NeighborArray;
import org.apache.lucene.util.hnsw.OnHeapHnswGraph;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
/**
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
index 4b29ce942d3..a3d7753871b 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
@@ -28,7 +28,6 @@ import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
@@ -47,6 +46,7 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
/**
* Reads vector values from a simple text format. All vectors are read up front and cached in RAM in
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsWriter.java
index 4992e524938..18a310d46ed 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsWriter.java
@@ -23,8 +23,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import org.apache.lucene.codecs.BufferingKnnVectorsWriter;
import org.apache.lucene.codecs.KnnVectorsReader;
-import org.apache.lucene.index.BufferingKnnVectorsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
diff --git a/lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/BufferingKnnVectorsWriter.java
similarity index 80%
rename from lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java
rename to lucene/core/src/java/org/apache/lucene/codecs/BufferingKnnVectorsWriter.java
index 3ff02da9810..d2a54a81609 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BufferingKnnVectorsWriter.java
@@ -15,16 +15,18 @@
* limitations under the License.
*/
-package org.apache.lucene.index;
+package org.apache.lucene.codecs;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.codecs.KnnFieldVectorsWriter;
-import org.apache.lucene.codecs.KnnVectorsReader;
-import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.index.DocsWithFieldSet;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.Sorter;
+import org.apache.lucene.index.VectorValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.ArrayUtil;
@@ -73,13 +75,13 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
@Override
public VectorValues getVectorValues(String field) throws IOException {
- VectorValues vectorValues =
+ BufferedVectorValues vectorValues =
new BufferedVectorValues(
fieldData.docsWithField,
fieldData.vectors,
fieldData.fieldInfo.getVectorDimension());
return sortMap != null
- ? new VectorValues.SortingVectorValues(vectorValues, sortMap)
+ ? new SortingVectorValues(vectorValues, sortMap)
: vectorValues;
}
@@ -94,6 +96,67 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
}
}
+ /** Sorting VectorValues that iterate over documents in the order of the provided sortMap */
+ private static class SortingVectorValues extends VectorValues {
+ private final BufferedVectorValues randomAccess;
+ private final int[] docIdOffsets;
+ private int docId = -1;
+
+ SortingVectorValues(BufferedVectorValues delegate, Sorter.DocMap sortMap) throws IOException {
+ this.randomAccess = delegate.copy();
+ this.docIdOffsets = new int[sortMap.size()];
+
+ int offset = 1; // 0 means no vector for this (field, document)
+ int docID;
+ while ((docID = delegate.nextDoc()) != NO_MORE_DOCS) {
+ int newDocID = sortMap.oldToNew(docID);
+ docIdOffsets[newDocID] = offset++;
+ }
+ }
+
+ @Override
+ public int docID() {
+ return docId;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ while (docId < docIdOffsets.length - 1) {
+ ++docId;
+ if (docIdOffsets[docId] != 0) {
+ return docId;
+ }
+ }
+ docId = NO_MORE_DOCS;
+ return docId;
+ }
+
+ @Override
+ public BytesRef binaryValue() throws IOException {
+ return randomAccess.binaryValue(docIdOffsets[docId] - 1);
+ }
+
+ @Override
+ public float[] vectorValue() throws IOException {
+ return randomAccess.vectorValue(docIdOffsets[docId] - 1);
+ }
+
+ @Override
+ public int dimension() {
+ return randomAccess.dimension();
+ }
+
+ @Override
+ public int size() {
+ return randomAccess.size();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+ }
+
@Override
public long ramBytesUsed() {
long total = 0;
@@ -197,8 +260,7 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
}
}
- private static class BufferedVectorValues extends VectorValues
- implements RandomAccessVectorValues {
+ private static class BufferedVectorValues extends VectorValues {
final DocsWithFieldSet docsWithField;
@@ -225,8 +287,7 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
docsWithFieldIter = docsWithField.iterator();
}
- @Override
- public RandomAccessVectorValues copy() {
+ public BufferedVectorValues copy() {
return new BufferedVectorValues(docsWithField, vectors, dimension);
}
@@ -246,7 +307,6 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
return binaryValue;
}
- @Override
public BytesRef binaryValue(int targetOrd) {
raBuffer.asFloatBuffer().put(vectors.get(targetOrd));
return raBinaryValue;
@@ -257,7 +317,6 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
return vectors.get(ord);
}
- @Override
public float[] vectorValue(int targetOrd) {
return vectors.get(targetOrd);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
index 60ecf6f6472..869dd03b42d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
@@ -41,6 +41,7 @@ import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
import org.apache.lucene.util.hnsw.HnswGraphBuilder;
import org.apache.lucene.util.hnsw.NeighborArray;
import org.apache.lucene.util.hnsw.OnHeapHnswGraph;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
/**
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapVectorValues.java
index 92e052329c9..b4ccfa2825e 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapVectorValues.java
@@ -20,12 +20,12 @@ package org.apache.lucene.codecs.lucene95;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.lucene.codecs.lucene90.IndexedDISI;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.packed.DirectMonotonicReader;
/** Read the vector values from the index input. This supports both iterated and random access. */
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java b/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
index 7fb91bf6621..320c89d9690 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
@@ -20,6 +20,8 @@ package org.apache.lucene.index;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
@@ -35,6 +37,7 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOSupplier;
import org.apache.lucene.util.packed.PackedInts;
@@ -212,6 +215,88 @@ public final class SortingCodecReader extends FilterCodecReader {
}
}
+ /** Sorting VectorValues that iterate over documents in the order of the provided sortMap */
+ private static class SortingVectorValues extends VectorValues {
+ final int size;
+ final int dimension;
+ final FixedBitSet docsWithField;
+ final float[][] vectors;
+ final ByteBuffer vectorAsBytes;
+ final BytesRef[] binaryVectors;
+
+ private int docId = -1;
+
+ SortingVectorValues(VectorValues delegate, Sorter.DocMap sortMap, VectorEncoding encoding)
+ throws IOException {
+ this.size = delegate.size();
+ this.dimension = delegate.dimension();
+ docsWithField = new FixedBitSet(sortMap.size());
+ if (encoding == VectorEncoding.BYTE) {
+ vectors = null;
+ binaryVectors = new BytesRef[sortMap.size()];
+ vectorAsBytes = null;
+ } else {
+ vectors = new float[sortMap.size()][];
+ binaryVectors = null;
+ vectorAsBytes =
+ ByteBuffer.allocate(delegate.dimension() * encoding.byteSize)
+ .order(ByteOrder.LITTLE_ENDIAN);
+ }
+ for (int doc = delegate.nextDoc(); doc != NO_MORE_DOCS; doc = delegate.nextDoc()) {
+ int newDocID = sortMap.oldToNew(doc);
+ docsWithField.set(newDocID);
+ if (encoding == VectorEncoding.BYTE) {
+ binaryVectors[newDocID] = BytesRef.deepCopyOf(delegate.binaryValue());
+ } else {
+ vectors[newDocID] = delegate.vectorValue().clone();
+ }
+ }
+ }
+
+ @Override
+ public int docID() {
+ return docId;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(docId + 1);
+ }
+
+ @Override
+ public BytesRef binaryValue() throws IOException {
+ if (binaryVectors != null) {
+ return binaryVectors[docId];
+ } else {
+ vectorAsBytes.asFloatBuffer().put(vectors[docId]);
+ return new BytesRef(vectorAsBytes.array());
+ }
+ }
+
+ @Override
+ public float[] vectorValue() throws IOException {
+ return vectors[docId];
+ }
+
+ @Override
+ public int dimension() {
+ return dimension;
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= docsWithField.length()) {
+ return NO_MORE_DOCS;
+ }
+ return docId = docsWithField.nextSetBit(target);
+ }
+ }
+
/**
* Return a sorted view of reader
according to the order defined by sort
* . If the reader is already sorted, this method might return the reader as-is.
@@ -380,7 +465,9 @@ public final class SortingCodecReader extends FilterCodecReader {
@Override
public VectorValues getVectorValues(String field) throws IOException {
- return new VectorValues.SortingVectorValues(delegate.getVectorValues(field), docMap);
+ FieldInfo fi = in.getFieldInfos().fieldInfo(field);
+ return new SortingVectorValues(
+ delegate.getVectorValues(field), docMap, fi.getVectorEncoding());
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/VectorValues.java b/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
index 21945e888e9..79b1033b4aa 100644
--- a/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
@@ -70,65 +70,4 @@ public abstract class VectorValues extends DocIdSetIterator {
public BytesRef binaryValue() throws IOException {
throw new UnsupportedOperationException();
}
-
- /** Sorting VectorValues that iterate over documents in the order of the provided sortMap */
- public static class SortingVectorValues extends VectorValues {
- private final RandomAccessVectorValues randomAccess;
- private final int[] docIdOffsets;
- private int docId = -1;
-
- SortingVectorValues(VectorValues delegate, Sorter.DocMap sortMap) throws IOException {
- this.randomAccess = ((RandomAccessVectorValues) delegate).copy();
- this.docIdOffsets = new int[sortMap.size()];
-
- int offset = 1; // 0 means no vector for this (field, document)
- int docID;
- while ((docID = delegate.nextDoc()) != NO_MORE_DOCS) {
- int newDocID = sortMap.oldToNew(docID);
- docIdOffsets[newDocID] = offset++;
- }
- }
-
- @Override
- public int docID() {
- return docId;
- }
-
- @Override
- public int nextDoc() throws IOException {
- while (docId < docIdOffsets.length - 1) {
- ++docId;
- if (docIdOffsets[docId] != 0) {
- return docId;
- }
- }
- docId = NO_MORE_DOCS;
- return docId;
- }
-
- @Override
- public BytesRef binaryValue() throws IOException {
- return randomAccess.binaryValue(docIdOffsets[docId] - 1);
- }
-
- @Override
- public float[] vectorValue() throws IOException {
- return randomAccess.vectorValue(docIdOffsets[docId] - 1);
- }
-
- @Override
- public int dimension() {
- return randomAccess.dimension();
- }
-
- @Override
- public int size() {
- return randomAccess.size();
- }
-
- @Override
- public int advance(int target) throws IOException {
- throw new UnsupportedOperationException();
- }
- }
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
index d9c772f093b..f1b9c8151f7 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
@@ -24,7 +24,6 @@ import java.util.Locale;
import java.util.Objects;
import java.util.SplittableRandom;
import java.util.concurrent.TimeUnit;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.BytesRef;
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index ce0022d5196..db71d19f458 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -21,7 +21,6 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.apache.lucene.util.VectorUtil.toBytesRef;
import java.io.IOException;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.BitSet;
diff --git a/lucene/core/src/java/org/apache/lucene/index/RandomAccessVectorValues.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/RandomAccessVectorValues.java
similarity index 93%
rename from lucene/core/src/java/org/apache/lucene/index/RandomAccessVectorValues.java
rename to lucene/core/src/java/org/apache/lucene/util/hnsw/RandomAccessVectorValues.java
index a18535ee567..4c220518a2e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/RandomAccessVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/RandomAccessVectorValues.java
@@ -15,13 +15,14 @@
* limitations under the License.
*/
-package org.apache.lucene.index;
+package org.apache.lucene.util.hnsw;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
/**
- * Provides random access to vectors by dense ordinal.
+ * Provides random access to vectors by dense ordinal. This interface is used by HNSW-based
+ * implementations of KNN search.
*
* @lucene.experimental
*/
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
index 2ae264fe8ce..299d27857da 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
@@ -17,7 +17,6 @@
package org.apache.lucene.util.hnsw;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
index 03f13195aa2..3e8f019b7f4 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
@@ -45,7 +45,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;