mirror of https://github.com/apache/lucene.git
Moving quantization logic to make future quantizer work simpler (#13091)
This commit is contained in:
parent
f339e24e8e
commit
860a097ab3
|
@ -63,6 +63,8 @@ module org.apache.lucene.core {
|
|||
opens org.apache.lucene.document to
|
||||
org.apache.lucene.test_framework;
|
||||
|
||||
exports org.apache.lucene.util.quantization;
|
||||
|
||||
provides org.apache.lucene.analysis.TokenizerFactory with
|
||||
org.apache.lucene.analysis.standard.StandardTokenizerFactory;
|
||||
provides org.apache.lucene.codecs.Codec with
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
|
||||
package org.apache.lucene.codecs.lucene99;
|
||||
|
||||
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction;
|
||||
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -35,7 +38,6 @@ import org.apache.lucene.index.SegmentReadState;
|
|||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -55,7 +57,7 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader {
|
|||
private final Map<String, FieldEntry> fields = new HashMap<>();
|
||||
private final IndexInput vectorData;
|
||||
|
||||
Lucene99FlatVectorsReader(SegmentReadState state) throws IOException {
|
||||
public Lucene99FlatVectorsReader(SegmentReadState state) throws IOException {
|
||||
int versionMeta = readMetadata(state);
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -181,24 +183,6 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader {
|
|||
}
|
||||
}
|
||||
|
||||
private VectorSimilarityFunction readSimilarityFunction(DataInput input) throws IOException {
|
||||
int similarityFunctionId = input.readInt();
|
||||
if (similarityFunctionId < 0
|
||||
|| similarityFunctionId >= VectorSimilarityFunction.values().length) {
|
||||
throw new CorruptIndexException(
|
||||
"Invalid similarity function id: " + similarityFunctionId, input);
|
||||
}
|
||||
return VectorSimilarityFunction.values()[similarityFunctionId];
|
||||
}
|
||||
|
||||
private VectorEncoding readVectorEncoding(DataInput input) throws IOException {
|
||||
int encodingId = input.readInt();
|
||||
if (encodingId < 0 || encodingId >= VectorEncoding.values().length) {
|
||||
throw new CorruptIndexException("Invalid vector encoding id: " + encodingId, input);
|
||||
}
|
||||
return VectorEncoding.values()[encodingId];
|
||||
}
|
||||
|
||||
private FieldEntry readField(IndexInput input) throws IOException {
|
||||
VectorEncoding vectorEncoding = readVectorEncoding(input);
|
||||
VectorSimilarityFunction similarityFunction = readSimilarityFunction(input);
|
||||
|
|
|
@ -69,7 +69,7 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter {
|
|||
private final List<FieldWriter<?>> fields = new ArrayList<>();
|
||||
private boolean finished;
|
||||
|
||||
Lucene99FlatVectorsWriter(SegmentWriteState state) throws IOException {
|
||||
public Lucene99FlatVectorsWriter(SegmentWriteState state) throws IOException {
|
||||
segmentWriteState = state;
|
||||
String metaFileName =
|
||||
IndexFileNames.segmentFileName(
|
||||
|
|
|
@ -45,12 +45,14 @@ import org.apache.lucene.util.Accountable;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ScalarQuantizer;
|
||||
import org.apache.lucene.util.hnsw.HnswGraph;
|
||||
import org.apache.lucene.util.hnsw.HnswGraphSearcher;
|
||||
import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorer;
|
||||
import org.apache.lucene.util.packed.DirectMonotonicReader;
|
||||
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
|
||||
import org.apache.lucene.util.quantization.QuantizedVectorsReader;
|
||||
import org.apache.lucene.util.quantization.ScalarQuantizer;
|
||||
|
||||
/**
|
||||
* Reads vectors from the index segments along with index data structures supporting KNN search.
|
||||
|
@ -68,7 +70,7 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
|
|||
private final IndexInput vectorIndex;
|
||||
private final FlatVectorsReader flatVectorsReader;
|
||||
|
||||
Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatVectorsReader)
|
||||
public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatVectorsReader)
|
||||
throws IOException {
|
||||
this.flatVectorsReader = flatVectorsReader;
|
||||
boolean success = false;
|
||||
|
@ -169,7 +171,8 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
|
|||
}
|
||||
}
|
||||
|
||||
private VectorSimilarityFunction readSimilarityFunction(DataInput input) throws IOException {
|
||||
public static VectorSimilarityFunction readSimilarityFunction(DataInput input)
|
||||
throws IOException {
|
||||
int similarityFunctionId = input.readInt();
|
||||
if (similarityFunctionId < 0
|
||||
|| similarityFunctionId >= VectorSimilarityFunction.values().length) {
|
||||
|
@ -179,7 +182,7 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
|
|||
return VectorSimilarityFunction.values()[similarityFunctionId];
|
||||
}
|
||||
|
||||
private VectorEncoding readVectorEncoding(DataInput input) throws IOException {
|
||||
public static VectorEncoding readVectorEncoding(DataInput input) throws IOException {
|
||||
int encodingId = input.readInt();
|
||||
if (encodingId < 0 || encodingId >= VectorEncoding.values().length) {
|
||||
throw new CorruptIndexException("Invalid vector encoding id: " + encodingId, input);
|
||||
|
|
|
@ -72,7 +72,7 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
|
|||
private final List<FieldWriter<?>> fields = new ArrayList<>();
|
||||
private boolean finished;
|
||||
|
||||
Lucene99HnswVectorsWriter(
|
||||
public Lucene99HnswVectorsWriter(
|
||||
SegmentWriteState state,
|
||||
int M,
|
||||
int beamWidth,
|
||||
|
|
|
@ -81,7 +81,7 @@ public final class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsForma
|
|||
this.confidenceInterval = confidenceInterval;
|
||||
}
|
||||
|
||||
static float calculateDefaultConfidenceInterval(int vectorDimension) {
|
||||
public static float calculateDefaultConfidenceInterval(int vectorDimension) {
|
||||
return Math.max(MINIMUM_CONFIDENCE_INTERVAL, 1f - (1f / (vectorDimension + 1)));
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
|
||||
package org.apache.lucene.codecs.lucene99;
|
||||
|
||||
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction;
|
||||
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -33,13 +36,15 @@ import org.apache.lucene.index.SegmentReadState;
|
|||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ScalarQuantizer;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorer;
|
||||
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
|
||||
import org.apache.lucene.util.quantization.QuantizedVectorsReader;
|
||||
import org.apache.lucene.util.quantization.ScalarQuantizedRandomVectorScorer;
|
||||
import org.apache.lucene.util.quantization.ScalarQuantizer;
|
||||
|
||||
/**
|
||||
* Reads Scalar Quantized vectors from the index segments along with index data structures.
|
||||
|
@ -56,8 +61,8 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
|
|||
private final IndexInput quantizedVectorData;
|
||||
private final FlatVectorsReader rawVectorsReader;
|
||||
|
||||
Lucene99ScalarQuantizedVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader)
|
||||
throws IOException {
|
||||
public Lucene99ScalarQuantizedVectorsReader(
|
||||
SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException {
|
||||
this.rawVectorsReader = rawVectorsReader;
|
||||
int versionMeta = -1;
|
||||
String metaFileName =
|
||||
|
@ -237,24 +242,6 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
|
|||
return new FieldEntry(input, vectorEncoding, similarityFunction);
|
||||
}
|
||||
|
||||
private VectorSimilarityFunction readSimilarityFunction(DataInput input) throws IOException {
|
||||
int similarityFunctionId = input.readInt();
|
||||
if (similarityFunctionId < 0
|
||||
|| similarityFunctionId >= VectorSimilarityFunction.values().length) {
|
||||
throw new CorruptIndexException(
|
||||
"Invalid similarity function id: " + similarityFunctionId, input);
|
||||
}
|
||||
return VectorSimilarityFunction.values()[similarityFunctionId];
|
||||
}
|
||||
|
||||
private VectorEncoding readVectorEncoding(DataInput input) throws IOException {
|
||||
int encodingId = input.readInt();
|
||||
if (encodingId < 0 || encodingId >= VectorEncoding.values().length) {
|
||||
throw new CorruptIndexException("Invalid vector encoding id: " + encodingId, input);
|
||||
}
|
||||
return VectorEncoding.values()[encodingId];
|
||||
}
|
||||
|
||||
@Override
|
||||
public QuantizedByteVectorValues getQuantizedVectorValues(String fieldName) throws IOException {
|
||||
FieldEntry fieldEntry = fields.get(fieldName);
|
||||
|
|
|
@ -53,11 +53,14 @@ import org.apache.lucene.store.IndexOutput;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ScalarQuantizer;
|
||||
import org.apache.lucene.util.VectorUtil;
|
||||
import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorer;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
|
||||
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
|
||||
import org.apache.lucene.util.quantization.QuantizedVectorsReader;
|
||||
import org.apache.lucene.util.quantization.ScalarQuantizedRandomVectorScorerSupplier;
|
||||
import org.apache.lucene.util.quantization.ScalarQuantizer;
|
||||
|
||||
/**
|
||||
* Writes quantized vector values and metadata to index segments.
|
||||
|
@ -95,7 +98,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
|
|||
private final FlatVectorsWriter rawVectorDelegate;
|
||||
private boolean finished;
|
||||
|
||||
Lucene99ScalarQuantizedVectorsWriter(
|
||||
public Lucene99ScalarQuantizedVectorsWriter(
|
||||
SegmentWriteState state, Float confidenceInterval, FlatVectorsWriter rawVectorDelegate)
|
||||
throws IOException {
|
||||
this.confidenceInterval = confidenceInterval;
|
||||
|
@ -522,7 +525,16 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
|
|||
return null;
|
||||
}
|
||||
|
||||
static ScalarQuantizer mergeAndRecalculateQuantiles(
|
||||
/**
|
||||
* Merges the quantiles of the segments and recalculates the quantiles if necessary.
|
||||
*
|
||||
* @param mergeState The merge state
|
||||
* @param fieldInfo The field info
|
||||
* @param confidenceInterval The confidence interval
|
||||
* @return The merged quantiles
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public static ScalarQuantizer mergeAndRecalculateQuantiles(
|
||||
MergeState mergeState, FieldInfo fieldInfo, float confidenceInterval) throws IOException {
|
||||
List<ScalarQuantizer> quantizationStates = new ArrayList<>(mergeState.liveDocs.length);
|
||||
List<Integer> segmentSizes = new ArrayList<>(mergeState.liveDocs.length);
|
||||
|
@ -587,7 +599,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
|
|||
/**
|
||||
* Writes the vector values to the output and returns a set of documents that contains vectors.
|
||||
*/
|
||||
private static DocsWithFieldSet writeQuantizedVectorData(
|
||||
public static DocsWithFieldSet writeQuantizedVectorData(
|
||||
IndexOutput output, QuantizedByteVectorValues quantizedByteVectorValues) throws IOException {
|
||||
DocsWithFieldSet docsWithField = new DocsWithFieldSet();
|
||||
for (int docV = quantizedByteVectorValues.nextDoc();
|
||||
|
@ -867,7 +879,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
|
|||
}
|
||||
|
||||
@Override
|
||||
float getScoreCorrectionConstant() throws IOException {
|
||||
public float getScoreCorrectionConstant() throws IOException {
|
||||
return current.values.getScoreCorrectionConstant();
|
||||
}
|
||||
}
|
||||
|
@ -897,7 +909,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
|
|||
}
|
||||
|
||||
@Override
|
||||
float getScoreCorrectionConstant() {
|
||||
public float getScoreCorrectionConstant() {
|
||||
return offsetValue;
|
||||
}
|
||||
|
||||
|
@ -1006,7 +1018,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
|
|||
}
|
||||
|
||||
@Override
|
||||
float getScoreCorrectionConstant() throws IOException {
|
||||
public float getScoreCorrectionConstant() throws IOException {
|
||||
return scalarQuantizer.recalculateCorrectiveOffset(
|
||||
in.vectorValue(), oldScalarQuantizer, vectorSimilarityFunction);
|
||||
}
|
||||
|
|
|
@ -24,12 +24,14 @@ import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.packed.DirectMonotonicReader;
|
||||
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
|
||||
import org.apache.lucene.util.quantization.RandomAccessQuantizedByteVectorValues;
|
||||
|
||||
/**
|
||||
* Read the quantized vector values and their score correction values from the index input. This
|
||||
* supports both iterated and random access.
|
||||
*/
|
||||
abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVectorValues
|
||||
public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVectorValues
|
||||
implements RandomAccessQuantizedByteVectorValues {
|
||||
|
||||
protected final int dimension;
|
||||
|
@ -77,7 +79,7 @@ abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVectorValue
|
|||
return scoreCorrectionConstant[0];
|
||||
}
|
||||
|
||||
static OffHeapQuantizedByteVectorValues load(
|
||||
public static OffHeapQuantizedByteVectorValues load(
|
||||
OrdToDocDISIReaderConfiguration configuration,
|
||||
int dimension,
|
||||
int size,
|
||||
|
@ -98,7 +100,11 @@ abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVectorValue
|
|||
}
|
||||
}
|
||||
|
||||
static class DenseOffHeapVectorValues extends OffHeapQuantizedByteVectorValues {
|
||||
/**
|
||||
* Dense vector values that are stored off-heap. This is the most common case when every doc has a
|
||||
* vector.
|
||||
*/
|
||||
public static class DenseOffHeapVectorValues extends OffHeapQuantizedByteVectorValues {
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
|
@ -231,7 +237,7 @@ abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVectorValue
|
|||
}
|
||||
|
||||
@Override
|
||||
public byte[] vectorValue() throws IOException {
|
||||
public byte[] vectorValue() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
@ -246,17 +252,17 @@ abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVectorValue
|
|||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
public int advance(int target) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EmptyOffHeapVectorValues copy() throws IOException {
|
||||
public EmptyOffHeapVectorValues copy() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] vectorValue(int targetOrd) throws IOException {
|
||||
public byte[] vectorValue(int targetOrd) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene99;
|
||||
package org.apache.lucene.util.quantization;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.ByteVectorValues;
|
||||
|
@ -22,7 +22,9 @@ import org.apache.lucene.index.ByteVectorValues;
|
|||
/**
|
||||
* A version of {@link ByteVectorValues}, but additionally retrieving score correction offset for
|
||||
* Scalar quantization scores.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
abstract class QuantizedByteVectorValues extends ByteVectorValues {
|
||||
abstract float getScoreCorrectionConstant() throws IOException;
|
||||
public abstract class QuantizedByteVectorValues extends ByteVectorValues {
|
||||
public abstract float getScoreCorrectionConstant() throws IOException;
|
||||
}
|
|
@ -14,15 +14,18 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene99;
|
||||
package org.apache.lucene.util.quantization;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.ScalarQuantizer;
|
||||
|
||||
/** Quantized vector reader */
|
||||
interface QuantizedVectorsReader extends Closeable, Accountable {
|
||||
/**
|
||||
* Quantized vector reader
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface QuantizedVectorsReader extends Closeable, Accountable {
|
||||
|
||||
QuantizedByteVectorValues getQuantizedVectorValues(String fieldName) throws IOException;
|
||||
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene99;
|
||||
package org.apache.lucene.util.quantization;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
|
@ -22,8 +22,10 @@ import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
|||
/**
|
||||
* Random access values for <code>byte[]</code>, but also includes accessing the score correction
|
||||
* constant for the current vector in the buffer.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
interface RandomAccessQuantizedByteVectorValues extends RandomAccessVectorValues<byte[]> {
|
||||
public interface RandomAccessQuantizedByteVectorValues extends RandomAccessVectorValues<byte[]> {
|
||||
float getScoreCorrectionConstant();
|
||||
|
||||
@Override
|
|
@ -14,21 +14,23 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene99;
|
||||
package org.apache.lucene.util.quantization;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.ScalarQuantizedVectorSimilarity;
|
||||
import org.apache.lucene.util.ScalarQuantizer;
|
||||
import org.apache.lucene.util.VectorUtil;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorer;
|
||||
|
||||
/** Quantized vector scorer */
|
||||
final class ScalarQuantizedRandomVectorScorer
|
||||
/**
|
||||
* Quantized vector scorer
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ScalarQuantizedRandomVectorScorer
|
||||
extends RandomVectorScorer.AbstractRandomVectorScorer<byte[]> {
|
||||
|
||||
private static float quantizeQuery(
|
||||
public static float quantizeQuery(
|
||||
float[] query,
|
||||
byte[] quantizedQuery,
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
|
@ -50,7 +52,7 @@ final class ScalarQuantizedRandomVectorScorer
|
|||
private final RandomAccessQuantizedByteVectorValues values;
|
||||
private final ScalarQuantizedVectorSimilarity similarity;
|
||||
|
||||
ScalarQuantizedRandomVectorScorer(
|
||||
public ScalarQuantizedRandomVectorScorer(
|
||||
ScalarQuantizedVectorSimilarity similarityFunction,
|
||||
RandomAccessQuantizedByteVectorValues values,
|
||||
byte[] query,
|
||||
|
@ -62,7 +64,7 @@ final class ScalarQuantizedRandomVectorScorer
|
|||
this.values = values;
|
||||
}
|
||||
|
||||
ScalarQuantizedRandomVectorScorer(
|
||||
public ScalarQuantizedRandomVectorScorer(
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
ScalarQuantizer scalarQuantizer,
|
||||
RandomAccessQuantizedByteVectorValues values,
|
|
@ -14,22 +14,24 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene99;
|
||||
package org.apache.lucene.util.quantization;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.util.ScalarQuantizedVectorSimilarity;
|
||||
import org.apache.lucene.util.ScalarQuantizer;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorer;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
|
||||
|
||||
/** Quantized vector scorer supplier */
|
||||
final class ScalarQuantizedRandomVectorScorerSupplier implements RandomVectorScorerSupplier {
|
||||
/**
|
||||
* Quantized vector scorer supplier
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ScalarQuantizedRandomVectorScorerSupplier implements RandomVectorScorerSupplier {
|
||||
|
||||
private final RandomAccessQuantizedByteVectorValues values;
|
||||
private final ScalarQuantizedVectorSimilarity similarity;
|
||||
|
||||
ScalarQuantizedRandomVectorScorerSupplier(
|
||||
public ScalarQuantizedRandomVectorScorerSupplier(
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
ScalarQuantizer scalarQuantizer,
|
||||
RandomAccessQuantizedByteVectorValues values) {
|
|
@ -14,11 +14,12 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
package org.apache.lucene.util.quantization;
|
||||
|
||||
import static org.apache.lucene.util.VectorUtil.scaleMaxInnerProductScore;
|
||||
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.util.VectorUtil;
|
||||
|
||||
/**
|
||||
* Calculates and adjust the scores correctly for quantized vectors given the scalar quantization
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
package org.apache.lucene.util.quantization;
|
||||
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
|
@ -24,6 +24,8 @@ import java.util.Random;
|
|||
import java.util.stream.IntStream;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.util.IntroSelector;
|
||||
import org.apache.lucene.util.Selector;
|
||||
|
||||
/**
|
||||
* Will scalar quantize float vectors into `int8` byte values. This is a lossy transformation.
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Provides quantization methods for scaling vector values to smaller data types and possibly fewer
|
||||
* dimensions
|
||||
*/
|
||||
package org.apache.lucene.util.quantization;
|
|
@ -38,8 +38,9 @@ import org.apache.lucene.index.VectorSimilarityFunction;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
|
||||
import org.apache.lucene.util.SameThreadExecutorService;
|
||||
import org.apache.lucene.util.ScalarQuantizer;
|
||||
import org.apache.lucene.util.VectorUtil;
|
||||
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
|
||||
import org.apache.lucene.util.quantization.ScalarQuantizer;
|
||||
|
||||
public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormatTestCase {
|
||||
|
||||
|
|
|
@ -14,17 +14,19 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
package org.apache.lucene.util.quantization;
|
||||
|
||||
import static org.apache.lucene.util.TestScalarQuantizer.fromFloats;
|
||||
import static org.apache.lucene.util.TestScalarQuantizer.randomFloatArray;
|
||||
import static org.apache.lucene.util.TestScalarQuantizer.randomFloats;
|
||||
import static org.apache.lucene.util.quantization.TestScalarQuantizer.fromFloats;
|
||||
import static org.apache.lucene.util.quantization.TestScalarQuantizer.randomFloatArray;
|
||||
import static org.apache.lucene.util.quantization.TestScalarQuantizer.randomFloats;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.VectorUtil;
|
||||
|
||||
public class TestScalarQuantizedVectorSimilarity extends LuceneTestCase {
|
||||
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
package org.apache.lucene.util.quantization;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
Loading…
Reference in New Issue