Add new dynamic confidence interval configuration to scalar quantized format (#13445)

When int4 scalar quantization was merged, it added a new way to dynamically calculate quantiles.

However, when that was merged, I inadvertently changed the default behavior, where a null confidenceInterval would actually calculate the dynamic quantiles instead of doing the previous auto-setting to 1 - 1/(dim + 1).

This commit formalizes the dynamic quantile calculate through setting the confidenceInterval to 0, and preserves the previous behavior for null confidenceIntervals so that users upgrading will not see different quantiles than they would expect.
This commit is contained in:
Benjamin Trent 2024-06-01 13:25:38 -04:00 committed by GitHub
parent f3c2b91630
commit a540027bde
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 170 additions and 27 deletions

View File

@ -302,6 +302,10 @@ New Features
* GITHUB#13414: Counts are always available in the result when using taxonomy facets. (Stefan Vodita)
* GITHUB#13445: Add new option when calculating scalar quantiles. The new option of setting `confidenceInterval` to
`0` will now dynamically determine the quantiles through a grid search over multiple quantiles calculated
by multiple intervals. (Ben Trent)
Improvements
---------------------

View File

@ -93,7 +93,9 @@ public class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFormat {
* lte 4 bits will be compressed into a single byte. If false, the vectors will be stored as
* is. This provides a trade-off of memory usage and speed.
* @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
* it is calculated based on the vector field dimensions.
* it is calculated based on the vector field dimensions. When `0`, the quantiles are
* dynamically determined by sampling many confidence intervals and determining the most
* accurate pair.
* @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
* generated by this format to do the merge
*/

View File

@ -57,6 +57,9 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
/** The maximum confidence interval */
private static final float MAXIMUM_CONFIDENCE_INTERVAL = 1f;
/** Dynamic confidence interval */
public static final float DYNAMIC_CONFIDENCE_INTERVAL = 0f;
/**
* Controls the confidence interval used to scalar quantize the vectors the default value is
* calculated as `1-1/(vector_dimensions + 1)`
@ -76,7 +79,8 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
* Constructs a format using the given graph construction parameters.
*
* @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
* it is calculated dynamically.
* it is calculated based on the vector dimension. When `0`, the quantiles are dynamically
* determined by sampling many confidence intervals and determining the most accurate pair.
* @param bits the number of bits to use for scalar quantization (must be between 1 and 8,
* inclusive)
* @param compress whether to compress the vectors, if true, the vectors that are quantized with
@ -86,6 +90,7 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
public Lucene99ScalarQuantizedVectorsFormat(
Float confidenceInterval, int bits, boolean compress) {
if (confidenceInterval != null
&& confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL
&& (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL
|| confidenceInterval > MAXIMUM_CONFIDENCE_INTERVAL)) {
throw new IllegalArgumentException(
@ -93,6 +98,7 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
+ MINIMUM_CONFIDENCE_INTERVAL
+ " and "
+ MAXIMUM_CONFIDENCE_INTERVAL
+ " or 0"
+ "; confidenceInterval="
+ confidenceInterval);
}

View File

@ -346,10 +346,17 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
if (size > 0) {
if (versionMeta < Lucene99ScalarQuantizedVectorsFormat.VERSION_ADD_BITS) {
int floatBits = input.readInt(); // confidenceInterval, unused
if (floatBits == -1) {
if (floatBits == -1) { // indicates a null confidence interval
throw new CorruptIndexException(
"Missing confidence interval for scalar quantizer", input);
}
float confidenceInterval = Float.intBitsToFloat(floatBits);
// indicates a dynamic interval, which shouldn't be provided in this version
if (confidenceInterval
== Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL) {
throw new CorruptIndexException(
"Invalid confidence interval for scalar quantizer: " + confidenceInterval, input);
}
bits = (byte) 7;
compress = false;
float minQuantile = Float.intBitsToFloat(input.readInt());

View File

@ -18,6 +18,7 @@
package org.apache.lucene.codecs.lucene99;
import static org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL;
import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.QUANTIZED_VECTOR_COMPONENT;
import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.calculateDefaultConfidenceInterval;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
@ -117,6 +118,9 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
false,
rawVectorDelegate,
scorer);
if (confidenceInterval != null && confidenceInterval == 0) {
throw new IllegalArgumentException("confidenceInterval cannot be set to zero");
}
}
public Lucene99ScalarQuantizedVectorsWriter(
@ -347,6 +351,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
meta.writeByte(bits);
meta.writeByte(compress ? (byte) 1 : (byte) 0);
} else {
assert confidenceInterval == null || confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL;
meta.writeInt(
Float.floatToIntBits(
confidenceInterval == null
@ -666,22 +671,36 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
doc = vectorValues.nextDoc()) {
numVectors++;
}
mergedQuantiles =
confidenceInterval == null
? ScalarQuantizer.fromVectorsAutoInterval(
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState),
fieldInfo.getVectorSimilarityFunction(),
numVectors,
bits)
: ScalarQuantizer.fromVectors(
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState),
confidenceInterval,
numVectors,
bits);
return buildScalarQuantizer(
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState),
numVectors,
fieldInfo.getVectorSimilarityFunction(),
confidenceInterval,
bits);
}
return mergedQuantiles;
}
static ScalarQuantizer buildScalarQuantizer(
FloatVectorValues floatVectorValues,
int numVectors,
VectorSimilarityFunction vectorSimilarityFunction,
Float confidenceInterval,
byte bits)
throws IOException {
if (confidenceInterval != null && confidenceInterval == DYNAMIC_CONFIDENCE_INTERVAL) {
return ScalarQuantizer.fromVectorsAutoInterval(
floatVectorValues, vectorSimilarityFunction, numVectors, bits);
}
return ScalarQuantizer.fromVectors(
floatVectorValues,
confidenceInterval == null
? calculateDefaultConfidenceInterval(floatVectorValues.dimension())
: confidenceInterval,
numVectors,
bits);
}
/**
* Returns true if the quantiles of the new quantization state are too far from the quantiles of
* the existing quantization state. This would imply that floating point values would slightly
@ -784,14 +803,12 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
}
FloatVectorValues floatVectorValues = new FloatVectorWrapper(floatVectors, normalize);
ScalarQuantizer quantizer =
confidenceInterval == null
? ScalarQuantizer.fromVectorsAutoInterval(
floatVectorValues,
fieldInfo.getVectorSimilarityFunction(),
floatVectors.size(),
bits)
: ScalarQuantizer.fromVectors(
floatVectorValues, confidenceInterval, floatVectors.size(), bits);
buildScalarQuantizer(
floatVectorValues,
floatVectors.size(),
fieldInfo.getVectorSimilarityFunction(),
confidenceInterval,
bits);
minQuantile = quantizer.getLowerQuantile();
maxQuantile = quantizer.getUpperQuantile();
if (infoStream.isEnabled(QUANTIZED_VECTOR_COMPONENT)) {

View File

@ -61,7 +61,10 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
@Override
public void setUp() throws Exception {
bits = random().nextBoolean() ? 4 : 7;
confidenceInterval = random().nextBoolean() ? 0.99f : null;
confidenceInterval = random().nextBoolean() ? random().nextFloat(0.90f, 1.0f) : null;
if (random().nextBoolean()) {
confidenceInterval = 0f;
}
format =
new Lucene99HnswScalarQuantizedVectorsFormat(
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
@ -133,7 +136,7 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
vectors.add(randomVector(dim));
}
ScalarQuantizer scalarQuantizer =
confidenceInterval == null
confidenceInterval != null && confidenceInterval == 0f
? ScalarQuantizer.fromVectorsAutoInterval(
new Lucene99ScalarQuantizedVectorsWriter.FloatVectorWrapper(vectors, normalize),
similarityFunction,
@ -141,7 +144,9 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
(byte) bits)
: ScalarQuantizer.fromVectors(
new Lucene99ScalarQuantizedVectorsWriter.FloatVectorWrapper(vectors, normalize),
confidenceInterval,
confidenceInterval == null
? Lucene99ScalarQuantizedVectorsFormat.calculateDefaultConfidenceInterval(dim)
: confidenceInterval,
numVectors,
(byte) bits);
float[] expectedCorrections = new float[numVectors];

View File

@ -60,7 +60,7 @@ public class TestLucene99ScalarQuantizedVectorScorer extends LuceneTestCase {
1,
bits,
compress,
null,
0f,
null);
}
};

View File

@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene99;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.quantization.ScalarQuantizer;
public class TestLucene99ScalarQuantizedVectorsWriter extends LuceneTestCase {
public void testBuildScalarQuantizerCosine() throws IOException {
assertScalarQuantizer(
new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 7, VectorSimilarityFunction.COSINE);
assertScalarQuantizer(
new float[] {0.28759837f, 0.62449116f}, 0f, (byte) 7, VectorSimilarityFunction.COSINE);
assertScalarQuantizer(
new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 4, VectorSimilarityFunction.COSINE);
assertScalarQuantizer(
new float[] {0.37247902f, 0.58848244f}, 0f, (byte) 4, VectorSimilarityFunction.COSINE);
}
public void testBuildScalarQuantizerDotProduct() throws IOException {
assertScalarQuantizer(
new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 7, VectorSimilarityFunction.DOT_PRODUCT);
assertScalarQuantizer(
new float[] {0.28759837f, 0.62449116f}, 0f, (byte) 7, VectorSimilarityFunction.DOT_PRODUCT);
assertScalarQuantizer(
new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 4, VectorSimilarityFunction.DOT_PRODUCT);
assertScalarQuantizer(
new float[] {0.37247902f, 0.58848244f}, 0f, (byte) 4, VectorSimilarityFunction.DOT_PRODUCT);
}
public void testBuildScalarQuantizerMIP() throws IOException {
assertScalarQuantizer(
new float[] {2.0f, 20.0f}, 0.9f, (byte) 7, VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
assertScalarQuantizer(
new float[] {2.4375f, 19.0625f},
0f,
(byte) 7,
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
assertScalarQuantizer(
new float[] {2.0f, 20.0f}, 0.9f, (byte) 4, VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
assertScalarQuantizer(
new float[] {2.6875f, 19.0625f},
0f,
(byte) 4,
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
}
public void testBuildScalarQuantizerEuclidean() throws IOException {
assertScalarQuantizer(
new float[] {2.0f, 20.0f}, 0.9f, (byte) 7, VectorSimilarityFunction.EUCLIDEAN);
assertScalarQuantizer(
new float[] {2.125f, 19.375f}, 0f, (byte) 7, VectorSimilarityFunction.EUCLIDEAN);
assertScalarQuantizer(
new float[] {2.0f, 20.0f}, 0.9f, (byte) 4, VectorSimilarityFunction.EUCLIDEAN);
assertScalarQuantizer(
new float[] {2.1875f, 19.0625f}, 0f, (byte) 4, VectorSimilarityFunction.EUCLIDEAN);
}
private void assertScalarQuantizer(
float[] expectedQuantiles,
Float confidenceInterval,
byte bits,
VectorSimilarityFunction vectorSimilarityFunction)
throws IOException {
List<float[]> vectors = new ArrayList<>(30);
for (int i = 0; i < 30; i++) {
float[] vector = new float[] {i, i + 1, i + 2, i + 3};
vectors.add(vector);
}
FloatVectorValues vectorValues =
new Lucene99ScalarQuantizedVectorsWriter.FloatVectorWrapper(
vectors,
vectorSimilarityFunction == VectorSimilarityFunction.COSINE
|| vectorSimilarityFunction == VectorSimilarityFunction.DOT_PRODUCT);
ScalarQuantizer scalarQuantizer =
Lucene99ScalarQuantizedVectorsWriter.buildScalarQuantizer(
vectorValues, 30, vectorSimilarityFunction, confidenceInterval, bits);
assertEquals(expectedQuantiles[0], scalarQuantizer.getLowerQuantile(), 0.0001f);
assertEquals(expectedQuantiles[1], scalarQuantizer.getUpperQuantile(), 0.0001f);
}
}