diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d41f70801ca..c193524d3dd 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -214,6 +214,19 @@ Bug Fixes * GITHUB#12878: Fix the declared Exceptions of Expression#evaluate() to match those of DoubleValues#doubleValue(). (Uwe Schindler) +* GITHUB#13519): 8 bit scalar vector quantization is no longer + supported: it was buggy starting in 9.11 (GITHUB#13197). 4 and 7 + bit quantization are still supported. Existing (9.x) Lucene indices + that previously used 8 bit quantization can still be read/searched + but the results from `KNN*VectorQuery` are silently buggy. Further + 8 bit quantized vector indexing into such (9.11) indices is not + permitted, so your path forward if you wish to continue using the + same 9.11 index is to index additional vectors into the same field + with either 4 or 7 bit quantization (or no quantization), and ensure + all older (9.11 written) segments are rewritten either via + `IndexWriter.forceMerge` or + `IndexWriter.addIndexes(CodecReader...)`, or reindexing entirely. + Changes in Runtime Behavior --------------------- diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md index fd3ea7e5be5..3052c7319ac 100644 --- a/lucene/MIGRATE.md +++ b/lucene/MIGRATE.md @@ -873,4 +873,18 @@ optimize the type of collectors it creates and exposes via `newCollector`. The protected `IndexSearcher#search(List leaves, Weight weight, Collector collector)` method has been removed in favour of the newly introduced `search(LeafReaderContextPartition[] partitions, Weight weight, Collector collector)`. -`IndexSearcher` subclasses that override this method need to instead override the new method. \ No newline at end of file +`IndexSearcher` subclasses that override this method need to instead override the new method. + +### Indexing vectors with 8 bit scalar quantization is no longer supported but 7 and 4 bit quantization still work (GITHUB#13519) + +8 bit scalar vector quantization is no longer supported: it was buggy +starting in 9.11 (GITHUB#13197). 4 and 7 bit quantization are still +supported. Existing (9.11) Lucene indices that previously used 8 bit +quantization can still be read/searched but the results from +`KNN*VectorQuery` are silently buggy. Further 8 bit quantized vector +indexing into such (9.11) indices is not permitted, so your path +forward if you wish to continue using the same 9.11 index is to index +additional vectors into the same field with either 4 or 7 bit +quantization (or no quantization), and ensure all older (9.x written) +segments are rewritten either via `IndexWriter.forceMerge` or +`IndexWriter.addIndexes(CodecReader...)`, or reindexing entirely. diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java index 5083b05c82d..1966ed21d65 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java @@ -65,19 +65,19 @@ public class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFormat { private final int numMergeWorkers; private final TaskExecutor mergeExec; - /** Constructs a format using default graph construction parameters */ + /** Constructs a format using default graph construction parameters with 7 bit quantization */ public Lucene99HnswScalarQuantizedVectorsFormat() { - this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, 7, true, null, null); + this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null); } /** - * Constructs a format using the given graph construction parameters. + * Constructs a format using the given graph construction parameters with 7 bit quantization * * @param maxConn the maximum number of connections to a node in the HNSW graph * @param beamWidth the size of the queue maintained during graph construction. */ public Lucene99HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) { - this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, true, null, null); + this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null); } /** @@ -87,11 +87,11 @@ public class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFormat { * @param beamWidth the size of the queue maintained during graph construction. * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If * larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec - * @param bits the number of bits to use for scalar quantization (must be between 1 and 8, - * inclusive) - * @param compress whether to compress the vectors, if true, the vectors that are quantized with - * lte 4 bits will be compressed into a single byte. If false, the vectors will be stored as - * is. This provides a trade-off of memory usage and speed. + * @param bits the number of bits to use for scalar quantization (must be 4 or 7) + * @param compress whether to compress the quantized vectors by another 50% when bits=4. If + * `true`, pairs of (4 bit quantized) dimensions are packed into a single byte. This must be + * `false` when bits=7. This provides a trade-off of 50% reduction in hot vector memory usage + * during searching, at some decode speed penalty. * @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null` * it is calculated based on the vector field dimensions. When `0`, the quantiles are * dynamically determined by sampling many confidence intervals and determining the most diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java index 552260894a8..0b3c6d19af8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java @@ -34,8 +34,10 @@ import org.apache.lucene.index.SegmentWriteState; public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat { // The bits that are allowed for scalar quantization - // We only allow unsigned byte (8), signed byte (7), and half-byte (4) - private static final int ALLOWED_BITS = (1 << 8) | (1 << 7) | (1 << 4); + // We only allow signed byte (7), and half-byte (4) + // NOTE: we used to allow 8 bits as well, but it was broken so we removed it + // (https://github.com/apache/lucene/issues/13519) + private static final int ALLOWED_BITS = (1 << 7) | (1 << 4); public static final String QUANTIZED_VECTOR_COMPONENT = "QVEC"; public static final String NAME = "Lucene99ScalarQuantizedVectorsFormat"; @@ -72,7 +74,7 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat { /** Constructs a format using default graph construction parameters */ public Lucene99ScalarQuantizedVectorsFormat() { - this(null, 7, true); + this(null, 7, false); } /** @@ -83,9 +85,10 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat { * determined by sampling many confidence intervals and determining the most accurate pair. * @param bits the number of bits to use for scalar quantization (must be between 1 and 8, * inclusive) - * @param compress whether to compress the vectors, if true, the vectors that are quantized with - * lte 4 bits will be compressed into a single byte. If false, the vectors will be stored as - * is. This provides a trade-off of memory usage and speed. + * @param compress whether to compress the quantized vectors by another 50% when bits=4. If + * `true`, pairs of (4 bit quantized) dimensions are packed into a single byte. This must be + * `false` when bits=7. This provides a trade-off of 50% reduction in hot vector memory usage + * during searching, at some decode speed penalty. */ public Lucene99ScalarQuantizedVectorsFormat( Float confidenceInterval, int bits, boolean compress) { @@ -104,7 +107,12 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat { + confidenceInterval); } if (bits < 1 || bits > 8 || (ALLOWED_BITS & (1 << bits)) == 0) { - throw new IllegalArgumentException("bits must be one of: 4, 7, 8; bits=" + bits); + throw new IllegalArgumentException("bits must be one of: 4, 7; bits=" + bits); + } + + if (bits > 4 && compress) { + // compress=true otherwise silently does nothing when bits=7? + throw new IllegalArgumentException("compress=true only applies when bits=4"); } this.bits = (byte) bits; this.confidenceInterval = confidenceInterval; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java index b8188a43bae..40002fe06a6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java @@ -135,10 +135,11 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade } final long quantizedVectorBytes; - if (fieldEntry.bits <= 4 && fieldEntry.compress) { + if (fieldEntry.compress) { + // two dimensions -> one byte quantizedVectorBytes = ((dimension + 1) >> 1) + Float.BYTES; } else { - // int8 quantized and calculated stored offset. + // one dimension -> one byte quantizedVectorBytes = dimension + Float.BYTES; } long numQuantizedVectorBytes = Math.multiplyExact(quantizedVectorBytes, fieldEntry.size); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java index 642b20b73dc..825de3ab725 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java @@ -42,6 +42,8 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.KnnFloatVectorQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopKnnCollector; @@ -67,15 +69,7 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat if (random().nextBoolean()) { confidenceInterval = 0f; } - format = - new Lucene99HnswScalarQuantizedVectorsFormat( - Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN, - Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH, - 1, - bits, - random().nextBoolean(), - confidenceInterval, - null); + format = getKnnFormat(bits); super.setUp(); } @@ -89,6 +83,132 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat }; } + private final KnnVectorsFormat getKnnFormat(int bits) { + return new Lucene99HnswScalarQuantizedVectorsFormat( + Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN, + Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH, + 1, + bits, + bits == 4 ? random().nextBoolean() : false, + confidenceInterval, + null); + } + + // verifies it's fine to change your mind on the number of bits quantization you want for the same + // field in the same index by changing up the Codec. This is allowed because at merge time we + // requantize the vectors. + public void testMixedQuantizedBits() throws Exception { + + try (Directory dir = newDirectory()) { + + // add first vector using 4 bit quantization, then close index: + try (IndexWriter w = + new IndexWriter( + dir, + newIndexWriterConfig() + .setCodec( + new Lucene912Codec() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return getKnnFormat(4); + } + }))) { + + Document doc = new Document(); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0.6f, 0.8f}, VectorSimilarityFunction.DOT_PRODUCT)); + w.addDocument(doc); + } + + // create another writer using 7 bit quantization and add 2nd vector + try (IndexWriter w = + new IndexWriter( + dir, + newIndexWriterConfig() + .setCodec( + new Lucene912Codec() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return getKnnFormat(7); + } + }))) { + + Document doc = new Document(); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0.8f, 0.6f}, VectorSimilarityFunction.DOT_PRODUCT)); + w.addDocument(doc); + w.forceMerge(1); + } + + // confirm searching works: we find both vectors + try (IndexReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = newSearcher(reader); + KnnFloatVectorQuery q = new KnnFloatVectorQuery("f", new float[] {0.7f, 0.7f}, 10); + TopDocs topDocs = searcher.search(q, 100); + assertEquals(2, topDocs.totalHits.value()); + } + } + } + + // verifies you can change your mind and enable quantization on a previously indexed vector field + // without quantization + public void testMixedQuantizedUnQuantized() throws Exception { + + try (Directory dir = newDirectory()) { + + // add first vector using no quantization + try (IndexWriter w = + new IndexWriter( + dir, + newIndexWriterConfig() + .setCodec( + new Lucene912Codec() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return new Lucene99HnswVectorsFormat(); + } + }))) { + + Document doc = new Document(); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0.6f, 0.8f}, VectorSimilarityFunction.DOT_PRODUCT)); + w.addDocument(doc); + } + + // create another writer using (7 bit) quantization and add 2nd vector + try (IndexWriter w = + new IndexWriter( + dir, + newIndexWriterConfig() + .setCodec( + new Lucene912Codec() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return getKnnFormat(7); + } + }))) { + + Document doc = new Document(); + doc.add( + new KnnFloatVectorField( + "f", new float[] {0.8f, 0.6f}, VectorSimilarityFunction.DOT_PRODUCT)); + w.addDocument(doc); + w.forceMerge(1); + } + + // confirm searching works: we find both vectors + try (IndexReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = newSearcher(reader); + KnnFloatVectorQuery q = new KnnFloatVectorQuery("f", new float[] {0.7f, 0.7f}, 10); + TopDocs topDocs = searcher.search(q, 100); + assertEquals(2, topDocs.totalHits.value()); + } + } + } + public void testQuantizationScoringEdgeCase() throws Exception { float[][] vectors = new float[][] {{0.6f, 0.8f}, {0.8f, 0.6f}, {-0.6f, -0.8f}}; try (Directory dir = newDirectory(); @@ -125,7 +245,6 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat } public void testQuantizedVectorsWriteAndRead() throws Exception { - // create lucene directory with codec int numVectors = 1 + random().nextInt(50); VectorSimilarityFunction similarityFunction = randomSimilarity(); int dim = random().nextInt(64) + 1; @@ -158,6 +277,7 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat } float[] randomlyReusedVector = new float[dim]; + // create lucene directory with codec try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter( diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java index 094d90ba5a2..64df927c765 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java @@ -62,7 +62,8 @@ public class TestLucene99ScalarQuantizedVectorsFormat extends BaseKnnVectorsForm confidenceInterval = 0f; } format = - new Lucene99ScalarQuantizedVectorsFormat(confidenceInterval, bits, random().nextBoolean()); + new Lucene99ScalarQuantizedVectorsFormat( + confidenceInterval, bits, bits == 4 ? random().nextBoolean() : false); super.setUp(); }