mirror of https://github.com/apache/lucene.git
Fix 9.12.0 backcompat break (Lucene 9.12.0 cannot read 9.11.x indices written with quantized HNSW, `Lucene99HnswScalarQuantizedVectorsFormat`) (#13874)
* carefully regenerate the int8_hnsw bwc indices so that they do in fact use Lucene99ScalarQuantizedVectorsFormat ... when running TestInt8HnswBackwardsCompatibility it now fails (as expected) on 9.11.0 and 9.11.1 bwc indices, but not on 9.10.0 * rename int8 -> int7 bwc tests since we are actually testing 7 bit quantization * actually fix the bwc bug: only allow compress=true when bits is 7 or 8 in HNSW scalar quantization * tidy * Revert "rename int8 -> int7 bwc tests since we are actually testing 7 bit quantization" This reverts commiteeb3f8a668
. * Reapply "rename int8 -> int7 bwc tests since we are actually testing 7 bit quantization" This reverts commit3487c4210b
. * #13880: add test to verify the int7 quantized indices are in fact using quantized vectors not float32 * bump 9.12.x version to 9.12.1 and add bwc indices for 9.12.0 * remove duplicate 9.12.0 Version constant * revert changes to index.9.12.0-cfs.zip, index.9.12.0-nocfs.zip, sorted.9.12.0.zip * remove unused bwc index Closes #13867 Closes #13880
This commit is contained in:
parent
e6bb5e2c54
commit
eadc07cc6a
|
@ -106,8 +106,8 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
|||
* This is a base constructor for parameterized BWC tests. The constructor arguments are provided
|
||||
* by {@link com.carrotsearch.randomizedtesting.RandomizedRunner} during test execution. A {@link
|
||||
* com.carrotsearch.randomizedtesting.annotations.ParametersFactory} specified in a subclass
|
||||
* provides a list lists of arguments for the tests and RandomizedRunner will execute the test for
|
||||
* each of the argument list.
|
||||
* provides a list of arguments for the tests and RandomizedRunner will execute the test for each
|
||||
* of the argument list.
|
||||
*
|
||||
* @param version the version this test should run for
|
||||
* @param indexPattern an index pattern in order to open an index of see {@link
|
||||
|
|
|
@ -39,7 +39,7 @@ public class TestGenerateBwcIndices extends LuceneTestCase {
|
|||
// To generate backcompat indexes with the current default codec, run the following gradle
|
||||
// command:
|
||||
// gradlew test -Ptests.bwcdir=/path/to/store/indexes -Ptests.codec=default
|
||||
// -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices
|
||||
// -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices --max-workers=1
|
||||
//
|
||||
// Also add testmethod with one of the index creation methods below, for example:
|
||||
// -Ptestmethod=testCreateCFS
|
||||
|
@ -82,14 +82,14 @@ public class TestGenerateBwcIndices extends LuceneTestCase {
|
|||
sortedTest.createBWCIndex();
|
||||
}
|
||||
|
||||
public void testCreateInt8HNSWIndices() throws IOException {
|
||||
TestInt8HnswBackwardsCompatibility int8HnswBackwardsCompatibility =
|
||||
new TestInt8HnswBackwardsCompatibility(
|
||||
public void testCreateInt7HNSWIndices() throws IOException {
|
||||
TestInt7HnswBackwardsCompatibility int7HnswBackwardsCompatibility =
|
||||
new TestInt7HnswBackwardsCompatibility(
|
||||
Version.LATEST,
|
||||
createPattern(
|
||||
TestInt8HnswBackwardsCompatibility.INDEX_NAME,
|
||||
TestInt8HnswBackwardsCompatibility.SUFFIX));
|
||||
int8HnswBackwardsCompatibility.createBWCIndex();
|
||||
TestInt7HnswBackwardsCompatibility.INDEX_NAME,
|
||||
TestInt7HnswBackwardsCompatibility.SUFFIX));
|
||||
int7HnswBackwardsCompatibility.createBWCIndex();
|
||||
}
|
||||
|
||||
private boolean isInitialMajorVersionRelease() {
|
||||
|
|
|
@ -23,17 +23,22 @@ import java.io.IOException;
|
|||
import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.KnnFloatVectorField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.CodecReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NoMergePolicy;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -41,23 +46,23 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
|
||||
|
||||
public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTestBase {
|
||||
public class TestInt7HnswBackwardsCompatibility extends BackwardsCompatibilityTestBase {
|
||||
|
||||
static final String INDEX_NAME = "int8_hnsw";
|
||||
static final String INDEX_NAME = "int7_hnsw";
|
||||
static final String SUFFIX = "";
|
||||
private static final Version FIRST_INT8_HNSW_VERSION = Version.LUCENE_9_10_0;
|
||||
private static final Version FIRST_INT7_HNSW_VERSION = Version.LUCENE_9_10_0;
|
||||
private static final String KNN_VECTOR_FIELD = "knn_field";
|
||||
private static final int DOC_COUNT = 30;
|
||||
private static final FieldType KNN_VECTOR_FIELD_TYPE =
|
||||
KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
|
||||
private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f};
|
||||
|
||||
public TestInt8HnswBackwardsCompatibility(Version version, String pattern) {
|
||||
public TestInt7HnswBackwardsCompatibility(Version version, String pattern) {
|
||||
super(version, pattern);
|
||||
}
|
||||
|
||||
/** Provides all sorted versions to the test-framework */
|
||||
@ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s")
|
||||
public static Iterable<Object[]> testVersionsFactory() throws IllegalAccessException {
|
||||
return allVersion(INDEX_NAME, SUFFIX);
|
||||
|
@ -76,7 +81,7 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
|
||||
@Override
|
||||
protected boolean supportsVersion(Version version) {
|
||||
return version.onOrAfter(FIRST_INT8_HNSW_VERSION);
|
||||
return version.onOrAfter(FIRST_INT7_HNSW_VERSION);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -84,7 +89,7 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
// We don't use the default codec
|
||||
}
|
||||
|
||||
public void testInt8HnswIndexAndSearch() throws Exception {
|
||||
public void testInt7HnswIndexAndSearch() throws Exception {
|
||||
IndexWriterConfig indexWriterConfig =
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
|
||||
|
@ -108,7 +113,6 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
|
||||
}
|
||||
}
|
||||
// This will confirm the docs are really sorted
|
||||
TestUtil.checkIndex(directory);
|
||||
}
|
||||
|
||||
|
@ -117,7 +121,7 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
IndexWriterConfig conf =
|
||||
new IndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(10)
|
||||
.setCodec(TestUtil.getDefaultCodec())
|
||||
.setCodec(getCodec())
|
||||
.setMergePolicy(NoMergePolicy.INSTANCE);
|
||||
try (IndexWriter writer = new IndexWriter(dir, conf)) {
|
||||
for (int i = 0; i < DOC_COUNT; i++) {
|
||||
|
@ -147,4 +151,29 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
|
||||
}
|
||||
}
|
||||
|
||||
// #13880: make sure the BWC index really contains quantized HNSW not float32
|
||||
public void testIndexIsReallyQuantized() throws Exception {
|
||||
try (DirectoryReader reader = DirectoryReader.open(directory)) {
|
||||
for (LeafReaderContext leafContext : reader.leaves()) {
|
||||
KnnVectorsReader knnVectorsReader = ((CodecReader) leafContext.reader()).getVectorReader();
|
||||
assertTrue(
|
||||
"expected PerFieldKnnVectorsFormat.FieldsReader but got: " + knnVectorsReader,
|
||||
knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader);
|
||||
|
||||
KnnVectorsReader forField =
|
||||
((PerFieldKnnVectorsFormat.FieldsReader) knnVectorsReader)
|
||||
.getFieldReader(KNN_VECTOR_FIELD);
|
||||
|
||||
assertTrue(forField instanceof Lucene99HnswVectorsReader);
|
||||
|
||||
QuantizedByteVectorValues quantized =
|
||||
((Lucene99HnswVectorsReader) forField).getQuantizedVectorValues(KNN_VECTOR_FIELD);
|
||||
|
||||
assertNotNull(
|
||||
"KnnVectorsReader should have quantized interface for field " + KNN_VECTOR_FIELD,
|
||||
quantized);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -135,7 +135,7 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
|
|||
}
|
||||
|
||||
final long quantizedVectorBytes;
|
||||
if (fieldEntry.compress) {
|
||||
if (fieldEntry.bits <= 4 && fieldEntry.compress) {
|
||||
// two dimensions -> one byte
|
||||
quantizedVectorBytes = ((dimension + 1) >> 1) + Float.BYTES;
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue