mirror of https://github.com/apache/lucene.git
Fix bug in SQ when just a single vector present in a segment (#13374)
This commit fixes a corner case in the ScalarQuantizer when just a single vector is present. I ran into this when updating a test that previously passed successfully with Lucene 9.10 but fails in 9.x. The score error correction is calculated to be NaN, as there are no score docs or variance.
This commit is contained in:
parent
731cecf730
commit
3d671a0fbe
|
@ -372,6 +372,8 @@ Bug Fixes
|
||||||
|
|
||||||
* GITHUB#13378: Fix points writing with no values (Chris Hegarty)
|
* GITHUB#13378: Fix points writing with no values (Chris Hegarty)
|
||||||
|
|
||||||
|
* GITHUB#13374: Fix bug in SQ when just a single vector present in a segment (Chris Hegarty)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -698,7 +698,7 @@ public class ScalarQuantizer {
|
||||||
}
|
}
|
||||||
corr.add(1 - errors.var() / scoreVariance);
|
corr.add(1 - errors.var() / scoreVariance);
|
||||||
}
|
}
|
||||||
return corr.mean;
|
return Double.isNaN(corr.mean) ? 0.0 : corr.mean;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,12 +29,14 @@ import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
|
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.KnnFloatVectorField;
|
import org.apache.lucene.document.KnnFloatVectorField;
|
||||||
import org.apache.lucene.index.CodecReader;
|
import org.apache.lucene.index.CodecReader;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.StoredFields;
|
||||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
@ -252,6 +254,59 @@ public class TestLucene99ScalarQuantizedVectorScorer extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSingleVectorPerSegmentCosine() throws IOException {
|
||||||
|
testSingleVectorPerSegment(VectorSimilarityFunction.COSINE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSingleVectorPerSegmentDot() throws IOException {
|
||||||
|
testSingleVectorPerSegment(VectorSimilarityFunction.DOT_PRODUCT);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSingleVectorPerSegmentEuclidean() throws IOException {
|
||||||
|
testSingleVectorPerSegment(VectorSimilarityFunction.EUCLIDEAN);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSingleVectorPerSegmentMIP() throws IOException {
|
||||||
|
testSingleVectorPerSegment(VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testSingleVectorPerSegment(VectorSimilarityFunction sim) throws IOException {
|
||||||
|
var codec = getCodec(7, false);
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig().setCodec(codec))) {
|
||||||
|
Document doc2 = new Document();
|
||||||
|
doc2.add(new KnnFloatVectorField("field", new float[] {0.8f, 0.6f}, sim));
|
||||||
|
doc2.add(newTextField("id", "A", Field.Store.YES));
|
||||||
|
writer.addDocument(doc2);
|
||||||
|
writer.commit();
|
||||||
|
|
||||||
|
Document doc1 = new Document();
|
||||||
|
doc1.add(new KnnFloatVectorField("field", new float[] {0.6f, 0.8f}, sim));
|
||||||
|
doc1.add(newTextField("id", "B", Field.Store.YES));
|
||||||
|
writer.addDocument(doc1);
|
||||||
|
writer.commit();
|
||||||
|
|
||||||
|
Document doc3 = new Document();
|
||||||
|
doc3.add(new KnnFloatVectorField("field", new float[] {-0.6f, -0.8f}, sim));
|
||||||
|
doc3.add(newTextField("id", "C", Field.Store.YES));
|
||||||
|
writer.addDocument(doc3);
|
||||||
|
writer.commit();
|
||||||
|
|
||||||
|
writer.forceMerge(1);
|
||||||
|
}
|
||||||
|
try (DirectoryReader reader = DirectoryReader.open(dir)) {
|
||||||
|
LeafReader leafReader = getOnlyLeafReader(reader);
|
||||||
|
StoredFields storedFields = reader.storedFields();
|
||||||
|
float[] queryVector = new float[] {0.6f, 0.8f};
|
||||||
|
var hits = leafReader.searchNearestVectors("field", queryVector, 3, null, 100);
|
||||||
|
assertEquals(hits.scoreDocs.length, 3);
|
||||||
|
assertEquals("B", storedFields.document(hits.scoreDocs[0].doc).get("id"));
|
||||||
|
assertEquals("A", storedFields.document(hits.scoreDocs[1].doc).get("id"));
|
||||||
|
assertEquals("C", storedFields.document(hits.scoreDocs[2].doc).get("id"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static byte[] floatToByteArray(float value) {
|
private static byte[] floatToByteArray(float value) {
|
||||||
return ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putFloat(value).array();
|
return ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putFloat(value).array();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue