SimpleText[Float|Byte]VectorValues::scorer should return null when the vector values is empty (#13444)

This commit ensures that SimpleText[Float|Byte]VectorValues::scorer returns null when the vector values is empty, as per the scorer javadoc. Other KnnVectorsReader implementations have specialised empty implementations that do similar, e.g. OffHeapFloatVectorValues.EmptyOffHeapVectorValues. The VectorScorer interface in new in Lucene 9.11, see #13181

An existing test randomly hits this, but a new test has been added that exercises this code path consistently. It's also useful to verify other KnnVectorsReader implementations.
This commit is contained in:
Chris Hegarty 2024-05-31 14:27:25 +01:00 committed by GitHub
parent 750a7c4d3b
commit f3c2b91630
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 80 additions and 0 deletions

View File

@ -445,6 +445,9 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
@Override
public VectorScorer scorer(float[] target) {
if (size() == 0) {
return null;
}
OffHeapFloatVectorValues values = this.copy();
return new VectorScorer() {
@Override

View File

@ -494,6 +494,9 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
@Override
public VectorScorer scorer(float[] target) {
if (size == 0) {
return null;
}
OffHeapFloatVectorValues values = this.copy();
return new VectorScorer() {
@Override

View File

@ -73,4 +73,9 @@ public class TestLucene90HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
public void testByteVectorScorerIteration() {
// unimplemented
}
@Override
public void testEmptyByteVectorData() {
// unimplemented
}
}

View File

@ -72,4 +72,9 @@ public class TestLucene91HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
public void testByteVectorScorerIteration() {
// unimplemented
}
@Override
public void testEmptyByteVectorData() {
// unimplemented
}
}

View File

@ -62,4 +62,9 @@ public class TestLucene92HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
public void testByteVectorScorerIteration() {
// unimplemented
}
@Override
public void testEmptyByteVectorData() {
// unimplemented
}
}

View File

@ -360,6 +360,9 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
@Override
public VectorScorer scorer(float[] target) {
if (size() == 0) {
return null;
}
SimpleTextFloatVectorValues simpleTextFloatVectorValues =
new SimpleTextFloatVectorValues(this);
return new VectorScorer() {
@ -470,6 +473,9 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
@Override
public VectorScorer scorer(byte[] target) {
if (size() == 0) {
return null;
}
SimpleTextByteVectorValues simpleTextByteVectorValues = new SimpleTextByteVectorValues(this);
return new VectorScorer() {
@Override

View File

@ -17,6 +17,7 @@
package org.apache.lucene.tests.index;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.ByteArrayOutputStream;
@ -838,6 +839,58 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe
}
}
public void testEmptyFloatVectorData() throws Exception {
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
var doc1 = new Document();
doc1.add(new StringField("id", "0", Field.Store.NO));
doc1.add(new KnnFloatVectorField("v", new float[] {2, 3, 5, 6}, DOT_PRODUCT));
w.addDocument(doc1);
var doc2 = new Document();
doc2.add(new StringField("id", "1", Field.Store.NO));
w.addDocument(doc2);
w.deleteDocuments(new Term("id", Integer.toString(0)));
w.commit();
w.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(w)) {
LeafReader r = getOnlyLeafReader(reader);
FloatVectorValues values = r.getFloatVectorValues("v");
assertNotNull(values);
assertEquals(0, values.size());
assertNull(values.scorer(new float[] {2, 3, 5, 6}));
}
}
}
public void testEmptyByteVectorData() throws Exception {
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
var doc1 = new Document();
doc1.add(new StringField("id", "0", Field.Store.NO));
doc1.add(new KnnByteVectorField("v", new byte[] {2, 3, 5, 6}, DOT_PRODUCT));
w.addDocument(doc1);
var doc2 = new Document();
doc2.add(new StringField("id", "1", Field.Store.NO));
w.addDocument(doc2);
w.deleteDocuments(new Term("id", Integer.toString(0)));
w.commit();
w.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(w)) {
LeafReader r = getOnlyLeafReader(reader);
ByteVectorValues values = r.getByteVectorValues("v");
assertNotNull(values);
assertEquals(0, values.size());
assertNull(values.scorer(new byte[] {2, 3, 5, 6}));
}
}
}
protected VectorSimilarityFunction randomSimilarity() {
return VectorSimilarityFunction.values()[
random().nextInt(VectorSimilarityFunction.values().length)];