mirror of https://github.com/apache/lucene.git
SimpleText[Float|Byte]VectorValues::scorer should return null when the vector values is empty (#13444)
This commit ensures that SimpleText[Float|Byte]VectorValues::scorer returns null when the vector values is empty, as per the scorer javadoc. Other KnnVectorsReader implementations have specialised empty implementations that do similar, e.g. OffHeapFloatVectorValues.EmptyOffHeapVectorValues. The VectorScorer interface in new in Lucene 9.11, see #13181 An existing test randomly hits this, but a new test has been added that exercises this code path consistently. It's also useful to verify other KnnVectorsReader implementations.
This commit is contained in:
parent
750a7c4d3b
commit
f3c2b91630
|
@ -445,6 +445,9 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
|
||||
@Override
|
||||
public VectorScorer scorer(float[] target) {
|
||||
if (size() == 0) {
|
||||
return null;
|
||||
}
|
||||
OffHeapFloatVectorValues values = this.copy();
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
|
|
|
@ -494,6 +494,9 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
|
||||
@Override
|
||||
public VectorScorer scorer(float[] target) {
|
||||
if (size == 0) {
|
||||
return null;
|
||||
}
|
||||
OffHeapFloatVectorValues values = this.copy();
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
|
|
|
@ -73,4 +73,9 @@ public class TestLucene90HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
|
|||
public void testByteVectorScorerIteration() {
|
||||
// unimplemented
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testEmptyByteVectorData() {
|
||||
// unimplemented
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,4 +72,9 @@ public class TestLucene91HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
|
|||
public void testByteVectorScorerIteration() {
|
||||
// unimplemented
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testEmptyByteVectorData() {
|
||||
// unimplemented
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,4 +62,9 @@ public class TestLucene92HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
|
|||
public void testByteVectorScorerIteration() {
|
||||
// unimplemented
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testEmptyByteVectorData() {
|
||||
// unimplemented
|
||||
}
|
||||
}
|
||||
|
|
|
@ -360,6 +360,9 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
|
|||
|
||||
@Override
|
||||
public VectorScorer scorer(float[] target) {
|
||||
if (size() == 0) {
|
||||
return null;
|
||||
}
|
||||
SimpleTextFloatVectorValues simpleTextFloatVectorValues =
|
||||
new SimpleTextFloatVectorValues(this);
|
||||
return new VectorScorer() {
|
||||
|
@ -470,6 +473,9 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
|
|||
|
||||
@Override
|
||||
public VectorScorer scorer(byte[] target) {
|
||||
if (size() == 0) {
|
||||
return null;
|
||||
}
|
||||
SimpleTextByteVectorValues simpleTextByteVectorValues = new SimpleTextByteVectorValues(this);
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.tests.index;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
@ -838,6 +839,58 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe
|
|||
}
|
||||
}
|
||||
|
||||
public void testEmptyFloatVectorData() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
var doc1 = new Document();
|
||||
doc1.add(new StringField("id", "0", Field.Store.NO));
|
||||
doc1.add(new KnnFloatVectorField("v", new float[] {2, 3, 5, 6}, DOT_PRODUCT));
|
||||
w.addDocument(doc1);
|
||||
|
||||
var doc2 = new Document();
|
||||
doc2.add(new StringField("id", "1", Field.Store.NO));
|
||||
w.addDocument(doc2);
|
||||
|
||||
w.deleteDocuments(new Term("id", Integer.toString(0)));
|
||||
w.commit();
|
||||
w.forceMerge(1);
|
||||
|
||||
try (DirectoryReader reader = DirectoryReader.open(w)) {
|
||||
LeafReader r = getOnlyLeafReader(reader);
|
||||
FloatVectorValues values = r.getFloatVectorValues("v");
|
||||
assertNotNull(values);
|
||||
assertEquals(0, values.size());
|
||||
assertNull(values.scorer(new float[] {2, 3, 5, 6}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testEmptyByteVectorData() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
var doc1 = new Document();
|
||||
doc1.add(new StringField("id", "0", Field.Store.NO));
|
||||
doc1.add(new KnnByteVectorField("v", new byte[] {2, 3, 5, 6}, DOT_PRODUCT));
|
||||
w.addDocument(doc1);
|
||||
|
||||
var doc2 = new Document();
|
||||
doc2.add(new StringField("id", "1", Field.Store.NO));
|
||||
w.addDocument(doc2);
|
||||
|
||||
w.deleteDocuments(new Term("id", Integer.toString(0)));
|
||||
w.commit();
|
||||
w.forceMerge(1);
|
||||
|
||||
try (DirectoryReader reader = DirectoryReader.open(w)) {
|
||||
LeafReader r = getOnlyLeafReader(reader);
|
||||
ByteVectorValues values = r.getByteVectorValues("v");
|
||||
assertNotNull(values);
|
||||
assertEquals(0, values.size());
|
||||
assertNull(values.scorer(new byte[] {2, 3, 5, 6}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected VectorSimilarityFunction randomSimilarity() {
|
||||
return VectorSimilarityFunction.values()[
|
||||
random().nextInt(VectorSimilarityFunction.values().length)];
|
||||
|
|
Loading…
Reference in New Issue