mirror of https://github.com/apache/lucene.git
LUCENE-9614: Fix KnnVectorQuery failure when numDocs is 0 (#413)
When the reader has no live docs, `KnnVectorQuery` can error out. This happens because `IndexReader#numDocs` is 0, and we end up passing an illegal value of `k = 0` to the search method. This commit removes the problematic optimization in `KnnVectorQuery` and replaces with a lower-level based on the total number of vectors in the segment.
This commit is contained in:
parent
941df98c3f
commit
abd5ec4ff0
|
@ -240,8 +240,10 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
return null;
|
||||
}
|
||||
|
||||
OffHeapVectorValues vectorValues = getOffHeapVectorValues(fieldEntry);
|
||||
// bound k by total number of vectors to prevent oversizing data structures
|
||||
k = Math.min(k, fieldEntry.size());
|
||||
|
||||
OffHeapVectorValues vectorValues = getOffHeapVectorValues(fieldEntry);
|
||||
// use a seed that is fixed for the index so we get reproducible results for the same query
|
||||
final SplittableRandom random = new SplittableRandom(checksumSeed);
|
||||
NeighborQueue results =
|
||||
|
|
|
@ -60,7 +60,7 @@ public class KnnVectorQuery extends Query {
|
|||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
TopDocs[] perLeafResults = new TopDocs[reader.leaves().size()];
|
||||
for (LeafReaderContext ctx : reader.leaves()) {
|
||||
perLeafResults[ctx.ord] = searchLeaf(ctx, Math.min(k, reader.numDocs()));
|
||||
perLeafResults[ctx.ord] = searchLeaf(ctx, k);
|
||||
}
|
||||
// Merge sort the results
|
||||
TopDocs topK = TopDocs.merge(k, perLeafResults);
|
||||
|
|
|
@ -30,13 +30,17 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.KnnVectorField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FilterDirectoryReader;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.VectorUtil;
|
||||
|
||||
|
@ -518,6 +522,34 @@ public class TestKnnVectorQuery extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the query behaves reasonably when using a custom filter reader where there are no
|
||||
* live docs.
|
||||
*/
|
||||
public void testNoLiveDocsReader() throws IOException {
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
try (Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, iwc)) {
|
||||
final int numDocs = 10;
|
||||
final int dim = 30;
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("index", String.valueOf(i), Field.Store.NO));
|
||||
d.add(new KnnVectorField("vector", randomVector(dim)));
|
||||
w.addDocument(d);
|
||||
}
|
||||
w.commit();
|
||||
|
||||
try (DirectoryReader reader = DirectoryReader.open(dir)) {
|
||||
DirectoryReader wrappedReader = new NoLiveDocsDirectoryReader(reader);
|
||||
IndexSearcher searcher = new IndexSearcher(wrappedReader);
|
||||
KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), numDocs);
|
||||
TopDocs topDocs = searcher.search(query, numDocs);
|
||||
assertEquals(0, topDocs.scoreDocs.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Directory getIndexStore(String field, float[]... contents) throws IOException {
|
||||
Directory indexStore = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
|
||||
|
@ -536,4 +568,54 @@ public class TestKnnVectorQuery extends LuceneTestCase {
|
|||
ScoreDoc[] result = searcher.search(q, 1000).scoreDocs;
|
||||
assertEquals(expectedMatches, result.length);
|
||||
}
|
||||
|
||||
private static class NoLiveDocsDirectoryReader extends FilterDirectoryReader {
|
||||
|
||||
private NoLiveDocsDirectoryReader(DirectoryReader in) throws IOException {
|
||||
super(
|
||||
in,
|
||||
new SubReaderWrapper() {
|
||||
@Override
|
||||
public LeafReader wrap(LeafReader reader) {
|
||||
return new NoLiveDocsLeafReader(reader);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
|
||||
return new NoLiveDocsDirectoryReader(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CacheHelper getReaderCacheHelper() {
|
||||
return in.getReaderCacheHelper();
|
||||
}
|
||||
}
|
||||
|
||||
private static class NoLiveDocsLeafReader extends FilterLeafReader {
|
||||
private NoLiveDocsLeafReader(LeafReader in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDocs() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getLiveDocs() {
|
||||
return new Bits.MatchNoBits(in.maxDoc());
|
||||
}
|
||||
|
||||
@Override
|
||||
public CacheHelper getReaderCacheHelper() {
|
||||
return in.getReaderCacheHelper();
|
||||
}
|
||||
|
||||
@Override
|
||||
public CacheHelper getCoreCacheHelper() {
|
||||
return in.getCoreCacheHelper();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue