LUCENE-9614: Fix KnnVectorQuery failure when numDocs is 0 (#413)

When the reader has no live docs, `KnnVectorQuery` can error out. This happens
because `IndexReader#numDocs` is 0, and we end up passing an illegal value of
`k = 0` to the search method.

This commit removes the problematic optimization in `KnnVectorQuery` and
replaces with a lower-level based on the total number of vectors in the segment.
This commit is contained in:
Julie Tibshirani 2021-10-27 11:08:47 -07:00 committed by GitHub
parent 941df98c3f
commit abd5ec4ff0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 86 additions and 2 deletions

View File

@ -240,8 +240,10 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
return null;
}
OffHeapVectorValues vectorValues = getOffHeapVectorValues(fieldEntry);
// bound k by total number of vectors to prevent oversizing data structures
k = Math.min(k, fieldEntry.size());
OffHeapVectorValues vectorValues = getOffHeapVectorValues(fieldEntry);
// use a seed that is fixed for the index so we get reproducible results for the same query
final SplittableRandom random = new SplittableRandom(checksumSeed);
NeighborQueue results =

View File

@ -60,7 +60,7 @@ public class KnnVectorQuery extends Query {
public Query rewrite(IndexReader reader) throws IOException {
TopDocs[] perLeafResults = new TopDocs[reader.leaves().size()];
for (LeafReaderContext ctx : reader.leaves()) {
perLeafResults[ctx.ord] = searchLeaf(ctx, Math.min(k, reader.numDocs()));
perLeafResults[ctx.ord] = searchLeaf(ctx, k);
}
// Merge sort the results
TopDocs topK = TopDocs.merge(k, perLeafResults);

View File

@ -30,13 +30,17 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.VectorUtil;
@ -518,6 +522,34 @@ public class TestKnnVectorQuery extends LuceneTestCase {
}
}
/**
* Check that the query behaves reasonably when using a custom filter reader where there are no
* live docs.
*/
public void testNoLiveDocsReader() throws IOException {
IndexWriterConfig iwc = newIndexWriterConfig();
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, iwc)) {
final int numDocs = 10;
final int dim = 30;
for (int i = 0; i < numDocs; ++i) {
Document d = new Document();
d.add(new StringField("index", String.valueOf(i), Field.Store.NO));
d.add(new KnnVectorField("vector", randomVector(dim)));
w.addDocument(d);
}
w.commit();
try (DirectoryReader reader = DirectoryReader.open(dir)) {
DirectoryReader wrappedReader = new NoLiveDocsDirectoryReader(reader);
IndexSearcher searcher = new IndexSearcher(wrappedReader);
KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), numDocs);
TopDocs topDocs = searcher.search(query, numDocs);
assertEquals(0, topDocs.scoreDocs.length);
}
}
}
private Directory getIndexStore(String field, float[]... contents) throws IOException {
Directory indexStore = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
@ -536,4 +568,54 @@ public class TestKnnVectorQuery extends LuceneTestCase {
ScoreDoc[] result = searcher.search(q, 1000).scoreDocs;
assertEquals(expectedMatches, result.length);
}
private static class NoLiveDocsDirectoryReader extends FilterDirectoryReader {
private NoLiveDocsDirectoryReader(DirectoryReader in) throws IOException {
super(
in,
new SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
return new NoLiveDocsLeafReader(reader);
}
});
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
return new NoLiveDocsDirectoryReader(in);
}
@Override
public CacheHelper getReaderCacheHelper() {
return in.getReaderCacheHelper();
}
}
private static class NoLiveDocsLeafReader extends FilterLeafReader {
private NoLiveDocsLeafReader(LeafReader in) {
super(in);
}
@Override
public int numDocs() {
return 0;
}
@Override
public Bits getLiveDocs() {
return new Bits.MatchNoBits(in.maxDoc());
}
@Override
public CacheHelper getReaderCacheHelper() {
return in.getReaderCacheHelper();
}
@Override
public CacheHelper getCoreCacheHelper() {
return in.getCoreCacheHelper();
}
}
}