LUCENE-10663: Fix KnnVectorQuery explain with multiple segments (#1050)

If there are multiple segments. KnnVectorQuery explain has a bug in locating
the doc ID. This is because the doc ID in explain is the docBase without the
segment.  In KnnVectorQuery.DocAndScoreQuery docs docid is increased in each
segment of the docBase. So, in the 'DocAndScoreQuery.explain', needs to be
added with the segment's docBase. 

Co-authored-by: Julie Tibshirani <julietibs@apache.org>
This commit is contained in:
Shiming Li 2022-07-29 01:31:49 +08:00 committed by GitHub
parent 0ff987562a
commit bb752c774c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 30 additions and 2 deletions

View File

@ -102,7 +102,7 @@ Optimizations
Bug Fixes
---------------------
(No changes)
* LUCENE-10663: Fix KnnVectorQuery explain with multiple segments. (Shiming Li)
Other
---------------------

View File

@ -318,7 +318,7 @@ public class KnnVectorQuery extends Query {
return new Weight(this) {
@Override
public Explanation explain(LeafReaderContext context, int doc) {
int found = Arrays.binarySearch(docs, doc);
int found = Arrays.binarySearch(docs, doc + context.docBase);
if (found < 0) {
return Explanation.noMatch("not in top " + k);
}

View File

@ -446,6 +446,34 @@ public class TestKnnVectorQuery extends LuceneTestCase {
}
}
public void testExplainMultipleSegments() throws IOException {
try (Directory d = newDirectory()) {
try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
for (int j = 0; j < 5; j++) {
Document doc = new Document();
doc.add(new KnnVectorField("field", new float[] {j, j}));
w.addDocument(doc);
w.commit();
}
}
try (IndexReader reader = DirectoryReader.open(d)) {
IndexSearcher searcher = new IndexSearcher(reader);
KnnVectorQuery query = new KnnVectorQuery("field", new float[] {2, 3}, 3);
Explanation matched = searcher.explain(query, 2);
assertTrue(matched.isMatch());
assertEquals(1 / 2f, matched.getValue());
assertEquals(0, matched.getDetails().length);
assertEquals("within top 3", matched.getDescription());
Explanation nomatch = searcher.explain(query, 4);
assertFalse(nomatch.isMatch());
assertEquals(0f, nomatch.getValue());
assertEquals(0, matched.getDetails().length);
assertEquals("not in top 3", nomatch.getDescription());
}
}
}
/** Test that when vectors are abnormally distributed among segments, we still find the top K */
public void testSkewedIndex() throws IOException {
/* We have to choose the numbers carefully here so that some segment has more than the expected