LUCENE-10663: Fix KnnVectorQuery explain with multiple segments (#1050)

If there are multiple segments. KnnVectorQuery explain has a bug in locating the doc ID. This is because the doc ID in explain is the docBase without the segment. In KnnVectorQuery.DocAndScoreQuery docs docid is increased in each segment of the docBase. So, in the 'DocAndScoreQuery.explain', needs to be added with the segment's docBase. Co-authored-by: Julie Tibshirani <julietibs@apache.org>
2022-07-29 01:31:49 +08:00 · 2022-07-29 01:31:49 +08:00 · bb752c774c
parent 0ff987562a
commit bb752c774c
3 changed files with 30 additions and 2 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -102,7 +102,7 @@ Optimizations

 Bug Fixes
 ---------------------
-(No changes)
+* LUCENE-10663: Fix KnnVectorQuery explain with multiple segments. (Shiming Li)

 Other
 ---------------------
--- a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
@ -318,7 +318,7 @@ public class KnnVectorQuery extends Query {
      return new Weight(this) {
        @Override
        public Explanation explain(LeafReaderContext context, int doc) {
-          int found = Arrays.binarySearch(docs, doc);
+          int found = Arrays.binarySearch(docs, doc + context.docBase);
          if (found < 0) {
            return Explanation.noMatch("not in top " + k);
          }
--- a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
@ -446,6 +446,34 @@ public class TestKnnVectorQuery extends LuceneTestCase {
    }
  }

+  public void testExplainMultipleSegments() throws IOException {
+    try (Directory d = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
+        for (int j = 0; j < 5; j++) {
+          Document doc = new Document();
+          doc.add(new KnnVectorField("field", new float[] {j, j}));
+          w.addDocument(doc);
+          w.commit();
+        }
+      }
+      try (IndexReader reader = DirectoryReader.open(d)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        KnnVectorQuery query = new KnnVectorQuery("field", new float[] {2, 3}, 3);
+        Explanation matched = searcher.explain(query, 2);
+        assertTrue(matched.isMatch());
+        assertEquals(1 / 2f, matched.getValue());
+        assertEquals(0, matched.getDetails().length);
+        assertEquals("within top 3", matched.getDescription());
+
+        Explanation nomatch = searcher.explain(query, 4);
+        assertFalse(nomatch.isMatch());
+        assertEquals(0f, nomatch.getValue());
+        assertEquals(0, matched.getDetails().length);
+        assertEquals("not in top 3", nomatch.getDescription());
+      }
+    }
+  }
+
  /** Test that when vectors are abnormally distributed among segments, we still find the top K */
  public void testSkewedIndex() throws IOException {
    /* We have to choose the numbers carefully here so that some segment has more than the expected