From bb752c774ca0264a02a60e9b8568addb7b6722d3 Mon Sep 17 00:00:00 2001
From: Shiming Li <limingnihao@live.com>
Date: Fri, 29 Jul 2022 01:31:49 +0800
Subject: [PATCH] LUCENE-10663: Fix KnnVectorQuery explain with multiple
 segments (#1050)

If there are multiple segments. KnnVectorQuery explain has a bug in locating
the doc ID. This is because the doc ID in explain is the docBase without the
segment.  In KnnVectorQuery.DocAndScoreQuery docs docid is increased in each
segment of the docBase. So, in the 'DocAndScoreQuery.explain', needs to be
added with the segment's docBase.

Co-authored-by: Julie Tibshirani <julietibs@apache.org>
---
 lucene/CHANGES.txt                            |  2 +-
 .../apache/lucene/search/KnnVectorQuery.java  |  2 +-
 .../lucene/search/TestKnnVectorQuery.java     | 28 +++++++++++++++++++
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ed6f9792c89..c5613ba6cf6 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -102,7 +102,7 @@ Optimizations
 
 Bug Fixes
 ---------------------
-(No changes)
+* LUCENE-10663: Fix KnnVectorQuery explain with multiple segments. (Shiming Li)
 
 Other
 ---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
index 1127521dc83..9d581bd073e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
@@ -318,7 +318,7 @@ public class KnnVectorQuery extends Query {
       return new Weight(this) {
         @Override
         public Explanation explain(LeafReaderContext context, int doc) {
-          int found = Arrays.binarySearch(docs, doc);
+          int found = Arrays.binarySearch(docs, doc + context.docBase);
           if (found < 0) {
             return Explanation.noMatch("not in top " + k);
           }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
index 74ecf23c292..4d826126ac0 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
@@ -446,6 +446,34 @@ public class TestKnnVectorQuery extends LuceneTestCase {
     }
   }
 
+  public void testExplainMultipleSegments() throws IOException {
+    try (Directory d = newDirectory()) {
+      try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
+        for (int j = 0; j < 5; j++) {
+          Document doc = new Document();
+          doc.add(new KnnVectorField("field", new float[] {j, j}));
+          w.addDocument(doc);
+          w.commit();
+        }
+      }
+      try (IndexReader reader = DirectoryReader.open(d)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        KnnVectorQuery query = new KnnVectorQuery("field", new float[] {2, 3}, 3);
+        Explanation matched = searcher.explain(query, 2);
+        assertTrue(matched.isMatch());
+        assertEquals(1 / 2f, matched.getValue());
+        assertEquals(0, matched.getDetails().length);
+        assertEquals("within top 3", matched.getDescription());
+
+        Explanation nomatch = searcher.explain(query, 4);
+        assertFalse(nomatch.isMatch());
+        assertEquals(0f, nomatch.getValue());
+        assertEquals(0, matched.getDetails().length);
+        assertEquals("not in top 3", nomatch.getDescription());
+      }
+    }
+  }
+
   /** Test that when vectors are abnormally distributed among segments, we still find the top K */
   public void testSkewedIndex() throws IOException {
     /* We have to choose the numbers carefully here so that some segment has more than the expected