From bb752c774ca0264a02a60e9b8568addb7b6722d3 Mon Sep 17 00:00:00 2001 From: Shiming Li Date: Fri, 29 Jul 2022 01:31:49 +0800 Subject: [PATCH] LUCENE-10663: Fix KnnVectorQuery explain with multiple segments (#1050) If there are multiple segments. KnnVectorQuery explain has a bug in locating the doc ID. This is because the doc ID in explain is the docBase without the segment. In KnnVectorQuery.DocAndScoreQuery docs docid is increased in each segment of the docBase. So, in the 'DocAndScoreQuery.explain', needs to be added with the segment's docBase. Co-authored-by: Julie Tibshirani --- lucene/CHANGES.txt | 2 +- .../apache/lucene/search/KnnVectorQuery.java | 2 +- .../lucene/search/TestKnnVectorQuery.java | 28 +++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ed6f9792c89..c5613ba6cf6 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -102,7 +102,7 @@ Optimizations Bug Fixes --------------------- -(No changes) +* LUCENE-10663: Fix KnnVectorQuery explain with multiple segments. (Shiming Li) Other --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java index 1127521dc83..9d581bd073e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java @@ -318,7 +318,7 @@ public class KnnVectorQuery extends Query { return new Weight(this) { @Override public Explanation explain(LeafReaderContext context, int doc) { - int found = Arrays.binarySearch(docs, doc); + int found = Arrays.binarySearch(docs, doc + context.docBase); if (found < 0) { return Explanation.noMatch("not in top " + k); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java index 74ecf23c292..4d826126ac0 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java @@ -446,6 +446,34 @@ public class TestKnnVectorQuery extends LuceneTestCase { } } + public void testExplainMultipleSegments() throws IOException { + try (Directory d = newDirectory()) { + try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) { + for (int j = 0; j < 5; j++) { + Document doc = new Document(); + doc.add(new KnnVectorField("field", new float[] {j, j})); + w.addDocument(doc); + w.commit(); + } + } + try (IndexReader reader = DirectoryReader.open(d)) { + IndexSearcher searcher = new IndexSearcher(reader); + KnnVectorQuery query = new KnnVectorQuery("field", new float[] {2, 3}, 3); + Explanation matched = searcher.explain(query, 2); + assertTrue(matched.isMatch()); + assertEquals(1 / 2f, matched.getValue()); + assertEquals(0, matched.getDetails().length); + assertEquals("within top 3", matched.getDescription()); + + Explanation nomatch = searcher.explain(query, 4); + assertFalse(nomatch.isMatch()); + assertEquals(0f, nomatch.getValue()); + assertEquals(0, matched.getDetails().length); + assertEquals("not in top 3", nomatch.getDescription()); + } + } + } + /** Test that when vectors are abnormally distributed among segments, we still find the top K */ public void testSkewedIndex() throws IOException { /* We have to choose the numbers carefully here so that some segment has more than the expected