From 0a1cf3108468e31f06cabfd4154b69189dcf6e79 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 4 Jan 2022 15:59:30 -0500 Subject: [PATCH] LUCENE-10351 Correct knn search failure with deleted docs (#580) Current when doing knn search on an segment where all documents with knn field were deleted, we get the following error: maxSize must be > 0 and < 2147483630; got: 0 java.lang.IllegalArgumentException: maxSize must be > 0 and < 2147483630; got: 0 at __randomizedtesting.SeedInfo.seed([43F1F124D7076A4E:1B860BFCCB9B0BB5]:0) at org.apache.lucene.util.LongHeap.(LongHeap.java:57) at org.apache.lucene.util.LongHeap$1.(LongHeap.java:69) at org.apache.lucene.util.LongHeap.create(LongHeap.java:69) at org.apache.lucene.util.hnsw.NeighborQueue.(NeighborQueue.java:41) at org.apache.lucene.util.hnsw.HnswGraph.search(HnswGraph.java:105)# This patch fixes this error and ensures empty TopDocs are returned when knn field doesn't have any documents left. --- .../lucene/codecs/lucene90/Lucene90HnswVectorsReader.java | 3 +++ .../lucene/tests/index/BaseKnnVectorsFormatTestCase.java | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java index c3e5e0a9eda..b0ac8a975d5 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java @@ -239,6 +239,9 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader { if (fieldEntry == null || fieldEntry.dimension == 0) { return null; } + if (fieldEntry.size() == 0) { + return new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]); + } // bound k by total number of vectors to prevent oversizing data structures k = Math.min(k, fieldEntry.size()); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java index 8d90fdc87f9..84d83f09168 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java @@ -556,9 +556,15 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe w.deleteDocuments(new Term("id", "0")); w.forceMerge(1); try (DirectoryReader r = DirectoryReader.open(w)) { - VectorValues values = getOnlyLeafReader(r).getVectorValues("v"); + LeafReader leafReader = getOnlyLeafReader(r); + VectorValues values = leafReader.getVectorValues("v"); assertNotNull(values); assertEquals(0, values.size()); + + // assert that knn search doesn't fail on a field with all deleted docs + TopDocs results = + leafReader.searchNearestVectors("v", randomVector(3), 1, leafReader.getLiveDocs()); + assertEquals(0, results.scoreDocs.length); } } }