ToParentBlockJoin[Byte|Float]KnnVectorQuery needs to handle the case when parents are missing (#12504)

This is a follow up to: https://github.com/apache/lucene/pull/12434

Adds a test for when parents are missing in the index and verifies we return no hits. Previously this would have thrown an NPE
This commit is contained in:
Benjamin Trent 2023-08-14 09:24:25 -04:00 committed by GitHub
parent 47258cc9e9
commit 18b56bd002
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 84 additions and 0 deletions

View File

@ -75,6 +75,9 @@ public class ToParentBlockJoinByteKnnVectorQuery extends KnnByteVectorQuery {
return null;
}
BitSet parentBitSet = parentsFilter.getBitSet(context);
if (parentBitSet == null) {
return NO_RESULTS;
}
ParentBlockJoinByteVectorScorer vectorScorer =
new ParentBlockJoinByteVectorScorer(
context.reader().getByteVectorValues(field),
@ -112,6 +115,9 @@ public class ToParentBlockJoinByteKnnVectorQuery extends KnnByteVectorQuery {
protected TopDocs approximateSearch(LeafReaderContext context, Bits acceptDocs, int visitedLimit)
throws IOException {
BitSet parentBitSet = parentsFilter.getBitSet(context);
if (parentBitSet == null) {
return NO_RESULTS;
}
KnnCollector collector = new ToParentJoinKnnCollector(k, visitedLimit, parentBitSet);
context.reader().searchNearestVectors(field, query, collector, acceptDocs);
return collector.topDocs();

View File

@ -77,6 +77,9 @@ public class ToParentBlockJoinFloatKnnVectorQuery extends KnnFloatVectorQuery {
return null;
}
BitSet parentBitSet = parentsFilter.getBitSet(context);
if (parentBitSet == null) {
return NO_RESULTS;
}
ParentBlockJoinFloatVectorScorer vectorScorer =
new ParentBlockJoinFloatVectorScorer(
context.reader().getFloatVectorValues(field),
@ -114,6 +117,9 @@ public class ToParentBlockJoinFloatKnnVectorQuery extends KnnFloatVectorQuery {
protected TopDocs approximateSearch(LeafReaderContext context, Bits acceptDocs, int visitedLimit)
throws IOException {
BitSet parentBitSet = parentsFilter.getBitSet(context);
if (parentBitSet == null) {
return NO_RESULTS;
}
KnnCollector collector = new ToParentJoinKnnCollector(k, visitedLimit, parentBitSet);
context.reader().searchNearestVectors(field, query, collector, acceptDocs);
return collector.topDocs();

View File

@ -88,6 +88,78 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
}
}
public void testIndexWithNoVectorsNorParents() throws IOException {
try (Directory d = newDirectory()) {
try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
// Add some documents without a vector
for (int i = 0; i < 5; i++) {
Document doc = new Document();
doc.add(new StringField("other", "value", Field.Store.NO));
w.addDocument(doc);
}
}
try (IndexReader reader = DirectoryReader.open(d)) {
IndexSearcher searcher = new IndexSearcher(reader);
// Create parent filter directly, tests use "check" to verify parentIds exist. Production
// may not
// verify we handle it gracefully
BitSetProducer parentFilter =
new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
TopDocs topDocs = searcher.search(query, 3);
assertEquals(0, topDocs.totalHits.value);
assertEquals(0, topDocs.scoreDocs.length);
// Test with match_all filter and large k to test exact search
query =
getParentJoinKnnQuery(
"field", new float[] {2, 2}, new MatchAllDocsQuery(), 10, parentFilter);
topDocs = searcher.search(query, 3);
assertEquals(0, topDocs.totalHits.value);
assertEquals(0, topDocs.scoreDocs.length);
}
}
}
public void testIndexWithNoParents() throws IOException {
try (Directory d = newDirectory()) {
try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
for (int i = 0; i < 3; ++i) {
Document doc = new Document();
doc.add(getKnnVectorField("field", new float[] {2, 2}));
doc.add(newStringField("id", Integer.toString(i), Field.Store.YES));
w.addDocument(doc);
}
// Add some documents without a vector
for (int i = 0; i < 5; i++) {
Document doc = new Document();
doc.add(new StringField("other", "value", Field.Store.NO));
w.addDocument(doc);
}
}
try (IndexReader reader = DirectoryReader.open(d)) {
IndexSearcher searcher = new IndexSearcher(reader);
// Create parent filter directly, tests use "check" to verify parentIds exist. Production
// may not
// verify we handle it gracefully
BitSetProducer parentFilter =
new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
TopDocs topDocs = searcher.search(query, 3);
assertEquals(0, topDocs.totalHits.value);
assertEquals(0, topDocs.scoreDocs.length);
// Test with match_all filter and large k to test exact search
query =
getParentJoinKnnQuery(
"field", new float[] {2, 2}, new MatchAllDocsQuery(), 10, parentFilter);
topDocs = searcher.search(query, 3);
assertEquals(0, topDocs.totalHits.value);
assertEquals(0, topDocs.scoreDocs.length);
}
}
}
public void testFilterWithNoVectorMatches() throws IOException {
try (Directory indexStore =
getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0});