mirror of https://github.com/apache/lucene.git
ToParentBlockJoin[Byte|Float]KnnVectorQuery needs to handle the case when parents are missing (#12504)
This is a follow up to: https://github.com/apache/lucene/pull/12434 Adds a test for when parents are missing in the index and verifies we return no hits. Previously this would have thrown an NPE
This commit is contained in:
parent
47258cc9e9
commit
18b56bd002
|
@ -75,6 +75,9 @@ public class ToParentBlockJoinByteKnnVectorQuery extends KnnByteVectorQuery {
|
|||
return null;
|
||||
}
|
||||
BitSet parentBitSet = parentsFilter.getBitSet(context);
|
||||
if (parentBitSet == null) {
|
||||
return NO_RESULTS;
|
||||
}
|
||||
ParentBlockJoinByteVectorScorer vectorScorer =
|
||||
new ParentBlockJoinByteVectorScorer(
|
||||
context.reader().getByteVectorValues(field),
|
||||
|
@ -112,6 +115,9 @@ public class ToParentBlockJoinByteKnnVectorQuery extends KnnByteVectorQuery {
|
|||
protected TopDocs approximateSearch(LeafReaderContext context, Bits acceptDocs, int visitedLimit)
|
||||
throws IOException {
|
||||
BitSet parentBitSet = parentsFilter.getBitSet(context);
|
||||
if (parentBitSet == null) {
|
||||
return NO_RESULTS;
|
||||
}
|
||||
KnnCollector collector = new ToParentJoinKnnCollector(k, visitedLimit, parentBitSet);
|
||||
context.reader().searchNearestVectors(field, query, collector, acceptDocs);
|
||||
return collector.topDocs();
|
||||
|
|
|
@ -77,6 +77,9 @@ public class ToParentBlockJoinFloatKnnVectorQuery extends KnnFloatVectorQuery {
|
|||
return null;
|
||||
}
|
||||
BitSet parentBitSet = parentsFilter.getBitSet(context);
|
||||
if (parentBitSet == null) {
|
||||
return NO_RESULTS;
|
||||
}
|
||||
ParentBlockJoinFloatVectorScorer vectorScorer =
|
||||
new ParentBlockJoinFloatVectorScorer(
|
||||
context.reader().getFloatVectorValues(field),
|
||||
|
@ -114,6 +117,9 @@ public class ToParentBlockJoinFloatKnnVectorQuery extends KnnFloatVectorQuery {
|
|||
protected TopDocs approximateSearch(LeafReaderContext context, Bits acceptDocs, int visitedLimit)
|
||||
throws IOException {
|
||||
BitSet parentBitSet = parentsFilter.getBitSet(context);
|
||||
if (parentBitSet == null) {
|
||||
return NO_RESULTS;
|
||||
}
|
||||
KnnCollector collector = new ToParentJoinKnnCollector(k, visitedLimit, parentBitSet);
|
||||
context.reader().searchNearestVectors(field, query, collector, acceptDocs);
|
||||
return collector.topDocs();
|
||||
|
|
|
@ -88,6 +88,78 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testIndexWithNoVectorsNorParents() throws IOException {
|
||||
try (Directory d = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
|
||||
// Add some documents without a vector
|
||||
for (int i = 0; i < 5; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("other", "value", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
}
|
||||
try (IndexReader reader = DirectoryReader.open(d)) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
// Create parent filter directly, tests use "check" to verify parentIds exist. Production
|
||||
// may not
|
||||
// verify we handle it gracefully
|
||||
BitSetProducer parentFilter =
|
||||
new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
|
||||
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
|
||||
TopDocs topDocs = searcher.search(query, 3);
|
||||
assertEquals(0, topDocs.totalHits.value);
|
||||
assertEquals(0, topDocs.scoreDocs.length);
|
||||
|
||||
// Test with match_all filter and large k to test exact search
|
||||
query =
|
||||
getParentJoinKnnQuery(
|
||||
"field", new float[] {2, 2}, new MatchAllDocsQuery(), 10, parentFilter);
|
||||
topDocs = searcher.search(query, 3);
|
||||
assertEquals(0, topDocs.totalHits.value);
|
||||
assertEquals(0, topDocs.scoreDocs.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testIndexWithNoParents() throws IOException {
|
||||
try (Directory d = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
Document doc = new Document();
|
||||
doc.add(getKnnVectorField("field", new float[] {2, 2}));
|
||||
doc.add(newStringField("id", Integer.toString(i), Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
// Add some documents without a vector
|
||||
for (int i = 0; i < 5; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("other", "value", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
}
|
||||
try (IndexReader reader = DirectoryReader.open(d)) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
// Create parent filter directly, tests use "check" to verify parentIds exist. Production
|
||||
// may not
|
||||
// verify we handle it gracefully
|
||||
BitSetProducer parentFilter =
|
||||
new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
|
||||
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
|
||||
TopDocs topDocs = searcher.search(query, 3);
|
||||
assertEquals(0, topDocs.totalHits.value);
|
||||
assertEquals(0, topDocs.scoreDocs.length);
|
||||
|
||||
// Test with match_all filter and large k to test exact search
|
||||
query =
|
||||
getParentJoinKnnQuery(
|
||||
"field", new float[] {2, 2}, new MatchAllDocsQuery(), 10, parentFilter);
|
||||
topDocs = searcher.search(query, 3);
|
||||
assertEquals(0, topDocs.totalHits.value);
|
||||
assertEquals(0, topDocs.scoreDocs.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testFilterWithNoVectorMatches() throws IOException {
|
||||
try (Directory indexStore =
|
||||
getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0});
|
||||
|
|
Loading…
Reference in New Issue