Fix FieldExistsQuery rewrite when all docs have vectors (#986)

Before we were checking the number of vectors in the segment against the total
number of documents in IndexReader. This meant FieldExistsQuery would not
rewrite to MatchAllDocsQuery when there were multiple segments.
This commit is contained in:
Julie Tibshirani 2022-06-27 09:59:53 +02:00 committed by GitHub
parent 03846b468e
commit 7b58088bd5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 5 deletions

View File

@ -131,6 +131,8 @@ Bug Fixes
* LUCENE-10607: Fix potential integer overflow in maxArcs computions (Tang Donghai)
* GITHUB#986: Fix FieldExistsQuery rewrite when all docs have vectors. (Julie Tibshirani)
Other
---------------------

View File

@ -126,7 +126,7 @@ public class FieldExistsQuery extends Query {
break;
}
} else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors
if (leaf.getVectorValues(field).size() != reader.maxDoc()) {
if (leaf.getVectorValues(field).size() != leaf.maxDoc()) {
allReadersRewritable = false;
break;
}

View File

@ -626,13 +626,18 @@ public class TestFieldExistsQuery extends LuceneTestCase {
public void testKnnVectorAllDocsHaveField() throws IOException {
try (Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
Document doc = new Document();
doc.add(new KnnVectorField("vector", randomVector(3)));
iw.addDocument(doc);
for (int i = 0; i < 100; ++i) {
Document doc = new Document();
doc.add(new KnnVectorField("vector", randomVector(5)));
iw.addDocument(doc);
}
iw.commit();
try (IndexReader reader = iw.getReader()) {
IndexSearcher searcher = newSearcher(reader);
assertEquals(1, searcher.count(new FieldExistsQuery("vector")));
Query query = new FieldExistsQuery("vector");
assertTrue(searcher.rewrite(query) instanceof MatchAllDocsQuery);
assertEquals(100, searcher.count(query));
}
}
}