mirror of https://github.com/apache/lucene.git
LUCENE-10263: Implement Weight.count() on NormsFieldExistsQuery (#477)
If all documents in the segment have a value, then `Reader.getDocCount()` will equal `maxDoc` and we can return `numDocs` as a shortcut.
This commit is contained in:
parent
4f5b41a71c
commit
749b744c0c
|
@ -51,6 +51,9 @@ New Features
|
||||||
points are indexed.
|
points are indexed.
|
||||||
(Quentin Pradet, Adrien Grand)
|
(Quentin Pradet, Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-10263: Added Weight#count to NormsFieldExistsQuery to speed up the query if all
|
||||||
|
documents have the field.. (Alan Woodward)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -79,6 +79,20 @@ public final class NormsFieldExistsQuery extends Query {
|
||||||
return new ConstantScoreScorer(this, score(), scoreMode, iterator);
|
return new ConstantScoreScorer(this, score(), scoreMode, iterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int count(LeafReaderContext context) throws IOException {
|
||||||
|
final LeafReader reader = context.reader();
|
||||||
|
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||||
|
if (fieldInfo == null || fieldInfo.hasNorms() == false) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
// If every field has a value then we can shortcut
|
||||||
|
if (reader.getDocCount(field) == reader.maxDoc()) {
|
||||||
|
return reader.numDocs();
|
||||||
|
}
|
||||||
|
return super.count(context);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isCacheable(LeafReaderContext ctx) {
|
public boolean isCacheable(LeafReaderContext ctx) {
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -18,15 +18,22 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.NoMergePolicy;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestNormsFieldExistsQuery extends LuceneTestCase {
|
public class TestNormsFieldExistsQuery extends LuceneTestCase {
|
||||||
|
|
||||||
|
@ -199,4 +206,68 @@ public class TestNormsFieldExistsQuery extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testQueryMatchesCount() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
|
||||||
|
int randomNumDocs = TestUtil.nextInt(random(), 10, 100);
|
||||||
|
|
||||||
|
FieldType noNormsFieldType = new FieldType();
|
||||||
|
noNormsFieldType.setOmitNorms(true);
|
||||||
|
noNormsFieldType.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new TextField("text", "always here", Store.NO));
|
||||||
|
doc.add(new TextField("text_s", "", Store.NO));
|
||||||
|
doc.add(new Field("text_n", "always here", noNormsFieldType));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
for (int i = 1; i < randomNumDocs; i++) {
|
||||||
|
doc.clear();
|
||||||
|
doc.add(new TextField("text", "some text", Store.NO));
|
||||||
|
doc.add(new TextField("text_s", "some text", Store.NO));
|
||||||
|
doc.add(new Field("text_n", "some here", noNormsFieldType));
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
w.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader reader = w.getReader();
|
||||||
|
final IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
|
||||||
|
assertCountWithShortcut(searcher, "text", randomNumDocs);
|
||||||
|
assertCountWithShortcut(searcher, "doesNotExist", 0);
|
||||||
|
assertCountWithShortcut(searcher, "text_n", 0);
|
||||||
|
|
||||||
|
// docs that have a text field that analyzes to an empty token
|
||||||
|
// stream still have a recorded norm value but don't show up in
|
||||||
|
// Reader.getDocCount(field), so we can't use the shortcut for
|
||||||
|
// these fields
|
||||||
|
assertCountWithoutShortcut(searcher, "text_s", randomNumDocs);
|
||||||
|
|
||||||
|
// We can still shortcut with deleted docs
|
||||||
|
w.w.getConfig().setMergePolicy(NoMergePolicy.INSTANCE);
|
||||||
|
w.deleteDocuments(new Term("text", "text")); // deletes all but the first doc
|
||||||
|
DirectoryReader reader2 = w.getReader();
|
||||||
|
final IndexSearcher searcher2 = new IndexSearcher(reader2);
|
||||||
|
assertCountWithShortcut(searcher2, "text", 1);
|
||||||
|
|
||||||
|
IOUtils.close(reader, reader2, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertCountWithoutShortcut(IndexSearcher searcher, String field, int expectedCount)
|
||||||
|
throws IOException {
|
||||||
|
final Query q = new NormsFieldExistsQuery(field);
|
||||||
|
final Weight weight = searcher.createWeight(q, ScoreMode.COMPLETE, 1);
|
||||||
|
assertEquals(-1, weight.count(searcher.reader.leaves().get(0)));
|
||||||
|
assertEquals(expectedCount, searcher.count(q));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertCountWithShortcut(IndexSearcher searcher, String field, int numMatchingDocs)
|
||||||
|
throws IOException {
|
||||||
|
final Query testQuery = new NormsFieldExistsQuery(field);
|
||||||
|
assertEquals(numMatchingDocs, searcher.count(testQuery));
|
||||||
|
final Weight weight = searcher.createWeight(testQuery, ScoreMode.COMPLETE, 1);
|
||||||
|
assertEquals(numMatchingDocs, weight.count(searcher.reader.leaves().get(0)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue