LUCENE-6754: Optimized IndexSearcher.count for simple queries.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1700791 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-09-02 12:36:37 +00:00
parent 0baae2f832
commit 5330bd6d2b
4 changed files with 64 additions and 15 deletions

View File

@ -83,6 +83,9 @@ Optimizations
* LUCENE-6746: DisjunctionMaxQuery, BoostingQuery and BoostedQuery now create * LUCENE-6746: DisjunctionMaxQuery, BoostingQuery and BoostedQuery now create
sub weights through IndexSearcher so that they can be cached. (Adrien Grand) sub weights through IndexSearcher so that they can be cached. (Adrien Grand)
* LUCENE-6754: Optimized IndexSearcher.count for the cases when it can use
index statistics instead of collecting all matches. (Adrien Grand)
Bug Fixes Bug Fixes
* LUCENE-6730: Hyper-parameter c is ignored in term frequency NormalizationH1. * LUCENE-6730: Hyper-parameter c is ignored in term frequency NormalizationH1.

View File

@ -339,6 +339,29 @@ public class IndexSearcher {
* Count how many documents match the given query. * Count how many documents match the given query.
*/ */
public int count(Query query) throws IOException { public int count(Query query) throws IOException {
query = rewrite(query);
while (true) {
// remove wrappers that don't matter for counts
if (query instanceof ConstantScoreQuery) {
query = ((ConstantScoreQuery) query).getQuery();
} else {
break;
}
}
// some counts can be computed in constant time
if (query instanceof MatchAllDocsQuery) {
return reader.numDocs();
} else if (query instanceof TermQuery && reader.hasDeletions() == false) {
Term term = ((TermQuery) query).getTerm();
int count = 0;
for (LeafReaderContext leaf : reader.leaves()) {
count += leaf.reader().docFreq(term);
}
return count;
}
// general case: create a collecor and count matches
final CollectorManager<TotalHitCountCollector, Integer> collectorManager = new CollectorManager<TotalHitCountCollector, Integer>() { final CollectorManager<TotalHitCountCollector, Integer> collectorManager = new CollectorManager<TotalHitCountCollector, Integer>() {
@Override @Override

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor;
@ -31,6 +32,7 @@ import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
@ -42,7 +44,7 @@ import org.junit.Test;
public class TestIndexSearcher extends LuceneTestCase { public class TestIndexSearcher extends LuceneTestCase {
Directory dir; Directory dir;
IndexReader reader; IndexReader reader;
@Override @Override
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
@ -58,20 +60,20 @@ public class TestIndexSearcher extends LuceneTestCase {
reader = iw.getReader(); reader = iw.getReader();
iw.close(); iw.close();
} }
@Override @Override
public void tearDown() throws Exception { public void tearDown() throws Exception {
super.tearDown(); super.tearDown();
reader.close(); reader.close();
dir.close(); dir.close();
} }
// should not throw exception // should not throw exception
public void testHugeN() throws Exception { public void testHugeN() throws Exception {
ExecutorService service = new ThreadPoolExecutor(4, 4, 0L, TimeUnit.MILLISECONDS, ExecutorService service = new ThreadPoolExecutor(4, 4, 0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>(), new LinkedBlockingQueue<Runnable>(),
new NamedThreadFactory("TestIndexSearcher")); new NamedThreadFactory("TestIndexSearcher"));
IndexSearcher searchers[] = new IndexSearcher[] { IndexSearcher searchers[] = new IndexSearcher[] {
new IndexSearcher(reader), new IndexSearcher(reader),
new IndexSearcher(reader, service) new IndexSearcher(reader, service)
@ -88,7 +90,7 @@ public class TestIndexSearcher extends LuceneTestCase {
null, null,
new FieldDoc(0, 0f, new Object[] { new BytesRef("boo!") }) new FieldDoc(0, 0f, new Object[] { new BytesRef("boo!") })
}; };
for (IndexSearcher searcher : searchers) { for (IndexSearcher searcher : searchers) {
for (ScoreDoc after : afters) { for (ScoreDoc after : afters) {
for (Query query : queries) { for (Query query : queries) {
@ -111,10 +113,10 @@ public class TestIndexSearcher extends LuceneTestCase {
} }
} }
} }
TestUtil.shutdownExecutorService(service); TestUtil.shutdownExecutorService(service);
} }
@Test @Test
public void testSearchAfterPassedMaxDoc() throws Exception { public void testSearchAfterPassedMaxDoc() throws Exception {
// LUCENE-5128: ensure we get a meaningful message if searchAfter exceeds maxDoc // LUCENE-5128: ensure we get a meaningful message if searchAfter exceeds maxDoc
@ -123,7 +125,7 @@ public class TestIndexSearcher extends LuceneTestCase {
w.addDocument(new Document()); w.addDocument(new Document());
IndexReader r = w.getReader(); IndexReader r = w.getReader();
w.close(); w.close();
IndexSearcher s = new IndexSearcher(r); IndexSearcher s = new IndexSearcher(r);
try { try {
s.searchAfter(new ScoreDoc(r.maxDoc(), 0.54f), new MatchAllDocsQuery(), 10); s.searchAfter(new ScoreDoc(r.maxDoc(), 0.54f), new MatchAllDocsQuery(), 10);
@ -144,15 +146,36 @@ public class TestIndexSearcher extends LuceneTestCase {
if (random().nextBoolean()) { if (random().nextBoolean()) {
doc.add(new StringField("foo", "bar", Store.NO)); doc.add(new StringField("foo", "bar", Store.NO));
} }
if (random().nextBoolean()) {
doc.add(new StringField("foo", "baz", Store.NO));
}
if (rarely()) {
doc.add(new StringField("delete", "yes", Store.NO));
}
w.addDocument(doc); w.addDocument(doc);
} }
w.commit(); for (boolean delete : new boolean[] {false, true}) {
final IndexReader reader = w.getReader(); if (delete) {
w.deleteDocuments(new Term("delete", "yes"));
}
final IndexReader reader = w.getReader();
final IndexSearcher searcher = newSearcher(reader);
// Test multiple queries, some of them are optimized by IndexSearcher.count()
for (Query query : Arrays.asList(
new MatchAllDocsQuery(),
new MatchNoDocsQuery(),
new TermQuery(new Term("foo", "bar")),
new ConstantScoreQuery(new TermQuery(new Term("foo", "baz"))),
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.build()
)) {
assertEquals(searcher.count(query), searcher.search(query, 1).totalHits);
}
reader.close();
}
w.close(); w.close();
final IndexSearcher searcher = newSearcher(reader);
final Query query = new TermQuery(new Term("foo", "bar"));
assertEquals(searcher.count(query), searcher.search(query, 1).totalHits);
reader.close();
dir.close(); dir.close();
} }

View File

@ -971,7 +971,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
try { try {
// trigger an eviction // trigger an eviction
searcher.count(new MatchAllDocsQuery()); searcher.search(new MatchAllDocsQuery(), new TotalHitCountCollector());
fail(); fail();
} catch (ConcurrentModificationException e) { } catch (ConcurrentModificationException e) {
// expected // expected