From b8289abeebb23b10ea02b8a27d6b6c07deaa9e50 Mon Sep 17 00:00:00 2001 From: jimczi Date: Mon, 29 Jul 2019 10:04:49 +0200 Subject: [PATCH] LUCENE-8935: BooleanQuery with no scoring clause can now early terminate the query when the total hits is not requested. --- lucene/CHANGES.txt | 3 + .../lucene/search/Boolean2ScorerSupplier.java | 13 +++ .../lucene/search/TestBooleanScorer.java | 84 +++++++++++++++++++ 3 files changed, 100 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 79b1b5ce728..0dcee02af7a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -80,6 +80,9 @@ Optimizations * LUCENE-8922: DisjunctionMaxQuery more efficiently leverages impacts to skip non-competitive hits. (Adrien Grand) +* LUCENE-8935: BooleanQuery with no scoring clause can now early terminate the query when +the total hits is not requested. + Other * LUCENE-8778 LUCENE-8911: Define analyzer SPI names as static final fields and document the names in Javadocs. diff --git a/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java b/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java index e50eec49956..78eac0307d6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java +++ b/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java @@ -85,6 +85,19 @@ final class Boolean2ScorerSupplier extends ScorerSupplier { @Override public Scorer get(long leadCost) throws IOException { + Scorer scorer = getInternal(leadCost); + if (scoreMode == ScoreMode.TOP_SCORES && + subs.get(Occur.SHOULD).isEmpty() && subs.get(Occur.MUST).isEmpty()) { + // no scoring clauses but scores are needed so we wrap the scorer in + // a constant score in order to allow early termination + return scorer.twoPhaseIterator() != null ? + new ConstantScoreScorer(weight, 0f, scoreMode, scorer.twoPhaseIterator()) : + new ConstantScoreScorer(weight, 0f, scoreMode, scorer.iterator()); + } + return scorer; + } + + private Scorer getInternal(long leadCost) throws IOException { // three cases: conjunction, disjunction, or mix leadCost = Math.min(leadCost, cost()); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index 9e4351b5346..884d877e839 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -283,4 +283,88 @@ public class TestBooleanScorer extends LuceneTestCase { w.close(); dir.close(); } + + public void testFilterConstantScore() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new StringField("foo", "bar", Store.NO)); + doc.add(new StringField("foo", "bat", Store.NO)); + doc.add(new StringField("foo", "baz", Store.NO)); + w.addDocument(doc); + IndexReader reader = w.getReader(); + IndexSearcher searcher = new IndexSearcher(reader); + searcher.setQueryCache(null); + + { + // single filter rewrites to a constant score query + Query query = new BooleanQuery.Builder().add(new TermQuery(new Term("foo", "bar")), Occur.FILTER).build(); + Query rewrite = searcher.rewrite(query); + assertTrue(rewrite instanceof BoostQuery); + assertTrue(((BoostQuery) rewrite).getQuery() instanceof ConstantScoreQuery); + } + + Query[] queries = new Query[] { + new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .add(new TermQuery(new Term("foo", "baz")), Occur.FILTER) + .build(), + new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "baz")), Occur.FILTER) + // non-existing term + .add(new TermQuery(new Term("foo", "arf")), Occur.SHOULD) + .build(), + new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "baz")), Occur.FILTER) + .add(new TermQuery(new Term("foo", "baz")), Occur.FILTER) + // non-existing term + .add(new TermQuery(new Term("foo", "arf")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "arw")), Occur.SHOULD) + .build() + }; + for (Query query : queries) { + Query rewrite = searcher.rewrite(query); + for (ScoreMode scoreMode : ScoreMode.values()) { + Weight weight = searcher.createWeight(rewrite, scoreMode, 1f); + Scorer scorer = weight.scorer(reader.leaves().get(0)); + if (scoreMode == ScoreMode.TOP_SCORES) { + assertTrue(scorer instanceof ConstantScoreScorer); + } else { + assertFalse(scorer instanceof ConstantScoreScorer); + } + } + } + + queries = new Query[]{ + new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) + .build(), + new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) + // non-existing term + .add(new TermQuery(new Term("foo", "arf")), Occur.SHOULD) + .build(), + new BooleanQuery.Builder() + // non-existing term + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) + // non-existing term + .add(new TermQuery(new Term("foo", "arf")), Occur.MUST) + .build() + }; + for (Query query : queries) { + Query rewrite = searcher.rewrite(query); + for (ScoreMode scoreMode : ScoreMode.values()) { + Weight weight = searcher.createWeight(rewrite, scoreMode, 1f); + Scorer scorer = weight.scorer(reader.leaves().get(0)); + assertFalse(scorer instanceof ConstantScoreScorer); + } + } + + reader.close(); + w.close(); + dir.close(); + } }