From 627b1ea6d127b5a52edf19ce56b0a8801bcb1469 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 10 Aug 2017 11:47:26 +0200 Subject: [PATCH 1/2] Update CHANGES for 7.0 --- lucene/CHANGES.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8edd0bdba5e..ebbbc7698ed 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -26,10 +26,6 @@ Optimizations Bug Fixes -* LUCENE-7914: Add a maximum recursion level in automaton recursive - functions (Operations.isFinite and Operations.topsortState) to prevent - large automaton to overflow the stack (Robert Muir, Adrien Grand, Jim Ferenczi) - * LUCENE-7916: Prevent ArrayIndexOutOfBoundsException if ICUTokenizer is used with a different ICU JAR version than it is compiled against. Note, this is not recommended, lucene-analyzers-icu contains binary data structures @@ -151,6 +147,10 @@ Bug Fixes * LUCENE-7871: fix false positive match in BlockJoinSelector when children have no value, introducing wrap methods accepting children as DISI. Extracting ToParentDocValues (Mikhail Khludnev) +* LUCENE-7914: Add a maximum recursion level in automaton recursive + functions (Operations.isFinite and Operations.topsortState) to prevent + large automaton to overflow the stack (Robert Muir, Adrien Grand, Jim Ferenczi) + Improvements * LUCENE-7489: Better storage of sparse doc-values fields with the default From 9c83d025e401bb0d454e9de9b40572e47d5da317 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 10 Aug 2017 11:51:30 +0200 Subject: [PATCH 2/2] LUCENE-7897: IndexOrDocValuesQuery now requires the range cost to be more than 8x greater than the cost of the lead iterator in order to use doc values. --- lucene/CHANGES.txt | 4 + .../lucene/document/RangeFieldQuery.java | 6 +- .../lucene/search/Boolean2ScorerSupplier.java | 76 ++----- .../apache/lucene/search/BooleanWeight.java | 2 +- .../lucene/search/ConstantScoreQuery.java | 6 +- .../lucene/search/IndexOrDocValuesQuery.java | 17 +- .../apache/lucene/search/LRUQueryCache.java | 4 +- .../apache/lucene/search/PointRangeQuery.java | 6 +- .../apache/lucene/search/ScorerSupplier.java | 15 +- .../java/org/apache/lucene/search/Weight.java | 2 +- .../search/TestBoolean2ScorerSupplier.java | 193 ++++++++++-------- .../TestBooleanQueryVisitSubscorers.java | 2 +- .../lucene/search/TestLRUQueryCache.java | 6 +- .../search/join/ToParentBlockJoinQuery.java | 6 +- .../document/LatLonPointDistanceQuery.java | 4 +- .../apache/lucene/search/AssertingWeight.java | 7 +- 16 files changed, 179 insertions(+), 177 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ebbbc7698ed..bb3e19d24b2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -24,6 +24,10 @@ Optimizations * LUCENE-7655: Speed up geo-distance queries in case of dense single-valued fields when most documents match. (Maciej Zasada via Adrien Grand) +* LUCENE-7897: IndexOrDocValuesQuery now requires the range cost to be more + than 8x greater than the cost of the lead iterator in order to use doc values. + (Murali Krishna P via Adrien Grand) + Bug Fixes * LUCENE-7916: Prevent ArrayIndexOutOfBoundsException if ICUTokenizer is used diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index fd3da1e6efd..c43b708bcd4 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -312,7 +312,7 @@ abstract class RangeFieldQuery extends Query { if (allDocsMatch) { return new ScorerSupplier() { @Override - public Scorer get(boolean randomAccess) { + public Scorer get(long leadCost) { return new ConstantScoreScorer(weight, score(), DocIdSetIterator.all(reader.maxDoc())); } @@ -329,7 +329,7 @@ abstract class RangeFieldQuery extends Query { long cost = -1; @Override - public Scorer get(boolean randomAccess) throws IOException { + public Scorer get(long leadCost) throws IOException { values.intersect(visitor); DocIdSetIterator iterator = result.build().iterator(); return new ConstantScoreScorer(weight, score(), iterator); @@ -354,7 +354,7 @@ abstract class RangeFieldQuery extends Query { if (scorerSupplier == null) { return null; } - return scorerSupplier.get(false); + return scorerSupplier.get(Long.MAX_VALUE); } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java b/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java index 4540c852fc6..b2a243efa9b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java +++ b/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java @@ -26,7 +26,6 @@ import java.util.OptionalLong; import java.util.stream.Stream; import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.util.PriorityQueue; final class Boolean2ScorerSupplier extends ScorerSupplier { @@ -84,17 +83,18 @@ final class Boolean2ScorerSupplier extends ScorerSupplier { } @Override - public Scorer get(boolean randomAccess) throws IOException { + public Scorer get(long leadCost) throws IOException { // three cases: conjunction, disjunction, or mix + leadCost = Math.min(leadCost, cost()); // pure conjunction if (subs.get(Occur.SHOULD).isEmpty()) { - return excl(req(subs.get(Occur.FILTER), subs.get(Occur.MUST), randomAccess), subs.get(Occur.MUST_NOT)); + return excl(req(subs.get(Occur.FILTER), subs.get(Occur.MUST), leadCost), subs.get(Occur.MUST_NOT), leadCost); } // pure disjunction if (subs.get(Occur.FILTER).isEmpty() && subs.get(Occur.MUST).isEmpty()) { - return excl(opt(subs.get(Occur.SHOULD), minShouldMatch, needsScores, randomAccess), subs.get(Occur.MUST_NOT)); + return excl(opt(subs.get(Occur.SHOULD), minShouldMatch, needsScores, leadCost), subs.get(Occur.MUST_NOT), leadCost); } // conjunction-disjunction mix: @@ -103,38 +103,23 @@ final class Boolean2ScorerSupplier extends ScorerSupplier { // optional side must match. otherwise it's required + optional if (minShouldMatch > 0) { - boolean reqRandomAccess = true; - boolean msmRandomAccess = true; - if (randomAccess == false) { - // We need to figure out whether the MUST/FILTER or the SHOULD clauses would lead the iteration - final long reqCost = Stream.concat( - subs.get(Occur.MUST).stream(), - subs.get(Occur.FILTER).stream()) - .mapToLong(ScorerSupplier::cost) - .min().getAsLong(); - final long msmCost = MinShouldMatchSumScorer.cost( - subs.get(Occur.SHOULD).stream().mapToLong(ScorerSupplier::cost), - subs.get(Occur.SHOULD).size(), minShouldMatch); - reqRandomAccess = reqCost > msmCost; - msmRandomAccess = msmCost > reqCost; - } - Scorer req = excl(req(subs.get(Occur.FILTER), subs.get(Occur.MUST), reqRandomAccess), subs.get(Occur.MUST_NOT)); - Scorer opt = opt(subs.get(Occur.SHOULD), minShouldMatch, needsScores, msmRandomAccess); + Scorer req = excl(req(subs.get(Occur.FILTER), subs.get(Occur.MUST), leadCost), subs.get(Occur.MUST_NOT), leadCost); + Scorer opt = opt(subs.get(Occur.SHOULD), minShouldMatch, needsScores, leadCost); return new ConjunctionScorer(weight, Arrays.asList(req, opt), Arrays.asList(req, opt)); } else { assert needsScores; return new ReqOptSumScorer( - excl(req(subs.get(Occur.FILTER), subs.get(Occur.MUST), randomAccess), subs.get(Occur.MUST_NOT)), - opt(subs.get(Occur.SHOULD), minShouldMatch, needsScores, true)); + excl(req(subs.get(Occur.FILTER), subs.get(Occur.MUST), leadCost), subs.get(Occur.MUST_NOT), leadCost), + opt(subs.get(Occur.SHOULD), minShouldMatch, needsScores, leadCost)); } } /** Create a new scorer for the given required clauses. Note that * {@code requiredScoring} is a subset of {@code required} containing * required clauses that should participate in scoring. */ - private Scorer req(Collection requiredNoScoring, Collection requiredScoring, boolean randomAccess) throws IOException { + private Scorer req(Collection requiredNoScoring, Collection requiredScoring, long leadCost) throws IOException { if (requiredNoScoring.size() + requiredScoring.size() == 1) { - Scorer req = (requiredNoScoring.isEmpty() ? requiredScoring : requiredNoScoring).iterator().next().get(randomAccess); + Scorer req = (requiredNoScoring.isEmpty() ? requiredScoring : requiredNoScoring).iterator().next().get(leadCost); if (needsScores == false) { return req; @@ -158,16 +143,13 @@ final class Boolean2ScorerSupplier extends ScorerSupplier { return req; } else { - long minCost = Math.min( - requiredNoScoring.stream().mapToLong(ScorerSupplier::cost).min().orElse(Long.MAX_VALUE), - requiredScoring.stream().mapToLong(ScorerSupplier::cost).min().orElse(Long.MAX_VALUE)); List requiredScorers = new ArrayList<>(); List scoringScorers = new ArrayList<>(); for (ScorerSupplier s : requiredNoScoring) { - requiredScorers.add(s.get(randomAccess || s.cost() > minCost)); + requiredScorers.add(s.get(leadCost)); } for (ScorerSupplier s : requiredScoring) { - Scorer scorer = s.get(randomAccess || s.cost() > minCost); + Scorer scorer = s.get(leadCost); requiredScorers.add(scorer); scoringScorers.add(scorer); } @@ -175,42 +157,28 @@ final class Boolean2ScorerSupplier extends ScorerSupplier { } } - private Scorer excl(Scorer main, Collection prohibited) throws IOException { + private Scorer excl(Scorer main, Collection prohibited, long leadCost) throws IOException { if (prohibited.isEmpty()) { return main; } else { - return new ReqExclScorer(main, opt(prohibited, 1, false, true)); + return new ReqExclScorer(main, opt(prohibited, 1, false, leadCost)); } } private Scorer opt(Collection optional, int minShouldMatch, - boolean needsScores, boolean randomAccess) throws IOException { + boolean needsScores, long leadCost) throws IOException { if (optional.size() == 1) { - return optional.iterator().next().get(randomAccess); - } else if (minShouldMatch > 1) { - final List optionalScorers = new ArrayList<>(); - final PriorityQueue pq = new PriorityQueue(subs.get(Occur.SHOULD).size() - minShouldMatch + 1) { - @Override - protected boolean lessThan(ScorerSupplier a, ScorerSupplier b) { - return a.cost() > b.cost(); - } - }; - for (ScorerSupplier scorer : subs.get(Occur.SHOULD)) { - ScorerSupplier overflow = pq.insertWithOverflow(scorer); - if (overflow != null) { - optionalScorers.add(overflow.get(true)); - } - } - for (ScorerSupplier scorer : pq) { - optionalScorers.add(scorer.get(randomAccess)); - } - return new MinShouldMatchSumScorer(weight, optionalScorers, minShouldMatch); + return optional.iterator().next().get(leadCost); } else { final List optionalScorers = new ArrayList<>(); for (ScorerSupplier scorer : optional) { - optionalScorers.add(scorer.get(randomAccess)); + optionalScorers.add(scorer.get(leadCost)); + } + if (minShouldMatch > 1) { + return new MinShouldMatchSumScorer(weight, optionalScorers, minShouldMatch); + } else { + return new DisjunctionSumScorer(weight, optionalScorers, needsScores); } - return new DisjunctionSumScorer(weight, optionalScorers, needsScores); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index dc44d53cd8a..778cb639f2c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -296,7 +296,7 @@ final class BooleanWeight extends Weight { if (scorerSupplier == null) { return null; } - return scorerSupplier.get(false); + return scorerSupplier.get(Long.MAX_VALUE); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 8827a9f9385..72dc442d7cc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -132,8 +132,8 @@ public final class ConstantScoreQuery extends Query { } return new ScorerSupplier() { @Override - public Scorer get(boolean randomAccess) throws IOException { - final Scorer innerScorer = innerScorerSupplier.get(randomAccess); + public Scorer get(long leadCost) throws IOException { + final Scorer innerScorer = innerScorerSupplier.get(leadCost); final float score = score(); return new FilterScorer(innerScorer) { @Override @@ -164,7 +164,7 @@ public final class ConstantScoreQuery extends Query { if (scorerSupplier == null) { return null; } - return scorerSupplier.get(false); + return scorerSupplier.get(Long.MAX_VALUE); } }; diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java index 35067d2105d..be14815ab3a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java @@ -141,13 +141,22 @@ public final class IndexOrDocValuesQuery extends Query { } return new ScorerSupplier() { @Override - public Scorer get(boolean randomAccess) throws IOException { - return (randomAccess ? dvScorerSupplier : indexScorerSupplier).get(randomAccess); + public Scorer get(long leadCost) throws IOException { + // At equal costs, doc values tend to be worse than points since they + // still need to perform one comparison per document while points can + // do much better than that given how values are organized. So we give + // an arbitrary 8x penalty to doc values. + final long threshold = cost() >>> 3; + if (threshold <= leadCost) { + return indexScorerSupplier.get(leadCost); + } else { + return dvScorerSupplier.get(leadCost); + } } @Override public long cost() { - return Math.min(indexScorerSupplier.cost(), dvScorerSupplier.cost()); + return indexScorerSupplier.cost(); } }; } @@ -158,7 +167,7 @@ public final class IndexOrDocValuesQuery extends Query { if (scorerSupplier == null) { return null; } - return scorerSupplier.get(false); + return scorerSupplier.get(Long.MAX_VALUE); } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index e30de031569..a682852fda6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -767,7 +767,7 @@ public class LRUQueryCache implements QueryCache, Accountable { return new ScorerSupplier() { @Override - public Scorer get(boolean randomAccess) throws IOException { + public Scorer get(long LeadCost) throws IOException { return new ConstantScoreScorer(CachingWrapperWeight.this, 0f, disi); } @@ -785,7 +785,7 @@ public class LRUQueryCache implements QueryCache, Accountable { if (scorerSupplier == null) { return null; } - return scorerSupplier.get(false); + return scorerSupplier.get(Long.MAX_VALUE); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 7c997caf08a..4f1076d4e9e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -262,7 +262,7 @@ public abstract class PointRangeQuery extends Query { // all docs have a value and all points are within bounds, so everything matches return new ScorerSupplier() { @Override - public Scorer get(boolean randomAccess) { + public Scorer get(long leadCost) { return new ConstantScoreScorer(weight, score(), DocIdSetIterator.all(reader.maxDoc())); } @@ -280,7 +280,7 @@ public abstract class PointRangeQuery extends Query { long cost = -1; @Override - public Scorer get(boolean randomAccess) throws IOException { + public Scorer get(long leadCost) throws IOException { if (values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size() && cost() > reader.maxDoc() / 2) { @@ -319,7 +319,7 @@ public abstract class PointRangeQuery extends Query { if (scorerSupplier == null) { return null; } - return scorerSupplier.get(false); + return scorerSupplier.get(Long.MAX_VALUE); } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ScorerSupplier.java b/lucene/core/src/java/org/apache/lucene/search/ScorerSupplier.java index 3f6906a0aa0..21bf760c700 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScorerSupplier.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScorerSupplier.java @@ -27,15 +27,14 @@ public abstract class ScorerSupplier { /** * Get the {@link Scorer}. This may not return {@code null} and must be called * at most once. - * @param randomAccess A hint about the expected usage of the {@link Scorer}. - * If {@link DocIdSetIterator#advance} or {@link TwoPhaseIterator} will be - * used to check whether given doc ids match, then pass {@code true}. - * Otherwise if the {@link Scorer} will be mostly used to lead the iteration - * using {@link DocIdSetIterator#nextDoc()}, then {@code false} should be - * passed. Under doubt, pass {@code false} which usually has a better - * worst-case. + * @param leadCost Cost of the scorer that will be used in order to lead + * iteration. This can be interpreted as an upper bound of the number of times + * that {@link DocIdSetIterator#nextDoc}, {@link DocIdSetIterator#advance} + * and {@link TwoPhaseIterator#matches} will be called. Under doubt, pass + * {@link Long#MAX_VALUE}, which will produce a {@link Scorer} that has good + * iteration capabilities. */ - public abstract Scorer get(boolean randomAccess) throws IOException; + public abstract Scorer get(long leadCost) throws IOException; /** * Get an estimate of the {@link Scorer} that would be returned by {@link #get}. diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index eef052d603a..af329ec7ba1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -116,7 +116,7 @@ public abstract class Weight { } return new ScorerSupplier() { @Override - public Scorer get(boolean randomAccess) { + public Scorer get(long leadCost) { return scorer; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java index 7f46a22087b..60c9551a00b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java @@ -70,22 +70,22 @@ public class TestBoolean2ScorerSupplier extends LuceneTestCase { private static class FakeScorerSupplier extends ScorerSupplier { private final long cost; - private final Boolean randomAccess; + private final Long leadCost; FakeScorerSupplier(long cost) { this.cost = cost; - this.randomAccess = null; + this.leadCost = null; } - FakeScorerSupplier(long cost, boolean randomAccess) { + FakeScorerSupplier(long cost, long leadCost) { this.cost = cost; - this.randomAccess = randomAccess; + this.leadCost = leadCost; } @Override - public Scorer get(boolean randomAccess) throws IOException { - if (this.randomAccess != null) { - assertEquals(this.toString(), this.randomAccess, randomAccess); + public Scorer get(long leadCost) throws IOException { + if (this.leadCost != null) { + assertEquals(this.toString(), this.leadCost.longValue(), leadCost); } return new FakeScorer(cost); } @@ -97,7 +97,7 @@ public class TestBoolean2ScorerSupplier extends LuceneTestCase { @Override public String toString() { - return "FakeLazyScorer(cost=" + cost + ",randomAccess=" + randomAccess + ")"; + return "FakeLazyScorer(cost=" + cost + ",leadCost=" + leadCost + ")"; } } @@ -127,17 +127,17 @@ public class TestBoolean2ScorerSupplier extends LuceneTestCase { subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42)); ScorerSupplier s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0); assertEquals(42, s.cost()); - assertEquals(42, s.get(random().nextBoolean()).iterator().cost()); + assertEquals(42, s.get(random().nextInt(100)).iterator().cost()); subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12)); s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0); assertEquals(42 + 12, s.cost()); - assertEquals(42 + 12, s.get(random().nextBoolean()).iterator().cost()); + assertEquals(42 + 12, s.get(random().nextInt(100)).iterator().cost()); subs.get(Occur.SHOULD).add(new FakeScorerSupplier(20)); s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0); assertEquals(42 + 12 + 20, s.cost()); - assertEquals(42 + 12 + 20, s.get(random().nextBoolean()).iterator().cost()); + assertEquals(42 + 12 + 20, s.get(random().nextInt(100)).iterator().cost()); } public void testDisjunctionWithMinShouldMatchCost() throws IOException { @@ -150,26 +150,26 @@ public class TestBoolean2ScorerSupplier extends LuceneTestCase { subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12)); ScorerSupplier s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 1); assertEquals(42 + 12, s.cost()); - assertEquals(42 + 12, s.get(random().nextBoolean()).iterator().cost()); + assertEquals(42 + 12, s.get(random().nextInt(100)).iterator().cost()); subs.get(Occur.SHOULD).add(new FakeScorerSupplier(20)); s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 1); assertEquals(42 + 12 + 20, s.cost()); - assertEquals(42 + 12 + 20, s.get(random().nextBoolean()).iterator().cost()); + assertEquals(42 + 12 + 20, s.get(random().nextInt(100)).iterator().cost()); s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2); assertEquals(12 + 20, s.cost()); - assertEquals(12 + 20, s.get(random().nextBoolean()).iterator().cost()); + assertEquals(12 + 20, s.get(random().nextInt(100)).iterator().cost()); subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30)); s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 1); assertEquals(42 + 12 + 20 + 30, s.cost()); - assertEquals(42 + 12 + 20 + 30, s.get(random().nextBoolean()).iterator().cost()); + assertEquals(42 + 12 + 20 + 30, s.get(random().nextInt(100)).iterator().cost()); s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2); assertEquals(12 + 20 + 30, s.cost()); - assertEquals(12 + 20 + 30, s.get(random().nextBoolean()).iterator().cost()); + assertEquals(12 + 20 + 30, s.get(random().nextInt(100)).iterator().cost()); s = new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 3); assertEquals(12 + 20, s.cost()); - assertEquals(12 + 20, s.get(random().nextBoolean()).iterator().cost()); + assertEquals(12 + 20, s.get(random().nextInt(100)).iterator().cost()); } public void testDuelCost() throws Exception { @@ -205,128 +205,149 @@ public class TestBoolean2ScorerSupplier extends LuceneTestCase { Boolean2ScorerSupplier supplier = new Boolean2ScorerSupplier(null, subs, needsScores, minShouldMatch); long cost1 = supplier.cost(); - long cost2 = supplier.get(false).iterator().cost(); + long cost2 = supplier.get(Long.MAX_VALUE).iterator().cost(); assertEquals("clauses=" + subs + ", minShouldMatch=" + minShouldMatch, cost1, cost2); } } // test the tester... public void testFakeScorerSupplier() { - FakeScorerSupplier randomAccessSupplier = new FakeScorerSupplier(random().nextInt(100), true); - expectThrows(AssertionError.class, () -> randomAccessSupplier.get(false)); - FakeScorerSupplier sequentialSupplier = new FakeScorerSupplier(random().nextInt(100), false); - expectThrows(AssertionError.class, () -> sequentialSupplier.get(true)); + FakeScorerSupplier randomAccessSupplier = new FakeScorerSupplier(random().nextInt(100), 30); + expectThrows(AssertionError.class, () -> randomAccessSupplier.get(70)); + FakeScorerSupplier sequentialSupplier = new FakeScorerSupplier(random().nextInt(100), 70); + expectThrows(AssertionError.class, () -> sequentialSupplier.get(30)); } - public void testConjunctionRandomAccess() throws IOException { + public void testConjunctionLeadCost() throws IOException { Map> subs = new EnumMap<>(Occur.class); for (Occur occur : Occur.values()) { subs.put(occur, new ArrayList<>()); } - // If sequential access is required, only the least costly clause does not use random-access - subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(42, true)); - subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(12, false)); - new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(false); // triggers assertions as a side-effect + // If the clauses are less costly than the lead cost, the min cost is the new lead cost + subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(42, 12)); + subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(12, 12)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(Long.MAX_VALUE); // triggers assertions as a side-effect subs = new EnumMap<>(Occur.class); for (Occur occur : Occur.values()) { subs.put(occur, new ArrayList<>()); } - // If random access is required, then we propagate to sub clauses - subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(42, true)); - subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(12, true)); - new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(true); // triggers assertions as a side-effect + // If the lead cost is less that the clauses' cost, then we don't modify it + subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(42, 7)); + subs.get(RandomPicks.randomFrom(random(), Arrays.asList(Occur.FILTER, Occur.MUST))).add(new FakeScorerSupplier(12, 7)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(7); // triggers assertions as a side-effect } - public void testDisjunctionRandomAccess() throws IOException { - // disjunctions propagate - for (boolean randomAccess : new boolean[] {false, true}) { - Map> subs = new EnumMap<>(Occur.class); - for (Occur occur : Occur.values()) { - subs.put(occur, new ArrayList<>()); - } - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, randomAccess)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, randomAccess)); - new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(randomAccess); // triggers assertions as a side-effect + public void testDisjunctionLeadCost() throws IOException { + Map> subs = new EnumMap<>(Occur.class); + for (Occur occur : Occur.values()) { + subs.put(occur, new ArrayList<>()); } + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, 54)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, 54)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(100); // triggers assertions as a side-effect + + subs.get(Occur.SHOULD).clear(); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, 20)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, 20)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(20); // triggers assertions as a side-effect } - public void testDisjunctionWithMinShouldMatchRandomAccess() throws IOException { + public void testDisjunctionWithMinShouldMatchLeadCost() throws IOException { Map> subs = new EnumMap<>(Occur.class); for (Occur occur : Occur.values()) { subs.put(occur, new ArrayList<>()); } - // Only the most costly clause uses random-access in that case: - // most of time, we will find agreement between the 2 least costly - // clauses and only then check whether the 3rd one matches too - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, true)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, false)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, false)); - new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2).get(false); // triggers assertions as a side-effect + // minShouldMatch is 2 so the 2 least costly clauses will lead iteration + // and their cost will be 30+12=42 + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(50, 42)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, 42)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, 42)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2).get(100); // triggers assertions as a side-effect subs = new EnumMap<>(Occur.class); for (Occur occur : Occur.values()) { subs.put(occur, new ArrayList<>()); } - // When random-access is true, just propagate - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, true)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, true)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, true)); - new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2).get(true); // triggers assertions as a side-effect + // If the leadCost is less than the msm cost, then it wins + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, 20)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, 20)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, 20)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2).get(20); // triggers assertions as a side-effect subs = new EnumMap<>(Occur.class); for (Occur occur : Occur.values()) { subs.put(occur, new ArrayList<>()); } - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, true)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, false)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, false)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(20, false)); - new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2).get(false); // triggers assertions as a side-effect + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, 62)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, 62)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, 62)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(20, 62)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 2).get(100); // triggers assertions as a side-effect subs = new EnumMap<>(Occur.class); for (Occur occur : Occur.values()) { subs.put(occur, new ArrayList<>()); } - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, true)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, false)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, true)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(20, false)); - new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 3).get(false); // triggers assertions as a side-effect + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, 32)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(12, 32)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, 32)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(20, 32)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 3).get(100); // triggers assertions as a side-effect } - public void testProhibitedRandomAccess() throws IOException { - for (boolean randomAccess : new boolean[] {false, true}) { - Map> subs = new EnumMap<>(Occur.class); - for (Occur occur : Occur.values()) { - subs.put(occur, new ArrayList<>()); - } - - // The MUST_NOT clause always uses random-access - subs.get(Occur.MUST).add(new FakeScorerSupplier(42, randomAccess)); - subs.get(Occur.MUST_NOT).add(new FakeScorerSupplier(TestUtil.nextInt(random(), 1, 100), true)); - new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(randomAccess); // triggers assertions as a side-effect + public void testProhibitedLeadCost() throws IOException { + Map> subs = new EnumMap<>(Occur.class); + for (Occur occur : Occur.values()) { + subs.put(occur, new ArrayList<>()); } + + // The MUST_NOT clause is called with the same lead cost as the MUST clause + subs.get(Occur.MUST).add(new FakeScorerSupplier(42, 42)); + subs.get(Occur.MUST_NOT).add(new FakeScorerSupplier(30, 42)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(100); // triggers assertions as a side-effect + + subs.get(Occur.MUST).clear(); + subs.get(Occur.MUST_NOT).clear(); + subs.get(Occur.MUST).add(new FakeScorerSupplier(42, 42)); + subs.get(Occur.MUST_NOT).add(new FakeScorerSupplier(80, 42)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(100); // triggers assertions as a side-effect + + subs.get(Occur.MUST).clear(); + subs.get(Occur.MUST_NOT).clear(); + subs.get(Occur.MUST).add(new FakeScorerSupplier(42, 20)); + subs.get(Occur.MUST_NOT).add(new FakeScorerSupplier(30, 20)); + new Boolean2ScorerSupplier(null, subs, random().nextBoolean(), 0).get(20); // triggers assertions as a side-effect } - public void testMixedRandomAccess() throws IOException { - for (boolean randomAccess : new boolean[] {false, true}) { - Map> subs = new EnumMap<>(Occur.class); - for (Occur occur : Occur.values()) { - subs.put(occur, new ArrayList<>()); - } - - // The SHOULD clause always uses random-access if there is a MUST clause - subs.get(Occur.MUST).add(new FakeScorerSupplier(42, randomAccess)); - subs.get(Occur.SHOULD).add(new FakeScorerSupplier(TestUtil.nextInt(random(), 1, 100), true)); - new Boolean2ScorerSupplier(null, subs, true, 0).get(randomAccess); // triggers assertions as a side-effect + public void testMixedLeadCost() throws IOException { + Map> subs = new EnumMap<>(Occur.class); + for (Occur occur : Occur.values()) { + subs.put(occur, new ArrayList<>()); } + + // The SHOULD clause is always called with the same lead cost as the MUST clause + subs.get(Occur.MUST).add(new FakeScorerSupplier(42, 42)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, 42)); + new Boolean2ScorerSupplier(null, subs, true, 0).get(100); // triggers assertions as a side-effect + + subs.get(Occur.MUST).clear(); + subs.get(Occur.SHOULD).clear(); + subs.get(Occur.MUST).add(new FakeScorerSupplier(42, 42)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(80, 42)); + new Boolean2ScorerSupplier(null, subs, true, 0).get(100); // triggers assertions as a side-effect + + subs.get(Occur.MUST).clear(); + subs.get(Occur.SHOULD).clear(); + subs.get(Occur.MUST).add(new FakeScorerSupplier(42, 20)); + subs.get(Occur.SHOULD).add(new FakeScorerSupplier(80, 20)); + new Boolean2ScorerSupplier(null, subs, true, 0).get(20); // triggers assertions as a side-effect } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java index 54368739963..bde3407ddab 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java @@ -238,8 +238,8 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase { "ConjunctionScorer\n" + " MUST ConstantScoreScorer\n" + " MUST MinShouldMatchSumScorer\n" + - " SHOULD TermScorer body:web\n" + " SHOULD TermScorer body:crawler\n" + + " SHOULD TermScorer body:web\n" + " SHOULD TermScorer body:nutch", summary); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index e13f9e077f2..85d2bf91d6c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -1289,14 +1289,14 @@ public class TestLRUQueryCache extends LuceneTestCase { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { - return scorerSupplier(context).get(false); + return scorerSupplier(context).get(Long.MAX_VALUE); } @Override public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { final Weight weight = this; return new ScorerSupplier() { @Override - public Scorer get(boolean randomAccess) throws IOException { + public Scorer get(long leadCost) throws IOException { scorerCreated.set(true); return new ConstantScoreScorer(weight, boost, DocIdSetIterator.all(1)); } @@ -1344,7 +1344,7 @@ public class TestLRUQueryCache extends LuceneTestCase { Weight weight = searcher.createNormalizedWeight(query, false); ScorerSupplier supplier = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0)); assertFalse(scorerCreated.get()); - supplier.get(random().nextBoolean()); + supplier.get(random().nextLong() & 0x7FFFFFFFFFFFFFFFL); assertTrue(scorerCreated.get()); reader.close(); diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java index 1b6f3864253..05a1186e4d1 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java @@ -109,7 +109,7 @@ public class ToParentBlockJoinQuery extends Query { if (scorerSupplier == null) { return null; } - return scorerSupplier.get(false); + return scorerSupplier.get(Long.MAX_VALUE); } // NOTE: acceptDocs applies (and is checked) only in the @@ -132,8 +132,8 @@ public class ToParentBlockJoinQuery extends Query { return new ScorerSupplier() { @Override - public Scorer get(boolean randomAccess) throws IOException { - return new BlockJoinScorer(BlockJoinWeight.this, childScorerSupplier.get(randomAccess), parents, scoreMode); + public Scorer get(long leadCost) throws IOException { + return new BlockJoinScorer(BlockJoinWeight.this, childScorerSupplier.get(leadCost), parents, scoreMode); } @Override diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java index f48c816b101..b16efe3d34f 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java @@ -114,7 +114,7 @@ final class LatLonPointDistanceQuery extends Query { if (scorerSupplier == null) { return null; } - return scorerSupplier.get(false); + return scorerSupplier.get(Long.MAX_VALUE); } @Override @@ -142,7 +142,7 @@ final class LatLonPointDistanceQuery extends Query { long cost = -1; @Override - public Scorer get(boolean randomAccess) throws IOException { + public Scorer get(long leadCost) throws IOException { if (values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size() && cost() > reader.maxDoc() / 2) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java index 7b6727d6f37..b98e6a1b395 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java @@ -46,7 +46,7 @@ class AssertingWeight extends FilterWeight { // Evil: make sure computing the cost has no side effects scorerSupplier.cost(); } - return scorerSupplier.get(false); + return scorerSupplier.get(Long.MAX_VALUE); } } @@ -59,10 +59,11 @@ class AssertingWeight extends FilterWeight { return new ScorerSupplier() { private boolean getCalled = false; @Override - public Scorer get(boolean randomAccess) throws IOException { + public Scorer get(long leadCost) throws IOException { assert getCalled == false; getCalled = true; - return AssertingScorer.wrap(new Random(random.nextLong()), inScorerSupplier.get(randomAccess), needsScores); + assert leadCost >= 0 : leadCost; + return AssertingScorer.wrap(new Random(random.nextLong()), inScorerSupplier.get(leadCost), needsScores); } @Override