From 20a280c33cd9765ca9186593771fc628b92cf6b3 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 6 Feb 2014 18:36:48 +0000 Subject: [PATCH] LUCENE-5418: faster drill-down/sideways on costly filters git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1565387 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 7 + .../org/apache/lucene/search/DocIdSet.java | 7 + .../apache/lucene/search/FilteredQuery.java | 27 +- .../lucene/search/TestFilteredQuery.java | 1 - .../demo/facet/DistanceFacetsExample.java | 111 ++++- .../apache/lucene/facet/DrillDownQuery.java | 107 ++++- .../apache/lucene/facet/DrillSideways.java | 205 +-------- .../lucene/facet/DrillSidewaysCollector.java | 188 --------- .../lucene/facet/DrillSidewaysQuery.java | 125 ++++-- .../lucene/facet/DrillSidewaysScorer.java | 399 +++++++++--------- .../lucene/facet/range/DoubleRange.java | 81 ++-- .../facet/range/DoubleRangeFacetCounts.java | 35 +- .../apache/lucene/facet/range/LongRange.java | 81 ++-- .../facet/range/LongRangeFacetCounts.java | 35 +- .../org/apache/lucene/facet/range/Range.java | 33 ++ .../lucene/facet/range/RangeFacetCounts.java | 14 +- .../SortedSetDocValuesReaderState.java | 6 - .../lucene/facet/TestDrillSideways.java | 4 +- .../facet/range/TestRangeFacetCounts.java | 178 +++++++- 19 files changed, 882 insertions(+), 762 deletions(-) delete mode 100644 lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysCollector.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c55f40d0522..b5f6dd638ae 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -143,6 +143,13 @@ New Features close or cannot delete while referenced semantics. (Mark Miller, Mike McCandless) +* LUCENE-5418: Drilling down or sideways on a Lucene facet range + (using Range.getFilter()) is now faster for costly filters (uses + random access, not iteration); range facet counts now accept a + fast-match filter to avoid computing the value for documents that + are out of bounds, e.g. using a bounding box filter with distance + range faceting. (Mike McCandless) + Build * LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config; diff --git a/lucene/core/src/java/org/apache/lucene/search/DocIdSet.java b/lucene/core/src/java/org/apache/lucene/search/DocIdSet.java index f6a4dd90b90..773a4c058e7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocIdSet.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocIdSet.java @@ -31,6 +31,13 @@ public abstract class DocIdSet { * are no docs that match. */ public abstract DocIdSetIterator iterator() throws IOException; + // TODO: somehow this class should express the cost of + // iteration vs the cost of random access Bits; for + // expensive Filters (e.g. distance < 1 km) we should use + // bits() after all other Query/Filters have matched, but + // this is the opposite of what bits() is for now + // (down-low filtering using e.g. FixedBitSet) + /** Optionally provides a {@link Bits} interface for random access * to matching documents. * @return {@code null}, if this {@code DocIdSet} does not support random access. diff --git a/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java b/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java index bf2fdde1b55..e65561a7c42 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java @@ -50,7 +50,7 @@ public class FilteredQuery extends Query { * @param query Query to be filtered, cannot be null. * @param filter Filter to apply to query results, cannot be null. */ - public FilteredQuery (Query query, Filter filter) { + public FilteredQuery(Query query, Filter filter) { this(query, filter, RANDOM_ACCESS_FILTER_STRATEGY); } @@ -63,7 +63,7 @@ public class FilteredQuery extends Query { * * @see FilterStrategy */ - public FilteredQuery (Query query, Filter filter, FilterStrategy strategy) { + public FilteredQuery(Query query, Filter filter, FilterStrategy strategy) { if (query == null || filter == null) throw new IllegalArgumentException("Query and filter cannot be null."); if (strategy == null) @@ -118,7 +118,9 @@ public class FilteredQuery extends Query { // return this query @Override - public Query getQuery() { return FilteredQuery.this; } + public Query getQuery() { + return FilteredQuery.this; + } // return a filtering scorer @Override @@ -130,8 +132,8 @@ public class FilteredQuery extends Query { // this means the filter does not accept any documents. return null; } + return strategy.filteredScorer(context, scoreDocsInOrder, topScorer, weight, filterDocIdSet); - } }; } @@ -183,14 +185,12 @@ public class FilteredQuery extends Query { @Override public int advance(int target) throws IOException { - int doc = scorer.advance(target); if (doc != Scorer.NO_MORE_DOCS && !filterbits.get(doc)) { return scorerDoc = nextDoc(); } else { return scorerDoc = doc; } - } @Override @@ -303,7 +303,9 @@ public class FilteredQuery extends Query { } @Override - public final int freq() throws IOException { return scorer.freq(); } + public final int freq() throws IOException { + return scorer.freq(); + } @Override public final Collection getChildren() { @@ -343,15 +345,6 @@ public class FilteredQuery extends Query { public Query rewrite(IndexReader reader) throws IOException { final Query queryRewritten = query.rewrite(reader); - if (queryRewritten instanceof MatchAllDocsQuery) { - // Special case: If the query is a MatchAllDocsQuery, we only - // return a CSQ(filter). - final Query rewritten = new ConstantScoreQuery(filter); - // Combine boost of MatchAllDocsQuery and the wrapped rewritten query: - rewritten.setBoost(this.getBoost() * queryRewritten.getBoost()); - return rewritten; - } - if (queryRewritten != query) { // rewrite to a new FilteredQuery wrapping the rewritten query final Query rewritten = new FilteredQuery(queryRewritten, filter, strategy); @@ -527,7 +520,7 @@ public class FilteredQuery extends Query { final Bits filterAcceptDocs = docIdSet.bits(); // force if RA is requested - final boolean useRandomAccess = (filterAcceptDocs != null && (useRandomAccess(filterAcceptDocs, firstFilterDoc))); + final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc); if (useRandomAccess) { // if we are using random access, we return the inner scorer, just with other acceptDocs return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java index a114536effb..0889052cca5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java @@ -375,7 +375,6 @@ public class TestFilteredQuery extends LuceneTestCase { public void testRewrite() throws Exception { assertRewrite(new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), FilteredQuery.class); assertRewrite(new FilteredQuery(new PrefixQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), FilteredQuery.class); - assertRewrite(new FilteredQuery(new MatchAllDocsQuery(), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), ConstantScoreQuery.class); } public void testGetFilterStrategy() { diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java index 061fe821ec9..87b8399638b 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java @@ -29,18 +29,24 @@ import org.apache.lucene.expressions.Expression; import org.apache.lucene.expressions.SimpleBindings; import org.apache.lucene.expressions.js.JavascriptCompiler; import org.apache.lucene.facet.DrillDownQuery; +import org.apache.lucene.facet.DrillSideways; import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.range.DoubleRange; import org.apache.lucene.facet.range.DoubleRangeFacetCounts; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.queries.BooleanFilter; import org.apache.lucene.queries.function.ValueSource; -import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; @@ -59,6 +65,20 @@ public class DistanceFacetsExample implements Closeable { private final Directory indexDir = new RAMDirectory(); private IndexSearcher searcher; + private final FacetsConfig config = new FacetsConfig(); + + /** The "home" latitude. */ + public final static double ORIGIN_LATITUDE = 40.7143528; + + /** The "home" longitude. */ + public final static double ORIGIN_LONGITUDE = -74.0059731; + + /** Radius of the Earth in KM + * + * NOTE: this is approximate, because the earth is a bit + * wider at the equator than the poles. See + * http://en.wikipedia.org/wiki/Earth_radius */ + public final static double EARTH_RADIUS_KM = 6371.01; /** Empty constructor */ public DistanceFacetsExample() {} @@ -68,6 +88,8 @@ public class DistanceFacetsExample implements Closeable { IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); + // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter + // Add documents with latitude/longitude location: Document doc = new Document(); doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO)); @@ -92,7 +114,8 @@ public class DistanceFacetsExample implements Closeable { private ValueSource getDistanceValueSource() { Expression distance; try { - distance = JavascriptCompiler.compile("haversin(40.7143528,-74.0059731,latitude,longitude)"); + distance = JavascriptCompiler.compile( + "haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)"); } catch (ParseException pe) { // Should not happen throw new RuntimeException(pe); @@ -104,15 +127,83 @@ public class DistanceFacetsExample implements Closeable { return distance.getValueSource(bindings); } + /** Given a latitude and longitude (in degrees) and the + * maximum great circle (surface of the earth) distance, + * returns a simple Filter bounding box to "fast match" + * candidates. */ + public static Filter getBoundingBoxFilter(double originLat, double originLng, double maxDistanceKM) { + + // Basic bounding box geo math from + // http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates, + // licensed under creative commons 3.0: + // http://creativecommons.org/licenses/by/3.0 + + // TODO: maybe switch to recursive prefix tree instead + // (in lucene/spatial)? It should be more efficient + // since it's a 2D trie... + + // Degrees -> Radians: + double originLatRadians = Math.toRadians(originLat); + double originLngRadians = Math.toRadians(originLng); + + double angle = maxDistanceKM / EARTH_RADIUS_KM; + + double minLat = originLatRadians - angle; + double maxLat = originLatRadians + angle; + + double minLng; + double maxLng; + if (minLat > Math.toRadians(-90) && maxLat < Math.toRadians(90)) { + double delta = Math.asin(Math.sin(angle)/Math.cos(originLatRadians)); + minLng = originLngRadians - delta; + if (minLng < Math.toRadians(-180)) { + minLng += 2 * Math.PI; + } + maxLng = originLngRadians + delta; + if (maxLng > Math.toRadians(180)) { + maxLng -= 2 * Math.PI; + } + } else { + // The query includes a pole! + minLat = Math.max(minLat, Math.toRadians(-90)); + maxLat = Math.min(maxLat, Math.toRadians(90)); + minLng = Math.toRadians(-180); + maxLng = Math.toRadians(180); + } + + BooleanFilter f = new BooleanFilter(); + + // Add latitude range filter: + f.add(NumericRangeFilter.newDoubleRange("latitude", Math.toDegrees(minLat), Math.toDegrees(maxLat), true, true), + BooleanClause.Occur.MUST); + + // Add longitude range filter: + if (minLng > maxLng) { + // The bounding box crosses the international date + // line: + BooleanFilter lonF = new BooleanFilter(); + lonF.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), null, true, true), + BooleanClause.Occur.SHOULD); + lonF.add(NumericRangeFilter.newDoubleRange("longitude", null, Math.toDegrees(maxLng), true, true), + BooleanClause.Occur.SHOULD); + f.add(lonF, BooleanClause.Occur.MUST); + } else { + f.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), Math.toDegrees(maxLng), true, true), + BooleanClause.Occur.MUST); + } + + return f; + } + /** User runs a query and counts facets. */ public FacetResult search() throws IOException { - FacetsCollector fc = new FacetsCollector(); searcher.search(new MatchAllDocsQuery(), fc); Facets facets = new DoubleRangeFacetCounts("field", getDistanceValueSource(), fc, + getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0), ONE_KM, TWO_KM, FIVE_KM, @@ -127,10 +218,16 @@ public class DistanceFacetsExample implements Closeable { // Passing no baseQuery means we drill down on all // documents ("browse only"): DrillDownQuery q = new DrillDownQuery(null); - - q.add("field", new ConstantScoreQuery(range.getFilter(getDistanceValueSource()))); - - return searcher.search(q, 10); + final ValueSource vs = getDistanceValueSource(); + q.add("field", range.getFilter(getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs)); + DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) { + @Override + protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException { + assert drillSideways.length == 1; + return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM); + } + }; + return ds.search(q, 10).hits; } @Override diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java index 9556f18c34b..8a390347bc1 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java @@ -18,22 +18,20 @@ package org.apache.lucene.facet; */ import java.io.IOException; +import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import org.apache.lucene.facet.range.DoubleRangeFacetCounts; -import org.apache.lucene.facet.range.LongRangeFacetCounts; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.Filter; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; @@ -86,7 +84,7 @@ public final class DrillDownQuery extends Query { /** Used by DrillSideways */ DrillDownQuery(FacetsConfig config, Query baseQuery, List clauses, Map drillDownDims) { - this.query = new BooleanQuery(true); + query = new BooleanQuery(true); if (baseQuery != null) { query.add(baseQuery, Occur.MUST); } @@ -155,11 +153,12 @@ public final class DrillDownQuery extends Query { /** Expert: add a custom drill-down subQuery. Use this * when you have a separate way to drill-down on the - * dimension than the indexed facet ordinals (for - * example, use a {@link NumericRangeQuery} to drill down - * after {@link LongRangeFacetCounts} or {@link DoubleRangeFacetCounts}. */ + * dimension than the indexed facet ordinals. */ public void add(String dim, Query subQuery) { + if (drillDownDims.containsKey(dim)) { + throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down"); + } // TODO: we should use FilteredQuery? // So scores of the drill-down query don't have an @@ -172,6 +171,40 @@ public final class DrillDownQuery extends Query { drillDownDims.put(dim, drillDownDims.size()); } + /** Expert: add a custom drill-down Filter, e.g. when + * drilling down after range faceting. */ + public void add(String dim, Filter subFilter) { + + if (drillDownDims.containsKey(dim)) { + throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down"); + } + + // TODO: we should use FilteredQuery? + + // So scores of the drill-down query don't have an + // effect: + final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subFilter); + drillDownQuery.setBoost(0.0f); + + query.add(drillDownQuery, Occur.MUST); + + drillDownDims.put(dim, drillDownDims.size()); + } + + static Filter getFilter(Query query) { + if (query instanceof ConstantScoreQuery) { + ConstantScoreQuery csq = (ConstantScoreQuery) query; + Filter filter = csq.getFilter(); + if (filter != null) { + return filter; + } else { + return getFilter(csq.getQuery()); + } + } else { + return null; + } + } + @Override public DrillDownQuery clone() { return new DrillDownQuery(config, query, drillDownDims); @@ -199,7 +232,63 @@ public final class DrillDownQuery extends Query { if (query.clauses().size() == 0) { return new MatchAllDocsQuery(); } - return query; + + List filters = new ArrayList(); + List queries = new ArrayList(); + List clauses = query.clauses(); + Query baseQuery; + int startIndex; + if (drillDownDims.size() == query.clauses().size()) { + baseQuery = new MatchAllDocsQuery(); + startIndex = 0; + } else { + baseQuery = clauses.get(0).getQuery(); + startIndex = 1; + } + + for(int i=startIndex;i drillDownDims = ddq.getDims(); - - BooleanQuery topQuery = new BooleanQuery(true); - final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors, - drillDownDims); - - // TODO: if query is already a BQ we could copy that and - // add clauses to it, instead of doing BQ inside BQ - // (should be more efficient)? Problem is this can - // affect scoring (coord) ... too bad we can't disable - // coord on a clause by clause basis: - topQuery.add(baseQuery, BooleanClause.Occur.MUST); - - // NOTE: in theory we could just make a single BQ, with - // +query a b c minShouldMatch=2, but in this case, - // annoyingly, BS2 wraps a sub-scorer that always - // returns 2 as the .freq(), not how many of the - // SHOULD clauses matched: - BooleanQuery subQuery = new BooleanQuery(true); - - Query wrappedSubQuery = new QueryWrapper(subQuery, - new SetWeight() { - @Override - public void set(Weight w) { - collector.setWeight(w, -1); - } - }); - Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery); - - // Don't impact score of original query: - constantScoreSubQuery.setBoost(0.0f); - - topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST); - - // Unfortunately this sub-BooleanQuery - // will never get BS1 because today BS1 only works - // if topScorer=true... and actually we cannot use BS1 - // anyways because we need subDocsScoredAtOnce: - int dimIndex = 0; - for(int i=startClause;i weightToIndex = new IdentityHashMap(); - - private Scorer mainScorer; - - public DrillSidewaysCollector(Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors, - Map dims) { - this.hitCollector = hitCollector; - this.drillDownCollector = drillDownCollector; - this.drillSidewaysCollectors = drillSidewaysCollectors; - subScorers = new Scorer[dims.size()]; - - if (dims.size() == 1) { - // When we have only one dim, we insert the - // MatchAllDocsQuery, bringing the clause count to - // 2: - exactCount = 2; - } else { - exactCount = dims.size(); - } - } - - @Override - public void collect(int doc) throws IOException { - //System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount); - - if (mainScorer == null) { - // This segment did not have any docs with any - // drill-down field & value: - return; - } - - if (mainScorer.freq() == exactCount) { - // All sub-clauses from the drill-down filters - // matched, so this is a "real" hit, so we first - // collect in both the hitCollector and the - // drillDown collector: - //System.out.println(" hit " + drillDownCollector); - hitCollector.collect(doc); - if (drillDownCollector != null) { - drillDownCollector.collect(doc); - } - - // Also collect across all drill-sideways counts so - // we "merge in" drill-down counts for this - // dimension. - for(int i=0;i doc: "subDoc=" + subDoc + " doc=" + doc; - drillSidewaysCollectors[i].collect(doc); - assert allMatchesFrom(i+1, doc); - found = true; - break; - } - } - assert found; - } - } - - // Only used by assert: - private boolean allMatchesFrom(int startFrom, int doc) { - for(int i=startFrom;i 1 || (nullCount == 1 && dims.length == 1)) { + // If more than one dim has no matches, then there + // are no hits nor drill-sideways counts. Or, if we + // have only one dim and that dim has no matches, + // same thing. + //if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) { + if (nullCount > 1) { return null; } // Sort drill-downs by most restrictive first: Arrays.sort(dims); - // TODO: it could be better if we take acceptDocs - // into account instead of baseScorer? - Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs); - if (baseScorer == null) { return null; } return new DrillSidewaysScorer(this, context, - baseScorer, - drillDownCollector, dims); + baseScorer, + drillDownCollector, dims, + scoreSubDocsAtOnce); } }; } @@ -174,7 +209,7 @@ class DrillSidewaysQuery extends Query { result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode()); result = prime * result + ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode()); - result = prime * result + Arrays.hashCode(drillDownTerms); + result = prime * result + Arrays.hashCode(drillDownQueries); result = prime * result + Arrays.hashCode(drillSidewaysCollectors); return result; } @@ -191,7 +226,7 @@ class DrillSidewaysQuery extends Query { if (drillDownCollector == null) { if (other.drillDownCollector != null) return false; } else if (!drillDownCollector.equals(other.drillDownCollector)) return false; - if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false; + if (!Arrays.equals(drillDownQueries, other.drillDownQueries)) return false; if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false; return true; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java index 263f9aa7055..220e64923ad 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java @@ -22,10 +22,11 @@ import java.util.Collection; import java.util.Collections; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.Collector; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; class DrillSidewaysScorer extends Scorer { @@ -34,13 +35,15 @@ class DrillSidewaysScorer extends Scorer { private final Collector drillDownCollector; - private final DocsEnumsAndFreq[] dims; + private final DocsAndCost[] dims; // DrillDown DocsEnums: private final Scorer baseScorer; private final AtomicReaderContext context; + final boolean scoreSubDocsAtOnce; + private static final int CHUNK = 2048; private static final int MASK = CHUNK-1; @@ -48,12 +51,13 @@ class DrillSidewaysScorer extends Scorer { private float collectScore; DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector, - DocsEnumsAndFreq[] dims) { + DocsAndCost[] dims, boolean scoreSubDocsAtOnce) { super(w); this.dims = dims; this.context = context; this.baseScorer = baseScorer; this.drillDownCollector = drillDownCollector; + this.scoreSubDocsAtOnce = scoreSubDocsAtOnce; } @Override @@ -67,7 +71,7 @@ class DrillSidewaysScorer extends Scorer { drillDownCollector.setScorer(this); drillDownCollector.setNextReader(context); } - for(DocsEnumsAndFreq dim : dims) { + for (DocsAndCost dim : dims) { dim.sidewaysCollector.setScorer(this); dim.sidewaysCollector.setNextReader(context); } @@ -79,26 +83,38 @@ class DrillSidewaysScorer extends Scorer { // Position all scorers to their first matching doc: baseScorer.nextDoc(); - for(DocsEnumsAndFreq dim : dims) { - for (DocsEnum docsEnum : dim.docsEnums) { - if (docsEnum != null) { - docsEnum.nextDoc(); - } + int numBits = 0; + for (DocsAndCost dim : dims) { + if (dim.disi != null) { + dim.disi.nextDoc(); + } else if (dim.bits != null) { + numBits++; } } final int numDims = dims.length; - DocsEnum[][] docsEnums = new DocsEnum[numDims][]; - Collector[] sidewaysCollectors = new Collector[numDims]; + Bits[] bits = new Bits[numBits]; + Collector[] bitsSidewaysCollectors = new Collector[numBits]; + + DocIdSetIterator[] disis = new DocIdSetIterator[numDims-numBits]; + Collector[] sidewaysCollectors = new Collector[numDims-numBits]; long drillDownCost = 0; - for(int dim=0;dim 1 && (dims[1].maxCost < baseQueryCost/10)) { + if (bitsUpto > 0 || scoreSubDocsAtOnce || baseQueryCost < drillDownCost/10) { + //System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length + " bits.length=" + bits.length); + doQueryFirstScoring(collector, disis, sidewaysCollectors, bits, bitsSidewaysCollectors); + } else if (numDims > 1 && (dims[1].disi == null || dims[1].disi.cost() < baseQueryCost/10)) { //System.out.println("drillDownAdvance"); - doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors); + doDrillDownAdvanceScoring(collector, disis, sidewaysCollectors); } else { //System.out.println("union"); - doUnionScoring(collector, docsEnums, sidewaysCollectors); + doUnionScoring(collector, disis, sidewaysCollectors); + } + } + + /** Used when base query is highly constraining vs the + * drilldowns, or when the docs must be scored at once + * (i.e., like BooleanScorer2, not BooleanScorer). In + * this case we just .next() on base and .advance() on + * the dim filters. */ + private void doQueryFirstScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors, + Bits[] bits, Collector[] bitsSidewaysCollectors) throws IOException { + //if (DEBUG) { + // System.out.println(" doQueryFirstScoring"); + //} + int docID = baseScorer.docID(); + + nextDoc: while (docID != NO_MORE_DOCS) { + Collector failedCollector = null; + for (int i=0;i docID) { + if (failedCollector != null) { + // More than one dim fails on this document, so + // it's neither a hit nor a near-miss; move to + // next doc: + docID = baseScorer.nextDoc(); + continue nextDoc; + } else { + failedCollector = sidewaysCollectors[i]; + } + } + } + + // TODO: for the "non-costly Bits" we really should + // have passed them down as acceptDocs, but + // unfortunately we cannot distinguish today betwen + // "bits() is so costly that you should apply it last" + // from "bits() is so cheap that you should apply it + // everywhere down low" + + // Fold in Filter Bits last, since they may be costly: + for(int i=0;i= dim) { @@ -299,8 +383,9 @@ class DrillSidewaysScorer extends Scorer { counts[slot] = dim+1; } } + // TODO: sometimes use advance? - docID = docsEnum.nextDoc(); + docID = disi.nextDoc(); } } } @@ -309,7 +394,7 @@ class DrillSidewaysScorer extends Scorer { //if (DEBUG) { // System.out.println(" now collect: " + filledCount + " hits"); //} - for(int i=0;i { - DocsEnum[] docsEnums; - // Max cost for all docsEnums for this dim: - long maxCost; + static class DocsAndCost implements Comparable { + // Iterator for docs matching this dim's filter, or ... + DocIdSetIterator disi; + // Random access bits: + Bits bits; Collector sidewaysCollector; String dim; @Override - public int compareTo(DocsEnumsAndFreq other) { - if (maxCost < other.maxCost) { + public int compareTo(DocsAndCost other) { + if (disi == null) { + if (other.disi == null) { + return 0; + } else { + return 1; + } + } else if (other.disi == null) { return -1; - } else if (maxCost > other.maxCost) { + } else if (disi.cost() < other.disi.cost()) { + return -1; + } else if (disi.cost() > other.disi.cost()) { return 1; } else { return 0; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java index 21423cf1ea8..1247d13f5ae 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java @@ -26,11 +26,12 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.NumericRangeFilter; // javadocs import org.apache.lucene.util.Bits; import org.apache.lucene.util.NumericUtils; -/** Represents a range over double values. */ +/** Represents a range over double values. + * + * @lucene.experimental */ public final class DoubleRange extends Range { final double minIncl; final double maxIncl; @@ -99,14 +100,15 @@ public final class DoubleRange extends Range { return "DoubleRange(" + minIncl + " to " + maxIncl + ")"; } - /** Returns a new {@link Filter} accepting only documents - * in this range. Note that this filter is not - * efficient: it's a linear scan of all docs, testing - * each value. If the {@link ValueSource} is static, - * e.g. an indexed numeric field, then it's more - * efficient to use {@link NumericRangeFilter}. */ - public Filter getFilter(final ValueSource valueSource) { + @Override + public Filter getFilter(final Filter fastMatchFilter, final ValueSource valueSource) { return new Filter() { + + @Override + public String toString() { + return "Filter(" + DoubleRange.this.toString() + ")"; + } + @Override public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { @@ -119,49 +121,48 @@ public final class DoubleRange extends Range { final int maxDoc = context.reader().maxDoc(); + final Bits fastMatchBits; + if (fastMatchFilter != null) { + DocIdSet dis = fastMatchFilter.getDocIdSet(context, null); + if (dis == null) { + // No documents match + return null; + } + fastMatchBits = dis.bits(); + if (fastMatchBits == null) { + throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits"); + } + } else { + fastMatchBits = null; + } + return new DocIdSet() { @Override - public DocIdSetIterator iterator() { - return new DocIdSetIterator() { - int doc = -1; - + public Bits bits() { + return new Bits() { @Override - public int nextDoc() throws IOException { - while (true) { - doc++; - if (doc == maxDoc) { - return doc = NO_MORE_DOCS; - } - if (acceptDocs != null && acceptDocs.get(doc) == false) { - continue; - } - double v = values.doubleVal(doc); - if (accept(v)) { - return doc; - } + public boolean get(int docID) { + if (acceptDocs != null && acceptDocs.get(docID) == false) { + return false; } + if (fastMatchBits != null && fastMatchBits.get(docID) == false) { + return false; + } + return accept(values.doubleVal(docID)); } @Override - public int advance(int target) throws IOException { - doc = target-1; - return nextDoc(); - } - - @Override - public int docID() { - return doc; - } - - @Override - public long cost() { - // Since we do a linear scan over all - // documents, our cost is O(maxDoc): + public int length() { return maxDoc; } }; } + + @Override + public DocIdSetIterator iterator() { + throw new UnsupportedOperationException("this filter can only be accessed via bits()"); + } }; } }; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java index a804187b83c..587c68cc988 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java @@ -24,12 +24,15 @@ import java.util.List; import org.apache.lucene.document.DoubleDocValuesField; // javadocs import org.apache.lucene.document.FloatDocValuesField; // javadocs import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.DoubleFieldSource; import org.apache.lucene.queries.function.valuesource.FloatFieldSource; // javadocs +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.NumericUtils; @@ -61,7 +64,16 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts { /** Create {@code RangeFacetCounts}, using the provided * {@link ValueSource}. */ public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, DoubleRange... ranges) throws IOException { - super(field, ranges); + this(field, valueSource, hits, null, ranges); + } + + /** Create {@code RangeFacetCounts}, using the provided + * {@link ValueSource}, and using the provided Filter as + * a fastmatch: only documents passing the filter are + * checked for the matching ranges. The filter must be + * random access (implement {@link DocIdSet#bits}). */ + public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, DoubleRange... ranges) throws IOException { + super(field, ranges, fastMatchFilter); count(valueSource, hits.getMatchingDocs()); } @@ -84,10 +96,29 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts { FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context); totCount += hits.totalHits; + Bits bits; + if (fastMatchFilter != null) { + DocIdSet dis = fastMatchFilter.getDocIdSet(hits.context, null); + if (dis == null) { + // No documents match + continue; + } + bits = dis.bits(); + if (bits == null) { + throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits"); + } + } else { + bits = null; + } + DocIdSetIterator docs = hits.bits.iterator(); int doc; while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (bits != null && bits.get(doc) == false) { + doc++; + continue; + } // Skip missing docs: if (fv.exists(doc)) { counter.add(NumericUtils.doubleToSortableLong(fv.doubleVal(doc))); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java index e2ee9151b98..8b88f403fd9 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java @@ -26,10 +26,11 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.NumericRangeFilter; // javadocs import org.apache.lucene.util.Bits; -/** Represents a range over long values. */ +/** Represents a range over long values. + * + * @lucene.experimental */ public final class LongRange extends Range { final long minIncl; final long maxIncl; @@ -91,14 +92,15 @@ public final class LongRange extends Range { return "LongRange(" + minIncl + " to " + maxIncl + ")"; } - /** Returns a new {@link Filter} accepting only documents - * in this range. Note that this filter is not - * efficient: it's a linear scan of all docs, testing - * each value. If the {@link ValueSource} is static, - * e.g. an indexed numeric field, then it's more - * efficient to use {@link NumericRangeFilter}. */ - public Filter getFilter(final ValueSource valueSource) { + @Override + public Filter getFilter(final Filter fastMatchFilter, final ValueSource valueSource) { return new Filter() { + + @Override + public String toString() { + return "Filter(" + LongRange.this.toString() + ")"; + } + @Override public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { @@ -111,49 +113,48 @@ public final class LongRange extends Range { final int maxDoc = context.reader().maxDoc(); + final Bits fastMatchBits; + if (fastMatchFilter != null) { + DocIdSet dis = fastMatchFilter.getDocIdSet(context, null); + if (dis == null) { + // No documents match + return null; + } + fastMatchBits = dis.bits(); + if (fastMatchBits == null) { + throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits"); + } + } else { + fastMatchBits = null; + } + return new DocIdSet() { @Override - public DocIdSetIterator iterator() { - return new DocIdSetIterator() { - int doc = -1; - + public Bits bits() { + return new Bits() { @Override - public int nextDoc() throws IOException { - while (true) { - doc++; - if (doc == maxDoc) { - return doc = NO_MORE_DOCS; - } - if (acceptDocs != null && acceptDocs.get(doc) == false) { - continue; - } - long v = values.longVal(doc); - if (accept(v)) { - return doc; - } + public boolean get(int docID) { + if (acceptDocs != null && acceptDocs.get(docID) == false) { + return false; } + if (fastMatchBits != null && fastMatchBits.get(docID) == false) { + return false; + } + return accept(values.longVal(docID)); } @Override - public int advance(int target) throws IOException { - doc = target-1; - return nextDoc(); - } - - @Override - public int docID() { - return doc; - } - - @Override - public long cost() { - // Since we do a linear scan over all - // documents, our cost is O(maxDoc): + public int length() { return maxDoc; } }; } + + @Override + public DocIdSetIterator iterator() { + throw new UnsupportedOperationException("this filter can only be accessed via bits()"); + } }; } }; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java index 22441378a72..78cc7195b1a 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java @@ -22,11 +22,14 @@ import java.util.Collections; import java.util.List; import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.LongFieldSource; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.search.DocIdSetIterator; /** {@link Facets} implementation that computes counts for @@ -50,7 +53,16 @@ public class LongRangeFacetCounts extends RangeFacetCounts { /** Create {@code RangeFacetCounts}, using the provided * {@link ValueSource}. */ public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, LongRange... ranges) throws IOException { - super(field, ranges); + this(field, valueSource, hits, null, ranges); + } + + /** Create {@code RangeFacetCounts}, using the provided + * {@link ValueSource}, and using the provided Filter as + * a fastmatch: only documents passing the filter are + * checked for the matching ranges. The filter must be + * random access (implement {@link DocIdSet#bits}). */ + public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, LongRange... ranges) throws IOException { + super(field, ranges, fastMatchFilter); count(valueSource, hits.getMatchingDocs()); } @@ -65,9 +77,28 @@ public class LongRangeFacetCounts extends RangeFacetCounts { FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context); totCount += hits.totalHits; + Bits bits; + if (fastMatchFilter != null) { + DocIdSet dis = fastMatchFilter.getDocIdSet(hits.context, null); + if (dis == null) { + // No documents match + continue; + } + bits = dis.bits(); + if (bits == null) { + throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits"); + } + } else { + bits = null; + } + DocIdSetIterator docs = hits.bits.iterator(); int doc; while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (bits != null && bits.get(doc) == false) { + doc++; + continue; + } // Skip missing docs: if (fv.exists(doc)) { counter.add(fv.longVal(doc)); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/Range.java b/lucene/facet/src/java/org/apache/lucene/facet/range/Range.java index b003b7fd7ab..eb5111220c5 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/Range.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/Range.java @@ -17,6 +17,13 @@ package org.apache.lucene.facet.range; * limitations under the License. */ +import org.apache.lucene.facet.DrillDownQuery; // javadocs +import org.apache.lucene.facet.DrillSideways; // javadocs +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.FilteredQuery; // javadocs +import org.apache.lucene.search.NumericRangeFilter; // javadocs + /** Base class for a single labeled range. * * @lucene.experimental */ @@ -33,6 +40,32 @@ public abstract class Range { this.label = label; } + /** Returns a new {@link Filter} accepting only documents + * in this range. This filter is not general-purpose; + * you should either use it with {@link DrillSideways} by + * adding it to {@link DrillDownQuery#add}, or pass it to + * {@link FilteredQuery} using its {@link + * FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}. If the + * {@link ValueSource} is static, e.g. an indexed numeric + * field, then it may be more efficient to use {@link + * NumericRangeFilter}. The provided fastMatchFilter, + * if non-null, will first be consulted, and only if + * that is set for each document will the range then be + * checked. */ + public abstract Filter getFilter(Filter fastMatchFilter, ValueSource valueSource); + + /** Returns a new {@link Filter} accepting only documents + * in this range. This filter is not general-purpose; + * you should either use it with {@link DrillSideways} by + * adding it to {@link DrillDownQuery#add}, or pass it to + * {@link FilteredQuery} using its {@link + * FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}. If the + * {@link ValueSource} is static, e.g. an indexed numeric + * field, then it may be more efficient to use {@link NumericRangeFilter}. */ + public Filter getFilter(ValueSource valueSource) { + return getFilter(null, valueSource); + } + /** Invoke this for a useless range. */ protected void failNoMatch() { throw new IllegalArgumentException("range \"" + label + "\" matches nothing"); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java index cd5c541d9ac..9dc8f16088a 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java @@ -24,7 +24,7 @@ import java.util.List; import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.queries.function.valuesource.LongFieldSource; +import org.apache.lucene.search.Filter; /** Base class for range faceting. * @@ -36,17 +36,23 @@ abstract class RangeFacetCounts extends Facets { /** Counts, initialized in by subclass. */ protected final int[] counts; + /** Optional: if specified, we first test this Filter to + * see whether the document should be checked for + * matching ranges. If this is null, all documents are + * checked. */ + protected final Filter fastMatchFilter; + /** Our field name. */ protected final String field; /** Total number of hits. */ protected int totCount; - /** Create {@code RangeFacetCounts}, using {@link - * LongFieldSource} from the specified field. */ - protected RangeFacetCounts(String field, Range[] ranges) throws IOException { + /** Create {@code RangeFacetCounts} */ + protected RangeFacetCounts(String field, Range[] ranges, Filter fastMatchFilter) throws IOException { this.field = field; this.ranges = ranges; + this.fastMatchFilter = fastMatchFilter; counts = new int[ranges.length]; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java index e3e50c9cdce..177173a42c0 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java @@ -18,16 +18,10 @@ package org.apache.lucene.facet.sortedset; */ import java.io.IOException; -import java.util.Arrays; -import java.util.HashMap; import java.util.Map; -import org.apache.lucene.facet.FacetsConfig; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.util.BytesRef; /** Wraps a {@link IndexReader} and resolves ords * using existing {@link SortedSetDocValues} APIs without a diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java index 8bbfd3a8207..8d7d7ecfbdf 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java @@ -645,7 +645,7 @@ public class TestDrillSideways extends FacetTestCase { final FixedBitSet bits = new FixedBitSet(maxDoc); for(int docID=0;docID < maxDoc;docID++) { // Keeps only the even ids: - if ((acceptDocs == null || acceptDocs.get(docID)) && ((Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0)) { + if ((acceptDocs == null || acceptDocs.get(docID)) && (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0) { bits.set(docID); } } @@ -689,7 +689,7 @@ public class TestDrillSideways extends FacetTestCase { // subScorers are on the same docID: if (!anyMultiValuedDrillDowns) { // Can only do this test when there are no OR'd - // drill-down values, beacuse in that case it's + // drill-down values, because in that case it's // easily possible for one of the DD terms to be on // a future docID: new DrillSideways(s, config, tr) { diff --git a/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java index 6d938fd23d0..8fcaec8ac97 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java @@ -20,6 +20,7 @@ package org.apache.lucene.facet.range; import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleDocValuesField; @@ -30,6 +31,7 @@ import org.apache.lucene.document.FloatField; import org.apache.lucene.document.LongField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.facet.DrillDownQuery; +import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult; import org.apache.lucene.facet.DrillSideways; import org.apache.lucene.facet.FacetField; import org.apache.lucene.facet.FacetResult; @@ -39,10 +41,10 @@ import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.LabelAndValue; import org.apache.lucene.facet.MultiFacets; -import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; @@ -50,12 +52,20 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.DoubleDocValues; +import org.apache.lucene.queries.function.valuesource.DoubleFieldSource; import org.apache.lucene.queries.function.valuesource.FloatFieldSource; -import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.queries.function.valuesource.LongFieldSource; +import org.apache.lucene.search.CachingWrapperFilter; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util._TestUtil; @@ -229,6 +239,10 @@ public class TestRangeFacetCounts extends FacetTestCase { IndexSearcher s = newSearcher(r); + if (VERBOSE) { + System.out.println("TEST: searcher=" + s); + } + DrillSideways ds = new DrillSideways(s, config, tr) { @Override @@ -365,6 +379,8 @@ public class TestRangeFacetCounts extends FacetTestCase { System.out.println("TEST: numDocs=" + numDocs); } long[] values = new long[numDocs]; + long minValue = Long.MAX_VALUE; + long maxValue = Long.MIN_VALUE; for(int i=0;i 0 && random().nextInt(10) == 7) { @@ -447,13 +467,26 @@ public class TestRangeFacetCounts extends FacetTestCase { } if (accept) { expectedCounts[rangeID]++; + minAcceptedValue = Math.min(minAcceptedValue, values[i]); + maxAcceptedValue = Math.max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.search(new MatchAllDocsQuery(), sfc); - Facets facets = new LongRangeFacetCounts("field", sfc, ranges); + Filter fastMatchFilter; + if (random().nextBoolean()) { + if (random().nextBoolean()) { + fastMatchFilter = NumericRangeFilter.newLongRange("field", minValue, maxValue, true, true); + } else { + fastMatchFilter = NumericRangeFilter.newLongRange("field", minAcceptedValue, maxAcceptedValue, true, true); + } + } else { + fastMatchFilter = null; + } + ValueSource vs = new LongFieldSource("field"); + Facets facets = new LongRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.getTopChildren(10, "field"); assertEquals(numRange, result.labelValues.length); for(int rangeID=0;rangeID 0 && random().nextInt(10) == 7) { @@ -693,13 +767,26 @@ public class TestRangeFacetCounts extends FacetTestCase { } if (accept) { expectedCounts[rangeID]++; + minAcceptedValue = Math.min(minAcceptedValue, values[i]); + maxAcceptedValue = Math.max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.search(new MatchAllDocsQuery(), sfc); - Facets facets = new DoubleRangeFacetCounts("field", sfc, ranges); + Filter fastMatchFilter; + if (random().nextBoolean()) { + if (random().nextBoolean()) { + fastMatchFilter = NumericRangeFilter.newDoubleRange("field", minValue, maxValue, true, true); + } else { + fastMatchFilter = NumericRangeFilter.newDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true); + } + } else { + fastMatchFilter = null; + } + ValueSource vs = new DoubleFieldSource("field"); + Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.getTopChildren(10, "field"); assertEquals(numRange, result.labelValues.length); for(int rangeID=0;rangeID