LUCENE-5418: faster drill-down/sideways on costly filters

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1565387 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-02-06 18:36:48 +00:00
parent 3d24b825a5
commit 20a280c33c
19 changed files with 882 additions and 762 deletions

View File

@ -143,6 +143,13 @@ New Features
close or cannot delete while referenced semantics.
(Mark Miller, Mike McCandless)
* LUCENE-5418: Drilling down or sideways on a Lucene facet range
(using Range.getFilter()) is now faster for costly filters (uses
random access, not iteration); range facet counts now accept a
fast-match filter to avoid computing the value for documents that
are out of bounds, e.g. using a bounding box filter with distance
range faceting. (Mike McCandless)
Build
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;

View File

@ -31,6 +31,13 @@ public abstract class DocIdSet {
* are no docs that match. */
public abstract DocIdSetIterator iterator() throws IOException;
// TODO: somehow this class should express the cost of
// iteration vs the cost of random access Bits; for
// expensive Filters (e.g. distance < 1 km) we should use
// bits() after all other Query/Filters have matched, but
// this is the opposite of what bits() is for now
// (down-low filtering using e.g. FixedBitSet)
/** Optionally provides a {@link Bits} interface for random access
* to matching documents.
* @return {@code null}, if this {@code DocIdSet} does not support random access.

View File

@ -50,7 +50,7 @@ public class FilteredQuery extends Query {
* @param query Query to be filtered, cannot be <code>null</code>.
* @param filter Filter to apply to query results, cannot be <code>null</code>.
*/
public FilteredQuery (Query query, Filter filter) {
public FilteredQuery(Query query, Filter filter) {
this(query, filter, RANDOM_ACCESS_FILTER_STRATEGY);
}
@ -63,7 +63,7 @@ public class FilteredQuery extends Query {
*
* @see FilterStrategy
*/
public FilteredQuery (Query query, Filter filter, FilterStrategy strategy) {
public FilteredQuery(Query query, Filter filter, FilterStrategy strategy) {
if (query == null || filter == null)
throw new IllegalArgumentException("Query and filter cannot be null.");
if (strategy == null)
@ -118,7 +118,9 @@ public class FilteredQuery extends Query {
// return this query
@Override
public Query getQuery() { return FilteredQuery.this; }
public Query getQuery() {
return FilteredQuery.this;
}
// return a filtering scorer
@Override
@ -130,8 +132,8 @@ public class FilteredQuery extends Query {
// this means the filter does not accept any documents.
return null;
}
return strategy.filteredScorer(context, scoreDocsInOrder, topScorer, weight, filterDocIdSet);
}
};
}
@ -183,14 +185,12 @@ public class FilteredQuery extends Query {
@Override
public int advance(int target) throws IOException {
int doc = scorer.advance(target);
if (doc != Scorer.NO_MORE_DOCS && !filterbits.get(doc)) {
return scorerDoc = nextDoc();
} else {
return scorerDoc = doc;
}
}
@Override
@ -303,7 +303,9 @@ public class FilteredQuery extends Query {
}
@Override
public final int freq() throws IOException { return scorer.freq(); }
public final int freq() throws IOException {
return scorer.freq();
}
@Override
public final Collection<ChildScorer> getChildren() {
@ -343,15 +345,6 @@ public class FilteredQuery extends Query {
public Query rewrite(IndexReader reader) throws IOException {
final Query queryRewritten = query.rewrite(reader);
if (queryRewritten instanceof MatchAllDocsQuery) {
// Special case: If the query is a MatchAllDocsQuery, we only
// return a CSQ(filter).
final Query rewritten = new ConstantScoreQuery(filter);
// Combine boost of MatchAllDocsQuery and the wrapped rewritten query:
rewritten.setBoost(this.getBoost() * queryRewritten.getBoost());
return rewritten;
}
if (queryRewritten != query) {
// rewrite to a new FilteredQuery wrapping the rewritten query
final Query rewritten = new FilteredQuery(queryRewritten, filter, strategy);
@ -527,7 +520,7 @@ public class FilteredQuery extends Query {
final Bits filterAcceptDocs = docIdSet.bits();
// force if RA is requested
final boolean useRandomAccess = (filterAcceptDocs != null && (useRandomAccess(filterAcceptDocs, firstFilterDoc)));
final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc);
if (useRandomAccess) {
// if we are using random access, we return the inner scorer, just with other acceptDocs
return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);

View File

@ -375,7 +375,6 @@ public class TestFilteredQuery extends LuceneTestCase {
public void testRewrite() throws Exception {
assertRewrite(new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), FilteredQuery.class);
assertRewrite(new FilteredQuery(new PrefixQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), FilteredQuery.class);
assertRewrite(new FilteredQuery(new MatchAllDocsQuery(), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), ConstantScoreQuery.class);
}
public void testGetFilterStrategy() {

View File

@ -29,18 +29,24 @@ import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.range.DoubleRange;
import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queries.BooleanFilter;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
@ -59,6 +65,20 @@ public class DistanceFacetsExample implements Closeable {
private final Directory indexDir = new RAMDirectory();
private IndexSearcher searcher;
private final FacetsConfig config = new FacetsConfig();
/** The "home" latitude. */
public final static double ORIGIN_LATITUDE = 40.7143528;
/** The "home" longitude. */
public final static double ORIGIN_LONGITUDE = -74.0059731;
/** Radius of the Earth in KM
*
* NOTE: this is approximate, because the earth is a bit
* wider at the equator than the poles. See
* http://en.wikipedia.org/wiki/Earth_radius */
public final static double EARTH_RADIUS_KM = 6371.01;
/** Empty constructor */
public DistanceFacetsExample() {}
@ -68,6 +88,8 @@ public class DistanceFacetsExample implements Closeable {
IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER,
new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));
// TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter
// Add documents with latitude/longitude location:
Document doc = new Document();
doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
@ -92,7 +114,8 @@ public class DistanceFacetsExample implements Closeable {
private ValueSource getDistanceValueSource() {
Expression distance;
try {
distance = JavascriptCompiler.compile("haversin(40.7143528,-74.0059731,latitude,longitude)");
distance = JavascriptCompiler.compile(
"haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)");
} catch (ParseException pe) {
// Should not happen
throw new RuntimeException(pe);
@ -104,15 +127,83 @@ public class DistanceFacetsExample implements Closeable {
return distance.getValueSource(bindings);
}
/** Given a latitude and longitude (in degrees) and the
* maximum great circle (surface of the earth) distance,
* returns a simple Filter bounding box to "fast match"
* candidates. */
public static Filter getBoundingBoxFilter(double originLat, double originLng, double maxDistanceKM) {
// Basic bounding box geo math from
// http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates,
// licensed under creative commons 3.0:
// http://creativecommons.org/licenses/by/3.0
// TODO: maybe switch to recursive prefix tree instead
// (in lucene/spatial)? It should be more efficient
// since it's a 2D trie...
// Degrees -> Radians:
double originLatRadians = Math.toRadians(originLat);
double originLngRadians = Math.toRadians(originLng);
double angle = maxDistanceKM / EARTH_RADIUS_KM;
double minLat = originLatRadians - angle;
double maxLat = originLatRadians + angle;
double minLng;
double maxLng;
if (minLat > Math.toRadians(-90) && maxLat < Math.toRadians(90)) {
double delta = Math.asin(Math.sin(angle)/Math.cos(originLatRadians));
minLng = originLngRadians - delta;
if (minLng < Math.toRadians(-180)) {
minLng += 2 * Math.PI;
}
maxLng = originLngRadians + delta;
if (maxLng > Math.toRadians(180)) {
maxLng -= 2 * Math.PI;
}
} else {
// The query includes a pole!
minLat = Math.max(minLat, Math.toRadians(-90));
maxLat = Math.min(maxLat, Math.toRadians(90));
minLng = Math.toRadians(-180);
maxLng = Math.toRadians(180);
}
BooleanFilter f = new BooleanFilter();
// Add latitude range filter:
f.add(NumericRangeFilter.newDoubleRange("latitude", Math.toDegrees(minLat), Math.toDegrees(maxLat), true, true),
BooleanClause.Occur.MUST);
// Add longitude range filter:
if (minLng > maxLng) {
// The bounding box crosses the international date
// line:
BooleanFilter lonF = new BooleanFilter();
lonF.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), null, true, true),
BooleanClause.Occur.SHOULD);
lonF.add(NumericRangeFilter.newDoubleRange("longitude", null, Math.toDegrees(maxLng), true, true),
BooleanClause.Occur.SHOULD);
f.add(lonF, BooleanClause.Occur.MUST);
} else {
f.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), Math.toDegrees(maxLng), true, true),
BooleanClause.Occur.MUST);
}
return f;
}
/** User runs a query and counts facets. */
public FacetResult search() throws IOException {
FacetsCollector fc = new FacetsCollector();
searcher.search(new MatchAllDocsQuery(), fc);
Facets facets = new DoubleRangeFacetCounts("field", getDistanceValueSource(), fc,
getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0),
ONE_KM,
TWO_KM,
FIVE_KM,
@ -127,10 +218,16 @@ public class DistanceFacetsExample implements Closeable {
// Passing no baseQuery means we drill down on all
// documents ("browse only"):
DrillDownQuery q = new DrillDownQuery(null);
q.add("field", new ConstantScoreQuery(range.getFilter(getDistanceValueSource())));
return searcher.search(q, 10);
final ValueSource vs = getDistanceValueSource();
q.add("field", range.getFilter(getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
@Override
protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
assert drillSideways.length == 1;
return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
}
};
return ds.search(q, 10).hits;
}
@Override

View File

@ -18,22 +18,20 @@ package org.apache.lucene.facet;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
import org.apache.lucene.facet.range.LongRangeFacetCounts;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
@ -86,7 +84,7 @@ public final class DrillDownQuery extends Query {
/** Used by DrillSideways */
DrillDownQuery(FacetsConfig config, Query baseQuery, List<Query> clauses, Map<String,Integer> drillDownDims) {
this.query = new BooleanQuery(true);
query = new BooleanQuery(true);
if (baseQuery != null) {
query.add(baseQuery, Occur.MUST);
}
@ -155,11 +153,12 @@ public final class DrillDownQuery extends Query {
/** Expert: add a custom drill-down subQuery. Use this
* when you have a separate way to drill-down on the
* dimension than the indexed facet ordinals (for
* example, use a {@link NumericRangeQuery} to drill down
* after {@link LongRangeFacetCounts} or {@link DoubleRangeFacetCounts}. */
* dimension than the indexed facet ordinals. */
public void add(String dim, Query subQuery) {
if (drillDownDims.containsKey(dim)) {
throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down");
}
// TODO: we should use FilteredQuery?
// So scores of the drill-down query don't have an
@ -172,6 +171,40 @@ public final class DrillDownQuery extends Query {
drillDownDims.put(dim, drillDownDims.size());
}
/** Expert: add a custom drill-down Filter, e.g. when
* drilling down after range faceting. */
public void add(String dim, Filter subFilter) {
if (drillDownDims.containsKey(dim)) {
throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down");
}
// TODO: we should use FilteredQuery?
// So scores of the drill-down query don't have an
// effect:
final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subFilter);
drillDownQuery.setBoost(0.0f);
query.add(drillDownQuery, Occur.MUST);
drillDownDims.put(dim, drillDownDims.size());
}
static Filter getFilter(Query query) {
if (query instanceof ConstantScoreQuery) {
ConstantScoreQuery csq = (ConstantScoreQuery) query;
Filter filter = csq.getFilter();
if (filter != null) {
return filter;
} else {
return getFilter(csq.getQuery());
}
} else {
return null;
}
}
@Override
public DrillDownQuery clone() {
return new DrillDownQuery(config, query, drillDownDims);
@ -199,7 +232,63 @@ public final class DrillDownQuery extends Query {
if (query.clauses().size() == 0) {
return new MatchAllDocsQuery();
}
return query;
List<Filter> filters = new ArrayList<Filter>();
List<Query> queries = new ArrayList<Query>();
List<BooleanClause> clauses = query.clauses();
Query baseQuery;
int startIndex;
if (drillDownDims.size() == query.clauses().size()) {
baseQuery = new MatchAllDocsQuery();
startIndex = 0;
} else {
baseQuery = clauses.get(0).getQuery();
startIndex = 1;
}
for(int i=startIndex;i<clauses.size();i++) {
BooleanClause clause = clauses.get(i);
Query queryClause = clause.getQuery();
Filter filter = getFilter(queryClause);
if (filter != null) {
filters.add(filter);
} else {
queries.add(queryClause);
}
}
if (filters.isEmpty()) {
return query;
} else {
// Wrap all filters using FilteredQuery
// TODO: this is hackish; we need to do it because
// BooleanQuery can't be trusted to handle the
// "expensive filter" case. Really, each Filter should
// know its cost and we should take that more
// carefully into account when picking the right
// strategy/optimization:
Query wrapped;
if (queries.isEmpty()) {
wrapped = baseQuery;
} else {
// disable coord
BooleanQuery wrappedBQ = new BooleanQuery(true);
if ((baseQuery instanceof MatchAllDocsQuery) == false) {
wrappedBQ.add(baseQuery, BooleanClause.Occur.MUST);
}
for(Query q : queries) {
wrappedBQ.add(q, BooleanClause.Occur.MUST);
}
wrapped = wrappedBQ;
}
for(Filter filter : filters) {
wrapped = new FilteredQuery(wrapped, filter, FilteredQuery.QUERY_FIRST_FILTER_STRATEGY);
}
return wrapped;
}
}
@Override

View File

@ -26,12 +26,9 @@ import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
@ -40,11 +37,9 @@ import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.Weight;
/**
* Computes drill down and sideways counts for the provided
@ -172,153 +167,23 @@ public class DrillSideways {
drillSidewaysCollectors[i] = new FacetsCollector();
}
boolean useCollectorMethod = scoreSubDocsAtOnce();
Term[][] drillDownTerms = null;
if (!useCollectorMethod) {
// Optimistic: assume subQueries of the DDQ are either
// TermQuery or BQ OR of TermQuery; if this is wrong
// then we detect it and fallback to the mome general
// but slower DrillSidewaysCollector:
drillDownTerms = new Term[clauses.length-startClause][];
for(int i=startClause;i<clauses.length;i++) {
Query q = clauses[i].getQuery();
// DrillDownQuery always wraps each subQuery in
// ConstantScoreQuery:
assert q instanceof ConstantScoreQuery;
q = ((ConstantScoreQuery) q).getQuery();
if (q instanceof TermQuery) {
drillDownTerms[i-startClause] = new Term[] {((TermQuery) q).getTerm()};
} else if (q instanceof BooleanQuery) {
BooleanQuery q2 = (BooleanQuery) q;
BooleanClause[] clauses2 = q2.getClauses();
drillDownTerms[i-startClause] = new Term[clauses2.length];
for(int j=0;j<clauses2.length;j++) {
if (clauses2[j].getQuery() instanceof TermQuery) {
drillDownTerms[i-startClause][j] = ((TermQuery) clauses2[j].getQuery()).getTerm();
} else {
useCollectorMethod = true;
break;
}
}
} else {
useCollectorMethod = true;
}
}
}
if (useCollectorMethod) {
// TODO: maybe we could push the "collector method"
// down into the optimized scorer to have a tighter
// integration ... and so TermQuery clauses could
// continue to run "optimized"
collectorMethod(query, baseQuery, startClause, hitCollector, drillDownCollector, drillSidewaysCollectors);
} else {
DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
searcher.search(dsq, hitCollector);
Query[] drillDownQueries = new Query[clauses.length-startClause];
for(int i=startClause;i<clauses.length;i++) {
drillDownQueries[i-startClause] = clauses[i].getQuery();
}
DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce());
searcher.search(dsq, hitCollector);
return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null);
}
/** Uses the more general but slower method of sideways
* counting. This method allows an arbitrary subQuery to
* implement the drill down for a given dimension. */
private void collectorMethod(DrillDownQuery ddq, Query baseQuery, int startClause, Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors) throws IOException {
BooleanClause[] clauses = ddq.getBooleanQuery().getClauses();
Map<String,Integer> drillDownDims = ddq.getDims();
BooleanQuery topQuery = new BooleanQuery(true);
final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors,
drillDownDims);
// TODO: if query is already a BQ we could copy that and
// add clauses to it, instead of doing BQ inside BQ
// (should be more efficient)? Problem is this can
// affect scoring (coord) ... too bad we can't disable
// coord on a clause by clause basis:
topQuery.add(baseQuery, BooleanClause.Occur.MUST);
// NOTE: in theory we could just make a single BQ, with
// +query a b c minShouldMatch=2, but in this case,
// annoyingly, BS2 wraps a sub-scorer that always
// returns 2 as the .freq(), not how many of the
// SHOULD clauses matched:
BooleanQuery subQuery = new BooleanQuery(true);
Query wrappedSubQuery = new QueryWrapper(subQuery,
new SetWeight() {
@Override
public void set(Weight w) {
collector.setWeight(w, -1);
}
});
Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery);
// Don't impact score of original query:
constantScoreSubQuery.setBoost(0.0f);
topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST);
// Unfortunately this sub-BooleanQuery
// will never get BS1 because today BS1 only works
// if topScorer=true... and actually we cannot use BS1
// anyways because we need subDocsScoredAtOnce:
int dimIndex = 0;
for(int i=startClause;i<clauses.length;i++) {
Query q = clauses[i].getQuery();
// DrillDownQuery always wraps each subQuery in
// ConstantScoreQuery:
assert q instanceof ConstantScoreQuery;
q = ((ConstantScoreQuery) q).getQuery();
final int finalDimIndex = dimIndex;
subQuery.add(new QueryWrapper(q,
new SetWeight() {
@Override
public void set(Weight w) {
collector.setWeight(w, finalDimIndex);
}
}),
BooleanClause.Occur.SHOULD);
dimIndex++;
}
// TODO: we could better optimize the "just one drill
// down" case w/ a separate [specialized]
// collector...
int minShouldMatch = drillDownDims.size()-1;
if (minShouldMatch == 0) {
// Must add another "fake" clause so BQ doesn't erase
// itself by rewriting to the single clause:
Query end = new MatchAllDocsQuery();
end.setBoost(0.0f);
subQuery.add(end, BooleanClause.Occur.SHOULD);
minShouldMatch++;
}
subQuery.setMinimumNumberShouldMatch(minShouldMatch);
// System.out.println("EXE " + topQuery);
// Collects against the passed-in
// drillDown/SidewaysCollectors as a side effect:
searcher.search(topQuery, collector);
}
/**
* Search, sorting by {@link Sort}, and computing
* drill down and sideways counts.
*/
public DrillSidewaysResult search(DrillDownQuery query,
Filter filter, FieldDoc after, int topN, Sort sort, boolean doDocScores,
boolean doMaxScore) throws IOException {
Filter filter, FieldDoc after, int topN, Sort sort, boolean doDocScores,
boolean doMaxScore) throws IOException {
if (filter != null) {
query = new DrillDownQuery(config, filter, query);
}
@ -355,7 +220,7 @@ public class DrillSideways {
* drill down and sideways counts.
*/
public DrillSidewaysResult search(ScoreDoc after,
DrillDownQuery query, int topN) throws IOException {
DrillDownQuery query, int topN) throws IOException {
int limit = searcher.getIndexReader().maxDoc();
if (limit == 0) {
limit = 1; // the collector does not alow numHits = 0
@ -367,14 +232,15 @@ public class DrillSideways {
}
/** Override this and return true if your collector
* (e.g., ToParentBlockJoinCollector) expects all
* (e.g., {@code ToParentBlockJoinCollector}) expects all
* sub-scorers to be positioned on the document being
* collected. This will cause some performance loss;
* default is false. Note that if you return true from
* this method (in a subclass) be sure your collector
* also returns false from {@link
* Collector#acceptsDocsOutOfOrder}: this will trick
* BooleanQuery into also scoring all subDocs at once. */
* {@code BooleanQuery} into also scoring all subDocs at
* once. */
protected boolean scoreSubDocsAtOnce() {
return false;
}
@ -394,54 +260,5 @@ public class DrillSideways {
this.hits = hits;
}
}
private interface SetWeight {
public void set(Weight w);
}
/** Just records which Weight was given out for the
* (possibly rewritten) Query. */
private static class QueryWrapper extends Query {
private final Query originalQuery;
private final SetWeight setter;
public QueryWrapper(Query originalQuery, SetWeight setter) {
this.originalQuery = originalQuery;
this.setter = setter;
}
@Override
public Weight createWeight(final IndexSearcher searcher) throws IOException {
Weight w = originalQuery.createWeight(searcher);
setter.set(w);
return w;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = originalQuery.rewrite(reader);
if (rewritten != originalQuery) {
return new QueryWrapper(rewritten, setter);
} else {
return this;
}
}
@Override
public String toString(String s) {
return originalQuery.toString(s);
}
@Override
public boolean equals(Object o) {
if (!(o instanceof QueryWrapper)) return false;
final QueryWrapper other = (QueryWrapper) o;
return super.equals(o) && originalQuery.equals(other.originalQuery);
}
@Override
public int hashCode() {
return super.hashCode() * 31 + originalQuery.hashCode();
}
}
}

View File

@ -1,188 +0,0 @@
package org.apache.lucene.facet;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.IdentityHashMap;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer.ChildScorer;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
/** Collector that scrutinizes each hit to determine if it
* passed all constraints (a true hit) or if it missed
* exactly one dimension (a near-miss, to count for
* drill-sideways counts on that dimension). */
class DrillSidewaysCollector extends Collector {
private final Collector hitCollector;
private final Collector drillDownCollector;
private final Collector[] drillSidewaysCollectors;
private final Scorer[] subScorers;
private final int exactCount;
// Maps Weight to either -1 (mainQuery) or to integer
// index of the dims drillDown. We needs this when
// visiting the child scorers to correlate back to the
// right scorers:
private final Map<Weight,Integer> weightToIndex = new IdentityHashMap<Weight,Integer>();
private Scorer mainScorer;
public DrillSidewaysCollector(Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors,
Map<String,Integer> dims) {
this.hitCollector = hitCollector;
this.drillDownCollector = drillDownCollector;
this.drillSidewaysCollectors = drillSidewaysCollectors;
subScorers = new Scorer[dims.size()];
if (dims.size() == 1) {
// When we have only one dim, we insert the
// MatchAllDocsQuery, bringing the clause count to
// 2:
exactCount = 2;
} else {
exactCount = dims.size();
}
}
@Override
public void collect(int doc) throws IOException {
//System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount);
if (mainScorer == null) {
// This segment did not have any docs with any
// drill-down field & value:
return;
}
if (mainScorer.freq() == exactCount) {
// All sub-clauses from the drill-down filters
// matched, so this is a "real" hit, so we first
// collect in both the hitCollector and the
// drillDown collector:
//System.out.println(" hit " + drillDownCollector);
hitCollector.collect(doc);
if (drillDownCollector != null) {
drillDownCollector.collect(doc);
}
// Also collect across all drill-sideways counts so
// we "merge in" drill-down counts for this
// dimension.
for(int i=0;i<subScorers.length;i++) {
// This cannot be null, because it was a hit,
// meaning all drill-down dims matched, so all
// dims must have non-null scorers:
assert subScorers[i] != null;
int subDoc = subScorers[i].docID();
assert subDoc == doc;
drillSidewaysCollectors[i].collect(doc);
}
} else {
boolean found = false;
for(int i=0;i<subScorers.length;i++) {
if (subScorers[i] == null) {
// This segment did not have any docs with this
// drill-down field & value:
drillSidewaysCollectors[i].collect(doc);
assert allMatchesFrom(i+1, doc);
found = true;
break;
}
int subDoc = subScorers[i].docID();
//System.out.println(" i=" + i + " sub: " + subDoc);
if (subDoc != doc) {
//System.out.println(" +ds[" + i + "]");
assert subDoc > doc: "subDoc=" + subDoc + " doc=" + doc;
drillSidewaysCollectors[i].collect(doc);
assert allMatchesFrom(i+1, doc);
found = true;
break;
}
}
assert found;
}
}
// Only used by assert:
private boolean allMatchesFrom(int startFrom, int doc) {
for(int i=startFrom;i<subScorers.length;i++) {
assert subScorers[i].docID() == doc;
}
return true;
}
@Override
public boolean acceptsDocsOutOfOrder() {
// We actually could accept docs out of order, but, we
// need to force BooleanScorer2 so that the
// sub-scorers are "on" each docID we are collecting:
return false;
}
@Override
public void setNextReader(AtomicReaderContext leaf) throws IOException {
//System.out.println("DS.setNextReader reader=" + leaf.reader());
hitCollector.setNextReader(leaf);
if (drillDownCollector != null) {
drillDownCollector.setNextReader(leaf);
}
for(Collector dsc : drillSidewaysCollectors) {
dsc.setNextReader(leaf);
}
}
void setWeight(Weight weight, int index) {
assert !weightToIndex.containsKey(weight);
weightToIndex.put(weight, index);
}
private void findScorers(Scorer scorer) {
Integer index = weightToIndex.get(scorer.getWeight());
if (index != null) {
if (index.intValue() == -1) {
mainScorer = scorer;
} else {
subScorers[index] = scorer;
}
}
for(ChildScorer child : scorer.getChildren()) {
findScorers(child.child);
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
mainScorer = null;
Arrays.fill(subScorers, null);
findScorers(scorer);
hitCollector.setScorer(scorer);
if (drillDownCollector != null) {
drillDownCollector.setScorer(scorer);
}
for(Collector dsc : drillSidewaysCollectors) {
dsc.setScorer(scorer);
}
}
}

View File

@ -19,15 +19,13 @@ package org.apache.lucene.facet;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
@ -35,19 +33,21 @@ import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
/** Only purpose is to punch through and return a
* SimpleDrillSidewaysScorer */
* DrillSidewaysScorer */
class DrillSidewaysQuery extends Query {
final Query baseQuery;
final Collector drillDownCollector;
final Collector[] drillSidewaysCollectors;
final Term[][] drillDownTerms;
final Query[] drillDownQueries;
final boolean scoreSubDocsAtOnce;
DrillSidewaysQuery(Query baseQuery, Collector drillDownCollector, Collector[] drillSidewaysCollectors, Term[][] drillDownTerms) {
DrillSidewaysQuery(Query baseQuery, Collector drillDownCollector, Collector[] drillSidewaysCollectors, Query[] drillDownQueries, boolean scoreSubDocsAtOnce) {
this.baseQuery = baseQuery;
this.drillDownCollector = drillDownCollector;
this.drillSidewaysCollectors = drillSidewaysCollectors;
this.drillDownTerms = drillDownTerms;
this.drillDownQueries = drillDownQueries;
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
}
@Override
@ -68,13 +68,25 @@ class DrillSidewaysQuery extends Query {
if (newQuery == baseQuery) {
return this;
} else {
return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce);
}
}
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
final Weight baseWeight = baseQuery.createWeight(searcher);
final Object[] drillDowns = new Object[drillDownQueries.length];
for(int dim=0;dim<drillDownQueries.length;dim++) {
Query query = drillDownQueries[dim];
Filter filter = DrillDownQuery.getFilter(query);
if (filter != null) {
drillDowns[dim] = filter;
} else {
// TODO: would be nice if we could say "we will do no
// scoring" here....
drillDowns[dim] = searcher.rewrite(query).createWeight(searcher);
}
}
return new Weight() {
@Override
@ -108,59 +120,82 @@ class DrillSidewaysQuery extends Query {
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
DrillSidewaysScorer.DocsEnumsAndFreq[] dims = new DrillSidewaysScorer.DocsEnumsAndFreq[drillDownTerms.length];
TermsEnum termsEnum = null;
String lastField = null;
// TODO: it could be better if we take acceptDocs
// into account instead of baseScorer?
Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs);
DrillSidewaysScorer.DocsAndCost[] dims = new DrillSidewaysScorer.DocsAndCost[drillDowns.length];
int nullCount = 0;
for(int dim=0;dim<dims.length;dim++) {
dims[dim] = new DrillSidewaysScorer.DocsEnumsAndFreq();
dims[dim] = new DrillSidewaysScorer.DocsAndCost();
dims[dim].sidewaysCollector = drillSidewaysCollectors[dim];
String field = drillDownTerms[dim][0].field();
dims[dim].dim = drillDownTerms[dim][0].text();
if (lastField == null || !lastField.equals(field)) {
AtomicReader reader = context.reader();
Terms terms = reader.terms(field);
if (terms != null) {
termsEnum = terms.iterator(null);
if (drillDowns[dim] instanceof Filter) {
// Pass null for acceptDocs because we already
// passed it to baseScorer and baseScorer is
// MUST'd here
DocIdSet dis = ((Filter) drillDowns[dim]).getDocIdSet(context, null);
if (dis == null) {
continue;
}
Bits bits = dis.bits();
if (bits != null) {
// TODO: this logic is too naive: the
// existence of bits() in DIS today means
// either "I'm a cheap FixedBitSet so apply me down
// low as you decode the postings" or "I'm so
// horribly expensive so apply me after all
// other Query/Filter clauses pass"
// Filter supports random access; use that to
// prevent .advance() on costly filters:
dims[dim].bits = bits;
// TODO: Filter needs to express its expected
// cost somehow, before pulling the iterator;
// we should use that here to set the order to
// check the filters:
} else {
termsEnum = null;
}
lastField = field;
}
dims[dim].docsEnums = new DocsEnum[drillDownTerms[dim].length];
if (termsEnum == null) {
nullCount++;
continue;
}
for(int i=0;i<drillDownTerms[dim].length;i++) {
if (termsEnum.seekExact(drillDownTerms[dim][i].bytes())) {
DocsEnum docsEnum = termsEnum.docs(null, null, 0);
if (docsEnum != null) {
dims[dim].docsEnums[i] = docsEnum;
dims[dim].maxCost = Math.max(dims[dim].maxCost, docsEnum.cost());
DocIdSetIterator disi = dis.iterator();
if (disi == null) {
nullCount++;
continue;
}
dims[dim].disi = disi;
}
} else {
DocIdSetIterator disi = ((Weight) drillDowns[dim]).scorer(context, true, false, null);
if (disi == null) {
nullCount++;
continue;
}
dims[dim].disi = disi;
}
}
if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
// If more than one dim has no matches, then there
// are no hits nor drill-sideways counts. Or, if we
// have only one dim and that dim has no matches,
// same thing.
//if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
if (nullCount > 1) {
return null;
}
// Sort drill-downs by most restrictive first:
Arrays.sort(dims);
// TODO: it could be better if we take acceptDocs
// into account instead of baseScorer?
Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs);
if (baseScorer == null) {
return null;
}
return new DrillSidewaysScorer(this, context,
baseScorer,
drillDownCollector, dims);
baseScorer,
drillDownCollector, dims,
scoreSubDocsAtOnce);
}
};
}
@ -174,7 +209,7 @@ class DrillSidewaysQuery extends Query {
result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode());
result = prime * result
+ ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode());
result = prime * result + Arrays.hashCode(drillDownTerms);
result = prime * result + Arrays.hashCode(drillDownQueries);
result = prime * result + Arrays.hashCode(drillSidewaysCollectors);
return result;
}
@ -191,7 +226,7 @@ class DrillSidewaysQuery extends Query {
if (drillDownCollector == null) {
if (other.drillDownCollector != null) return false;
} else if (!drillDownCollector.equals(other.drillDownCollector)) return false;
if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false;
if (!Arrays.equals(drillDownQueries, other.drillDownQueries)) return false;
if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false;
return true;
}

View File

@ -22,10 +22,11 @@ import java.util.Collection;
import java.util.Collections;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
class DrillSidewaysScorer extends Scorer {
@ -34,13 +35,15 @@ class DrillSidewaysScorer extends Scorer {
private final Collector drillDownCollector;
private final DocsEnumsAndFreq[] dims;
private final DocsAndCost[] dims;
// DrillDown DocsEnums:
private final Scorer baseScorer;
private final AtomicReaderContext context;
final boolean scoreSubDocsAtOnce;
private static final int CHUNK = 2048;
private static final int MASK = CHUNK-1;
@ -48,12 +51,13 @@ class DrillSidewaysScorer extends Scorer {
private float collectScore;
DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector,
DocsEnumsAndFreq[] dims) {
DocsAndCost[] dims, boolean scoreSubDocsAtOnce) {
super(w);
this.dims = dims;
this.context = context;
this.baseScorer = baseScorer;
this.drillDownCollector = drillDownCollector;
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
}
@Override
@ -67,7 +71,7 @@ class DrillSidewaysScorer extends Scorer {
drillDownCollector.setScorer(this);
drillDownCollector.setNextReader(context);
}
for(DocsEnumsAndFreq dim : dims) {
for (DocsAndCost dim : dims) {
dim.sidewaysCollector.setScorer(this);
dim.sidewaysCollector.setNextReader(context);
}
@ -79,26 +83,38 @@ class DrillSidewaysScorer extends Scorer {
// Position all scorers to their first matching doc:
baseScorer.nextDoc();
for(DocsEnumsAndFreq dim : dims) {
for (DocsEnum docsEnum : dim.docsEnums) {
if (docsEnum != null) {
docsEnum.nextDoc();
}
int numBits = 0;
for (DocsAndCost dim : dims) {
if (dim.disi != null) {
dim.disi.nextDoc();
} else if (dim.bits != null) {
numBits++;
}
}
final int numDims = dims.length;
DocsEnum[][] docsEnums = new DocsEnum[numDims][];
Collector[] sidewaysCollectors = new Collector[numDims];
Bits[] bits = new Bits[numBits];
Collector[] bitsSidewaysCollectors = new Collector[numBits];
DocIdSetIterator[] disis = new DocIdSetIterator[numDims-numBits];
Collector[] sidewaysCollectors = new Collector[numDims-numBits];
long drillDownCost = 0;
for(int dim=0;dim<numDims;dim++) {
docsEnums[dim] = dims[dim].docsEnums;
sidewaysCollectors[dim] = dims[dim].sidewaysCollector;
for (DocsEnum de : dims[dim].docsEnums) {
if (de != null) {
drillDownCost += de.cost();
int disiUpto = 0;
int bitsUpto = 0;
for (int dim=0;dim<numDims;dim++) {
DocIdSetIterator disi = dims[dim].disi;
if (dims[dim].bits == null) {
disis[disiUpto] = disi;
sidewaysCollectors[disiUpto] = dims[dim].sidewaysCollector;
disiUpto++;
if (disi != null) {
drillDownCost += disi.cost();
}
} else {
bits[bitsUpto] = dims[dim].bits;
bitsSidewaysCollectors[bitsUpto] = dims[dim].sidewaysCollector;
bitsUpto++;
}
}
@ -114,21 +130,95 @@ class DrillSidewaysScorer extends Scorer {
}
*/
if (baseQueryCost < drillDownCost/10) {
//System.out.println("baseAdvance");
doBaseAdvanceScoring(collector, docsEnums, sidewaysCollectors);
} else if (numDims > 1 && (dims[1].maxCost < baseQueryCost/10)) {
if (bitsUpto > 0 || scoreSubDocsAtOnce || baseQueryCost < drillDownCost/10) {
//System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length + " bits.length=" + bits.length);
doQueryFirstScoring(collector, disis, sidewaysCollectors, bits, bitsSidewaysCollectors);
} else if (numDims > 1 && (dims[1].disi == null || dims[1].disi.cost() < baseQueryCost/10)) {
//System.out.println("drillDownAdvance");
doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors);
doDrillDownAdvanceScoring(collector, disis, sidewaysCollectors);
} else {
//System.out.println("union");
doUnionScoring(collector, docsEnums, sidewaysCollectors);
doUnionScoring(collector, disis, sidewaysCollectors);
}
}
/** Used when base query is highly constraining vs the
* drilldowns, or when the docs must be scored at once
* (i.e., like BooleanScorer2, not BooleanScorer). In
* this case we just .next() on base and .advance() on
* the dim filters. */
private void doQueryFirstScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors,
Bits[] bits, Collector[] bitsSidewaysCollectors) throws IOException {
//if (DEBUG) {
// System.out.println(" doQueryFirstScoring");
//}
int docID = baseScorer.docID();
nextDoc: while (docID != NO_MORE_DOCS) {
Collector failedCollector = null;
for (int i=0;i<disis.length;i++) {
// TODO: should we sort this 2nd dimension of
// docsEnums from most frequent to least?
DocIdSetIterator disi = disis[i];
if (disi != null && disi.docID() < docID) {
disi.advance(docID);
}
if (disi == null || disi.docID() > docID) {
if (failedCollector != null) {
// More than one dim fails on this document, so
// it's neither a hit nor a near-miss; move to
// next doc:
docID = baseScorer.nextDoc();
continue nextDoc;
} else {
failedCollector = sidewaysCollectors[i];
}
}
}
// TODO: for the "non-costly Bits" we really should
// have passed them down as acceptDocs, but
// unfortunately we cannot distinguish today betwen
// "bits() is so costly that you should apply it last"
// from "bits() is so cheap that you should apply it
// everywhere down low"
// Fold in Filter Bits last, since they may be costly:
for(int i=0;i<bits.length;i++) {
if (bits[i].get(docID) == false) {
if (failedCollector != null) {
// More than one dim fails on this document, so
// it's neither a hit nor a near-miss; move to
// next doc:
docID = baseScorer.nextDoc();
continue nextDoc;
} else {
failedCollector = bitsSidewaysCollectors[i];
}
}
}
collectDocID = docID;
// TODO: we could score on demand instead since we are
// daat here:
collectScore = baseScorer.score();
if (failedCollector == null) {
// Hit passed all filters, so it's "real":
collectHit(collector, sidewaysCollectors, bitsSidewaysCollectors);
} else {
// Hit missed exactly one filter:
collectNearMiss(failedCollector);
}
docID = baseScorer.nextDoc();
}
}
/** Used when drill downs are highly constraining vs
* baseQuery. */
private void doDrillDownAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
private void doDrillDownAdvanceScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors) throws IOException {
final int maxDoc = context.reader().maxDoc();
final int numDims = dims.length;
@ -157,11 +247,9 @@ class DrillSidewaysScorer extends Scorer {
//if (DEBUG) {
// System.out.println(" dim0");
//}
for(DocsEnum docsEnum : docsEnums[0]) {
if (docsEnum == null) {
continue;
}
int docID = docsEnum.docID();
DocIdSetIterator disi = disis[0];
if (disi != null) {
int docID = disi.docID();
while (docID < nextChunkStart) {
int slot = docID & MASK;
@ -176,19 +264,17 @@ class DrillSidewaysScorer extends Scorer {
counts[slot] = 1;
}
docID = docsEnum.nextDoc();
docID = disi.nextDoc();
}
}
// Second dim:
//if (DEBUG) {
// System.out.println(" dim1");
//}
for(DocsEnum docsEnum : docsEnums[1]) {
if (docsEnum == null) {
continue;
}
int docID = docsEnum.docID();
disi = disis[1];
if (disi != null) {
int docID = disi.docID();
while (docID < nextChunkStart) {
int slot = docID & MASK;
@ -218,7 +304,7 @@ class DrillSidewaysScorer extends Scorer {
}
}
docID = docsEnum.nextDoc();
docID = disi.nextDoc();
}
}
@ -272,15 +358,13 @@ class DrillSidewaysScorer extends Scorer {
// TODO: factor this out & share w/ union scorer,
// except we start from dim=2 instead:
for(int dim=2;dim<numDims;dim++) {
for (int dim=2;dim<numDims;dim++) {
//if (DEBUG) {
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
//}
for(DocsEnum docsEnum : docsEnums[dim]) {
if (docsEnum == null) {
continue;
}
int docID = docsEnum.docID();
disi = disis[dim];
if (disi != null) {
int docID = disi.docID();
while (docID < nextChunkStart) {
int slot = docID & MASK;
if (docIDs[slot] == docID && counts[slot] >= dim) {
@ -299,8 +383,9 @@ class DrillSidewaysScorer extends Scorer {
counts[slot] = dim+1;
}
}
// TODO: sometimes use advance?
docID = docsEnum.nextDoc();
docID = disi.nextDoc();
}
}
}
@ -309,7 +394,7 @@ class DrillSidewaysScorer extends Scorer {
//if (DEBUG) {
// System.out.println(" now collect: " + filledCount + " hits");
//}
for(int i=0;i<filledCount;i++) {
for (int i=0;i<filledCount;i++) {
int slot = filledSlots[i];
collectDocID = docIDs[slot];
collectScore = scores[slot];
@ -319,7 +404,7 @@ class DrillSidewaysScorer extends Scorer {
if (counts[slot] == 1+numDims) {
collectHit(collector, sidewaysCollectors);
} else if (counts[slot] == numDims) {
collectNearMiss(sidewaysCollectors, missingDims[slot]);
collectNearMiss(sidewaysCollectors[missingDims[slot]]);
}
}
@ -331,92 +416,7 @@ class DrillSidewaysScorer extends Scorer {
}
}
/** Used when base query is highly constraining vs the
* drilldowns; in this case we just .next() on base and
* .advance() on the dims. */
private void doBaseAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
//if (DEBUG) {
// System.out.println(" doBaseAdvanceScoring");
//}
int docID = baseScorer.docID();
final int numDims = dims.length;
nextDoc: while (docID != NO_MORE_DOCS) {
int failedDim = -1;
for(int dim=0;dim<numDims;dim++) {
// TODO: should we sort this 2nd dimension of
// docsEnums from most frequent to least?
boolean found = false;
for(DocsEnum docsEnum : docsEnums[dim]) {
if (docsEnum == null) {
continue;
}
if (docsEnum.docID() < docID) {
docsEnum.advance(docID);
}
if (docsEnum.docID() == docID) {
found = true;
break;
}
}
if (!found) {
if (failedDim != -1) {
// More than one dim fails on this document, so
// it's neither a hit nor a near-miss; move to
// next doc:
docID = baseScorer.nextDoc();
continue nextDoc;
} else {
failedDim = dim;
}
}
}
collectDocID = docID;
// TODO: we could score on demand instead since we are
// daat here:
collectScore = baseScorer.score();
if (failedDim == -1) {
collectHit(collector, sidewaysCollectors);
} else {
collectNearMiss(sidewaysCollectors, failedDim);
}
docID = baseScorer.nextDoc();
}
}
private void collectHit(Collector collector, Collector[] sidewaysCollectors) throws IOException {
//if (DEBUG) {
// System.out.println(" hit");
//}
collector.collect(collectDocID);
if (drillDownCollector != null) {
drillDownCollector.collect(collectDocID);
}
// TODO: we could "fix" faceting of the sideways counts
// to do this "union" (of the drill down hits) in the
// end instead:
// Tally sideways counts:
for(int dim=0;dim<sidewaysCollectors.length;dim++) {
sidewaysCollectors[dim].collect(collectDocID);
}
}
private void collectNearMiss(Collector[] sidewaysCollectors, int dim) throws IOException {
//if (DEBUG) {
// System.out.println(" missingDim=" + dim);
//}
sidewaysCollectors[dim].collect(collectDocID);
}
private void doUnionScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
private void doUnionScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors) throws IOException {
//if (DEBUG) {
// System.out.println(" doUnionScoring");
//}
@ -478,11 +478,9 @@ class DrillSidewaysScorer extends Scorer {
//if (DEBUG) {
// System.out.println(" dim=0 [" + dims[0].dim + "]");
//}
for(DocsEnum docsEnum : docsEnums[0]) {
if (docsEnum == null) {
continue;
}
docID = docsEnum.docID();
DocIdSetIterator disi = disis[0];
if (disi != null) {
docID = disi.docID();
//if (DEBUG) {
// System.out.println(" start docID=" + docID);
//}
@ -495,19 +493,18 @@ class DrillSidewaysScorer extends Scorer {
missingDims[slot] = 1;
counts[slot] = 2;
}
docID = docsEnum.nextDoc();
docID = disi.nextDoc();
}
}
for(int dim=1;dim<numDims;dim++) {
for (int dim=1;dim<numDims;dim++) {
//if (DEBUG) {
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
//}
for(DocsEnum docsEnum : docsEnums[dim]) {
if (docsEnum == null) {
continue;
}
docID = docsEnum.docID();
disi = disis[dim];
if (disi != null) {
docID = disi.docID();
//if (DEBUG) {
// System.out.println(" start docID=" + docID);
//}
@ -530,47 +527,14 @@ class DrillSidewaysScorer extends Scorer {
counts[slot] = dim+1;
}
}
docID = docsEnum.nextDoc();
docID = disi.nextDoc();
}
// TODO: sometimes use advance?
/*
int docBase = nextChunkStart - CHUNK;
for(int i=0;i<filledCount;i++) {
int slot = filledSlots[i];
docID = docBase + filledSlots[i];
if (docIDs[slot] == docID && counts[slot] >= dim) {
// This doc is still in the running...
int ddDocID = docsEnum.docID();
if (ddDocID < docID) {
ddDocID = docsEnum.advance(docID);
}
if (ddDocID == docID) {
if (missingDims[slot] >= dim && counts[slot] == allMatchCount) {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " count=" + (dim+2));
// }
missingDims[slot] = dim+1;
counts[slot] = dim+2;
} else {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
// }
counts[slot] = dim+1;
}
}
}
}
*/
}
}
// Collect:
//if (DEBUG) {
// System.out.println(" now collect: " + filledCount + " hits");
//}
for(int i=0;i<filledCount;i++) {
//System.out.println(" now collect: " + filledCount + " hits");
for (int i=0;i<filledCount;i++) {
// NOTE: This is actually in-order collection,
// because we only accept docs originally returned by
// the baseScorer (ie that Scorer is AND'd)
@ -586,7 +550,7 @@ class DrillSidewaysScorer extends Scorer {
collectHit(collector, sidewaysCollectors);
} else if (counts[slot] == numDims) {
//System.out.println(" sw");
collectNearMiss(sidewaysCollectors, missingDims[slot]);
collectNearMiss(sidewaysCollectors[missingDims[slot]]);
}
}
@ -598,6 +562,56 @@ class DrillSidewaysScorer extends Scorer {
}
}
private void collectHit(Collector collector, Collector[] sidewaysCollectors) throws IOException {
//if (DEBUG) {
// System.out.println(" hit");
//}
collector.collect(collectDocID);
if (drillDownCollector != null) {
drillDownCollector.collect(collectDocID);
}
// TODO: we could "fix" faceting of the sideways counts
// to do this "union" (of the drill down hits) in the
// end instead:
// Tally sideways counts:
for (int dim=0;dim<sidewaysCollectors.length;dim++) {
sidewaysCollectors[dim].collect(collectDocID);
}
}
private void collectHit(Collector collector, Collector[] sidewaysCollectors, Collector[] sidewaysCollectors2) throws IOException {
//if (DEBUG) {
// System.out.println(" hit");
//}
collector.collect(collectDocID);
if (drillDownCollector != null) {
drillDownCollector.collect(collectDocID);
}
// TODO: we could "fix" faceting of the sideways counts
// to do this "union" (of the drill down hits) in the
// end instead:
// Tally sideways counts:
for (int i=0;i<sidewaysCollectors.length;i++) {
sidewaysCollectors[i].collect(collectDocID);
}
for (int i=0;i<sidewaysCollectors2.length;i++) {
sidewaysCollectors2[i].collect(collectDocID);
}
}
private void collectNearMiss(Collector sidewaysCollector) throws IOException {
//if (DEBUG) {
// System.out.println(" missingDim=" + dim);
//}
sidewaysCollector.collect(collectDocID);
}
@Override
public int docID() {
return collectDocID;
@ -633,18 +647,27 @@ class DrillSidewaysScorer extends Scorer {
return Collections.singletonList(new ChildScorer(baseScorer, "MUST"));
}
static class DocsEnumsAndFreq implements Comparable<DocsEnumsAndFreq> {
DocsEnum[] docsEnums;
// Max cost for all docsEnums for this dim:
long maxCost;
static class DocsAndCost implements Comparable<DocsAndCost> {
// Iterator for docs matching this dim's filter, or ...
DocIdSetIterator disi;
// Random access bits:
Bits bits;
Collector sidewaysCollector;
String dim;
@Override
public int compareTo(DocsEnumsAndFreq other) {
if (maxCost < other.maxCost) {
public int compareTo(DocsAndCost other) {
if (disi == null) {
if (other.disi == null) {
return 0;
} else {
return 1;
}
} else if (other.disi == null) {
return -1;
} else if (maxCost > other.maxCost) {
} else if (disi.cost() < other.disi.cost()) {
return -1;
} else if (disi.cost() > other.disi.cost()) {
return 1;
} else {
return 0;

View File

@ -26,11 +26,12 @@ import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.NumericUtils;
/** Represents a range over double values. */
/** Represents a range over double values.
*
* @lucene.experimental */
public final class DoubleRange extends Range {
final double minIncl;
final double maxIncl;
@ -99,14 +100,15 @@ public final class DoubleRange extends Range {
return "DoubleRange(" + minIncl + " to " + maxIncl + ")";
}
/** Returns a new {@link Filter} accepting only documents
* in this range. Note that this filter is not
* efficient: it's a linear scan of all docs, testing
* each value. If the {@link ValueSource} is static,
* e.g. an indexed numeric field, then it's more
* efficient to use {@link NumericRangeFilter}. */
public Filter getFilter(final ValueSource valueSource) {
@Override
public Filter getFilter(final Filter fastMatchFilter, final ValueSource valueSource) {
return new Filter() {
@Override
public String toString() {
return "Filter(" + DoubleRange.this.toString() + ")";
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
@ -119,49 +121,48 @@ public final class DoubleRange extends Range {
final int maxDoc = context.reader().maxDoc();
final Bits fastMatchBits;
if (fastMatchFilter != null) {
DocIdSet dis = fastMatchFilter.getDocIdSet(context, null);
if (dis == null) {
// No documents match
return null;
}
fastMatchBits = dis.bits();
if (fastMatchBits == null) {
throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
}
} else {
fastMatchBits = null;
}
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int doc = -1;
public Bits bits() {
return new Bits() {
@Override
public int nextDoc() throws IOException {
while (true) {
doc++;
if (doc == maxDoc) {
return doc = NO_MORE_DOCS;
}
if (acceptDocs != null && acceptDocs.get(doc) == false) {
continue;
}
double v = values.doubleVal(doc);
if (accept(v)) {
return doc;
}
public boolean get(int docID) {
if (acceptDocs != null && acceptDocs.get(docID) == false) {
return false;
}
if (fastMatchBits != null && fastMatchBits.get(docID) == false) {
return false;
}
return accept(values.doubleVal(docID));
}
@Override
public int advance(int target) throws IOException {
doc = target-1;
return nextDoc();
}
@Override
public int docID() {
return doc;
}
@Override
public long cost() {
// Since we do a linear scan over all
// documents, our cost is O(maxDoc):
public int length() {
return maxDoc;
}
};
}
@Override
public DocIdSetIterator iterator() {
throw new UnsupportedOperationException("this filter can only be accessed via bits()");
}
};
}
};

View File

@ -24,12 +24,15 @@ import java.util.List;
import org.apache.lucene.document.DoubleDocValuesField; // javadocs
import org.apache.lucene.document.FloatDocValuesField; // javadocs
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource; // javadocs
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.NumericUtils;
@ -61,7 +64,16 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
/** Create {@code RangeFacetCounts}, using the provided
* {@link ValueSource}. */
public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, DoubleRange... ranges) throws IOException {
super(field, ranges);
this(field, valueSource, hits, null, ranges);
}
/** Create {@code RangeFacetCounts}, using the provided
* {@link ValueSource}, and using the provided Filter as
* a fastmatch: only documents passing the filter are
* checked for the matching ranges. The filter must be
* random access (implement {@link DocIdSet#bits}). */
public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, DoubleRange... ranges) throws IOException {
super(field, ranges, fastMatchFilter);
count(valueSource, hits.getMatchingDocs());
}
@ -84,10 +96,29 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context);
totCount += hits.totalHits;
Bits bits;
if (fastMatchFilter != null) {
DocIdSet dis = fastMatchFilter.getDocIdSet(hits.context, null);
if (dis == null) {
// No documents match
continue;
}
bits = dis.bits();
if (bits == null) {
throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
}
} else {
bits = null;
}
DocIdSetIterator docs = hits.bits.iterator();
int doc;
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (bits != null && bits.get(doc) == false) {
doc++;
continue;
}
// Skip missing docs:
if (fv.exists(doc)) {
counter.add(NumericUtils.doubleToSortableLong(fv.doubleVal(doc)));

View File

@ -26,10 +26,11 @@ import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.util.Bits;
/** Represents a range over long values. */
/** Represents a range over long values.
*
* @lucene.experimental */
public final class LongRange extends Range {
final long minIncl;
final long maxIncl;
@ -91,14 +92,15 @@ public final class LongRange extends Range {
return "LongRange(" + minIncl + " to " + maxIncl + ")";
}
/** Returns a new {@link Filter} accepting only documents
* in this range. Note that this filter is not
* efficient: it's a linear scan of all docs, testing
* each value. If the {@link ValueSource} is static,
* e.g. an indexed numeric field, then it's more
* efficient to use {@link NumericRangeFilter}. */
public Filter getFilter(final ValueSource valueSource) {
@Override
public Filter getFilter(final Filter fastMatchFilter, final ValueSource valueSource) {
return new Filter() {
@Override
public String toString() {
return "Filter(" + LongRange.this.toString() + ")";
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
@ -111,49 +113,48 @@ public final class LongRange extends Range {
final int maxDoc = context.reader().maxDoc();
final Bits fastMatchBits;
if (fastMatchFilter != null) {
DocIdSet dis = fastMatchFilter.getDocIdSet(context, null);
if (dis == null) {
// No documents match
return null;
}
fastMatchBits = dis.bits();
if (fastMatchBits == null) {
throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
}
} else {
fastMatchBits = null;
}
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int doc = -1;
public Bits bits() {
return new Bits() {
@Override
public int nextDoc() throws IOException {
while (true) {
doc++;
if (doc == maxDoc) {
return doc = NO_MORE_DOCS;
}
if (acceptDocs != null && acceptDocs.get(doc) == false) {
continue;
}
long v = values.longVal(doc);
if (accept(v)) {
return doc;
}
public boolean get(int docID) {
if (acceptDocs != null && acceptDocs.get(docID) == false) {
return false;
}
if (fastMatchBits != null && fastMatchBits.get(docID) == false) {
return false;
}
return accept(values.longVal(docID));
}
@Override
public int advance(int target) throws IOException {
doc = target-1;
return nextDoc();
}
@Override
public int docID() {
return doc;
}
@Override
public long cost() {
// Since we do a linear scan over all
// documents, our cost is O(maxDoc):
public int length() {
return maxDoc;
}
};
}
@Override
public DocIdSetIterator iterator() {
throw new UnsupportedOperationException("this filter can only be accessed via bits()");
}
};
}
};

View File

@ -22,11 +22,14 @@ import java.util.Collections;
import java.util.List;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.search.DocIdSetIterator;
/** {@link Facets} implementation that computes counts for
@ -50,7 +53,16 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
/** Create {@code RangeFacetCounts}, using the provided
* {@link ValueSource}. */
public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, LongRange... ranges) throws IOException {
super(field, ranges);
this(field, valueSource, hits, null, ranges);
}
/** Create {@code RangeFacetCounts}, using the provided
* {@link ValueSource}, and using the provided Filter as
* a fastmatch: only documents passing the filter are
* checked for the matching ranges. The filter must be
* random access (implement {@link DocIdSet#bits}). */
public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, LongRange... ranges) throws IOException {
super(field, ranges, fastMatchFilter);
count(valueSource, hits.getMatchingDocs());
}
@ -65,9 +77,28 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context);
totCount += hits.totalHits;
Bits bits;
if (fastMatchFilter != null) {
DocIdSet dis = fastMatchFilter.getDocIdSet(hits.context, null);
if (dis == null) {
// No documents match
continue;
}
bits = dis.bits();
if (bits == null) {
throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
}
} else {
bits = null;
}
DocIdSetIterator docs = hits.bits.iterator();
int doc;
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (bits != null && bits.get(doc) == false) {
doc++;
continue;
}
// Skip missing docs:
if (fv.exists(doc)) {
counter.add(fv.longVal(doc));

View File

@ -17,6 +17,13 @@ package org.apache.lucene.facet.range;
* limitations under the License.
*/
import org.apache.lucene.facet.DrillDownQuery; // javadocs
import org.apache.lucene.facet.DrillSideways; // javadocs
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredQuery; // javadocs
import org.apache.lucene.search.NumericRangeFilter; // javadocs
/** Base class for a single labeled range.
*
* @lucene.experimental */
@ -33,6 +40,32 @@ public abstract class Range {
this.label = label;
}
/** Returns a new {@link Filter} accepting only documents
* in this range. This filter is not general-purpose;
* you should either use it with {@link DrillSideways} by
* adding it to {@link DrillDownQuery#add}, or pass it to
* {@link FilteredQuery} using its {@link
* FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}. If the
* {@link ValueSource} is static, e.g. an indexed numeric
* field, then it may be more efficient to use {@link
* NumericRangeFilter}. The provided fastMatchFilter,
* if non-null, will first be consulted, and only if
* that is set for each document will the range then be
* checked. */
public abstract Filter getFilter(Filter fastMatchFilter, ValueSource valueSource);
/** Returns a new {@link Filter} accepting only documents
* in this range. This filter is not general-purpose;
* you should either use it with {@link DrillSideways} by
* adding it to {@link DrillDownQuery#add}, or pass it to
* {@link FilteredQuery} using its {@link
* FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}. If the
* {@link ValueSource} is static, e.g. an indexed numeric
* field, then it may be more efficient to use {@link NumericRangeFilter}. */
public Filter getFilter(ValueSource valueSource) {
return getFilter(null, valueSource);
}
/** Invoke this for a useless range. */
protected void failNoMatch() {
throw new IllegalArgumentException("range \"" + label + "\" matches nothing");

View File

@ -24,7 +24,7 @@ import java.util.List;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.lucene.search.Filter;
/** Base class for range faceting.
*
@ -36,17 +36,23 @@ abstract class RangeFacetCounts extends Facets {
/** Counts, initialized in by subclass. */
protected final int[] counts;
/** Optional: if specified, we first test this Filter to
* see whether the document should be checked for
* matching ranges. If this is null, all documents are
* checked. */
protected final Filter fastMatchFilter;
/** Our field name. */
protected final String field;
/** Total number of hits. */
protected int totCount;
/** Create {@code RangeFacetCounts}, using {@link
* LongFieldSource} from the specified field. */
protected RangeFacetCounts(String field, Range[] ranges) throws IOException {
/** Create {@code RangeFacetCounts} */
protected RangeFacetCounts(String field, Range[] ranges, Filter fastMatchFilter) throws IOException {
this.field = field;
this.ranges = ranges;
this.fastMatchFilter = fastMatchFilter;
counts = new int[ranges.length];
}

View File

@ -18,16 +18,10 @@ package org.apache.lucene.facet.sortedset;
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
/** Wraps a {@link IndexReader} and resolves ords
* using existing {@link SortedSetDocValues} APIs without a

View File

@ -645,7 +645,7 @@ public class TestDrillSideways extends FacetTestCase {
final FixedBitSet bits = new FixedBitSet(maxDoc);
for(int docID=0;docID < maxDoc;docID++) {
// Keeps only the even ids:
if ((acceptDocs == null || acceptDocs.get(docID)) && ((Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0)) {
if ((acceptDocs == null || acceptDocs.get(docID)) && (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0) {
bits.set(docID);
}
}
@ -689,7 +689,7 @@ public class TestDrillSideways extends FacetTestCase {
// subScorers are on the same docID:
if (!anyMultiValuedDrillDowns) {
// Can only do this test when there are no OR'd
// drill-down values, beacuse in that case it's
// drill-down values, because in that case it's
// easily possible for one of the DD terms to be on
// a future docID:
new DrillSideways(s, config, tr) {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.facet.range;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
@ -30,6 +31,7 @@ import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetResult;
@ -39,10 +41,10 @@ import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.MultiFacets;
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
@ -50,12 +52,20 @@ import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util._TestUtil;
@ -229,6 +239,10 @@ public class TestRangeFacetCounts extends FacetTestCase {
IndexSearcher s = newSearcher(r);
if (VERBOSE) {
System.out.println("TEST: searcher=" + s);
}
DrillSideways ds = new DrillSideways(s, config, tr) {
@Override
@ -365,6 +379,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
System.out.println("TEST: numDocs=" + numDocs);
}
long[] values = new long[numDocs];
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
for(int i=0;i<numDocs;i++) {
Document doc = new Document();
long v = random().nextLong();
@ -372,6 +388,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
doc.add(new NumericDocValuesField("field", v));
doc.add(new LongField("field", v, Field.Store.NO));
w.addDocument(doc);
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
}
IndexReader r = w.getReader();
@ -386,6 +404,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
int numRange = _TestUtil.nextInt(random(), 1, 100);
LongRange[] ranges = new LongRange[numRange];
int[] expectedCounts = new int[numRange];
long minAcceptedValue = Long.MAX_VALUE;
long maxAcceptedValue = Long.MIN_VALUE;
for(int rangeID=0;rangeID<numRange;rangeID++) {
long min;
if (rangeID > 0 && random().nextInt(10) == 7) {
@ -447,13 +467,26 @@ public class TestRangeFacetCounts extends FacetTestCase {
}
if (accept) {
expectedCounts[rangeID]++;
minAcceptedValue = Math.min(minAcceptedValue, values[i]);
maxAcceptedValue = Math.max(maxAcceptedValue, values[i]);
}
}
}
FacetsCollector sfc = new FacetsCollector();
s.search(new MatchAllDocsQuery(), sfc);
Facets facets = new LongRangeFacetCounts("field", sfc, ranges);
Filter fastMatchFilter;
if (random().nextBoolean()) {
if (random().nextBoolean()) {
fastMatchFilter = NumericRangeFilter.newLongRange("field", minValue, maxValue, true, true);
} else {
fastMatchFilter = NumericRangeFilter.newLongRange("field", minAcceptedValue, maxAcceptedValue, true, true);
}
} else {
fastMatchFilter = null;
}
ValueSource vs = new LongFieldSource("field");
Facets facets = new LongRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
FacetResult result = facets.getTopChildren(10, "field");
assertEquals(numRange, result.labelValues.length);
for(int rangeID=0;rangeID<numRange;rangeID++) {
@ -468,7 +501,15 @@ public class TestRangeFacetCounts extends FacetTestCase {
// Test drill-down:
DrillDownQuery ddq = new DrillDownQuery(config);
ddq.add("field", NumericRangeQuery.newLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
if (random().nextBoolean()) {
if (random().nextBoolean()) {
ddq.add("field", NumericRangeFilter.newLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
} else {
ddq.add("field", NumericRangeQuery.newLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
}
} else {
ddq.add("field", range.getFilter(fastMatchFilter, vs));
}
assertEquals(expectedCounts[rangeID], s.search(ddq, 10).totalHits);
}
}
@ -482,6 +523,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
int numDocs = atLeast(1000);
float[] values = new float[numDocs];
float minValue = Float.POSITIVE_INFINITY;
float maxValue = Float.NEGATIVE_INFINITY;
for(int i=0;i<numDocs;i++) {
Document doc = new Document();
float v = random().nextFloat();
@ -489,6 +532,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
doc.add(new FloatDocValuesField("field", v));
doc.add(new FloatField("field", v, Field.Store.NO));
w.addDocument(doc);
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
}
IndexReader r = w.getReader();
@ -503,6 +548,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
int numRange = _TestUtil.nextInt(random(), 1, 5);
DoubleRange[] ranges = new DoubleRange[numRange];
int[] expectedCounts = new int[numRange];
float minAcceptedValue = Float.POSITIVE_INFINITY;
float maxAcceptedValue = Float.NEGATIVE_INFINITY;
if (VERBOSE) {
System.out.println("TEST: " + numRange + " ranges");
}
@ -578,13 +625,26 @@ public class TestRangeFacetCounts extends FacetTestCase {
}
if (accept) {
expectedCounts[rangeID]++;
minAcceptedValue = Math.min(minAcceptedValue, values[i]);
maxAcceptedValue = Math.max(maxAcceptedValue, values[i]);
}
}
}
FacetsCollector sfc = new FacetsCollector();
s.search(new MatchAllDocsQuery(), sfc);
Facets facets = new DoubleRangeFacetCounts("field", new FloatFieldSource("field"), sfc, ranges);
Filter fastMatchFilter;
if (random().nextBoolean()) {
if (random().nextBoolean()) {
fastMatchFilter = NumericRangeFilter.newFloatRange("field", minValue, maxValue, true, true);
} else {
fastMatchFilter = NumericRangeFilter.newFloatRange("field", minAcceptedValue, maxAcceptedValue, true, true);
}
} else {
fastMatchFilter = null;
}
ValueSource vs = new FloatFieldSource("field");
Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
FacetResult result = facets.getTopChildren(10, "field");
assertEquals(numRange, result.labelValues.length);
for(int rangeID=0;rangeID<numRange;rangeID++) {
@ -599,7 +659,15 @@ public class TestRangeFacetCounts extends FacetTestCase {
// Test drill-down:
DrillDownQuery ddq = new DrillDownQuery(config);
ddq.add("field", NumericRangeQuery.newFloatRange("field", (float) range.min, (float) range.max, range.minInclusive, range.maxInclusive));
if (random().nextBoolean()) {
if (random().nextBoolean()) {
ddq.add("field", NumericRangeFilter.newFloatRange("field", (float) range.min, (float) range.max, range.minInclusive, range.maxInclusive));
} else {
ddq.add("field", NumericRangeQuery.newFloatRange("field", (float) range.min, (float) range.max, range.minInclusive, range.maxInclusive));
}
} else {
ddq.add("field", range.getFilter(fastMatchFilter, vs));
}
assertEquals(expectedCounts[rangeID], s.search(ddq, 10).totalHits);
}
}
@ -613,6 +681,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
int numDocs = atLeast(1000);
double[] values = new double[numDocs];
double minValue = Double.POSITIVE_INFINITY;
double maxValue = Double.NEGATIVE_INFINITY;
for(int i=0;i<numDocs;i++) {
Document doc = new Document();
double v = random().nextDouble();
@ -620,6 +690,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
doc.add(new DoubleDocValuesField("field", v));
doc.add(new DoubleField("field", v, Field.Store.NO));
w.addDocument(doc);
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
}
IndexReader r = w.getReader();
@ -634,6 +706,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
int numRange = _TestUtil.nextInt(random(), 1, 5);
DoubleRange[] ranges = new DoubleRange[numRange];
int[] expectedCounts = new int[numRange];
double minAcceptedValue = Double.POSITIVE_INFINITY;
double maxAcceptedValue = Double.NEGATIVE_INFINITY;
for(int rangeID=0;rangeID<numRange;rangeID++) {
double min;
if (rangeID > 0 && random().nextInt(10) == 7) {
@ -693,13 +767,26 @@ public class TestRangeFacetCounts extends FacetTestCase {
}
if (accept) {
expectedCounts[rangeID]++;
minAcceptedValue = Math.min(minAcceptedValue, values[i]);
maxAcceptedValue = Math.max(maxAcceptedValue, values[i]);
}
}
}
FacetsCollector sfc = new FacetsCollector();
s.search(new MatchAllDocsQuery(), sfc);
Facets facets = new DoubleRangeFacetCounts("field", sfc, ranges);
Filter fastMatchFilter;
if (random().nextBoolean()) {
if (random().nextBoolean()) {
fastMatchFilter = NumericRangeFilter.newDoubleRange("field", minValue, maxValue, true, true);
} else {
fastMatchFilter = NumericRangeFilter.newDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true);
}
} else {
fastMatchFilter = null;
}
ValueSource vs = new DoubleFieldSource("field");
Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
FacetResult result = facets.getTopChildren(10, "field");
assertEquals(numRange, result.labelValues.length);
for(int rangeID=0;rangeID<numRange;rangeID++) {
@ -714,7 +801,16 @@ public class TestRangeFacetCounts extends FacetTestCase {
// Test drill-down:
DrillDownQuery ddq = new DrillDownQuery(config);
ddq.add("field", NumericRangeQuery.newDoubleRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
if (random().nextBoolean()) {
if (random().nextBoolean()) {
ddq.add("field", NumericRangeFilter.newDoubleRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
} else {
ddq.add("field", NumericRangeQuery.newDoubleRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
}
} else {
ddq.add("field", range.getFilter(fastMatchFilter, vs));
}
assertEquals(expectedCounts[rangeID], s.search(ddq, 10).totalHits);
}
}
@ -765,16 +861,13 @@ public class TestRangeFacetCounts extends FacetTestCase {
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
writer.addDocument(doc);
doc = new Document();
writer.addDocument(doc);
// Test wants 3 docs in one segment:
writer.forceMerge(1);
ValueSource vs = new ValueSource() {
final ValueSource vs = new ValueSource() {
@SuppressWarnings("rawtypes")
@Override
public FunctionValues getValues(Map ignored, AtomicReaderContext ignored2) {
@ -801,6 +894,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
throw new UnsupportedOperationException();
}
};
FacetsConfig config = new FacetsConfig();
FacetsCollector fc = new FacetsCollector();
@ -808,18 +903,67 @@ public class TestRangeFacetCounts extends FacetTestCase {
IndexSearcher s = newSearcher(r);
s.search(new MatchAllDocsQuery(), fc);
Facets facets = new DoubleRangeFacetCounts("field", vs, fc,
final DoubleRange[] ranges = new DoubleRange[] {
new DoubleRange("< 1", 0.0, true, 1.0, false),
new DoubleRange("< 2", 0.0, true, 2.0, false),
new DoubleRange("< 5", 0.0, true, 5.0, false),
new DoubleRange("< 10", 0.0, true, 10.0, false),
new DoubleRange("< 20", 0.0, true, 20.0, false),
new DoubleRange("< 50", 0.0, true, 50.0, false));
new DoubleRange("< 50", 0.0, true, 50.0, false)};
final Filter fastMatchFilter;
final AtomicBoolean filterWasUsed = new AtomicBoolean();
if (random().nextBoolean()) {
// Sort of silly:
fastMatchFilter = new CachingWrapperFilter(new QueryWrapperFilter(new MatchAllDocsQuery())) {
@Override
protected DocIdSet cacheImpl(DocIdSetIterator iterator, AtomicReader reader)
throws IOException {
final FixedBitSet cached = new FixedBitSet(reader.maxDoc());
filterWasUsed.set(true);
cached.or(iterator);
return cached;
}
};
} else {
fastMatchFilter = null;
}
if (VERBOSE) {
System.out.println("TEST: fastMatchFilter=" + fastMatchFilter);
}
Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges);
assertEquals("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", facets.getTopChildren(10, "field").toString());
assertTrue(fastMatchFilter == null || filterWasUsed.get());
// Test drill-down:
assertEquals(1, s.search(new ConstantScoreQuery(new DoubleRange("< 2", 0.0, true, 2.0, false).getFilter(vs)), 10).totalHits);
DrillDownQuery ddq = new DrillDownQuery(config);
ddq.add("field", ranges[1].getFilter(fastMatchFilter, vs));
// Test simple drill-down:
assertEquals(1, s.search(ddq, 10).totalHits);
// Test drill-sideways after drill-down
DrillSideways ds = new DrillSideways(s, config, (TaxonomyReader) null) {
@Override
protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
assert drillSideways.length == 1;
return new DoubleRangeFacetCounts("field", vs, drillSideways[0], fastMatchFilter, ranges);
}
@Override
protected boolean scoreSubDocsAtOnce() {
return random().nextBoolean();
}
};
DrillSidewaysResult dsr = ds.search(ddq, 10);
assertEquals(1, dsr.hits.totalHits);
assertEquals("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n",
dsr.facets.getTopChildren(10, "field").toString());
IOUtils.close(r, writer, dir);
}