mirror of https://github.com/apache/lucene.git
LUCENE-5418: faster drill-down/sideways on costly filters
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1565387 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3d24b825a5
commit
20a280c33c
|
@ -143,6 +143,13 @@ New Features
|
|||
close or cannot delete while referenced semantics.
|
||||
(Mark Miller, Mike McCandless)
|
||||
|
||||
* LUCENE-5418: Drilling down or sideways on a Lucene facet range
|
||||
(using Range.getFilter()) is now faster for costly filters (uses
|
||||
random access, not iteration); range facet counts now accept a
|
||||
fast-match filter to avoid computing the value for documents that
|
||||
are out of bounds, e.g. using a bounding box filter with distance
|
||||
range faceting. (Mike McCandless)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;
|
||||
|
|
|
@ -31,6 +31,13 @@ public abstract class DocIdSet {
|
|||
* are no docs that match. */
|
||||
public abstract DocIdSetIterator iterator() throws IOException;
|
||||
|
||||
// TODO: somehow this class should express the cost of
|
||||
// iteration vs the cost of random access Bits; for
|
||||
// expensive Filters (e.g. distance < 1 km) we should use
|
||||
// bits() after all other Query/Filters have matched, but
|
||||
// this is the opposite of what bits() is for now
|
||||
// (down-low filtering using e.g. FixedBitSet)
|
||||
|
||||
/** Optionally provides a {@link Bits} interface for random access
|
||||
* to matching documents.
|
||||
* @return {@code null}, if this {@code DocIdSet} does not support random access.
|
||||
|
|
|
@ -50,7 +50,7 @@ public class FilteredQuery extends Query {
|
|||
* @param query Query to be filtered, cannot be <code>null</code>.
|
||||
* @param filter Filter to apply to query results, cannot be <code>null</code>.
|
||||
*/
|
||||
public FilteredQuery (Query query, Filter filter) {
|
||||
public FilteredQuery(Query query, Filter filter) {
|
||||
this(query, filter, RANDOM_ACCESS_FILTER_STRATEGY);
|
||||
}
|
||||
|
||||
|
@ -63,7 +63,7 @@ public class FilteredQuery extends Query {
|
|||
*
|
||||
* @see FilterStrategy
|
||||
*/
|
||||
public FilteredQuery (Query query, Filter filter, FilterStrategy strategy) {
|
||||
public FilteredQuery(Query query, Filter filter, FilterStrategy strategy) {
|
||||
if (query == null || filter == null)
|
||||
throw new IllegalArgumentException("Query and filter cannot be null.");
|
||||
if (strategy == null)
|
||||
|
@ -118,7 +118,9 @@ public class FilteredQuery extends Query {
|
|||
|
||||
// return this query
|
||||
@Override
|
||||
public Query getQuery() { return FilteredQuery.this; }
|
||||
public Query getQuery() {
|
||||
return FilteredQuery.this;
|
||||
}
|
||||
|
||||
// return a filtering scorer
|
||||
@Override
|
||||
|
@ -130,8 +132,8 @@ public class FilteredQuery extends Query {
|
|||
// this means the filter does not accept any documents.
|
||||
return null;
|
||||
}
|
||||
|
||||
return strategy.filteredScorer(context, scoreDocsInOrder, topScorer, weight, filterDocIdSet);
|
||||
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -183,14 +185,12 @@ public class FilteredQuery extends Query {
|
|||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
|
||||
int doc = scorer.advance(target);
|
||||
if (doc != Scorer.NO_MORE_DOCS && !filterbits.get(doc)) {
|
||||
return scorerDoc = nextDoc();
|
||||
} else {
|
||||
return scorerDoc = doc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -303,7 +303,9 @@ public class FilteredQuery extends Query {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final int freq() throws IOException { return scorer.freq(); }
|
||||
public final int freq() throws IOException {
|
||||
return scorer.freq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Collection<ChildScorer> getChildren() {
|
||||
|
@ -343,15 +345,6 @@ public class FilteredQuery extends Query {
|
|||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
final Query queryRewritten = query.rewrite(reader);
|
||||
|
||||
if (queryRewritten instanceof MatchAllDocsQuery) {
|
||||
// Special case: If the query is a MatchAllDocsQuery, we only
|
||||
// return a CSQ(filter).
|
||||
final Query rewritten = new ConstantScoreQuery(filter);
|
||||
// Combine boost of MatchAllDocsQuery and the wrapped rewritten query:
|
||||
rewritten.setBoost(this.getBoost() * queryRewritten.getBoost());
|
||||
return rewritten;
|
||||
}
|
||||
|
||||
if (queryRewritten != query) {
|
||||
// rewrite to a new FilteredQuery wrapping the rewritten query
|
||||
final Query rewritten = new FilteredQuery(queryRewritten, filter, strategy);
|
||||
|
@ -527,7 +520,7 @@ public class FilteredQuery extends Query {
|
|||
|
||||
final Bits filterAcceptDocs = docIdSet.bits();
|
||||
// force if RA is requested
|
||||
final boolean useRandomAccess = (filterAcceptDocs != null && (useRandomAccess(filterAcceptDocs, firstFilterDoc)));
|
||||
final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc);
|
||||
if (useRandomAccess) {
|
||||
// if we are using random access, we return the inner scorer, just with other acceptDocs
|
||||
return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
|
||||
|
|
|
@ -375,7 +375,6 @@ public class TestFilteredQuery extends LuceneTestCase {
|
|||
public void testRewrite() throws Exception {
|
||||
assertRewrite(new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), FilteredQuery.class);
|
||||
assertRewrite(new FilteredQuery(new PrefixQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), FilteredQuery.class);
|
||||
assertRewrite(new FilteredQuery(new MatchAllDocsQuery(), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), ConstantScoreQuery.class);
|
||||
}
|
||||
|
||||
public void testGetFilterStrategy() {
|
||||
|
|
|
@ -29,18 +29,24 @@ import org.apache.lucene.expressions.Expression;
|
|||
import org.apache.lucene.expressions.SimpleBindings;
|
||||
import org.apache.lucene.expressions.js.JavascriptCompiler;
|
||||
import org.apache.lucene.facet.DrillDownQuery;
|
||||
import org.apache.lucene.facet.DrillSideways;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.range.DoubleRange;
|
||||
import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.queries.BooleanFilter;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.NumericRangeFilter;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -59,6 +65,20 @@ public class DistanceFacetsExample implements Closeable {
|
|||
|
||||
private final Directory indexDir = new RAMDirectory();
|
||||
private IndexSearcher searcher;
|
||||
private final FacetsConfig config = new FacetsConfig();
|
||||
|
||||
/** The "home" latitude. */
|
||||
public final static double ORIGIN_LATITUDE = 40.7143528;
|
||||
|
||||
/** The "home" longitude. */
|
||||
public final static double ORIGIN_LONGITUDE = -74.0059731;
|
||||
|
||||
/** Radius of the Earth in KM
|
||||
*
|
||||
* NOTE: this is approximate, because the earth is a bit
|
||||
* wider at the equator than the poles. See
|
||||
* http://en.wikipedia.org/wiki/Earth_radius */
|
||||
public final static double EARTH_RADIUS_KM = 6371.01;
|
||||
|
||||
/** Empty constructor */
|
||||
public DistanceFacetsExample() {}
|
||||
|
@ -68,6 +88,8 @@ public class DistanceFacetsExample implements Closeable {
|
|||
IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER,
|
||||
new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));
|
||||
|
||||
// TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter
|
||||
|
||||
// Add documents with latitude/longitude location:
|
||||
Document doc = new Document();
|
||||
doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
|
||||
|
@ -92,7 +114,8 @@ public class DistanceFacetsExample implements Closeable {
|
|||
private ValueSource getDistanceValueSource() {
|
||||
Expression distance;
|
||||
try {
|
||||
distance = JavascriptCompiler.compile("haversin(40.7143528,-74.0059731,latitude,longitude)");
|
||||
distance = JavascriptCompiler.compile(
|
||||
"haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)");
|
||||
} catch (ParseException pe) {
|
||||
// Should not happen
|
||||
throw new RuntimeException(pe);
|
||||
|
@ -104,15 +127,83 @@ public class DistanceFacetsExample implements Closeable {
|
|||
return distance.getValueSource(bindings);
|
||||
}
|
||||
|
||||
/** Given a latitude and longitude (in degrees) and the
|
||||
* maximum great circle (surface of the earth) distance,
|
||||
* returns a simple Filter bounding box to "fast match"
|
||||
* candidates. */
|
||||
public static Filter getBoundingBoxFilter(double originLat, double originLng, double maxDistanceKM) {
|
||||
|
||||
// Basic bounding box geo math from
|
||||
// http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates,
|
||||
// licensed under creative commons 3.0:
|
||||
// http://creativecommons.org/licenses/by/3.0
|
||||
|
||||
// TODO: maybe switch to recursive prefix tree instead
|
||||
// (in lucene/spatial)? It should be more efficient
|
||||
// since it's a 2D trie...
|
||||
|
||||
// Degrees -> Radians:
|
||||
double originLatRadians = Math.toRadians(originLat);
|
||||
double originLngRadians = Math.toRadians(originLng);
|
||||
|
||||
double angle = maxDistanceKM / EARTH_RADIUS_KM;
|
||||
|
||||
double minLat = originLatRadians - angle;
|
||||
double maxLat = originLatRadians + angle;
|
||||
|
||||
double minLng;
|
||||
double maxLng;
|
||||
if (minLat > Math.toRadians(-90) && maxLat < Math.toRadians(90)) {
|
||||
double delta = Math.asin(Math.sin(angle)/Math.cos(originLatRadians));
|
||||
minLng = originLngRadians - delta;
|
||||
if (minLng < Math.toRadians(-180)) {
|
||||
minLng += 2 * Math.PI;
|
||||
}
|
||||
maxLng = originLngRadians + delta;
|
||||
if (maxLng > Math.toRadians(180)) {
|
||||
maxLng -= 2 * Math.PI;
|
||||
}
|
||||
} else {
|
||||
// The query includes a pole!
|
||||
minLat = Math.max(minLat, Math.toRadians(-90));
|
||||
maxLat = Math.min(maxLat, Math.toRadians(90));
|
||||
minLng = Math.toRadians(-180);
|
||||
maxLng = Math.toRadians(180);
|
||||
}
|
||||
|
||||
BooleanFilter f = new BooleanFilter();
|
||||
|
||||
// Add latitude range filter:
|
||||
f.add(NumericRangeFilter.newDoubleRange("latitude", Math.toDegrees(minLat), Math.toDegrees(maxLat), true, true),
|
||||
BooleanClause.Occur.MUST);
|
||||
|
||||
// Add longitude range filter:
|
||||
if (minLng > maxLng) {
|
||||
// The bounding box crosses the international date
|
||||
// line:
|
||||
BooleanFilter lonF = new BooleanFilter();
|
||||
lonF.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), null, true, true),
|
||||
BooleanClause.Occur.SHOULD);
|
||||
lonF.add(NumericRangeFilter.newDoubleRange("longitude", null, Math.toDegrees(maxLng), true, true),
|
||||
BooleanClause.Occur.SHOULD);
|
||||
f.add(lonF, BooleanClause.Occur.MUST);
|
||||
} else {
|
||||
f.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), Math.toDegrees(maxLng), true, true),
|
||||
BooleanClause.Occur.MUST);
|
||||
}
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
/** User runs a query and counts facets. */
|
||||
public FacetResult search() throws IOException {
|
||||
|
||||
|
||||
FacetsCollector fc = new FacetsCollector();
|
||||
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
|
||||
Facets facets = new DoubleRangeFacetCounts("field", getDistanceValueSource(), fc,
|
||||
getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0),
|
||||
ONE_KM,
|
||||
TWO_KM,
|
||||
FIVE_KM,
|
||||
|
@ -127,10 +218,16 @@ public class DistanceFacetsExample implements Closeable {
|
|||
// Passing no baseQuery means we drill down on all
|
||||
// documents ("browse only"):
|
||||
DrillDownQuery q = new DrillDownQuery(null);
|
||||
|
||||
q.add("field", new ConstantScoreQuery(range.getFilter(getDistanceValueSource())));
|
||||
|
||||
return searcher.search(q, 10);
|
||||
final ValueSource vs = getDistanceValueSource();
|
||||
q.add("field", range.getFilter(getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
|
||||
DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
|
||||
@Override
|
||||
protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
|
||||
assert drillSideways.length == 1;
|
||||
return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
|
||||
}
|
||||
};
|
||||
return ds.search(q, 10).hits;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,22 +18,20 @@ package org.apache.lucene.facet;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
|
||||
import org.apache.lucene.facet.range.LongRangeFacetCounts;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.FilteredQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
|
@ -86,7 +84,7 @@ public final class DrillDownQuery extends Query {
|
|||
|
||||
/** Used by DrillSideways */
|
||||
DrillDownQuery(FacetsConfig config, Query baseQuery, List<Query> clauses, Map<String,Integer> drillDownDims) {
|
||||
this.query = new BooleanQuery(true);
|
||||
query = new BooleanQuery(true);
|
||||
if (baseQuery != null) {
|
||||
query.add(baseQuery, Occur.MUST);
|
||||
}
|
||||
|
@ -155,11 +153,12 @@ public final class DrillDownQuery extends Query {
|
|||
|
||||
/** Expert: add a custom drill-down subQuery. Use this
|
||||
* when you have a separate way to drill-down on the
|
||||
* dimension than the indexed facet ordinals (for
|
||||
* example, use a {@link NumericRangeQuery} to drill down
|
||||
* after {@link LongRangeFacetCounts} or {@link DoubleRangeFacetCounts}. */
|
||||
* dimension than the indexed facet ordinals. */
|
||||
public void add(String dim, Query subQuery) {
|
||||
|
||||
if (drillDownDims.containsKey(dim)) {
|
||||
throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down");
|
||||
}
|
||||
// TODO: we should use FilteredQuery?
|
||||
|
||||
// So scores of the drill-down query don't have an
|
||||
|
@ -172,6 +171,40 @@ public final class DrillDownQuery extends Query {
|
|||
drillDownDims.put(dim, drillDownDims.size());
|
||||
}
|
||||
|
||||
/** Expert: add a custom drill-down Filter, e.g. when
|
||||
* drilling down after range faceting. */
|
||||
public void add(String dim, Filter subFilter) {
|
||||
|
||||
if (drillDownDims.containsKey(dim)) {
|
||||
throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down");
|
||||
}
|
||||
|
||||
// TODO: we should use FilteredQuery?
|
||||
|
||||
// So scores of the drill-down query don't have an
|
||||
// effect:
|
||||
final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subFilter);
|
||||
drillDownQuery.setBoost(0.0f);
|
||||
|
||||
query.add(drillDownQuery, Occur.MUST);
|
||||
|
||||
drillDownDims.put(dim, drillDownDims.size());
|
||||
}
|
||||
|
||||
static Filter getFilter(Query query) {
|
||||
if (query instanceof ConstantScoreQuery) {
|
||||
ConstantScoreQuery csq = (ConstantScoreQuery) query;
|
||||
Filter filter = csq.getFilter();
|
||||
if (filter != null) {
|
||||
return filter;
|
||||
} else {
|
||||
return getFilter(csq.getQuery());
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DrillDownQuery clone() {
|
||||
return new DrillDownQuery(config, query, drillDownDims);
|
||||
|
@ -199,7 +232,63 @@ public final class DrillDownQuery extends Query {
|
|||
if (query.clauses().size() == 0) {
|
||||
return new MatchAllDocsQuery();
|
||||
}
|
||||
return query;
|
||||
|
||||
List<Filter> filters = new ArrayList<Filter>();
|
||||
List<Query> queries = new ArrayList<Query>();
|
||||
List<BooleanClause> clauses = query.clauses();
|
||||
Query baseQuery;
|
||||
int startIndex;
|
||||
if (drillDownDims.size() == query.clauses().size()) {
|
||||
baseQuery = new MatchAllDocsQuery();
|
||||
startIndex = 0;
|
||||
} else {
|
||||
baseQuery = clauses.get(0).getQuery();
|
||||
startIndex = 1;
|
||||
}
|
||||
|
||||
for(int i=startIndex;i<clauses.size();i++) {
|
||||
BooleanClause clause = clauses.get(i);
|
||||
Query queryClause = clause.getQuery();
|
||||
Filter filter = getFilter(queryClause);
|
||||
if (filter != null) {
|
||||
filters.add(filter);
|
||||
} else {
|
||||
queries.add(queryClause);
|
||||
}
|
||||
}
|
||||
|
||||
if (filters.isEmpty()) {
|
||||
return query;
|
||||
} else {
|
||||
// Wrap all filters using FilteredQuery
|
||||
|
||||
// TODO: this is hackish; we need to do it because
|
||||
// BooleanQuery can't be trusted to handle the
|
||||
// "expensive filter" case. Really, each Filter should
|
||||
// know its cost and we should take that more
|
||||
// carefully into account when picking the right
|
||||
// strategy/optimization:
|
||||
Query wrapped;
|
||||
if (queries.isEmpty()) {
|
||||
wrapped = baseQuery;
|
||||
} else {
|
||||
// disable coord
|
||||
BooleanQuery wrappedBQ = new BooleanQuery(true);
|
||||
if ((baseQuery instanceof MatchAllDocsQuery) == false) {
|
||||
wrappedBQ.add(baseQuery, BooleanClause.Occur.MUST);
|
||||
}
|
||||
for(Query q : queries) {
|
||||
wrappedBQ.add(q, BooleanClause.Occur.MUST);
|
||||
}
|
||||
wrapped = wrappedBQ;
|
||||
}
|
||||
|
||||
for(Filter filter : filters) {
|
||||
wrapped = new FilteredQuery(wrapped, filter, FilteredQuery.QUERY_FIRST_FILTER_STRATEGY);
|
||||
}
|
||||
|
||||
return wrapped;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -26,12 +26,9 @@ import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
|
|||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
|
||||
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -40,11 +37,9 @@ import org.apache.lucene.search.MultiCollector;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.search.Weight;
|
||||
|
||||
/**
|
||||
* Computes drill down and sideways counts for the provided
|
||||
|
@ -172,153 +167,23 @@ public class DrillSideways {
|
|||
drillSidewaysCollectors[i] = new FacetsCollector();
|
||||
}
|
||||
|
||||
boolean useCollectorMethod = scoreSubDocsAtOnce();
|
||||
|
||||
Term[][] drillDownTerms = null;
|
||||
|
||||
if (!useCollectorMethod) {
|
||||
// Optimistic: assume subQueries of the DDQ are either
|
||||
// TermQuery or BQ OR of TermQuery; if this is wrong
|
||||
// then we detect it and fallback to the mome general
|
||||
// but slower DrillSidewaysCollector:
|
||||
drillDownTerms = new Term[clauses.length-startClause][];
|
||||
for(int i=startClause;i<clauses.length;i++) {
|
||||
Query q = clauses[i].getQuery();
|
||||
|
||||
// DrillDownQuery always wraps each subQuery in
|
||||
// ConstantScoreQuery:
|
||||
assert q instanceof ConstantScoreQuery;
|
||||
|
||||
q = ((ConstantScoreQuery) q).getQuery();
|
||||
|
||||
if (q instanceof TermQuery) {
|
||||
drillDownTerms[i-startClause] = new Term[] {((TermQuery) q).getTerm()};
|
||||
} else if (q instanceof BooleanQuery) {
|
||||
BooleanQuery q2 = (BooleanQuery) q;
|
||||
BooleanClause[] clauses2 = q2.getClauses();
|
||||
drillDownTerms[i-startClause] = new Term[clauses2.length];
|
||||
for(int j=0;j<clauses2.length;j++) {
|
||||
if (clauses2[j].getQuery() instanceof TermQuery) {
|
||||
drillDownTerms[i-startClause][j] = ((TermQuery) clauses2[j].getQuery()).getTerm();
|
||||
} else {
|
||||
useCollectorMethod = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
useCollectorMethod = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (useCollectorMethod) {
|
||||
// TODO: maybe we could push the "collector method"
|
||||
// down into the optimized scorer to have a tighter
|
||||
// integration ... and so TermQuery clauses could
|
||||
// continue to run "optimized"
|
||||
collectorMethod(query, baseQuery, startClause, hitCollector, drillDownCollector, drillSidewaysCollectors);
|
||||
} else {
|
||||
DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
|
||||
searcher.search(dsq, hitCollector);
|
||||
Query[] drillDownQueries = new Query[clauses.length-startClause];
|
||||
for(int i=startClause;i<clauses.length;i++) {
|
||||
drillDownQueries[i-startClause] = clauses[i].getQuery();
|
||||
}
|
||||
DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce());
|
||||
searcher.search(dsq, hitCollector);
|
||||
|
||||
return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null);
|
||||
}
|
||||
|
||||
/** Uses the more general but slower method of sideways
|
||||
* counting. This method allows an arbitrary subQuery to
|
||||
* implement the drill down for a given dimension. */
|
||||
private void collectorMethod(DrillDownQuery ddq, Query baseQuery, int startClause, Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors) throws IOException {
|
||||
|
||||
BooleanClause[] clauses = ddq.getBooleanQuery().getClauses();
|
||||
|
||||
Map<String,Integer> drillDownDims = ddq.getDims();
|
||||
|
||||
BooleanQuery topQuery = new BooleanQuery(true);
|
||||
final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors,
|
||||
drillDownDims);
|
||||
|
||||
// TODO: if query is already a BQ we could copy that and
|
||||
// add clauses to it, instead of doing BQ inside BQ
|
||||
// (should be more efficient)? Problem is this can
|
||||
// affect scoring (coord) ... too bad we can't disable
|
||||
// coord on a clause by clause basis:
|
||||
topQuery.add(baseQuery, BooleanClause.Occur.MUST);
|
||||
|
||||
// NOTE: in theory we could just make a single BQ, with
|
||||
// +query a b c minShouldMatch=2, but in this case,
|
||||
// annoyingly, BS2 wraps a sub-scorer that always
|
||||
// returns 2 as the .freq(), not how many of the
|
||||
// SHOULD clauses matched:
|
||||
BooleanQuery subQuery = new BooleanQuery(true);
|
||||
|
||||
Query wrappedSubQuery = new QueryWrapper(subQuery,
|
||||
new SetWeight() {
|
||||
@Override
|
||||
public void set(Weight w) {
|
||||
collector.setWeight(w, -1);
|
||||
}
|
||||
});
|
||||
Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery);
|
||||
|
||||
// Don't impact score of original query:
|
||||
constantScoreSubQuery.setBoost(0.0f);
|
||||
|
||||
topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST);
|
||||
|
||||
// Unfortunately this sub-BooleanQuery
|
||||
// will never get BS1 because today BS1 only works
|
||||
// if topScorer=true... and actually we cannot use BS1
|
||||
// anyways because we need subDocsScoredAtOnce:
|
||||
int dimIndex = 0;
|
||||
for(int i=startClause;i<clauses.length;i++) {
|
||||
Query q = clauses[i].getQuery();
|
||||
// DrillDownQuery always wraps each subQuery in
|
||||
// ConstantScoreQuery:
|
||||
assert q instanceof ConstantScoreQuery;
|
||||
q = ((ConstantScoreQuery) q).getQuery();
|
||||
|
||||
final int finalDimIndex = dimIndex;
|
||||
subQuery.add(new QueryWrapper(q,
|
||||
new SetWeight() {
|
||||
@Override
|
||||
public void set(Weight w) {
|
||||
collector.setWeight(w, finalDimIndex);
|
||||
}
|
||||
}),
|
||||
BooleanClause.Occur.SHOULD);
|
||||
dimIndex++;
|
||||
}
|
||||
|
||||
// TODO: we could better optimize the "just one drill
|
||||
// down" case w/ a separate [specialized]
|
||||
// collector...
|
||||
int minShouldMatch = drillDownDims.size()-1;
|
||||
if (minShouldMatch == 0) {
|
||||
// Must add another "fake" clause so BQ doesn't erase
|
||||
// itself by rewriting to the single clause:
|
||||
Query end = new MatchAllDocsQuery();
|
||||
end.setBoost(0.0f);
|
||||
subQuery.add(end, BooleanClause.Occur.SHOULD);
|
||||
minShouldMatch++;
|
||||
}
|
||||
|
||||
subQuery.setMinimumNumberShouldMatch(minShouldMatch);
|
||||
|
||||
// System.out.println("EXE " + topQuery);
|
||||
|
||||
// Collects against the passed-in
|
||||
// drillDown/SidewaysCollectors as a side effect:
|
||||
searcher.search(topQuery, collector);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search, sorting by {@link Sort}, and computing
|
||||
* drill down and sideways counts.
|
||||
*/
|
||||
public DrillSidewaysResult search(DrillDownQuery query,
|
||||
Filter filter, FieldDoc after, int topN, Sort sort, boolean doDocScores,
|
||||
boolean doMaxScore) throws IOException {
|
||||
Filter filter, FieldDoc after, int topN, Sort sort, boolean doDocScores,
|
||||
boolean doMaxScore) throws IOException {
|
||||
if (filter != null) {
|
||||
query = new DrillDownQuery(config, filter, query);
|
||||
}
|
||||
|
@ -355,7 +220,7 @@ public class DrillSideways {
|
|||
* drill down and sideways counts.
|
||||
*/
|
||||
public DrillSidewaysResult search(ScoreDoc after,
|
||||
DrillDownQuery query, int topN) throws IOException {
|
||||
DrillDownQuery query, int topN) throws IOException {
|
||||
int limit = searcher.getIndexReader().maxDoc();
|
||||
if (limit == 0) {
|
||||
limit = 1; // the collector does not alow numHits = 0
|
||||
|
@ -367,14 +232,15 @@ public class DrillSideways {
|
|||
}
|
||||
|
||||
/** Override this and return true if your collector
|
||||
* (e.g., ToParentBlockJoinCollector) expects all
|
||||
* (e.g., {@code ToParentBlockJoinCollector}) expects all
|
||||
* sub-scorers to be positioned on the document being
|
||||
* collected. This will cause some performance loss;
|
||||
* default is false. Note that if you return true from
|
||||
* this method (in a subclass) be sure your collector
|
||||
* also returns false from {@link
|
||||
* Collector#acceptsDocsOutOfOrder}: this will trick
|
||||
* BooleanQuery into also scoring all subDocs at once. */
|
||||
* {@code BooleanQuery} into also scoring all subDocs at
|
||||
* once. */
|
||||
protected boolean scoreSubDocsAtOnce() {
|
||||
return false;
|
||||
}
|
||||
|
@ -394,54 +260,5 @@ public class DrillSideways {
|
|||
this.hits = hits;
|
||||
}
|
||||
}
|
||||
private interface SetWeight {
|
||||
public void set(Weight w);
|
||||
}
|
||||
|
||||
/** Just records which Weight was given out for the
|
||||
* (possibly rewritten) Query. */
|
||||
private static class QueryWrapper extends Query {
|
||||
private final Query originalQuery;
|
||||
private final SetWeight setter;
|
||||
|
||||
public QueryWrapper(Query originalQuery, SetWeight setter) {
|
||||
this.originalQuery = originalQuery;
|
||||
this.setter = setter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(final IndexSearcher searcher) throws IOException {
|
||||
Weight w = originalQuery.createWeight(searcher);
|
||||
setter.set(w);
|
||||
return w;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
Query rewritten = originalQuery.rewrite(reader);
|
||||
if (rewritten != originalQuery) {
|
||||
return new QueryWrapper(rewritten, setter);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String s) {
|
||||
return originalQuery.toString(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof QueryWrapper)) return false;
|
||||
final QueryWrapper other = (QueryWrapper) o;
|
||||
return super.equals(o) && originalQuery.equals(other.originalQuery);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return super.hashCode() * 31 + originalQuery.hashCode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,188 +0,0 @@
|
|||
package org.apache.lucene.facet;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer.ChildScorer;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
|
||||
/** Collector that scrutinizes each hit to determine if it
|
||||
* passed all constraints (a true hit) or if it missed
|
||||
* exactly one dimension (a near-miss, to count for
|
||||
* drill-sideways counts on that dimension). */
|
||||
class DrillSidewaysCollector extends Collector {
|
||||
|
||||
private final Collector hitCollector;
|
||||
private final Collector drillDownCollector;
|
||||
private final Collector[] drillSidewaysCollectors;
|
||||
private final Scorer[] subScorers;
|
||||
private final int exactCount;
|
||||
|
||||
// Maps Weight to either -1 (mainQuery) or to integer
|
||||
// index of the dims drillDown. We needs this when
|
||||
// visiting the child scorers to correlate back to the
|
||||
// right scorers:
|
||||
private final Map<Weight,Integer> weightToIndex = new IdentityHashMap<Weight,Integer>();
|
||||
|
||||
private Scorer mainScorer;
|
||||
|
||||
public DrillSidewaysCollector(Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors,
|
||||
Map<String,Integer> dims) {
|
||||
this.hitCollector = hitCollector;
|
||||
this.drillDownCollector = drillDownCollector;
|
||||
this.drillSidewaysCollectors = drillSidewaysCollectors;
|
||||
subScorers = new Scorer[dims.size()];
|
||||
|
||||
if (dims.size() == 1) {
|
||||
// When we have only one dim, we insert the
|
||||
// MatchAllDocsQuery, bringing the clause count to
|
||||
// 2:
|
||||
exactCount = 2;
|
||||
} else {
|
||||
exactCount = dims.size();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
//System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount);
|
||||
|
||||
if (mainScorer == null) {
|
||||
// This segment did not have any docs with any
|
||||
// drill-down field & value:
|
||||
return;
|
||||
}
|
||||
|
||||
if (mainScorer.freq() == exactCount) {
|
||||
// All sub-clauses from the drill-down filters
|
||||
// matched, so this is a "real" hit, so we first
|
||||
// collect in both the hitCollector and the
|
||||
// drillDown collector:
|
||||
//System.out.println(" hit " + drillDownCollector);
|
||||
hitCollector.collect(doc);
|
||||
if (drillDownCollector != null) {
|
||||
drillDownCollector.collect(doc);
|
||||
}
|
||||
|
||||
// Also collect across all drill-sideways counts so
|
||||
// we "merge in" drill-down counts for this
|
||||
// dimension.
|
||||
for(int i=0;i<subScorers.length;i++) {
|
||||
// This cannot be null, because it was a hit,
|
||||
// meaning all drill-down dims matched, so all
|
||||
// dims must have non-null scorers:
|
||||
assert subScorers[i] != null;
|
||||
int subDoc = subScorers[i].docID();
|
||||
assert subDoc == doc;
|
||||
drillSidewaysCollectors[i].collect(doc);
|
||||
}
|
||||
|
||||
} else {
|
||||
boolean found = false;
|
||||
for(int i=0;i<subScorers.length;i++) {
|
||||
if (subScorers[i] == null) {
|
||||
// This segment did not have any docs with this
|
||||
// drill-down field & value:
|
||||
drillSidewaysCollectors[i].collect(doc);
|
||||
assert allMatchesFrom(i+1, doc);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
int subDoc = subScorers[i].docID();
|
||||
//System.out.println(" i=" + i + " sub: " + subDoc);
|
||||
if (subDoc != doc) {
|
||||
//System.out.println(" +ds[" + i + "]");
|
||||
assert subDoc > doc: "subDoc=" + subDoc + " doc=" + doc;
|
||||
drillSidewaysCollectors[i].collect(doc);
|
||||
assert allMatchesFrom(i+1, doc);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert found;
|
||||
}
|
||||
}
|
||||
|
||||
// Only used by assert:
|
||||
private boolean allMatchesFrom(int startFrom, int doc) {
|
||||
for(int i=startFrom;i<subScorers.length;i++) {
|
||||
assert subScorers[i].docID() == doc;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
// We actually could accept docs out of order, but, we
|
||||
// need to force BooleanScorer2 so that the
|
||||
// sub-scorers are "on" each docID we are collecting:
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext leaf) throws IOException {
|
||||
//System.out.println("DS.setNextReader reader=" + leaf.reader());
|
||||
hitCollector.setNextReader(leaf);
|
||||
if (drillDownCollector != null) {
|
||||
drillDownCollector.setNextReader(leaf);
|
||||
}
|
||||
for(Collector dsc : drillSidewaysCollectors) {
|
||||
dsc.setNextReader(leaf);
|
||||
}
|
||||
}
|
||||
|
||||
void setWeight(Weight weight, int index) {
|
||||
assert !weightToIndex.containsKey(weight);
|
||||
weightToIndex.put(weight, index);
|
||||
}
|
||||
|
||||
private void findScorers(Scorer scorer) {
|
||||
Integer index = weightToIndex.get(scorer.getWeight());
|
||||
if (index != null) {
|
||||
if (index.intValue() == -1) {
|
||||
mainScorer = scorer;
|
||||
} else {
|
||||
subScorers[index] = scorer;
|
||||
}
|
||||
}
|
||||
for(ChildScorer child : scorer.getChildren()) {
|
||||
findScorers(child.child);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
mainScorer = null;
|
||||
Arrays.fill(subScorers, null);
|
||||
findScorers(scorer);
|
||||
hitCollector.setScorer(scorer);
|
||||
if (drillDownCollector != null) {
|
||||
drillDownCollector.setScorer(scorer);
|
||||
}
|
||||
for(Collector dsc : drillSidewaysCollectors) {
|
||||
dsc.setScorer(scorer);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -19,15 +19,13 @@ package org.apache.lucene.facet;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
@ -35,19 +33,21 @@ import org.apache.lucene.search.Weight;
|
|||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/** Only purpose is to punch through and return a
|
||||
* SimpleDrillSidewaysScorer */
|
||||
* DrillSidewaysScorer */
|
||||
|
||||
class DrillSidewaysQuery extends Query {
|
||||
final Query baseQuery;
|
||||
final Collector drillDownCollector;
|
||||
final Collector[] drillSidewaysCollectors;
|
||||
final Term[][] drillDownTerms;
|
||||
final Query[] drillDownQueries;
|
||||
final boolean scoreSubDocsAtOnce;
|
||||
|
||||
DrillSidewaysQuery(Query baseQuery, Collector drillDownCollector, Collector[] drillSidewaysCollectors, Term[][] drillDownTerms) {
|
||||
DrillSidewaysQuery(Query baseQuery, Collector drillDownCollector, Collector[] drillSidewaysCollectors, Query[] drillDownQueries, boolean scoreSubDocsAtOnce) {
|
||||
this.baseQuery = baseQuery;
|
||||
this.drillDownCollector = drillDownCollector;
|
||||
this.drillSidewaysCollectors = drillSidewaysCollectors;
|
||||
this.drillDownTerms = drillDownTerms;
|
||||
this.drillDownQueries = drillDownQueries;
|
||||
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -68,13 +68,25 @@ class DrillSidewaysQuery extends Query {
|
|||
if (newQuery == baseQuery) {
|
||||
return this;
|
||||
} else {
|
||||
return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
|
||||
return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher) throws IOException {
|
||||
final Weight baseWeight = baseQuery.createWeight(searcher);
|
||||
final Object[] drillDowns = new Object[drillDownQueries.length];
|
||||
for(int dim=0;dim<drillDownQueries.length;dim++) {
|
||||
Query query = drillDownQueries[dim];
|
||||
Filter filter = DrillDownQuery.getFilter(query);
|
||||
if (filter != null) {
|
||||
drillDowns[dim] = filter;
|
||||
} else {
|
||||
// TODO: would be nice if we could say "we will do no
|
||||
// scoring" here....
|
||||
drillDowns[dim] = searcher.rewrite(query).createWeight(searcher);
|
||||
}
|
||||
}
|
||||
|
||||
return new Weight() {
|
||||
@Override
|
||||
|
@ -108,59 +120,82 @@ class DrillSidewaysQuery extends Query {
|
|||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
|
||||
DrillSidewaysScorer.DocsEnumsAndFreq[] dims = new DrillSidewaysScorer.DocsEnumsAndFreq[drillDownTerms.length];
|
||||
TermsEnum termsEnum = null;
|
||||
String lastField = null;
|
||||
// TODO: it could be better if we take acceptDocs
|
||||
// into account instead of baseScorer?
|
||||
Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs);
|
||||
|
||||
DrillSidewaysScorer.DocsAndCost[] dims = new DrillSidewaysScorer.DocsAndCost[drillDowns.length];
|
||||
int nullCount = 0;
|
||||
for(int dim=0;dim<dims.length;dim++) {
|
||||
dims[dim] = new DrillSidewaysScorer.DocsEnumsAndFreq();
|
||||
dims[dim] = new DrillSidewaysScorer.DocsAndCost();
|
||||
dims[dim].sidewaysCollector = drillSidewaysCollectors[dim];
|
||||
String field = drillDownTerms[dim][0].field();
|
||||
dims[dim].dim = drillDownTerms[dim][0].text();
|
||||
if (lastField == null || !lastField.equals(field)) {
|
||||
AtomicReader reader = context.reader();
|
||||
Terms terms = reader.terms(field);
|
||||
if (terms != null) {
|
||||
termsEnum = terms.iterator(null);
|
||||
if (drillDowns[dim] instanceof Filter) {
|
||||
// Pass null for acceptDocs because we already
|
||||
// passed it to baseScorer and baseScorer is
|
||||
// MUST'd here
|
||||
DocIdSet dis = ((Filter) drillDowns[dim]).getDocIdSet(context, null);
|
||||
|
||||
if (dis == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Bits bits = dis.bits();
|
||||
|
||||
if (bits != null) {
|
||||
// TODO: this logic is too naive: the
|
||||
// existence of bits() in DIS today means
|
||||
// either "I'm a cheap FixedBitSet so apply me down
|
||||
// low as you decode the postings" or "I'm so
|
||||
// horribly expensive so apply me after all
|
||||
// other Query/Filter clauses pass"
|
||||
|
||||
// Filter supports random access; use that to
|
||||
// prevent .advance() on costly filters:
|
||||
dims[dim].bits = bits;
|
||||
|
||||
// TODO: Filter needs to express its expected
|
||||
// cost somehow, before pulling the iterator;
|
||||
// we should use that here to set the order to
|
||||
// check the filters:
|
||||
|
||||
} else {
|
||||
termsEnum = null;
|
||||
}
|
||||
lastField = field;
|
||||
}
|
||||
dims[dim].docsEnums = new DocsEnum[drillDownTerms[dim].length];
|
||||
if (termsEnum == null) {
|
||||
nullCount++;
|
||||
continue;
|
||||
}
|
||||
for(int i=0;i<drillDownTerms[dim].length;i++) {
|
||||
if (termsEnum.seekExact(drillDownTerms[dim][i].bytes())) {
|
||||
DocsEnum docsEnum = termsEnum.docs(null, null, 0);
|
||||
if (docsEnum != null) {
|
||||
dims[dim].docsEnums[i] = docsEnum;
|
||||
dims[dim].maxCost = Math.max(dims[dim].maxCost, docsEnum.cost());
|
||||
DocIdSetIterator disi = dis.iterator();
|
||||
if (disi == null) {
|
||||
nullCount++;
|
||||
continue;
|
||||
}
|
||||
dims[dim].disi = disi;
|
||||
}
|
||||
} else {
|
||||
DocIdSetIterator disi = ((Weight) drillDowns[dim]).scorer(context, true, false, null);
|
||||
if (disi == null) {
|
||||
nullCount++;
|
||||
continue;
|
||||
}
|
||||
dims[dim].disi = disi;
|
||||
}
|
||||
}
|
||||
|
||||
if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
|
||||
// If more than one dim has no matches, then there
|
||||
// are no hits nor drill-sideways counts. Or, if we
|
||||
// have only one dim and that dim has no matches,
|
||||
// same thing.
|
||||
//if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
|
||||
if (nullCount > 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Sort drill-downs by most restrictive first:
|
||||
Arrays.sort(dims);
|
||||
|
||||
// TODO: it could be better if we take acceptDocs
|
||||
// into account instead of baseScorer?
|
||||
Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs);
|
||||
|
||||
if (baseScorer == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new DrillSidewaysScorer(this, context,
|
||||
baseScorer,
|
||||
drillDownCollector, dims);
|
||||
baseScorer,
|
||||
drillDownCollector, dims,
|
||||
scoreSubDocsAtOnce);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -174,7 +209,7 @@ class DrillSidewaysQuery extends Query {
|
|||
result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode());
|
||||
result = prime * result
|
||||
+ ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode());
|
||||
result = prime * result + Arrays.hashCode(drillDownTerms);
|
||||
result = prime * result + Arrays.hashCode(drillDownQueries);
|
||||
result = prime * result + Arrays.hashCode(drillSidewaysCollectors);
|
||||
return result;
|
||||
}
|
||||
|
@ -191,7 +226,7 @@ class DrillSidewaysQuery extends Query {
|
|||
if (drillDownCollector == null) {
|
||||
if (other.drillDownCollector != null) return false;
|
||||
} else if (!drillDownCollector.equals(other.drillDownCollector)) return false;
|
||||
if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false;
|
||||
if (!Arrays.equals(drillDownQueries, other.drillDownQueries)) return false;
|
||||
if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -22,10 +22,11 @@ import java.util.Collection;
|
|||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
class DrillSidewaysScorer extends Scorer {
|
||||
|
@ -34,13 +35,15 @@ class DrillSidewaysScorer extends Scorer {
|
|||
|
||||
private final Collector drillDownCollector;
|
||||
|
||||
private final DocsEnumsAndFreq[] dims;
|
||||
private final DocsAndCost[] dims;
|
||||
|
||||
// DrillDown DocsEnums:
|
||||
private final Scorer baseScorer;
|
||||
|
||||
private final AtomicReaderContext context;
|
||||
|
||||
final boolean scoreSubDocsAtOnce;
|
||||
|
||||
private static final int CHUNK = 2048;
|
||||
private static final int MASK = CHUNK-1;
|
||||
|
||||
|
@ -48,12 +51,13 @@ class DrillSidewaysScorer extends Scorer {
|
|||
private float collectScore;
|
||||
|
||||
DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector,
|
||||
DocsEnumsAndFreq[] dims) {
|
||||
DocsAndCost[] dims, boolean scoreSubDocsAtOnce) {
|
||||
super(w);
|
||||
this.dims = dims;
|
||||
this.context = context;
|
||||
this.baseScorer = baseScorer;
|
||||
this.drillDownCollector = drillDownCollector;
|
||||
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -67,7 +71,7 @@ class DrillSidewaysScorer extends Scorer {
|
|||
drillDownCollector.setScorer(this);
|
||||
drillDownCollector.setNextReader(context);
|
||||
}
|
||||
for(DocsEnumsAndFreq dim : dims) {
|
||||
for (DocsAndCost dim : dims) {
|
||||
dim.sidewaysCollector.setScorer(this);
|
||||
dim.sidewaysCollector.setNextReader(context);
|
||||
}
|
||||
|
@ -79,26 +83,38 @@ class DrillSidewaysScorer extends Scorer {
|
|||
|
||||
// Position all scorers to their first matching doc:
|
||||
baseScorer.nextDoc();
|
||||
for(DocsEnumsAndFreq dim : dims) {
|
||||
for (DocsEnum docsEnum : dim.docsEnums) {
|
||||
if (docsEnum != null) {
|
||||
docsEnum.nextDoc();
|
||||
}
|
||||
int numBits = 0;
|
||||
for (DocsAndCost dim : dims) {
|
||||
if (dim.disi != null) {
|
||||
dim.disi.nextDoc();
|
||||
} else if (dim.bits != null) {
|
||||
numBits++;
|
||||
}
|
||||
}
|
||||
|
||||
final int numDims = dims.length;
|
||||
|
||||
DocsEnum[][] docsEnums = new DocsEnum[numDims][];
|
||||
Collector[] sidewaysCollectors = new Collector[numDims];
|
||||
Bits[] bits = new Bits[numBits];
|
||||
Collector[] bitsSidewaysCollectors = new Collector[numBits];
|
||||
|
||||
DocIdSetIterator[] disis = new DocIdSetIterator[numDims-numBits];
|
||||
Collector[] sidewaysCollectors = new Collector[numDims-numBits];
|
||||
long drillDownCost = 0;
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
docsEnums[dim] = dims[dim].docsEnums;
|
||||
sidewaysCollectors[dim] = dims[dim].sidewaysCollector;
|
||||
for (DocsEnum de : dims[dim].docsEnums) {
|
||||
if (de != null) {
|
||||
drillDownCost += de.cost();
|
||||
int disiUpto = 0;
|
||||
int bitsUpto = 0;
|
||||
for (int dim=0;dim<numDims;dim++) {
|
||||
DocIdSetIterator disi = dims[dim].disi;
|
||||
if (dims[dim].bits == null) {
|
||||
disis[disiUpto] = disi;
|
||||
sidewaysCollectors[disiUpto] = dims[dim].sidewaysCollector;
|
||||
disiUpto++;
|
||||
if (disi != null) {
|
||||
drillDownCost += disi.cost();
|
||||
}
|
||||
} else {
|
||||
bits[bitsUpto] = dims[dim].bits;
|
||||
bitsSidewaysCollectors[bitsUpto] = dims[dim].sidewaysCollector;
|
||||
bitsUpto++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -114,21 +130,95 @@ class DrillSidewaysScorer extends Scorer {
|
|||
}
|
||||
*/
|
||||
|
||||
if (baseQueryCost < drillDownCost/10) {
|
||||
//System.out.println("baseAdvance");
|
||||
doBaseAdvanceScoring(collector, docsEnums, sidewaysCollectors);
|
||||
} else if (numDims > 1 && (dims[1].maxCost < baseQueryCost/10)) {
|
||||
if (bitsUpto > 0 || scoreSubDocsAtOnce || baseQueryCost < drillDownCost/10) {
|
||||
//System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length + " bits.length=" + bits.length);
|
||||
doQueryFirstScoring(collector, disis, sidewaysCollectors, bits, bitsSidewaysCollectors);
|
||||
} else if (numDims > 1 && (dims[1].disi == null || dims[1].disi.cost() < baseQueryCost/10)) {
|
||||
//System.out.println("drillDownAdvance");
|
||||
doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors);
|
||||
doDrillDownAdvanceScoring(collector, disis, sidewaysCollectors);
|
||||
} else {
|
||||
//System.out.println("union");
|
||||
doUnionScoring(collector, docsEnums, sidewaysCollectors);
|
||||
doUnionScoring(collector, disis, sidewaysCollectors);
|
||||
}
|
||||
}
|
||||
|
||||
/** Used when base query is highly constraining vs the
|
||||
* drilldowns, or when the docs must be scored at once
|
||||
* (i.e., like BooleanScorer2, not BooleanScorer). In
|
||||
* this case we just .next() on base and .advance() on
|
||||
* the dim filters. */
|
||||
private void doQueryFirstScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors,
|
||||
Bits[] bits, Collector[] bitsSidewaysCollectors) throws IOException {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" doQueryFirstScoring");
|
||||
//}
|
||||
int docID = baseScorer.docID();
|
||||
|
||||
nextDoc: while (docID != NO_MORE_DOCS) {
|
||||
Collector failedCollector = null;
|
||||
for (int i=0;i<disis.length;i++) {
|
||||
// TODO: should we sort this 2nd dimension of
|
||||
// docsEnums from most frequent to least?
|
||||
DocIdSetIterator disi = disis[i];
|
||||
if (disi != null && disi.docID() < docID) {
|
||||
disi.advance(docID);
|
||||
}
|
||||
if (disi == null || disi.docID() > docID) {
|
||||
if (failedCollector != null) {
|
||||
// More than one dim fails on this document, so
|
||||
// it's neither a hit nor a near-miss; move to
|
||||
// next doc:
|
||||
docID = baseScorer.nextDoc();
|
||||
continue nextDoc;
|
||||
} else {
|
||||
failedCollector = sidewaysCollectors[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: for the "non-costly Bits" we really should
|
||||
// have passed them down as acceptDocs, but
|
||||
// unfortunately we cannot distinguish today betwen
|
||||
// "bits() is so costly that you should apply it last"
|
||||
// from "bits() is so cheap that you should apply it
|
||||
// everywhere down low"
|
||||
|
||||
// Fold in Filter Bits last, since they may be costly:
|
||||
for(int i=0;i<bits.length;i++) {
|
||||
if (bits[i].get(docID) == false) {
|
||||
if (failedCollector != null) {
|
||||
// More than one dim fails on this document, so
|
||||
// it's neither a hit nor a near-miss; move to
|
||||
// next doc:
|
||||
docID = baseScorer.nextDoc();
|
||||
continue nextDoc;
|
||||
} else {
|
||||
failedCollector = bitsSidewaysCollectors[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collectDocID = docID;
|
||||
|
||||
// TODO: we could score on demand instead since we are
|
||||
// daat here:
|
||||
collectScore = baseScorer.score();
|
||||
|
||||
if (failedCollector == null) {
|
||||
// Hit passed all filters, so it's "real":
|
||||
collectHit(collector, sidewaysCollectors, bitsSidewaysCollectors);
|
||||
} else {
|
||||
// Hit missed exactly one filter:
|
||||
collectNearMiss(failedCollector);
|
||||
}
|
||||
|
||||
docID = baseScorer.nextDoc();
|
||||
}
|
||||
}
|
||||
|
||||
/** Used when drill downs are highly constraining vs
|
||||
* baseQuery. */
|
||||
private void doDrillDownAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
|
||||
private void doDrillDownAdvanceScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors) throws IOException {
|
||||
final int maxDoc = context.reader().maxDoc();
|
||||
final int numDims = dims.length;
|
||||
|
||||
|
@ -157,11 +247,9 @@ class DrillSidewaysScorer extends Scorer {
|
|||
//if (DEBUG) {
|
||||
// System.out.println(" dim0");
|
||||
//}
|
||||
for(DocsEnum docsEnum : docsEnums[0]) {
|
||||
if (docsEnum == null) {
|
||||
continue;
|
||||
}
|
||||
int docID = docsEnum.docID();
|
||||
DocIdSetIterator disi = disis[0];
|
||||
if (disi != null) {
|
||||
int docID = disi.docID();
|
||||
while (docID < nextChunkStart) {
|
||||
int slot = docID & MASK;
|
||||
|
||||
|
@ -176,19 +264,17 @@ class DrillSidewaysScorer extends Scorer {
|
|||
counts[slot] = 1;
|
||||
}
|
||||
|
||||
docID = docsEnum.nextDoc();
|
||||
docID = disi.nextDoc();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Second dim:
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" dim1");
|
||||
//}
|
||||
for(DocsEnum docsEnum : docsEnums[1]) {
|
||||
if (docsEnum == null) {
|
||||
continue;
|
||||
}
|
||||
int docID = docsEnum.docID();
|
||||
disi = disis[1];
|
||||
if (disi != null) {
|
||||
int docID = disi.docID();
|
||||
while (docID < nextChunkStart) {
|
||||
int slot = docID & MASK;
|
||||
|
||||
|
@ -218,7 +304,7 @@ class DrillSidewaysScorer extends Scorer {
|
|||
}
|
||||
}
|
||||
|
||||
docID = docsEnum.nextDoc();
|
||||
docID = disi.nextDoc();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -272,15 +358,13 @@ class DrillSidewaysScorer extends Scorer {
|
|||
|
||||
// TODO: factor this out & share w/ union scorer,
|
||||
// except we start from dim=2 instead:
|
||||
for(int dim=2;dim<numDims;dim++) {
|
||||
for (int dim=2;dim<numDims;dim++) {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
|
||||
//}
|
||||
for(DocsEnum docsEnum : docsEnums[dim]) {
|
||||
if (docsEnum == null) {
|
||||
continue;
|
||||
}
|
||||
int docID = docsEnum.docID();
|
||||
disi = disis[dim];
|
||||
if (disi != null) {
|
||||
int docID = disi.docID();
|
||||
while (docID < nextChunkStart) {
|
||||
int slot = docID & MASK;
|
||||
if (docIDs[slot] == docID && counts[slot] >= dim) {
|
||||
|
@ -299,8 +383,9 @@ class DrillSidewaysScorer extends Scorer {
|
|||
counts[slot] = dim+1;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: sometimes use advance?
|
||||
docID = docsEnum.nextDoc();
|
||||
docID = disi.nextDoc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -309,7 +394,7 @@ class DrillSidewaysScorer extends Scorer {
|
|||
//if (DEBUG) {
|
||||
// System.out.println(" now collect: " + filledCount + " hits");
|
||||
//}
|
||||
for(int i=0;i<filledCount;i++) {
|
||||
for (int i=0;i<filledCount;i++) {
|
||||
int slot = filledSlots[i];
|
||||
collectDocID = docIDs[slot];
|
||||
collectScore = scores[slot];
|
||||
|
@ -319,7 +404,7 @@ class DrillSidewaysScorer extends Scorer {
|
|||
if (counts[slot] == 1+numDims) {
|
||||
collectHit(collector, sidewaysCollectors);
|
||||
} else if (counts[slot] == numDims) {
|
||||
collectNearMiss(sidewaysCollectors, missingDims[slot]);
|
||||
collectNearMiss(sidewaysCollectors[missingDims[slot]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -331,92 +416,7 @@ class DrillSidewaysScorer extends Scorer {
|
|||
}
|
||||
}
|
||||
|
||||
/** Used when base query is highly constraining vs the
|
||||
* drilldowns; in this case we just .next() on base and
|
||||
* .advance() on the dims. */
|
||||
private void doBaseAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" doBaseAdvanceScoring");
|
||||
//}
|
||||
int docID = baseScorer.docID();
|
||||
|
||||
final int numDims = dims.length;
|
||||
|
||||
nextDoc: while (docID != NO_MORE_DOCS) {
|
||||
int failedDim = -1;
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
// TODO: should we sort this 2nd dimension of
|
||||
// docsEnums from most frequent to least?
|
||||
boolean found = false;
|
||||
for(DocsEnum docsEnum : docsEnums[dim]) {
|
||||
if (docsEnum == null) {
|
||||
continue;
|
||||
}
|
||||
if (docsEnum.docID() < docID) {
|
||||
docsEnum.advance(docID);
|
||||
}
|
||||
if (docsEnum.docID() == docID) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
if (failedDim != -1) {
|
||||
// More than one dim fails on this document, so
|
||||
// it's neither a hit nor a near-miss; move to
|
||||
// next doc:
|
||||
docID = baseScorer.nextDoc();
|
||||
continue nextDoc;
|
||||
} else {
|
||||
failedDim = dim;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collectDocID = docID;
|
||||
|
||||
// TODO: we could score on demand instead since we are
|
||||
// daat here:
|
||||
collectScore = baseScorer.score();
|
||||
|
||||
if (failedDim == -1) {
|
||||
collectHit(collector, sidewaysCollectors);
|
||||
} else {
|
||||
collectNearMiss(sidewaysCollectors, failedDim);
|
||||
}
|
||||
|
||||
docID = baseScorer.nextDoc();
|
||||
}
|
||||
}
|
||||
|
||||
private void collectHit(Collector collector, Collector[] sidewaysCollectors) throws IOException {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" hit");
|
||||
//}
|
||||
|
||||
collector.collect(collectDocID);
|
||||
if (drillDownCollector != null) {
|
||||
drillDownCollector.collect(collectDocID);
|
||||
}
|
||||
|
||||
// TODO: we could "fix" faceting of the sideways counts
|
||||
// to do this "union" (of the drill down hits) in the
|
||||
// end instead:
|
||||
|
||||
// Tally sideways counts:
|
||||
for(int dim=0;dim<sidewaysCollectors.length;dim++) {
|
||||
sidewaysCollectors[dim].collect(collectDocID);
|
||||
}
|
||||
}
|
||||
|
||||
private void collectNearMiss(Collector[] sidewaysCollectors, int dim) throws IOException {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" missingDim=" + dim);
|
||||
//}
|
||||
sidewaysCollectors[dim].collect(collectDocID);
|
||||
}
|
||||
|
||||
private void doUnionScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
|
||||
private void doUnionScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors) throws IOException {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" doUnionScoring");
|
||||
//}
|
||||
|
@ -478,11 +478,9 @@ class DrillSidewaysScorer extends Scorer {
|
|||
//if (DEBUG) {
|
||||
// System.out.println(" dim=0 [" + dims[0].dim + "]");
|
||||
//}
|
||||
for(DocsEnum docsEnum : docsEnums[0]) {
|
||||
if (docsEnum == null) {
|
||||
continue;
|
||||
}
|
||||
docID = docsEnum.docID();
|
||||
DocIdSetIterator disi = disis[0];
|
||||
if (disi != null) {
|
||||
docID = disi.docID();
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" start docID=" + docID);
|
||||
//}
|
||||
|
@ -495,19 +493,18 @@ class DrillSidewaysScorer extends Scorer {
|
|||
missingDims[slot] = 1;
|
||||
counts[slot] = 2;
|
||||
}
|
||||
docID = docsEnum.nextDoc();
|
||||
docID = disi.nextDoc();
|
||||
}
|
||||
}
|
||||
|
||||
for(int dim=1;dim<numDims;dim++) {
|
||||
for (int dim=1;dim<numDims;dim++) {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
|
||||
//}
|
||||
for(DocsEnum docsEnum : docsEnums[dim]) {
|
||||
if (docsEnum == null) {
|
||||
continue;
|
||||
}
|
||||
docID = docsEnum.docID();
|
||||
|
||||
disi = disis[dim];
|
||||
if (disi != null) {
|
||||
docID = disi.docID();
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" start docID=" + docID);
|
||||
//}
|
||||
|
@ -530,47 +527,14 @@ class DrillSidewaysScorer extends Scorer {
|
|||
counts[slot] = dim+1;
|
||||
}
|
||||
}
|
||||
docID = docsEnum.nextDoc();
|
||||
docID = disi.nextDoc();
|
||||
}
|
||||
|
||||
// TODO: sometimes use advance?
|
||||
|
||||
/*
|
||||
int docBase = nextChunkStart - CHUNK;
|
||||
for(int i=0;i<filledCount;i++) {
|
||||
int slot = filledSlots[i];
|
||||
docID = docBase + filledSlots[i];
|
||||
if (docIDs[slot] == docID && counts[slot] >= dim) {
|
||||
// This doc is still in the running...
|
||||
int ddDocID = docsEnum.docID();
|
||||
if (ddDocID < docID) {
|
||||
ddDocID = docsEnum.advance(docID);
|
||||
}
|
||||
if (ddDocID == docID) {
|
||||
if (missingDims[slot] >= dim && counts[slot] == allMatchCount) {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" set docID=" + docID + " count=" + (dim+2));
|
||||
// }
|
||||
missingDims[slot] = dim+1;
|
||||
counts[slot] = dim+2;
|
||||
} else {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
|
||||
// }
|
||||
counts[slot] = dim+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
// Collect:
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" now collect: " + filledCount + " hits");
|
||||
//}
|
||||
for(int i=0;i<filledCount;i++) {
|
||||
//System.out.println(" now collect: " + filledCount + " hits");
|
||||
for (int i=0;i<filledCount;i++) {
|
||||
// NOTE: This is actually in-order collection,
|
||||
// because we only accept docs originally returned by
|
||||
// the baseScorer (ie that Scorer is AND'd)
|
||||
|
@ -586,7 +550,7 @@ class DrillSidewaysScorer extends Scorer {
|
|||
collectHit(collector, sidewaysCollectors);
|
||||
} else if (counts[slot] == numDims) {
|
||||
//System.out.println(" sw");
|
||||
collectNearMiss(sidewaysCollectors, missingDims[slot]);
|
||||
collectNearMiss(sidewaysCollectors[missingDims[slot]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -598,6 +562,56 @@ class DrillSidewaysScorer extends Scorer {
|
|||
}
|
||||
}
|
||||
|
||||
private void collectHit(Collector collector, Collector[] sidewaysCollectors) throws IOException {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" hit");
|
||||
//}
|
||||
|
||||
collector.collect(collectDocID);
|
||||
if (drillDownCollector != null) {
|
||||
drillDownCollector.collect(collectDocID);
|
||||
}
|
||||
|
||||
// TODO: we could "fix" faceting of the sideways counts
|
||||
// to do this "union" (of the drill down hits) in the
|
||||
// end instead:
|
||||
|
||||
// Tally sideways counts:
|
||||
for (int dim=0;dim<sidewaysCollectors.length;dim++) {
|
||||
sidewaysCollectors[dim].collect(collectDocID);
|
||||
}
|
||||
}
|
||||
|
||||
private void collectHit(Collector collector, Collector[] sidewaysCollectors, Collector[] sidewaysCollectors2) throws IOException {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" hit");
|
||||
//}
|
||||
|
||||
collector.collect(collectDocID);
|
||||
if (drillDownCollector != null) {
|
||||
drillDownCollector.collect(collectDocID);
|
||||
}
|
||||
|
||||
// TODO: we could "fix" faceting of the sideways counts
|
||||
// to do this "union" (of the drill down hits) in the
|
||||
// end instead:
|
||||
|
||||
// Tally sideways counts:
|
||||
for (int i=0;i<sidewaysCollectors.length;i++) {
|
||||
sidewaysCollectors[i].collect(collectDocID);
|
||||
}
|
||||
for (int i=0;i<sidewaysCollectors2.length;i++) {
|
||||
sidewaysCollectors2[i].collect(collectDocID);
|
||||
}
|
||||
}
|
||||
|
||||
private void collectNearMiss(Collector sidewaysCollector) throws IOException {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" missingDim=" + dim);
|
||||
//}
|
||||
sidewaysCollector.collect(collectDocID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return collectDocID;
|
||||
|
@ -633,18 +647,27 @@ class DrillSidewaysScorer extends Scorer {
|
|||
return Collections.singletonList(new ChildScorer(baseScorer, "MUST"));
|
||||
}
|
||||
|
||||
static class DocsEnumsAndFreq implements Comparable<DocsEnumsAndFreq> {
|
||||
DocsEnum[] docsEnums;
|
||||
// Max cost for all docsEnums for this dim:
|
||||
long maxCost;
|
||||
static class DocsAndCost implements Comparable<DocsAndCost> {
|
||||
// Iterator for docs matching this dim's filter, or ...
|
||||
DocIdSetIterator disi;
|
||||
// Random access bits:
|
||||
Bits bits;
|
||||
Collector sidewaysCollector;
|
||||
String dim;
|
||||
|
||||
@Override
|
||||
public int compareTo(DocsEnumsAndFreq other) {
|
||||
if (maxCost < other.maxCost) {
|
||||
public int compareTo(DocsAndCost other) {
|
||||
if (disi == null) {
|
||||
if (other.disi == null) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
} else if (other.disi == null) {
|
||||
return -1;
|
||||
} else if (maxCost > other.maxCost) {
|
||||
} else if (disi.cost() < other.disi.cost()) {
|
||||
return -1;
|
||||
} else if (disi.cost() > other.disi.cost()) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
|
|
|
@ -26,11 +26,12 @@ import org.apache.lucene.queries.function.ValueSource;
|
|||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.NumericRangeFilter; // javadocs
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
|
||||
/** Represents a range over double values. */
|
||||
/** Represents a range over double values.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public final class DoubleRange extends Range {
|
||||
final double minIncl;
|
||||
final double maxIncl;
|
||||
|
@ -99,14 +100,15 @@ public final class DoubleRange extends Range {
|
|||
return "DoubleRange(" + minIncl + " to " + maxIncl + ")";
|
||||
}
|
||||
|
||||
/** Returns a new {@link Filter} accepting only documents
|
||||
* in this range. Note that this filter is not
|
||||
* efficient: it's a linear scan of all docs, testing
|
||||
* each value. If the {@link ValueSource} is static,
|
||||
* e.g. an indexed numeric field, then it's more
|
||||
* efficient to use {@link NumericRangeFilter}. */
|
||||
public Filter getFilter(final ValueSource valueSource) {
|
||||
@Override
|
||||
public Filter getFilter(final Filter fastMatchFilter, final ValueSource valueSource) {
|
||||
return new Filter() {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Filter(" + DoubleRange.this.toString() + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
||||
|
||||
|
@ -119,49 +121,48 @@ public final class DoubleRange extends Range {
|
|||
|
||||
final int maxDoc = context.reader().maxDoc();
|
||||
|
||||
final Bits fastMatchBits;
|
||||
if (fastMatchFilter != null) {
|
||||
DocIdSet dis = fastMatchFilter.getDocIdSet(context, null);
|
||||
if (dis == null) {
|
||||
// No documents match
|
||||
return null;
|
||||
}
|
||||
fastMatchBits = dis.bits();
|
||||
if (fastMatchBits == null) {
|
||||
throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
|
||||
}
|
||||
} else {
|
||||
fastMatchBits = null;
|
||||
}
|
||||
|
||||
return new DocIdSet() {
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return new DocIdSetIterator() {
|
||||
int doc = -1;
|
||||
|
||||
public Bits bits() {
|
||||
return new Bits() {
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
while (true) {
|
||||
doc++;
|
||||
if (doc == maxDoc) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
if (acceptDocs != null && acceptDocs.get(doc) == false) {
|
||||
continue;
|
||||
}
|
||||
double v = values.doubleVal(doc);
|
||||
if (accept(v)) {
|
||||
return doc;
|
||||
}
|
||||
public boolean get(int docID) {
|
||||
if (acceptDocs != null && acceptDocs.get(docID) == false) {
|
||||
return false;
|
||||
}
|
||||
if (fastMatchBits != null && fastMatchBits.get(docID) == false) {
|
||||
return false;
|
||||
}
|
||||
return accept(values.doubleVal(docID));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
doc = target-1;
|
||||
return nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
// Since we do a linear scan over all
|
||||
// documents, our cost is O(maxDoc):
|
||||
public int length() {
|
||||
return maxDoc;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
throw new UnsupportedOperationException("this filter can only be accessed via bits()");
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
|
@ -24,12 +24,15 @@ import java.util.List;
|
|||
import org.apache.lucene.document.DoubleDocValuesField; // javadocs
|
||||
import org.apache.lucene.document.FloatDocValuesField; // javadocs
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource; // javadocs
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
|
||||
|
@ -61,7 +64,16 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
|
|||
/** Create {@code RangeFacetCounts}, using the provided
|
||||
* {@link ValueSource}. */
|
||||
public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, DoubleRange... ranges) throws IOException {
|
||||
super(field, ranges);
|
||||
this(field, valueSource, hits, null, ranges);
|
||||
}
|
||||
|
||||
/** Create {@code RangeFacetCounts}, using the provided
|
||||
* {@link ValueSource}, and using the provided Filter as
|
||||
* a fastmatch: only documents passing the filter are
|
||||
* checked for the matching ranges. The filter must be
|
||||
* random access (implement {@link DocIdSet#bits}). */
|
||||
public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, DoubleRange... ranges) throws IOException {
|
||||
super(field, ranges, fastMatchFilter);
|
||||
count(valueSource, hits.getMatchingDocs());
|
||||
}
|
||||
|
||||
|
@ -84,10 +96,29 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
|
|||
FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context);
|
||||
|
||||
totCount += hits.totalHits;
|
||||
Bits bits;
|
||||
if (fastMatchFilter != null) {
|
||||
DocIdSet dis = fastMatchFilter.getDocIdSet(hits.context, null);
|
||||
if (dis == null) {
|
||||
// No documents match
|
||||
continue;
|
||||
}
|
||||
bits = dis.bits();
|
||||
if (bits == null) {
|
||||
throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
|
||||
}
|
||||
} else {
|
||||
bits = null;
|
||||
}
|
||||
|
||||
DocIdSetIterator docs = hits.bits.iterator();
|
||||
|
||||
int doc;
|
||||
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (bits != null && bits.get(doc) == false) {
|
||||
doc++;
|
||||
continue;
|
||||
}
|
||||
// Skip missing docs:
|
||||
if (fv.exists(doc)) {
|
||||
counter.add(NumericUtils.doubleToSortableLong(fv.doubleVal(doc)));
|
||||
|
|
|
@ -26,10 +26,11 @@ import org.apache.lucene.queries.function.ValueSource;
|
|||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.NumericRangeFilter; // javadocs
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/** Represents a range over long values. */
|
||||
/** Represents a range over long values.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public final class LongRange extends Range {
|
||||
final long minIncl;
|
||||
final long maxIncl;
|
||||
|
@ -91,14 +92,15 @@ public final class LongRange extends Range {
|
|||
return "LongRange(" + minIncl + " to " + maxIncl + ")";
|
||||
}
|
||||
|
||||
/** Returns a new {@link Filter} accepting only documents
|
||||
* in this range. Note that this filter is not
|
||||
* efficient: it's a linear scan of all docs, testing
|
||||
* each value. If the {@link ValueSource} is static,
|
||||
* e.g. an indexed numeric field, then it's more
|
||||
* efficient to use {@link NumericRangeFilter}. */
|
||||
public Filter getFilter(final ValueSource valueSource) {
|
||||
@Override
|
||||
public Filter getFilter(final Filter fastMatchFilter, final ValueSource valueSource) {
|
||||
return new Filter() {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Filter(" + LongRange.this.toString() + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
||||
|
||||
|
@ -111,49 +113,48 @@ public final class LongRange extends Range {
|
|||
|
||||
final int maxDoc = context.reader().maxDoc();
|
||||
|
||||
final Bits fastMatchBits;
|
||||
if (fastMatchFilter != null) {
|
||||
DocIdSet dis = fastMatchFilter.getDocIdSet(context, null);
|
||||
if (dis == null) {
|
||||
// No documents match
|
||||
return null;
|
||||
}
|
||||
fastMatchBits = dis.bits();
|
||||
if (fastMatchBits == null) {
|
||||
throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
|
||||
}
|
||||
} else {
|
||||
fastMatchBits = null;
|
||||
}
|
||||
|
||||
return new DocIdSet() {
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return new DocIdSetIterator() {
|
||||
int doc = -1;
|
||||
|
||||
public Bits bits() {
|
||||
return new Bits() {
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
while (true) {
|
||||
doc++;
|
||||
if (doc == maxDoc) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
if (acceptDocs != null && acceptDocs.get(doc) == false) {
|
||||
continue;
|
||||
}
|
||||
long v = values.longVal(doc);
|
||||
if (accept(v)) {
|
||||
return doc;
|
||||
}
|
||||
public boolean get(int docID) {
|
||||
if (acceptDocs != null && acceptDocs.get(docID) == false) {
|
||||
return false;
|
||||
}
|
||||
if (fastMatchBits != null && fastMatchBits.get(docID) == false) {
|
||||
return false;
|
||||
}
|
||||
return accept(values.longVal(docID));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
doc = target-1;
|
||||
return nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
// Since we do a linear scan over all
|
||||
// documents, our cost is O(maxDoc):
|
||||
public int length() {
|
||||
return maxDoc;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
throw new UnsupportedOperationException("this filter can only be accessed via bits()");
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
|
@ -22,11 +22,14 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/** {@link Facets} implementation that computes counts for
|
||||
|
@ -50,7 +53,16 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
|
|||
/** Create {@code RangeFacetCounts}, using the provided
|
||||
* {@link ValueSource}. */
|
||||
public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, LongRange... ranges) throws IOException {
|
||||
super(field, ranges);
|
||||
this(field, valueSource, hits, null, ranges);
|
||||
}
|
||||
|
||||
/** Create {@code RangeFacetCounts}, using the provided
|
||||
* {@link ValueSource}, and using the provided Filter as
|
||||
* a fastmatch: only documents passing the filter are
|
||||
* checked for the matching ranges. The filter must be
|
||||
* random access (implement {@link DocIdSet#bits}). */
|
||||
public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, LongRange... ranges) throws IOException {
|
||||
super(field, ranges, fastMatchFilter);
|
||||
count(valueSource, hits.getMatchingDocs());
|
||||
}
|
||||
|
||||
|
@ -65,9 +77,28 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
|
|||
FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context);
|
||||
|
||||
totCount += hits.totalHits;
|
||||
Bits bits;
|
||||
if (fastMatchFilter != null) {
|
||||
DocIdSet dis = fastMatchFilter.getDocIdSet(hits.context, null);
|
||||
if (dis == null) {
|
||||
// No documents match
|
||||
continue;
|
||||
}
|
||||
bits = dis.bits();
|
||||
if (bits == null) {
|
||||
throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
|
||||
}
|
||||
} else {
|
||||
bits = null;
|
||||
}
|
||||
|
||||
DocIdSetIterator docs = hits.bits.iterator();
|
||||
int doc;
|
||||
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (bits != null && bits.get(doc) == false) {
|
||||
doc++;
|
||||
continue;
|
||||
}
|
||||
// Skip missing docs:
|
||||
if (fv.exists(doc)) {
|
||||
counter.add(fv.longVal(doc));
|
||||
|
|
|
@ -17,6 +17,13 @@ package org.apache.lucene.facet.range;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.facet.DrillDownQuery; // javadocs
|
||||
import org.apache.lucene.facet.DrillSideways; // javadocs
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.FilteredQuery; // javadocs
|
||||
import org.apache.lucene.search.NumericRangeFilter; // javadocs
|
||||
|
||||
/** Base class for a single labeled range.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
@ -33,6 +40,32 @@ public abstract class Range {
|
|||
this.label = label;
|
||||
}
|
||||
|
||||
/** Returns a new {@link Filter} accepting only documents
|
||||
* in this range. This filter is not general-purpose;
|
||||
* you should either use it with {@link DrillSideways} by
|
||||
* adding it to {@link DrillDownQuery#add}, or pass it to
|
||||
* {@link FilteredQuery} using its {@link
|
||||
* FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}. If the
|
||||
* {@link ValueSource} is static, e.g. an indexed numeric
|
||||
* field, then it may be more efficient to use {@link
|
||||
* NumericRangeFilter}. The provided fastMatchFilter,
|
||||
* if non-null, will first be consulted, and only if
|
||||
* that is set for each document will the range then be
|
||||
* checked. */
|
||||
public abstract Filter getFilter(Filter fastMatchFilter, ValueSource valueSource);
|
||||
|
||||
/** Returns a new {@link Filter} accepting only documents
|
||||
* in this range. This filter is not general-purpose;
|
||||
* you should either use it with {@link DrillSideways} by
|
||||
* adding it to {@link DrillDownQuery#add}, or pass it to
|
||||
* {@link FilteredQuery} using its {@link
|
||||
* FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}. If the
|
||||
* {@link ValueSource} is static, e.g. an indexed numeric
|
||||
* field, then it may be more efficient to use {@link NumericRangeFilter}. */
|
||||
public Filter getFilter(ValueSource valueSource) {
|
||||
return getFilter(null, valueSource);
|
||||
}
|
||||
|
||||
/** Invoke this for a useless range. */
|
||||
protected void failNoMatch() {
|
||||
throw new IllegalArgumentException("range \"" + label + "\" matches nothing");
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.List;
|
|||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
||||
import org.apache.lucene.search.Filter;
|
||||
|
||||
/** Base class for range faceting.
|
||||
*
|
||||
|
@ -36,17 +36,23 @@ abstract class RangeFacetCounts extends Facets {
|
|||
/** Counts, initialized in by subclass. */
|
||||
protected final int[] counts;
|
||||
|
||||
/** Optional: if specified, we first test this Filter to
|
||||
* see whether the document should be checked for
|
||||
* matching ranges. If this is null, all documents are
|
||||
* checked. */
|
||||
protected final Filter fastMatchFilter;
|
||||
|
||||
/** Our field name. */
|
||||
protected final String field;
|
||||
|
||||
/** Total number of hits. */
|
||||
protected int totCount;
|
||||
|
||||
/** Create {@code RangeFacetCounts}, using {@link
|
||||
* LongFieldSource} from the specified field. */
|
||||
protected RangeFacetCounts(String field, Range[] ranges) throws IOException {
|
||||
/** Create {@code RangeFacetCounts} */
|
||||
protected RangeFacetCounts(String field, Range[] ranges, Filter fastMatchFilter) throws IOException {
|
||||
this.field = field;
|
||||
this.ranges = ranges;
|
||||
this.fastMatchFilter = fastMatchFilter;
|
||||
counts = new int[ranges.length];
|
||||
}
|
||||
|
||||
|
|
|
@ -18,16 +18,10 @@ package org.apache.lucene.facet.sortedset;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Wraps a {@link IndexReader} and resolves ords
|
||||
* using existing {@link SortedSetDocValues} APIs without a
|
||||
|
|
|
@ -645,7 +645,7 @@ public class TestDrillSideways extends FacetTestCase {
|
|||
final FixedBitSet bits = new FixedBitSet(maxDoc);
|
||||
for(int docID=0;docID < maxDoc;docID++) {
|
||||
// Keeps only the even ids:
|
||||
if ((acceptDocs == null || acceptDocs.get(docID)) && ((Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0)) {
|
||||
if ((acceptDocs == null || acceptDocs.get(docID)) && (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0) {
|
||||
bits.set(docID);
|
||||
}
|
||||
}
|
||||
|
@ -689,7 +689,7 @@ public class TestDrillSideways extends FacetTestCase {
|
|||
// subScorers are on the same docID:
|
||||
if (!anyMultiValuedDrillDowns) {
|
||||
// Can only do this test when there are no OR'd
|
||||
// drill-down values, beacuse in that case it's
|
||||
// drill-down values, because in that case it's
|
||||
// easily possible for one of the DD terms to be on
|
||||
// a future docID:
|
||||
new DrillSideways(s, config, tr) {
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.facet.range;
|
|||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoubleDocValuesField;
|
||||
|
@ -30,6 +31,7 @@ import org.apache.lucene.document.FloatField;
|
|||
import org.apache.lucene.document.LongField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.facet.DrillDownQuery;
|
||||
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
|
||||
import org.apache.lucene.facet.DrillSideways;
|
||||
import org.apache.lucene.facet.FacetField;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
|
@ -39,10 +41,10 @@ import org.apache.lucene.facet.FacetsCollector;
|
|||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.facet.MultiFacets;
|
||||
import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -50,12 +52,20 @@ import org.apache.lucene.index.RandomIndexWriter;
|
|||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
|
||||
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
||||
import org.apache.lucene.search.CachingWrapperFilter;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.NumericRangeFilter;
|
||||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.QueryWrapperFilter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
|
@ -229,6 +239,10 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
|
||||
IndexSearcher s = newSearcher(r);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: searcher=" + s);
|
||||
}
|
||||
|
||||
DrillSideways ds = new DrillSideways(s, config, tr) {
|
||||
|
||||
@Override
|
||||
|
@ -365,6 +379,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
System.out.println("TEST: numDocs=" + numDocs);
|
||||
}
|
||||
long[] values = new long[numDocs];
|
||||
long minValue = Long.MAX_VALUE;
|
||||
long maxValue = Long.MIN_VALUE;
|
||||
for(int i=0;i<numDocs;i++) {
|
||||
Document doc = new Document();
|
||||
long v = random().nextLong();
|
||||
|
@ -372,6 +388,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
doc.add(new NumericDocValuesField("field", v));
|
||||
doc.add(new LongField("field", v, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
minValue = Math.min(minValue, v);
|
||||
maxValue = Math.max(maxValue, v);
|
||||
}
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
|
@ -386,6 +404,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
int numRange = _TestUtil.nextInt(random(), 1, 100);
|
||||
LongRange[] ranges = new LongRange[numRange];
|
||||
int[] expectedCounts = new int[numRange];
|
||||
long minAcceptedValue = Long.MAX_VALUE;
|
||||
long maxAcceptedValue = Long.MIN_VALUE;
|
||||
for(int rangeID=0;rangeID<numRange;rangeID++) {
|
||||
long min;
|
||||
if (rangeID > 0 && random().nextInt(10) == 7) {
|
||||
|
@ -447,13 +467,26 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
}
|
||||
if (accept) {
|
||||
expectedCounts[rangeID]++;
|
||||
minAcceptedValue = Math.min(minAcceptedValue, values[i]);
|
||||
maxAcceptedValue = Math.max(maxAcceptedValue, values[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FacetsCollector sfc = new FacetsCollector();
|
||||
s.search(new MatchAllDocsQuery(), sfc);
|
||||
Facets facets = new LongRangeFacetCounts("field", sfc, ranges);
|
||||
Filter fastMatchFilter;
|
||||
if (random().nextBoolean()) {
|
||||
if (random().nextBoolean()) {
|
||||
fastMatchFilter = NumericRangeFilter.newLongRange("field", minValue, maxValue, true, true);
|
||||
} else {
|
||||
fastMatchFilter = NumericRangeFilter.newLongRange("field", minAcceptedValue, maxAcceptedValue, true, true);
|
||||
}
|
||||
} else {
|
||||
fastMatchFilter = null;
|
||||
}
|
||||
ValueSource vs = new LongFieldSource("field");
|
||||
Facets facets = new LongRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
|
||||
FacetResult result = facets.getTopChildren(10, "field");
|
||||
assertEquals(numRange, result.labelValues.length);
|
||||
for(int rangeID=0;rangeID<numRange;rangeID++) {
|
||||
|
@ -468,7 +501,15 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
|
||||
// Test drill-down:
|
||||
DrillDownQuery ddq = new DrillDownQuery(config);
|
||||
ddq.add("field", NumericRangeQuery.newLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
|
||||
if (random().nextBoolean()) {
|
||||
if (random().nextBoolean()) {
|
||||
ddq.add("field", NumericRangeFilter.newLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
|
||||
} else {
|
||||
ddq.add("field", NumericRangeQuery.newLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
|
||||
}
|
||||
} else {
|
||||
ddq.add("field", range.getFilter(fastMatchFilter, vs));
|
||||
}
|
||||
assertEquals(expectedCounts[rangeID], s.search(ddq, 10).totalHits);
|
||||
}
|
||||
}
|
||||
|
@ -482,6 +523,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
|
||||
int numDocs = atLeast(1000);
|
||||
float[] values = new float[numDocs];
|
||||
float minValue = Float.POSITIVE_INFINITY;
|
||||
float maxValue = Float.NEGATIVE_INFINITY;
|
||||
for(int i=0;i<numDocs;i++) {
|
||||
Document doc = new Document();
|
||||
float v = random().nextFloat();
|
||||
|
@ -489,6 +532,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
doc.add(new FloatDocValuesField("field", v));
|
||||
doc.add(new FloatField("field", v, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
minValue = Math.min(minValue, v);
|
||||
maxValue = Math.max(maxValue, v);
|
||||
}
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
|
@ -503,6 +548,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
int numRange = _TestUtil.nextInt(random(), 1, 5);
|
||||
DoubleRange[] ranges = new DoubleRange[numRange];
|
||||
int[] expectedCounts = new int[numRange];
|
||||
float minAcceptedValue = Float.POSITIVE_INFINITY;
|
||||
float maxAcceptedValue = Float.NEGATIVE_INFINITY;
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: " + numRange + " ranges");
|
||||
}
|
||||
|
@ -578,13 +625,26 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
}
|
||||
if (accept) {
|
||||
expectedCounts[rangeID]++;
|
||||
minAcceptedValue = Math.min(minAcceptedValue, values[i]);
|
||||
maxAcceptedValue = Math.max(maxAcceptedValue, values[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FacetsCollector sfc = new FacetsCollector();
|
||||
s.search(new MatchAllDocsQuery(), sfc);
|
||||
Facets facets = new DoubleRangeFacetCounts("field", new FloatFieldSource("field"), sfc, ranges);
|
||||
Filter fastMatchFilter;
|
||||
if (random().nextBoolean()) {
|
||||
if (random().nextBoolean()) {
|
||||
fastMatchFilter = NumericRangeFilter.newFloatRange("field", minValue, maxValue, true, true);
|
||||
} else {
|
||||
fastMatchFilter = NumericRangeFilter.newFloatRange("field", minAcceptedValue, maxAcceptedValue, true, true);
|
||||
}
|
||||
} else {
|
||||
fastMatchFilter = null;
|
||||
}
|
||||
ValueSource vs = new FloatFieldSource("field");
|
||||
Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
|
||||
FacetResult result = facets.getTopChildren(10, "field");
|
||||
assertEquals(numRange, result.labelValues.length);
|
||||
for(int rangeID=0;rangeID<numRange;rangeID++) {
|
||||
|
@ -599,7 +659,15 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
|
||||
// Test drill-down:
|
||||
DrillDownQuery ddq = new DrillDownQuery(config);
|
||||
ddq.add("field", NumericRangeQuery.newFloatRange("field", (float) range.min, (float) range.max, range.minInclusive, range.maxInclusive));
|
||||
if (random().nextBoolean()) {
|
||||
if (random().nextBoolean()) {
|
||||
ddq.add("field", NumericRangeFilter.newFloatRange("field", (float) range.min, (float) range.max, range.minInclusive, range.maxInclusive));
|
||||
} else {
|
||||
ddq.add("field", NumericRangeQuery.newFloatRange("field", (float) range.min, (float) range.max, range.minInclusive, range.maxInclusive));
|
||||
}
|
||||
} else {
|
||||
ddq.add("field", range.getFilter(fastMatchFilter, vs));
|
||||
}
|
||||
assertEquals(expectedCounts[rangeID], s.search(ddq, 10).totalHits);
|
||||
}
|
||||
}
|
||||
|
@ -613,6 +681,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
|
||||
int numDocs = atLeast(1000);
|
||||
double[] values = new double[numDocs];
|
||||
double minValue = Double.POSITIVE_INFINITY;
|
||||
double maxValue = Double.NEGATIVE_INFINITY;
|
||||
for(int i=0;i<numDocs;i++) {
|
||||
Document doc = new Document();
|
||||
double v = random().nextDouble();
|
||||
|
@ -620,6 +690,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
doc.add(new DoubleDocValuesField("field", v));
|
||||
doc.add(new DoubleField("field", v, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
minValue = Math.min(minValue, v);
|
||||
maxValue = Math.max(maxValue, v);
|
||||
}
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
|
@ -634,6 +706,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
int numRange = _TestUtil.nextInt(random(), 1, 5);
|
||||
DoubleRange[] ranges = new DoubleRange[numRange];
|
||||
int[] expectedCounts = new int[numRange];
|
||||
double minAcceptedValue = Double.POSITIVE_INFINITY;
|
||||
double maxAcceptedValue = Double.NEGATIVE_INFINITY;
|
||||
for(int rangeID=0;rangeID<numRange;rangeID++) {
|
||||
double min;
|
||||
if (rangeID > 0 && random().nextInt(10) == 7) {
|
||||
|
@ -693,13 +767,26 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
}
|
||||
if (accept) {
|
||||
expectedCounts[rangeID]++;
|
||||
minAcceptedValue = Math.min(minAcceptedValue, values[i]);
|
||||
maxAcceptedValue = Math.max(maxAcceptedValue, values[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FacetsCollector sfc = new FacetsCollector();
|
||||
s.search(new MatchAllDocsQuery(), sfc);
|
||||
Facets facets = new DoubleRangeFacetCounts("field", sfc, ranges);
|
||||
Filter fastMatchFilter;
|
||||
if (random().nextBoolean()) {
|
||||
if (random().nextBoolean()) {
|
||||
fastMatchFilter = NumericRangeFilter.newDoubleRange("field", minValue, maxValue, true, true);
|
||||
} else {
|
||||
fastMatchFilter = NumericRangeFilter.newDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true);
|
||||
}
|
||||
} else {
|
||||
fastMatchFilter = null;
|
||||
}
|
||||
ValueSource vs = new DoubleFieldSource("field");
|
||||
Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
|
||||
FacetResult result = facets.getTopChildren(10, "field");
|
||||
assertEquals(numRange, result.labelValues.length);
|
||||
for(int rangeID=0;rangeID<numRange;rangeID++) {
|
||||
|
@ -714,7 +801,16 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
|
||||
// Test drill-down:
|
||||
DrillDownQuery ddq = new DrillDownQuery(config);
|
||||
ddq.add("field", NumericRangeQuery.newDoubleRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
|
||||
if (random().nextBoolean()) {
|
||||
if (random().nextBoolean()) {
|
||||
ddq.add("field", NumericRangeFilter.newDoubleRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
|
||||
} else {
|
||||
ddq.add("field", NumericRangeQuery.newDoubleRange("field", range.min, range.max, range.minInclusive, range.maxInclusive));
|
||||
}
|
||||
} else {
|
||||
ddq.add("field", range.getFilter(fastMatchFilter, vs));
|
||||
}
|
||||
|
||||
assertEquals(expectedCounts[rangeID], s.search(ddq, 10).totalHits);
|
||||
}
|
||||
}
|
||||
|
@ -765,16 +861,13 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
|
||||
Document doc = new Document();
|
||||
writer.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
writer.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
writer.addDocument(doc);
|
||||
|
||||
// Test wants 3 docs in one segment:
|
||||
writer.forceMerge(1);
|
||||
|
||||
ValueSource vs = new ValueSource() {
|
||||
final ValueSource vs = new ValueSource() {
|
||||
@SuppressWarnings("rawtypes")
|
||||
@Override
|
||||
public FunctionValues getValues(Map ignored, AtomicReaderContext ignored2) {
|
||||
|
@ -801,6 +894,8 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
|
||||
FacetsCollector fc = new FacetsCollector();
|
||||
|
||||
|
@ -808,18 +903,67 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
IndexSearcher s = newSearcher(r);
|
||||
s.search(new MatchAllDocsQuery(), fc);
|
||||
|
||||
Facets facets = new DoubleRangeFacetCounts("field", vs, fc,
|
||||
final DoubleRange[] ranges = new DoubleRange[] {
|
||||
new DoubleRange("< 1", 0.0, true, 1.0, false),
|
||||
new DoubleRange("< 2", 0.0, true, 2.0, false),
|
||||
new DoubleRange("< 5", 0.0, true, 5.0, false),
|
||||
new DoubleRange("< 10", 0.0, true, 10.0, false),
|
||||
new DoubleRange("< 20", 0.0, true, 20.0, false),
|
||||
new DoubleRange("< 50", 0.0, true, 50.0, false));
|
||||
new DoubleRange("< 50", 0.0, true, 50.0, false)};
|
||||
|
||||
final Filter fastMatchFilter;
|
||||
final AtomicBoolean filterWasUsed = new AtomicBoolean();
|
||||
if (random().nextBoolean()) {
|
||||
// Sort of silly:
|
||||
fastMatchFilter = new CachingWrapperFilter(new QueryWrapperFilter(new MatchAllDocsQuery())) {
|
||||
@Override
|
||||
protected DocIdSet cacheImpl(DocIdSetIterator iterator, AtomicReader reader)
|
||||
throws IOException {
|
||||
final FixedBitSet cached = new FixedBitSet(reader.maxDoc());
|
||||
filterWasUsed.set(true);
|
||||
cached.or(iterator);
|
||||
return cached;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
fastMatchFilter = null;
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: fastMatchFilter=" + fastMatchFilter);
|
||||
}
|
||||
|
||||
Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges);
|
||||
|
||||
assertEquals("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", facets.getTopChildren(10, "field").toString());
|
||||
assertTrue(fastMatchFilter == null || filterWasUsed.get());
|
||||
|
||||
// Test drill-down:
|
||||
assertEquals(1, s.search(new ConstantScoreQuery(new DoubleRange("< 2", 0.0, true, 2.0, false).getFilter(vs)), 10).totalHits);
|
||||
DrillDownQuery ddq = new DrillDownQuery(config);
|
||||
ddq.add("field", ranges[1].getFilter(fastMatchFilter, vs));
|
||||
|
||||
// Test simple drill-down:
|
||||
assertEquals(1, s.search(ddq, 10).totalHits);
|
||||
|
||||
// Test drill-sideways after drill-down
|
||||
DrillSideways ds = new DrillSideways(s, config, (TaxonomyReader) null) {
|
||||
|
||||
@Override
|
||||
protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
|
||||
assert drillSideways.length == 1;
|
||||
return new DoubleRangeFacetCounts("field", vs, drillSideways[0], fastMatchFilter, ranges);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean scoreSubDocsAtOnce() {
|
||||
return random().nextBoolean();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
DrillSidewaysResult dsr = ds.search(ddq, 10);
|
||||
assertEquals(1, dsr.hits.totalHits);
|
||||
assertEquals("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n",
|
||||
dsr.facets.getTopChildren(10, "field").toString());
|
||||
|
||||
IOUtils.close(r, writer, dir);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue