LUCENE-5297: allow range faceting on any ValueSource

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1538863 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-11-05 04:28:41 +00:00
parent 5b372bd1cd
commit 55c8752b20
3 changed files with 87 additions and 54 deletions

View File

@ -145,6 +145,9 @@ New Features
* LUCENE-5312: Add BlockJoinSorter, a new Sorter implementation that makes sure * LUCENE-5312: Add BlockJoinSorter, a new Sorter implementation that makes sure
to never split up blocks of documents indexed with IndexWriter.addDocuments. to never split up blocks of documents indexed with IndexWriter.addDocuments.
(Adrien Grand) (Adrien Grand)
* LUCENE-5297: Allow to range-facet on any ValueSource, not just
NumericDocValues fields. (Shai Erera)
Bug Fixes Bug Fixes

View File

@ -28,29 +28,15 @@ import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.util.Bits;
/** Uses a {@link NumericDocValues} and accumulates
* counts for provided ranges. This is dynamic (does not
* use the taxonomy index or anything from the index
* except the NumericDocValuesField). */
/**
* Uses {@link RangeFacetRequest#getValues(AtomicReaderContext)} and accumulates
* counts for provided ranges.
*/
public class RangeAccumulator extends FacetsAccumulator { public class RangeAccumulator extends FacetsAccumulator {
static class RangeSet {
final Range[] ranges;
final String field;
public RangeSet(Range[] ranges, String field) {
this.ranges = ranges;
this.field = field;
}
}
final List<RangeSet> requests = new ArrayList<RangeSet>();
public RangeAccumulator(FacetRequest... facetRequests) { public RangeAccumulator(FacetRequest... facetRequests) {
this(Arrays.asList(facetRequests)); this(Arrays.asList(facetRequests));
} }
@ -65,9 +51,6 @@ public class RangeAccumulator extends FacetsAccumulator {
if (fr.categoryPath.length != 1) { if (fr.categoryPath.length != 1) {
throw new IllegalArgumentException("only flat (dimension only) CategoryPath is allowed"); throw new IllegalArgumentException("only flat (dimension only) CategoryPath is allowed");
} }
RangeFacetRequest<?> rfr = (RangeFacetRequest<?>) fr;
requests.add(new RangeSet(rfr.ranges, fr.categoryPath.components[0]));
} }
} }
@ -78,35 +61,29 @@ public class RangeAccumulator extends FacetsAccumulator {
// faster to do MachingDocs on the inside) ... see // faster to do MachingDocs on the inside) ... see
// patches on LUCENE-4965): // patches on LUCENE-4965):
List<FacetResult> results = new ArrayList<FacetResult>(); List<FacetResult> results = new ArrayList<FacetResult>();
for (int i = 0; i < requests.size(); i++) { for (FacetRequest req : searchParams.facetRequests) {
RangeSet ranges = requests.get(i); RangeFacetRequest<?> rangeFR = (RangeFacetRequest<?>) req;
int[] counts = new int[rangeFR.ranges.length];
int[] counts = new int[ranges.ranges.length];
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
NumericDocValues ndv = hits.context.reader().getNumericDocValues(ranges.field); FunctionValues fv = rangeFR.getValues(hits.context);
if (ndv == null) {
continue; // no numeric values for this field in this reader
}
Bits docsWithField = hits.context.reader().getDocsWithField(ranges.field);
final int length = hits.bits.length(); final int length = hits.bits.length();
int doc = 0; int doc = 0;
while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) { while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) {
long v = ndv.get(doc);
// Skip missing docs: // Skip missing docs:
if (v == 0 && docsWithField.get(doc) == false) { if (!fv.exists(doc)) {
doc++; ++doc;
continue; continue;
} }
long v = fv.longVal(doc);
// TODO: if all ranges are non-overlapping, we // TODO: if all ranges are non-overlapping, we
// should instead do a bin-search up front // should instead do a bin-search up front
// (really, a specialized case of the interval // (really, a specialized case of the interval
// tree) // tree)
// TODO: use interval tree instead of linear search: // TODO: use interval tree instead of linear search:
for (int j = 0; j < ranges.ranges.length; j++) { for (int j = 0; j < rangeFR.ranges.length; j++) {
if (ranges.ranges[j].accept(v)) { if (rangeFR.ranges[j].accept(v)) {
counts[j]++; counts[j]++;
} }
} }
@ -114,19 +91,19 @@ public class RangeAccumulator extends FacetsAccumulator {
doc++; doc++;
} }
} }
List<FacetResultNode> nodes = new ArrayList<FacetResultNode>(ranges.ranges.length); List<FacetResultNode> nodes = new ArrayList<FacetResultNode>(rangeFR.ranges.length);
for(int j=0;j<ranges.ranges.length;j++) { for (int j = 0; j < rangeFR.ranges.length; j++) {
nodes.add(new RangeFacetResultNode(ranges.field, ranges.ranges[j], counts[j])); nodes.add(new RangeFacetResultNode(rangeFR.label, rangeFR.ranges[j], counts[j]));
} }
FacetResultNode rootNode = new FacetResultNode(-1, 0); FacetResultNode rootNode = new FacetResultNode(-1, 0);
rootNode.label = new CategoryPath(ranges.field); rootNode.label = rangeFR.categoryPath;
rootNode.subResults = nodes; rootNode.subResults = nodes;
results.add(new FacetResult(searchParams.facetRequests.get(i), rootNode, nodes.size())); results.add(new FacetResult(req, rootNode, nodes.size()));
} }
return results; return results;
} }

View File

@ -17,38 +17,91 @@ package org.apache.lucene.facet.range;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
/** /**
* Facet request for dynamic ranges based on a * A {@link FacetRequest} for dynamic ranges based on a {@link NumericDocValues}
* NumericDocValues field. This does not use the taxonomy * field or {@link ValueSource}. This does not use the taxonomy index nor any
* index nor any indexed facet values. * indexed facet values.
* *
* @lucene.experimental * @lucene.experimental
*/ */
public class RangeFacetRequest<T extends Range> extends FacetRequest { public class RangeFacetRequest<T extends Range> extends FacetRequest {
public final Range[] ranges; public final Range[] ranges;
public final String label;
private final ValueSource valueSource;
/**
* Create a request for the given ranges over the specified
* {@link NumericDocValues} field. The field will be used to as the root's
* {@link FacetResultNode} label.
*/
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public RangeFacetRequest(String field, T...ranges) { public RangeFacetRequest(String field, T...ranges) {
super(new CategoryPath(field), 1); this(field, new LongFieldSource(field), ranges);
this.ranges = ranges;
} }
/**
* Create a request for the given ranges over the specified
* {@link NumericDocValues} field. The field will be used to as the root's
* {@link FacetResultNode} label.
*/
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public RangeFacetRequest(String field, List<T> ranges) { public RangeFacetRequest(String field, List<T> ranges) {
this(field, (T[]) ranges.toArray(new Range[ranges.size()])); this(field, (T[]) ranges.toArray(new Range[ranges.size()]));
} }
/**
* Create a request for the given ranges over the specified
* {@link ValueSource}. The label will be used to as the root's
* {@link FacetResultNode} label.
*/
@SuppressWarnings("unchecked")
public RangeFacetRequest(String label, ValueSource valueSource, T...ranges) {
super(new CategoryPath(label), 1);
this.ranges = ranges;
this.valueSource = valueSource;
this.label = label;
}
/**
* Create a request for the given ranges over the specified
* {@link ValueSource}. The label will be used to as the root's
* {@link FacetResultNode} label.
*/
@SuppressWarnings("unchecked")
public RangeFacetRequest(String label, ValueSource valueSource, List<T> ranges) {
this(label, valueSource, (T[]) ranges.toArray(new Range[ranges.size()]));
}
/**
* Returns the {@link FunctionValues} for the given
* {@link AtomicReaderContext}. If the request was created over a
* {@link NumericDocValues} field, the respective {@link NumericDocValues} is
* returned.
*/
public FunctionValues getValues(AtomicReaderContext context) throws IOException {
return valueSource.getValues(Collections.emptyMap(), context);
}
@Override @Override
public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
return null; throw new UnsupportedOperationException("this FacetRequest does not support categories aggregation and only works with RangeAccumulator");
} }
} }