LUCENE-5297: allow range faceting on any ValueSource

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1538863 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-11-05 04:28:41 +00:00
parent 5b372bd1cd
commit 55c8752b20
3 changed files with 87 additions and 54 deletions

View File

@ -145,6 +145,9 @@ New Features
* LUCENE-5312: Add BlockJoinSorter, a new Sorter implementation that makes sure
to never split up blocks of documents indexed with IndexWriter.addDocuments.
(Adrien Grand)
* LUCENE-5297: Allow to range-facet on any ValueSource, not just
NumericDocValues fields. (Shai Erera)
Bug Fixes

View File

@ -28,29 +28,15 @@ import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.util.Bits;
/** Uses a {@link NumericDocValues} and accumulates
* counts for provided ranges. This is dynamic (does not
* use the taxonomy index or anything from the index
* except the NumericDocValuesField). */
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
/**
* Uses {@link RangeFacetRequest#getValues(AtomicReaderContext)} and accumulates
* counts for provided ranges.
*/
public class RangeAccumulator extends FacetsAccumulator {
static class RangeSet {
final Range[] ranges;
final String field;
public RangeSet(Range[] ranges, String field) {
this.ranges = ranges;
this.field = field;
}
}
final List<RangeSet> requests = new ArrayList<RangeSet>();
public RangeAccumulator(FacetRequest... facetRequests) {
this(Arrays.asList(facetRequests));
}
@ -65,9 +51,6 @@ public class RangeAccumulator extends FacetsAccumulator {
if (fr.categoryPath.length != 1) {
throw new IllegalArgumentException("only flat (dimension only) CategoryPath is allowed");
}
RangeFacetRequest<?> rfr = (RangeFacetRequest<?>) fr;
requests.add(new RangeSet(rfr.ranges, fr.categoryPath.components[0]));
}
}
@ -78,35 +61,29 @@ public class RangeAccumulator extends FacetsAccumulator {
// faster to do MachingDocs on the inside) ... see
// patches on LUCENE-4965):
List<FacetResult> results = new ArrayList<FacetResult>();
for (int i = 0; i < requests.size(); i++) {
RangeSet ranges = requests.get(i);
int[] counts = new int[ranges.ranges.length];
for (FacetRequest req : searchParams.facetRequests) {
RangeFacetRequest<?> rangeFR = (RangeFacetRequest<?>) req;
int[] counts = new int[rangeFR.ranges.length];
for (MatchingDocs hits : matchingDocs) {
NumericDocValues ndv = hits.context.reader().getNumericDocValues(ranges.field);
if (ndv == null) {
continue; // no numeric values for this field in this reader
}
Bits docsWithField = hits.context.reader().getDocsWithField(ranges.field);
FunctionValues fv = rangeFR.getValues(hits.context);
final int length = hits.bits.length();
int doc = 0;
while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) {
long v = ndv.get(doc);
// Skip missing docs:
if (v == 0 && docsWithField.get(doc) == false) {
doc++;
if (!fv.exists(doc)) {
++doc;
continue;
}
long v = fv.longVal(doc);
// TODO: if all ranges are non-overlapping, we
// should instead do a bin-search up front
// (really, a specialized case of the interval
// tree)
// TODO: use interval tree instead of linear search:
for (int j = 0; j < ranges.ranges.length; j++) {
if (ranges.ranges[j].accept(v)) {
for (int j = 0; j < rangeFR.ranges.length; j++) {
if (rangeFR.ranges[j].accept(v)) {
counts[j]++;
}
}
@ -114,19 +91,19 @@ public class RangeAccumulator extends FacetsAccumulator {
doc++;
}
}
List<FacetResultNode> nodes = new ArrayList<FacetResultNode>(ranges.ranges.length);
for(int j=0;j<ranges.ranges.length;j++) {
nodes.add(new RangeFacetResultNode(ranges.field, ranges.ranges[j], counts[j]));
List<FacetResultNode> nodes = new ArrayList<FacetResultNode>(rangeFR.ranges.length);
for (int j = 0; j < rangeFR.ranges.length; j++) {
nodes.add(new RangeFacetResultNode(rangeFR.label, rangeFR.ranges[j], counts[j]));
}
FacetResultNode rootNode = new FacetResultNode(-1, 0);
rootNode.label = new CategoryPath(ranges.field);
rootNode.label = rangeFR.categoryPath;
rootNode.subResults = nodes;
results.add(new FacetResult(searchParams.facetRequests.get(i), rootNode, nodes.size()));
results.add(new FacetResult(req, rootNode, nodes.size()));
}
return results;
}

View File

@ -17,38 +17,91 @@ package org.apache.lucene.facet.range;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
/**
* Facet request for dynamic ranges based on a
* NumericDocValues field. This does not use the taxonomy
* index nor any indexed facet values.
* A {@link FacetRequest} for dynamic ranges based on a {@link NumericDocValues}
* field or {@link ValueSource}. This does not use the taxonomy index nor any
* indexed facet values.
*
* @lucene.experimental
*/
public class RangeFacetRequest<T extends Range> extends FacetRequest {
public final Range[] ranges;
public final String label;
private final ValueSource valueSource;
/**
* Create a request for the given ranges over the specified
* {@link NumericDocValues} field. The field will be used to as the root's
* {@link FacetResultNode} label.
*/
@SuppressWarnings("unchecked")
public RangeFacetRequest(String field, T...ranges) {
super(new CategoryPath(field), 1);
this.ranges = ranges;
this(field, new LongFieldSource(field), ranges);
}
/**
* Create a request for the given ranges over the specified
* {@link NumericDocValues} field. The field will be used to as the root's
* {@link FacetResultNode} label.
*/
@SuppressWarnings("unchecked")
public RangeFacetRequest(String field, List<T> ranges) {
this(field, (T[]) ranges.toArray(new Range[ranges.size()]));
}
/**
* Create a request for the given ranges over the specified
* {@link ValueSource}. The label will be used to as the root's
* {@link FacetResultNode} label.
*/
@SuppressWarnings("unchecked")
public RangeFacetRequest(String label, ValueSource valueSource, T...ranges) {
super(new CategoryPath(label), 1);
this.ranges = ranges;
this.valueSource = valueSource;
this.label = label;
}
/**
* Create a request for the given ranges over the specified
* {@link ValueSource}. The label will be used to as the root's
* {@link FacetResultNode} label.
*/
@SuppressWarnings("unchecked")
public RangeFacetRequest(String label, ValueSource valueSource, List<T> ranges) {
this(label, valueSource, (T[]) ranges.toArray(new Range[ranges.size()]));
}
/**
* Returns the {@link FunctionValues} for the given
* {@link AtomicReaderContext}. If the request was created over a
* {@link NumericDocValues} field, the respective {@link NumericDocValues} is
* returned.
*/
public FunctionValues getValues(AtomicReaderContext context) throws IOException {
return valueSource.getValues(Collections.emptyMap(), context);
}
@Override
public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
return null;
throw new UnsupportedOperationException("this FacetRequest does not support categories aggregation and only works with RangeAccumulator");
}
}