diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 63a613e3a73..b9a13c48007 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -145,6 +145,9 @@ New Features * LUCENE-5312: Add BlockJoinSorter, a new Sorter implementation that makes sure to never split up blocks of documents indexed with IndexWriter.addDocuments. (Adrien Grand) + +* LUCENE-5297: Allow to range-facet on any ValueSource, not just + NumericDocValues fields. (Shai Erera) Bug Fixes diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java index 6adacec3426..7a7c54b7078 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java @@ -28,29 +28,15 @@ import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.util.Bits; - -/** Uses a {@link NumericDocValues} and accumulates - * counts for provided ranges. This is dynamic (does not - * use the taxonomy index or anything from the index - * except the NumericDocValuesField). */ +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +/** + * Uses {@link RangeFacetRequest#getValues(AtomicReaderContext)} and accumulates + * counts for provided ranges. + */ public class RangeAccumulator extends FacetsAccumulator { - static class RangeSet { - final Range[] ranges; - final String field; - - public RangeSet(Range[] ranges, String field) { - this.ranges = ranges; - this.field = field; - } - } - - final List requests = new ArrayList(); - public RangeAccumulator(FacetRequest... facetRequests) { this(Arrays.asList(facetRequests)); } @@ -65,9 +51,6 @@ public class RangeAccumulator extends FacetsAccumulator { if (fr.categoryPath.length != 1) { throw new IllegalArgumentException("only flat (dimension only) CategoryPath is allowed"); } - - RangeFacetRequest rfr = (RangeFacetRequest) fr; - requests.add(new RangeSet(rfr.ranges, fr.categoryPath.components[0])); } } @@ -78,35 +61,29 @@ public class RangeAccumulator extends FacetsAccumulator { // faster to do MachingDocs on the inside) ... see // patches on LUCENE-4965): List results = new ArrayList(); - for (int i = 0; i < requests.size(); i++) { - RangeSet ranges = requests.get(i); - - int[] counts = new int[ranges.ranges.length]; + for (FacetRequest req : searchParams.facetRequests) { + RangeFacetRequest rangeFR = (RangeFacetRequest) req; + int[] counts = new int[rangeFR.ranges.length]; for (MatchingDocs hits : matchingDocs) { - NumericDocValues ndv = hits.context.reader().getNumericDocValues(ranges.field); - if (ndv == null) { - continue; // no numeric values for this field in this reader - } - Bits docsWithField = hits.context.reader().getDocsWithField(ranges.field); - + FunctionValues fv = rangeFR.getValues(hits.context); final int length = hits.bits.length(); int doc = 0; while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) { - long v = ndv.get(doc); - // Skip missing docs: - if (v == 0 && docsWithField.get(doc) == false) { - doc++; + if (!fv.exists(doc)) { + ++doc; continue; } + + long v = fv.longVal(doc); // TODO: if all ranges are non-overlapping, we // should instead do a bin-search up front // (really, a specialized case of the interval // tree) // TODO: use interval tree instead of linear search: - for (int j = 0; j < ranges.ranges.length; j++) { - if (ranges.ranges[j].accept(v)) { + for (int j = 0; j < rangeFR.ranges.length; j++) { + if (rangeFR.ranges[j].accept(v)) { counts[j]++; } } @@ -114,19 +91,19 @@ public class RangeAccumulator extends FacetsAccumulator { doc++; } } - - List nodes = new ArrayList(ranges.ranges.length); - for(int j=0;j nodes = new ArrayList(rangeFR.ranges.length); + for (int j = 0; j < rangeFR.ranges.length; j++) { + nodes.add(new RangeFacetResultNode(rangeFR.label, rangeFR.ranges[j], counts[j])); } - + FacetResultNode rootNode = new FacetResultNode(-1, 0); - rootNode.label = new CategoryPath(ranges.field); + rootNode.label = rangeFR.categoryPath; rootNode.subResults = nodes; - results.add(new FacetResult(searchParams.facetRequests.get(i), rootNode, nodes.size())); + results.add(new FacetResult(req, rootNode, nodes.size())); } - + return results; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java index a7376f75aed..8786fe9bd7b 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java @@ -17,38 +17,91 @@ package org.apache.lucene.facet.range; * limitations under the License. */ +import java.io.IOException; +import java.util.Collections; import java.util.List; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.search.FacetRequest; +import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.LongFieldSource; /** - * Facet request for dynamic ranges based on a - * NumericDocValues field. This does not use the taxonomy - * index nor any indexed facet values. + * A {@link FacetRequest} for dynamic ranges based on a {@link NumericDocValues} + * field or {@link ValueSource}. This does not use the taxonomy index nor any + * indexed facet values. * * @lucene.experimental */ public class RangeFacetRequest extends FacetRequest { public final Range[] ranges; - + public final String label; + + private final ValueSource valueSource; + + /** + * Create a request for the given ranges over the specified + * {@link NumericDocValues} field. The field will be used to as the root's + * {@link FacetResultNode} label. + */ @SuppressWarnings("unchecked") public RangeFacetRequest(String field, T...ranges) { - super(new CategoryPath(field), 1); - this.ranges = ranges; + this(field, new LongFieldSource(field), ranges); } + /** + * Create a request for the given ranges over the specified + * {@link NumericDocValues} field. The field will be used to as the root's + * {@link FacetResultNode} label. + */ @SuppressWarnings("unchecked") public RangeFacetRequest(String field, List ranges) { this(field, (T[]) ranges.toArray(new Range[ranges.size()])); } + + /** + * Create a request for the given ranges over the specified + * {@link ValueSource}. The label will be used to as the root's + * {@link FacetResultNode} label. + */ + @SuppressWarnings("unchecked") + public RangeFacetRequest(String label, ValueSource valueSource, T...ranges) { + super(new CategoryPath(label), 1); + this.ranges = ranges; + this.valueSource = valueSource; + this.label = label; + } + + /** + * Create a request for the given ranges over the specified + * {@link ValueSource}. The label will be used to as the root's + * {@link FacetResultNode} label. + */ + @SuppressWarnings("unchecked") + public RangeFacetRequest(String label, ValueSource valueSource, List ranges) { + this(label, valueSource, (T[]) ranges.toArray(new Range[ranges.size()])); + } + /** + * Returns the {@link FunctionValues} for the given + * {@link AtomicReaderContext}. If the request was created over a + * {@link NumericDocValues} field, the respective {@link NumericDocValues} is + * returned. + */ + public FunctionValues getValues(AtomicReaderContext context) throws IOException { + return valueSource.getValues(Collections.emptyMap(), context); + } + @Override public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { - return null; + throw new UnsupportedOperationException("this FacetRequest does not support categories aggregation and only works with RangeAccumulator"); } }