mirror of https://github.com/apache/lucene.git
LUCENE-7889: Allow grouping on Double/LongValuesSource (#1484)
The grouping module currently allows grouping on a SortedDocValues field, or on a ValueSource. The latter groups only on exact values, and so will not perform well on numeric-valued fields. This commit adds the ability to group by defined ranges from a Long or DoubleValuesSource.
This commit is contained in:
parent
bd004d2c57
commit
7c350d22c7
|
@ -159,7 +159,9 @@ API Changes
|
|||
|
||||
New Features
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* LUCENE-7889: Grouping by range based on values from DoubleValuesSource and LongValuesSource
|
||||
(Alan Woodward)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a contiguous range of double values, with an inclusive minimum and
|
||||
* exclusive maximum
|
||||
*/
|
||||
public class DoubleRange {
|
||||
|
||||
/** The inclusive minimum value of this range */
|
||||
public double min;
|
||||
/** The exclusive maximum value of this range */
|
||||
public double max;
|
||||
|
||||
/**
|
||||
* Creates a new double range, running from {@code min} inclusive to {@code max} exclusive
|
||||
*/
|
||||
public DoubleRange(double min, double max) {
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "DoubleRange(" + min + ", " + max + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
DoubleRange that = (DoubleRange) o;
|
||||
return Double.compare(that.min, min) == 0 &&
|
||||
Double.compare(that.max, max) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(min, max);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
/**
|
||||
* Groups double values into ranges
|
||||
*/
|
||||
public class DoubleRangeFactory {
|
||||
|
||||
private final double min;
|
||||
private final double width;
|
||||
private final double max;
|
||||
|
||||
/**
|
||||
* Creates a new DoubleRangeFactory
|
||||
* @param min a minimum value; all doubles below this value are grouped into a single range
|
||||
* @param width a standard width; all ranges between {@code min} and {@code max} are this wide,
|
||||
* with the exception of the final range which may be up to this width. Ranges
|
||||
* are inclusive at the lower end, and exclusive at the upper end.
|
||||
* @param max a maximum value; all doubles above this value are grouped into a single range
|
||||
*/
|
||||
public DoubleRangeFactory(double min, double width, double max) {
|
||||
this.min = min;
|
||||
this.width = width;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the DoubleRange that a value should be grouped into
|
||||
* @param value the value to group
|
||||
* @param reuse an existing DoubleRange object to reuse
|
||||
*/
|
||||
public DoubleRange getRange(double value, DoubleRange reuse) {
|
||||
if (reuse == null)
|
||||
reuse = new DoubleRange(Double.MIN_VALUE, Double.MAX_VALUE);
|
||||
if (value < min) {
|
||||
reuse.max = min;
|
||||
reuse.min = Double.MIN_VALUE;
|
||||
return reuse;
|
||||
}
|
||||
if (value >= max) {
|
||||
reuse.min = max;
|
||||
reuse.max = Double.MAX_VALUE;
|
||||
return reuse;
|
||||
}
|
||||
double bucket = Math.floor((value - min) / width);
|
||||
reuse.min = min + (bucket * width);
|
||||
reuse.max = reuse.min + width;
|
||||
return reuse;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DoubleValues;
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
|
||||
/**
|
||||
* A GroupSelector implementation that groups documents by double values
|
||||
*/
|
||||
public class DoubleRangeGroupSelector extends GroupSelector<DoubleRange> {
|
||||
|
||||
private final DoubleValuesSource source;
|
||||
private final DoubleRangeFactory rangeFactory;
|
||||
|
||||
private Set<DoubleRange> inSecondPass;
|
||||
private boolean includeEmpty = true;
|
||||
private boolean positioned;
|
||||
private DoubleRange current;
|
||||
|
||||
private LeafReaderContext context;
|
||||
private DoubleValues values;
|
||||
|
||||
/**
|
||||
* Creates a new DoubleRangeGroupSelector
|
||||
* @param source a DoubleValuesSource to retrieve double values per document
|
||||
* @param rangeFactory a DoubleRangeFactory that defines how to group the double values into range buckets
|
||||
*/
|
||||
public DoubleRangeGroupSelector(DoubleValuesSource source, DoubleRangeFactory rangeFactory) {
|
||||
this.source = source;
|
||||
this.rangeFactory = rangeFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
this.context = readerContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
this.values = source.getValues(context, DoubleValuesSource.fromScorer(scorer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public State advanceTo(int doc) throws IOException {
|
||||
positioned = values.advanceExact(doc);
|
||||
if (positioned == false) {
|
||||
return includeEmpty ? State.ACCEPT : State.SKIP;
|
||||
}
|
||||
this.current = rangeFactory.getRange(values.doubleValue(), this.current);
|
||||
if (inSecondPass == null) {
|
||||
return State.ACCEPT;
|
||||
}
|
||||
return inSecondPass.contains(this.current) ? State.ACCEPT : State.SKIP;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleRange currentValue() throws IOException {
|
||||
return positioned ? this.current : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleRange copyValue() throws IOException {
|
||||
return positioned ? new DoubleRange(this.current.min, this.current.max) : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setGroups(Collection<SearchGroup<DoubleRange>> searchGroups) {
|
||||
inSecondPass = new HashSet<>();
|
||||
includeEmpty = false;
|
||||
for (SearchGroup<DoubleRange> group : searchGroups) {
|
||||
if (group.groupValue == null)
|
||||
includeEmpty = true;
|
||||
else
|
||||
inSecondPass.add(group.groupValue);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -151,6 +151,7 @@ public class FirstPassGroupingCollector<T> extends SimpleCollector {
|
|||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
groupSelector.setScorer(scorer);
|
||||
for (LeafFieldComparator comparator : leafComparators) {
|
||||
comparator.setScorer(scorer);
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
|
||||
/**
|
||||
* Defines a group, for use by grouping collectors
|
||||
|
@ -43,6 +44,11 @@ public abstract class GroupSelector<T> {
|
|||
*/
|
||||
public abstract void setNextReader(LeafReaderContext readerContext) throws IOException;
|
||||
|
||||
/**
|
||||
* Set the current Scorer
|
||||
*/
|
||||
public abstract void setScorer(Scorable scorer) throws IOException;
|
||||
|
||||
/**
|
||||
* Advance the GroupSelector's iterator to the given document
|
||||
*/
|
||||
|
@ -53,12 +59,12 @@ public abstract class GroupSelector<T> {
|
|||
*
|
||||
* N.B. this object may be reused, for a persistent version use {@link #copyValue()}
|
||||
*/
|
||||
public abstract T currentValue();
|
||||
public abstract T currentValue() throws IOException;
|
||||
|
||||
/**
|
||||
* @return a copy of the group value of the current document
|
||||
*/
|
||||
public abstract T copyValue();
|
||||
public abstract T copyValue() throws IOException;
|
||||
|
||||
/**
|
||||
* Set a restriction on the group values returned by this selector
|
||||
|
|
|
@ -71,6 +71,10 @@ public class GroupingSearch {
|
|||
this(new TermGroupSelector(groupField), null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a <code>GroupingSearch</code> instance that groups documents using a {@link GroupSelector}
|
||||
* @param groupSelector a {@link GroupSelector} that defines groups for this GroupingSearch
|
||||
*/
|
||||
public GroupingSearch(GroupSelector<?> groupSelector) {
|
||||
this(groupSelector, null);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a contiguous range of long values, with an inclusive minimum and
|
||||
* exclusive maximum
|
||||
*/
|
||||
public class LongRange {
|
||||
|
||||
/** The inclusive minimum value of this range */
|
||||
public long min;
|
||||
/** The exclusive maximum value of this range */
|
||||
public long max;
|
||||
|
||||
/**
|
||||
* Creates a new double range, running from {@code min} inclusive to {@code max} exclusive
|
||||
*/
|
||||
public LongRange(long min, long max) {
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "LongRange(" + min + ", " + max + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
LongRange that = (LongRange) o;
|
||||
return that.min == min && that.max == max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(min, max);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
/**
|
||||
* Groups double values into ranges
|
||||
*/
|
||||
public class LongRangeFactory {
|
||||
|
||||
private final long min;
|
||||
private final long width;
|
||||
private final long max;
|
||||
|
||||
/**
|
||||
* Creates a new LongRangeFactory
|
||||
* @param min a minimum value; all longs below this value are grouped into a single range
|
||||
* @param width a standard width; all ranges between {@code min} and {@code max} are this wide,
|
||||
* with the exception of the final range which may be up to this width. Ranges
|
||||
* are inclusive at the lower end, and exclusive at the upper end.
|
||||
* @param max a maximum value; all longs above this value are grouped into a single range
|
||||
*/
|
||||
public LongRangeFactory(long min, long width, long max) {
|
||||
this.min = min;
|
||||
this.width = width;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the LongRange that a value should be grouped into
|
||||
* @param value the value to group
|
||||
* @param reuse an existing LongRange object to reuse
|
||||
*/
|
||||
public LongRange getRange(long value, LongRange reuse) {
|
||||
if (reuse == null)
|
||||
reuse = new LongRange(Long.MIN_VALUE, Long.MAX_VALUE);
|
||||
if (value < min) {
|
||||
reuse.max = min;
|
||||
reuse.min = Long.MIN_VALUE;
|
||||
return reuse;
|
||||
}
|
||||
if (value >= max) {
|
||||
reuse.min = max;
|
||||
reuse.max = Long.MAX_VALUE;
|
||||
return reuse;
|
||||
}
|
||||
long bucket = (value - min) / width;
|
||||
reuse.min = min + (bucket * width);
|
||||
reuse.max = reuse.min + width;
|
||||
return reuse;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.LongValues;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
|
||||
/**
|
||||
* A GroupSelector implementation that groups documents by long values
|
||||
*/
|
||||
public class LongRangeGroupSelector extends GroupSelector<LongRange> {
|
||||
|
||||
private final LongValuesSource source;
|
||||
private final LongRangeFactory rangeFactory;
|
||||
|
||||
private Set<LongRange> inSecondPass;
|
||||
private boolean includeEmpty = true;
|
||||
private boolean positioned;
|
||||
private LongRange current;
|
||||
|
||||
private LeafReaderContext context;
|
||||
private LongValues values;
|
||||
|
||||
/**
|
||||
* Creates a new LongRangeGroupSelector
|
||||
* @param source a LongValuesSource to retrieve long values per document
|
||||
* @param rangeFactory a LongRangeFactory that defines how to group the long values into range buckets
|
||||
*/
|
||||
public LongRangeGroupSelector(LongValuesSource source, LongRangeFactory rangeFactory) {
|
||||
this.source = source;
|
||||
this.rangeFactory = rangeFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||
this.context = readerContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
this.values = source.getValues(context, DoubleValuesSource.fromScorer(scorer));
|
||||
}
|
||||
|
||||
@Override
|
||||
public State advanceTo(int doc) throws IOException {
|
||||
positioned = values.advanceExact(doc);
|
||||
if (positioned == false) {
|
||||
return includeEmpty ? State.ACCEPT : State.SKIP;
|
||||
}
|
||||
this.current = rangeFactory.getRange(values.longValue(), this.current);
|
||||
if (inSecondPass == null) {
|
||||
return State.ACCEPT;
|
||||
}
|
||||
return inSecondPass.contains(this.current) ? State.ACCEPT : State.SKIP;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongRange currentValue() throws IOException {
|
||||
return positioned ? this.current : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongRange copyValue() throws IOException {
|
||||
return positioned ? new LongRange(this.current.min, this.current.max) : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setGroups(Collection<SearchGroup<LongRange>> searchGroups) {
|
||||
inSecondPass = new HashSet<>();
|
||||
includeEmpty = false;
|
||||
for (SearchGroup<LongRange> group : searchGroups) {
|
||||
if (group.groupValue == null)
|
||||
includeEmpty = true;
|
||||
else
|
||||
inSecondPass.add(group.groupValue);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -78,6 +78,7 @@ public class SecondPassGroupingCollector<T> extends SimpleCollector {
|
|||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
groupSelector.setScorer(scorer);
|
||||
groupReducer.setScorer(scorer);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Map;
|
|||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
|
||||
|
@ -64,6 +65,9 @@ public class TermGroupSelector extends GroupSelector<BytesRef> {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException { }
|
||||
|
||||
@Override
|
||||
public State advanceTo(int doc) throws IOException {
|
||||
if (this.docValues.advanceExact(doc) == false) {
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.Set;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.util.mutable.MutableValue;
|
||||
|
||||
/**
|
||||
|
@ -56,6 +57,9 @@ public class ValueSourceGroupSelector extends GroupSelector<MutableValue> {
|
|||
this.filler = values.getValueFiller();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException { }
|
||||
|
||||
@Override
|
||||
public State advanceTo(int doc) throws IOException {
|
||||
this.filler.fillValue(doc);
|
||||
|
@ -67,7 +71,7 @@ public class ValueSourceGroupSelector extends GroupSelector<MutableValue> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public MutableValue currentValue() {
|
||||
public MutableValue currentValue() throws IOException {
|
||||
return filler.getValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -28,11 +28,9 @@
|
|||
* <p>Grouping requires a number of inputs:</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>groupField</code>: this is the field used for grouping.
|
||||
* For example, if you use the <code>author</code> field then each
|
||||
* group has all books by the same author. Documents that don't
|
||||
* have this field are grouped under a single group with
|
||||
* a <code>null</code> group value.
|
||||
* <li><code>groupSelector</code>: this defines how groups are created
|
||||
* from values per-document. The grouping module ships with
|
||||
* selectors for grouping by term, and by long and double ranges.
|
||||
*
|
||||
* <li><code>groupSort</code>: how the groups are sorted. For sorting
|
||||
* purposes, each group is "represented" by the highest-sorted
|
||||
|
@ -80,6 +78,10 @@
|
|||
* the value of a {@link org.apache.lucene.index.SortedDocValues} field</li>
|
||||
* <li>{@link org.apache.lucene.search.grouping.ValueSourceGroupSelector} groups based on
|
||||
* the value of a {@link org.apache.lucene.queries.function.ValueSource}</li>
|
||||
* <li>{@link org.apache.lucene.search.grouping.DoubleRangeGroupSelector} groups based on
|
||||
* the value of a {@link org.apache.lucene.search.DoubleValuesSource}</li>
|
||||
* <li>{@link org.apache.lucene.search.grouping.LongRangeGroupSelector} groups based on
|
||||
* the value of a {@link org.apache.lucene.search.LongValuesSource}</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>Known limitations:</p>
|
||||
|
@ -137,17 +139,10 @@
|
|||
* writer.addDocuments(oneGroup);
|
||||
* </pre>
|
||||
*
|
||||
* Then, at search time, do this up front:
|
||||
* Then, at search time:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* // Set this once in your app & save away for reusing across all queries:
|
||||
* Filter groupEndDocs = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupEnd", "x"))));
|
||||
* </pre>
|
||||
*
|
||||
* Finally, do this per search:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* // Per search:
|
||||
* Query groupEndDocs = new TermQuery(new Term("groupEnd", "x"));
|
||||
* BlockGroupingCollector c = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, groupEndDocs);
|
||||
* s.search(new TermQuery(new Term("content", searchTerm)), c);
|
||||
* TopGroups groupsResult = c.getTopGroups(withinGroupSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
public class DoubleRangeGroupSelectorTest extends BaseGroupSelectorTestCase<DoubleRange> {
|
||||
|
||||
@Override
|
||||
protected void addGroupField(Document document, int id) {
|
||||
if (rarely()) {
|
||||
return; // missing value
|
||||
}
|
||||
// numbers between 0 and 1000, groups are 100 wide from 100 to 900
|
||||
double value = random().nextDouble() * 1000;
|
||||
document.add(new DoublePoint("double", value));
|
||||
document.add(new NumericDocValuesField("double", Double.doubleToLongBits(value)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected GroupSelector<DoubleRange> getGroupSelector() {
|
||||
return new DoubleRangeGroupSelector(DoubleValuesSource.fromDoubleField("double"),
|
||||
new DoubleRangeFactory(100, 100, 900));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query filterQuery(DoubleRange groupValue) {
|
||||
if (groupValue == null) {
|
||||
return new BooleanQuery.Builder()
|
||||
.add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER)
|
||||
.add(new DocValuesFieldExistsQuery("double"), BooleanClause.Occur.MUST_NOT)
|
||||
.build();
|
||||
}
|
||||
return DoublePoint.newRangeQuery("double", groupValue.min, Math.nextDown(groupValue.max));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
public class LongRangeGroupSelectorTest extends BaseGroupSelectorTestCase<LongRange> {
|
||||
|
||||
@Override
|
||||
protected void addGroupField(Document document, int id) {
|
||||
if (rarely()) {
|
||||
return; // missing value
|
||||
}
|
||||
// numbers between 0 and 1000, groups are 100 wide from 100 to 900
|
||||
long value = random().nextInt(1000);
|
||||
document.add(new LongPoint("long", value));
|
||||
document.add(new NumericDocValuesField("long", value));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected GroupSelector<LongRange> getGroupSelector() {
|
||||
return new LongRangeGroupSelector(LongValuesSource.fromLongField("long"),
|
||||
new LongRangeFactory(100, 100, 900));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query filterQuery(LongRange groupValue) {
|
||||
if (groupValue == null) {
|
||||
return new BooleanQuery.Builder()
|
||||
.add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER)
|
||||
.add(new DocValuesFieldExistsQuery("long"), BooleanClause.Occur.MUST_NOT)
|
||||
.build();
|
||||
}
|
||||
return LongPoint.newRangeQuery("long", groupValue.min, groupValue.max - 1);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestDoubleRangeFactory extends LuceneTestCase {
|
||||
|
||||
public void test() {
|
||||
|
||||
DoubleRangeFactory factory = new DoubleRangeFactory(10, 10, 50);
|
||||
DoubleRange scratch = new DoubleRange(0, 0);
|
||||
|
||||
assertEquals(new DoubleRange(Double.MIN_VALUE, 10), factory.getRange(4, scratch));
|
||||
assertEquals(new DoubleRange(10, 20), factory.getRange(10, scratch));
|
||||
assertEquals(new DoubleRange(20, 30), factory.getRange(20, scratch));
|
||||
assertEquals(new DoubleRange(10, 20), factory.getRange(15, scratch));
|
||||
assertEquals(new DoubleRange(30, 40), factory.getRange(35, scratch));
|
||||
assertEquals(new DoubleRange(50, Double.MAX_VALUE), factory.getRange(50, scratch));
|
||||
assertEquals(new DoubleRange(50, Double.MAX_VALUE), factory.getRange(500, scratch));
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.grouping;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestLongRangeFactory extends LuceneTestCase {
|
||||
|
||||
public void test() {
|
||||
|
||||
LongRangeFactory factory = new LongRangeFactory(10, 10, 50);
|
||||
LongRange scratch = new LongRange(0, 0);
|
||||
|
||||
assertEquals(new LongRange(Long.MIN_VALUE, 10), factory.getRange(4, scratch));
|
||||
assertEquals(new LongRange(10, 20), factory.getRange(10, scratch));
|
||||
assertEquals(new LongRange(20, 30), factory.getRange(20, scratch));
|
||||
assertEquals(new LongRange(10, 20), factory.getRange(15, scratch));
|
||||
assertEquals(new LongRange(30, 40), factory.getRange(35, scratch));
|
||||
assertEquals(new LongRange(50, Long.MAX_VALUE), factory.getRange(50, scratch));
|
||||
assertEquals(new LongRange(50, Long.MAX_VALUE), factory.getRange(500, scratch));
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue