mirror of https://github.com/apache/lucene.git
LUCENE-7889: Allow grouping on Double/LongValuesSource (#1484)
The grouping module currently allows grouping on a SortedDocValues field, or on a ValueSource. The latter groups only on exact values, and so will not perform well on numeric-valued fields. This commit adds the ability to group by defined ranges from a Long or DoubleValuesSource.
This commit is contained in:
parent
bd004d2c57
commit
7c350d22c7
|
@ -159,7 +159,9 @@ API Changes
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
---------------------
|
---------------------
|
||||||
(No changes)
|
|
||||||
|
* LUCENE-7889: Grouping by range based on values from DoubleValuesSource and LongValuesSource
|
||||||
|
(Alan Woodward)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
---------------------
|
---------------------
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.grouping;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a contiguous range of double values, with an inclusive minimum and
|
||||||
|
* exclusive maximum
|
||||||
|
*/
|
||||||
|
public class DoubleRange {
|
||||||
|
|
||||||
|
/** The inclusive minimum value of this range */
|
||||||
|
public double min;
|
||||||
|
/** The exclusive maximum value of this range */
|
||||||
|
public double max;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new double range, running from {@code min} inclusive to {@code max} exclusive
|
||||||
|
*/
|
||||||
|
public DoubleRange(double min, double max) {
|
||||||
|
this.min = min;
|
||||||
|
this.max = max;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "DoubleRange(" + min + ", " + max + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
DoubleRange that = (DoubleRange) o;
|
||||||
|
return Double.compare(that.min, min) == 0 &&
|
||||||
|
Double.compare(that.max, max) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(min, max);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.grouping;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Groups double values into ranges
|
||||||
|
*/
|
||||||
|
public class DoubleRangeFactory {
|
||||||
|
|
||||||
|
private final double min;
|
||||||
|
private final double width;
|
||||||
|
private final double max;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new DoubleRangeFactory
|
||||||
|
* @param min a minimum value; all doubles below this value are grouped into a single range
|
||||||
|
* @param width a standard width; all ranges between {@code min} and {@code max} are this wide,
|
||||||
|
* with the exception of the final range which may be up to this width. Ranges
|
||||||
|
* are inclusive at the lower end, and exclusive at the upper end.
|
||||||
|
* @param max a maximum value; all doubles above this value are grouped into a single range
|
||||||
|
*/
|
||||||
|
public DoubleRangeFactory(double min, double width, double max) {
|
||||||
|
this.min = min;
|
||||||
|
this.width = width;
|
||||||
|
this.max = max;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the DoubleRange that a value should be grouped into
|
||||||
|
* @param value the value to group
|
||||||
|
* @param reuse an existing DoubleRange object to reuse
|
||||||
|
*/
|
||||||
|
public DoubleRange getRange(double value, DoubleRange reuse) {
|
||||||
|
if (reuse == null)
|
||||||
|
reuse = new DoubleRange(Double.MIN_VALUE, Double.MAX_VALUE);
|
||||||
|
if (value < min) {
|
||||||
|
reuse.max = min;
|
||||||
|
reuse.min = Double.MIN_VALUE;
|
||||||
|
return reuse;
|
||||||
|
}
|
||||||
|
if (value >= max) {
|
||||||
|
reuse.min = max;
|
||||||
|
reuse.max = Double.MAX_VALUE;
|
||||||
|
return reuse;
|
||||||
|
}
|
||||||
|
double bucket = Math.floor((value - min) / width);
|
||||||
|
reuse.min = min + (bucket * width);
|
||||||
|
reuse.max = reuse.min + width;
|
||||||
|
return reuse;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,100 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.grouping;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.DoubleValues;
|
||||||
|
import org.apache.lucene.search.DoubleValuesSource;
|
||||||
|
import org.apache.lucene.search.Scorable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A GroupSelector implementation that groups documents by double values
|
||||||
|
*/
|
||||||
|
public class DoubleRangeGroupSelector extends GroupSelector<DoubleRange> {
|
||||||
|
|
||||||
|
private final DoubleValuesSource source;
|
||||||
|
private final DoubleRangeFactory rangeFactory;
|
||||||
|
|
||||||
|
private Set<DoubleRange> inSecondPass;
|
||||||
|
private boolean includeEmpty = true;
|
||||||
|
private boolean positioned;
|
||||||
|
private DoubleRange current;
|
||||||
|
|
||||||
|
private LeafReaderContext context;
|
||||||
|
private DoubleValues values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new DoubleRangeGroupSelector
|
||||||
|
* @param source a DoubleValuesSource to retrieve double values per document
|
||||||
|
* @param rangeFactory a DoubleRangeFactory that defines how to group the double values into range buckets
|
||||||
|
*/
|
||||||
|
public DoubleRangeGroupSelector(DoubleValuesSource source, DoubleRangeFactory rangeFactory) {
|
||||||
|
this.source = source;
|
||||||
|
this.rangeFactory = rangeFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||||
|
this.context = readerContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorer) throws IOException {
|
||||||
|
this.values = source.getValues(context, DoubleValuesSource.fromScorer(scorer));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public State advanceTo(int doc) throws IOException {
|
||||||
|
positioned = values.advanceExact(doc);
|
||||||
|
if (positioned == false) {
|
||||||
|
return includeEmpty ? State.ACCEPT : State.SKIP;
|
||||||
|
}
|
||||||
|
this.current = rangeFactory.getRange(values.doubleValue(), this.current);
|
||||||
|
if (inSecondPass == null) {
|
||||||
|
return State.ACCEPT;
|
||||||
|
}
|
||||||
|
return inSecondPass.contains(this.current) ? State.ACCEPT : State.SKIP;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DoubleRange currentValue() throws IOException {
|
||||||
|
return positioned ? this.current : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DoubleRange copyValue() throws IOException {
|
||||||
|
return positioned ? new DoubleRange(this.current.min, this.current.max) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setGroups(Collection<SearchGroup<DoubleRange>> searchGroups) {
|
||||||
|
inSecondPass = new HashSet<>();
|
||||||
|
includeEmpty = false;
|
||||||
|
for (SearchGroup<DoubleRange> group : searchGroups) {
|
||||||
|
if (group.groupValue == null)
|
||||||
|
includeEmpty = true;
|
||||||
|
else
|
||||||
|
inSecondPass.add(group.groupValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -151,6 +151,7 @@ public class FirstPassGroupingCollector<T> extends SimpleCollector {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setScorer(Scorable scorer) throws IOException {
|
public void setScorer(Scorable scorer) throws IOException {
|
||||||
|
groupSelector.setScorer(scorer);
|
||||||
for (LeafFieldComparator comparator : leafComparators) {
|
for (LeafFieldComparator comparator : leafComparators) {
|
||||||
comparator.setScorer(scorer);
|
comparator.setScorer(scorer);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.Scorable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines a group, for use by grouping collectors
|
* Defines a group, for use by grouping collectors
|
||||||
|
@ -43,6 +44,11 @@ public abstract class GroupSelector<T> {
|
||||||
*/
|
*/
|
||||||
public abstract void setNextReader(LeafReaderContext readerContext) throws IOException;
|
public abstract void setNextReader(LeafReaderContext readerContext) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the current Scorer
|
||||||
|
*/
|
||||||
|
public abstract void setScorer(Scorable scorer) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Advance the GroupSelector's iterator to the given document
|
* Advance the GroupSelector's iterator to the given document
|
||||||
*/
|
*/
|
||||||
|
@ -53,12 +59,12 @@ public abstract class GroupSelector<T> {
|
||||||
*
|
*
|
||||||
* N.B. this object may be reused, for a persistent version use {@link #copyValue()}
|
* N.B. this object may be reused, for a persistent version use {@link #copyValue()}
|
||||||
*/
|
*/
|
||||||
public abstract T currentValue();
|
public abstract T currentValue() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return a copy of the group value of the current document
|
* @return a copy of the group value of the current document
|
||||||
*/
|
*/
|
||||||
public abstract T copyValue();
|
public abstract T copyValue() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set a restriction on the group values returned by this selector
|
* Set a restriction on the group values returned by this selector
|
||||||
|
|
|
@ -71,6 +71,10 @@ public class GroupingSearch {
|
||||||
this(new TermGroupSelector(groupField), null);
|
this(new TermGroupSelector(groupField), null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a <code>GroupingSearch</code> instance that groups documents using a {@link GroupSelector}
|
||||||
|
* @param groupSelector a {@link GroupSelector} that defines groups for this GroupingSearch
|
||||||
|
*/
|
||||||
public GroupingSearch(GroupSelector<?> groupSelector) {
|
public GroupingSearch(GroupSelector<?> groupSelector) {
|
||||||
this(groupSelector, null);
|
this(groupSelector, null);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.grouping;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a contiguous range of long values, with an inclusive minimum and
|
||||||
|
* exclusive maximum
|
||||||
|
*/
|
||||||
|
public class LongRange {
|
||||||
|
|
||||||
|
/** The inclusive minimum value of this range */
|
||||||
|
public long min;
|
||||||
|
/** The exclusive maximum value of this range */
|
||||||
|
public long max;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new double range, running from {@code min} inclusive to {@code max} exclusive
|
||||||
|
*/
|
||||||
|
public LongRange(long min, long max) {
|
||||||
|
this.min = min;
|
||||||
|
this.max = max;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "LongRange(" + min + ", " + max + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
LongRange that = (LongRange) o;
|
||||||
|
return that.min == min && that.max == max;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(min, max);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.grouping;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Groups double values into ranges
|
||||||
|
*/
|
||||||
|
public class LongRangeFactory {
|
||||||
|
|
||||||
|
private final long min;
|
||||||
|
private final long width;
|
||||||
|
private final long max;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new LongRangeFactory
|
||||||
|
* @param min a minimum value; all longs below this value are grouped into a single range
|
||||||
|
* @param width a standard width; all ranges between {@code min} and {@code max} are this wide,
|
||||||
|
* with the exception of the final range which may be up to this width. Ranges
|
||||||
|
* are inclusive at the lower end, and exclusive at the upper end.
|
||||||
|
* @param max a maximum value; all longs above this value are grouped into a single range
|
||||||
|
*/
|
||||||
|
public LongRangeFactory(long min, long width, long max) {
|
||||||
|
this.min = min;
|
||||||
|
this.width = width;
|
||||||
|
this.max = max;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the LongRange that a value should be grouped into
|
||||||
|
* @param value the value to group
|
||||||
|
* @param reuse an existing LongRange object to reuse
|
||||||
|
*/
|
||||||
|
public LongRange getRange(long value, LongRange reuse) {
|
||||||
|
if (reuse == null)
|
||||||
|
reuse = new LongRange(Long.MIN_VALUE, Long.MAX_VALUE);
|
||||||
|
if (value < min) {
|
||||||
|
reuse.max = min;
|
||||||
|
reuse.min = Long.MIN_VALUE;
|
||||||
|
return reuse;
|
||||||
|
}
|
||||||
|
if (value >= max) {
|
||||||
|
reuse.min = max;
|
||||||
|
reuse.max = Long.MAX_VALUE;
|
||||||
|
return reuse;
|
||||||
|
}
|
||||||
|
long bucket = (value - min) / width;
|
||||||
|
reuse.min = min + (bucket * width);
|
||||||
|
reuse.max = reuse.min + width;
|
||||||
|
return reuse;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,101 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.grouping;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.DoubleValuesSource;
|
||||||
|
import org.apache.lucene.search.LongValues;
|
||||||
|
import org.apache.lucene.search.LongValuesSource;
|
||||||
|
import org.apache.lucene.search.Scorable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A GroupSelector implementation that groups documents by long values
|
||||||
|
*/
|
||||||
|
public class LongRangeGroupSelector extends GroupSelector<LongRange> {
|
||||||
|
|
||||||
|
private final LongValuesSource source;
|
||||||
|
private final LongRangeFactory rangeFactory;
|
||||||
|
|
||||||
|
private Set<LongRange> inSecondPass;
|
||||||
|
private boolean includeEmpty = true;
|
||||||
|
private boolean positioned;
|
||||||
|
private LongRange current;
|
||||||
|
|
||||||
|
private LeafReaderContext context;
|
||||||
|
private LongValues values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new LongRangeGroupSelector
|
||||||
|
* @param source a LongValuesSource to retrieve long values per document
|
||||||
|
* @param rangeFactory a LongRangeFactory that defines how to group the long values into range buckets
|
||||||
|
*/
|
||||||
|
public LongRangeGroupSelector(LongValuesSource source, LongRangeFactory rangeFactory) {
|
||||||
|
this.source = source;
|
||||||
|
this.rangeFactory = rangeFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(LeafReaderContext readerContext) throws IOException {
|
||||||
|
this.context = readerContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorer) throws IOException {
|
||||||
|
this.values = source.getValues(context, DoubleValuesSource.fromScorer(scorer));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public State advanceTo(int doc) throws IOException {
|
||||||
|
positioned = values.advanceExact(doc);
|
||||||
|
if (positioned == false) {
|
||||||
|
return includeEmpty ? State.ACCEPT : State.SKIP;
|
||||||
|
}
|
||||||
|
this.current = rangeFactory.getRange(values.longValue(), this.current);
|
||||||
|
if (inSecondPass == null) {
|
||||||
|
return State.ACCEPT;
|
||||||
|
}
|
||||||
|
return inSecondPass.contains(this.current) ? State.ACCEPT : State.SKIP;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LongRange currentValue() throws IOException {
|
||||||
|
return positioned ? this.current : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LongRange copyValue() throws IOException {
|
||||||
|
return positioned ? new LongRange(this.current.min, this.current.max) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setGroups(Collection<SearchGroup<LongRange>> searchGroups) {
|
||||||
|
inSecondPass = new HashSet<>();
|
||||||
|
includeEmpty = false;
|
||||||
|
for (SearchGroup<LongRange> group : searchGroups) {
|
||||||
|
if (group.groupValue == null)
|
||||||
|
includeEmpty = true;
|
||||||
|
else
|
||||||
|
inSecondPass.add(group.groupValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -78,6 +78,7 @@ public class SecondPassGroupingCollector<T> extends SimpleCollector {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setScorer(Scorable scorer) throws IOException {
|
public void setScorer(Scorable scorer) throws IOException {
|
||||||
|
groupSelector.setScorer(scorer);
|
||||||
groupReducer.setScorer(scorer);
|
groupReducer.setScorer(scorer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Map;
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.search.Scorable;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefHash;
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
|
|
||||||
|
@ -64,6 +65,9 @@ public class TermGroupSelector extends GroupSelector<BytesRef> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorer) throws IOException { }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public State advanceTo(int doc) throws IOException {
|
public State advanceTo(int doc) throws IOException {
|
||||||
if (this.docValues.advanceExact(doc) == false) {
|
if (this.docValues.advanceExact(doc) == false) {
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.util.Set;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.queries.function.FunctionValues;
|
import org.apache.lucene.queries.function.FunctionValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
import org.apache.lucene.search.Scorable;
|
||||||
import org.apache.lucene.util.mutable.MutableValue;
|
import org.apache.lucene.util.mutable.MutableValue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -56,6 +57,9 @@ public class ValueSourceGroupSelector extends GroupSelector<MutableValue> {
|
||||||
this.filler = values.getValueFiller();
|
this.filler = values.getValueFiller();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorer) throws IOException { }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public State advanceTo(int doc) throws IOException {
|
public State advanceTo(int doc) throws IOException {
|
||||||
this.filler.fillValue(doc);
|
this.filler.fillValue(doc);
|
||||||
|
@ -67,7 +71,7 @@ public class ValueSourceGroupSelector extends GroupSelector<MutableValue> {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public MutableValue currentValue() {
|
public MutableValue currentValue() throws IOException {
|
||||||
return filler.getValue();
|
return filler.getValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,11 +28,9 @@
|
||||||
* <p>Grouping requires a number of inputs:</p>
|
* <p>Grouping requires a number of inputs:</p>
|
||||||
*
|
*
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li><code>groupField</code>: this is the field used for grouping.
|
* <li><code>groupSelector</code>: this defines how groups are created
|
||||||
* For example, if you use the <code>author</code> field then each
|
* from values per-document. The grouping module ships with
|
||||||
* group has all books by the same author. Documents that don't
|
* selectors for grouping by term, and by long and double ranges.
|
||||||
* have this field are grouped under a single group with
|
|
||||||
* a <code>null</code> group value.
|
|
||||||
*
|
*
|
||||||
* <li><code>groupSort</code>: how the groups are sorted. For sorting
|
* <li><code>groupSort</code>: how the groups are sorted. For sorting
|
||||||
* purposes, each group is "represented" by the highest-sorted
|
* purposes, each group is "represented" by the highest-sorted
|
||||||
|
@ -80,6 +78,10 @@
|
||||||
* the value of a {@link org.apache.lucene.index.SortedDocValues} field</li>
|
* the value of a {@link org.apache.lucene.index.SortedDocValues} field</li>
|
||||||
* <li>{@link org.apache.lucene.search.grouping.ValueSourceGroupSelector} groups based on
|
* <li>{@link org.apache.lucene.search.grouping.ValueSourceGroupSelector} groups based on
|
||||||
* the value of a {@link org.apache.lucene.queries.function.ValueSource}</li>
|
* the value of a {@link org.apache.lucene.queries.function.ValueSource}</li>
|
||||||
|
* <li>{@link org.apache.lucene.search.grouping.DoubleRangeGroupSelector} groups based on
|
||||||
|
* the value of a {@link org.apache.lucene.search.DoubleValuesSource}</li>
|
||||||
|
* <li>{@link org.apache.lucene.search.grouping.LongRangeGroupSelector} groups based on
|
||||||
|
* the value of a {@link org.apache.lucene.search.LongValuesSource}</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* <p>Known limitations:</p>
|
* <p>Known limitations:</p>
|
||||||
|
@ -137,17 +139,10 @@
|
||||||
* writer.addDocuments(oneGroup);
|
* writer.addDocuments(oneGroup);
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* Then, at search time, do this up front:
|
* Then, at search time:
|
||||||
*
|
*
|
||||||
* <pre class="prettyprint">
|
* <pre class="prettyprint">
|
||||||
* // Set this once in your app & save away for reusing across all queries:
|
* Query groupEndDocs = new TermQuery(new Term("groupEnd", "x"));
|
||||||
* Filter groupEndDocs = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupEnd", "x"))));
|
|
||||||
* </pre>
|
|
||||||
*
|
|
||||||
* Finally, do this per search:
|
|
||||||
*
|
|
||||||
* <pre class="prettyprint">
|
|
||||||
* // Per search:
|
|
||||||
* BlockGroupingCollector c = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, groupEndDocs);
|
* BlockGroupingCollector c = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, groupEndDocs);
|
||||||
* s.search(new TermQuery(new Term("content", searchTerm)), c);
|
* s.search(new TermQuery(new Term("content", searchTerm)), c);
|
||||||
* TopGroups groupsResult = c.getTopGroups(withinGroupSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
|
* TopGroups groupsResult = c.getTopGroups(withinGroupSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.grouping;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.DoublePoint;
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
||||||
|
import org.apache.lucene.search.DoubleValuesSource;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
|
||||||
|
public class DoubleRangeGroupSelectorTest extends BaseGroupSelectorTestCase<DoubleRange> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void addGroupField(Document document, int id) {
|
||||||
|
if (rarely()) {
|
||||||
|
return; // missing value
|
||||||
|
}
|
||||||
|
// numbers between 0 and 1000, groups are 100 wide from 100 to 900
|
||||||
|
double value = random().nextDouble() * 1000;
|
||||||
|
document.add(new DoublePoint("double", value));
|
||||||
|
document.add(new NumericDocValuesField("double", Double.doubleToLongBits(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected GroupSelector<DoubleRange> getGroupSelector() {
|
||||||
|
return new DoubleRangeGroupSelector(DoubleValuesSource.fromDoubleField("double"),
|
||||||
|
new DoubleRangeFactory(100, 100, 900));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query filterQuery(DoubleRange groupValue) {
|
||||||
|
if (groupValue == null) {
|
||||||
|
return new BooleanQuery.Builder()
|
||||||
|
.add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER)
|
||||||
|
.add(new DocValuesFieldExistsQuery("double"), BooleanClause.Occur.MUST_NOT)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
return DoublePoint.newRangeQuery("double", groupValue.min, Math.nextDown(groupValue.max));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.grouping;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.LongPoint;
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
||||||
|
import org.apache.lucene.search.LongValuesSource;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
|
||||||
|
public class LongRangeGroupSelectorTest extends BaseGroupSelectorTestCase<LongRange> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void addGroupField(Document document, int id) {
|
||||||
|
if (rarely()) {
|
||||||
|
return; // missing value
|
||||||
|
}
|
||||||
|
// numbers between 0 and 1000, groups are 100 wide from 100 to 900
|
||||||
|
long value = random().nextInt(1000);
|
||||||
|
document.add(new LongPoint("long", value));
|
||||||
|
document.add(new NumericDocValuesField("long", value));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected GroupSelector<LongRange> getGroupSelector() {
|
||||||
|
return new LongRangeGroupSelector(LongValuesSource.fromLongField("long"),
|
||||||
|
new LongRangeFactory(100, 100, 900));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query filterQuery(LongRange groupValue) {
|
||||||
|
if (groupValue == null) {
|
||||||
|
return new BooleanQuery.Builder()
|
||||||
|
.add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER)
|
||||||
|
.add(new DocValuesFieldExistsQuery("long"), BooleanClause.Occur.MUST_NOT)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
return LongPoint.newRangeQuery("long", groupValue.min, groupValue.max - 1);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.grouping;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestDoubleRangeFactory extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void test() {
|
||||||
|
|
||||||
|
DoubleRangeFactory factory = new DoubleRangeFactory(10, 10, 50);
|
||||||
|
DoubleRange scratch = new DoubleRange(0, 0);
|
||||||
|
|
||||||
|
assertEquals(new DoubleRange(Double.MIN_VALUE, 10), factory.getRange(4, scratch));
|
||||||
|
assertEquals(new DoubleRange(10, 20), factory.getRange(10, scratch));
|
||||||
|
assertEquals(new DoubleRange(20, 30), factory.getRange(20, scratch));
|
||||||
|
assertEquals(new DoubleRange(10, 20), factory.getRange(15, scratch));
|
||||||
|
assertEquals(new DoubleRange(30, 40), factory.getRange(35, scratch));
|
||||||
|
assertEquals(new DoubleRange(50, Double.MAX_VALUE), factory.getRange(50, scratch));
|
||||||
|
assertEquals(new DoubleRange(50, Double.MAX_VALUE), factory.getRange(500, scratch));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.grouping;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestLongRangeFactory extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void test() {
|
||||||
|
|
||||||
|
LongRangeFactory factory = new LongRangeFactory(10, 10, 50);
|
||||||
|
LongRange scratch = new LongRange(0, 0);
|
||||||
|
|
||||||
|
assertEquals(new LongRange(Long.MIN_VALUE, 10), factory.getRange(4, scratch));
|
||||||
|
assertEquals(new LongRange(10, 20), factory.getRange(10, scratch));
|
||||||
|
assertEquals(new LongRange(20, 30), factory.getRange(20, scratch));
|
||||||
|
assertEquals(new LongRange(10, 20), factory.getRange(15, scratch));
|
||||||
|
assertEquals(new LongRange(30, 40), factory.getRange(35, scratch));
|
||||||
|
assertEquals(new LongRange(50, Long.MAX_VALUE), factory.getRange(50, scratch));
|
||||||
|
assertEquals(new LongRange(50, Long.MAX_VALUE), factory.getRange(500, scratch));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue