mirror of https://github.com/apache/lucene.git
Github#11869: Add RangeOnRangeFacetCounts (#11901)
This commit is contained in:
parent
6f477e5831
commit
cbfed77fd3
|
@ -21,14 +21,22 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
class BinaryRangeDocValues extends BinaryDocValues {
|
||||
/** A binary representation of a range that wraps a BinaryDocValues field */
|
||||
public class BinaryRangeDocValues extends BinaryDocValues {
|
||||
private final BinaryDocValues in;
|
||||
private byte[] packedValue;
|
||||
private final int numDims;
|
||||
private final int numBytesPerDimension;
|
||||
private int docID = -1;
|
||||
|
||||
BinaryRangeDocValues(BinaryDocValues in, int numDims, int numBytesPerDimension) {
|
||||
/**
|
||||
* Constructor for BinaryRangeDocValues
|
||||
*
|
||||
* @param in the binary doc values source field
|
||||
* @param numDims the number of dimensions in each doc values field
|
||||
* @param numBytesPerDimension size of each dimension (2 * encoded value size)
|
||||
*/
|
||||
public BinaryRangeDocValues(BinaryDocValues in, int numDims, int numBytesPerDimension) {
|
||||
assert in != null;
|
||||
this.in = in;
|
||||
this.numBytesPerDimension = numBytesPerDimension;
|
||||
|
@ -82,6 +90,11 @@ class BinaryRangeDocValues extends BinaryDocValues {
|
|||
return in.binaryValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the packed value that represents this range
|
||||
*
|
||||
* @return the packed value that represents this range
|
||||
*/
|
||||
public byte[] getPackedValue() {
|
||||
return packedValue;
|
||||
}
|
||||
|
|
|
@ -131,7 +131,7 @@ public class DoubleRange extends Field {
|
|||
*
|
||||
* <p>example for 4 dimensions (8 bytes per dimension value): minD1 ... minD4 | maxD1 ... maxD4
|
||||
*/
|
||||
static void verifyAndEncode(double[] min, double[] max, byte[] bytes) {
|
||||
public static void verifyAndEncode(double[] min, double[] max, byte[] bytes) {
|
||||
for (int d = 0, i = 0, j = min.length * BYTES; d < min.length; ++d, i += BYTES, j += BYTES) {
|
||||
if (Double.isNaN(min[d])) {
|
||||
throw new IllegalArgumentException(
|
||||
|
|
|
@ -131,7 +131,7 @@ public class LongRange extends Field {
|
|||
*
|
||||
* <p>example for 4 dimensions (8 bytes per dimension value): minD1 ... minD4 | maxD1 ... maxD4
|
||||
*/
|
||||
static void verifyAndEncode(long[] min, long[] max, byte[] bytes) {
|
||||
public static void verifyAndEncode(long[] min, long[] max, byte[] bytes) {
|
||||
for (int d = 0, i = 0, j = min.length * BYTES; d < min.length; ++d, i += BYTES, j += BYTES) {
|
||||
if (min[d] > max[d]) {
|
||||
throw new IllegalArgumentException(
|
||||
|
|
|
@ -251,7 +251,7 @@ public abstract class RangeFieldQuery extends Query {
|
|||
}
|
||||
|
||||
@Override
|
||||
boolean matches(
|
||||
public boolean matches(
|
||||
byte[] queryPackedValue,
|
||||
byte[] packedValue,
|
||||
int numDims,
|
||||
|
@ -307,7 +307,11 @@ public abstract class RangeFieldQuery extends Query {
|
|||
int dim,
|
||||
ByteArrayComparator comparator);
|
||||
|
||||
boolean matches(
|
||||
/**
|
||||
* Compares every dim for 2 encoded ranges and returns true if all dims match. Matching
|
||||
* implementation is based on the QueryType.
|
||||
*/
|
||||
public boolean matches(
|
||||
byte[] queryPackedValue,
|
||||
byte[] packedValue,
|
||||
int numDims,
|
||||
|
|
|
@ -28,4 +28,5 @@ module org.apache.lucene.facet {
|
|||
exports org.apache.lucene.facet.taxonomy.directory;
|
||||
exports org.apache.lucene.facet.taxonomy.writercache;
|
||||
exports org.apache.lucene.facet.facetset;
|
||||
exports org.apache.lucene.facet.rangeonrange;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet.rangeonrange;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
/** Represents a double range for RangeOnRange faceting */
|
||||
public class DoubleRange extends Range {
|
||||
/** Minimum (inclusive). */
|
||||
public final double[] min;
|
||||
|
||||
/** Maximum (inclusive). */
|
||||
public final double[] max;
|
||||
|
||||
/**
|
||||
* Represents a double range for RangeOnRange faceting
|
||||
*
|
||||
* @param label the name of the range
|
||||
* @param minIn the minimum
|
||||
* @param minInclusive if the minimum is inclusive
|
||||
* @param maxIn the maximum
|
||||
* @param maxInclusive if the maximum is inclusive
|
||||
*/
|
||||
public DoubleRange(
|
||||
String label, double minIn, boolean minInclusive, double maxIn, boolean maxInclusive) {
|
||||
super(label, 1);
|
||||
|
||||
if (Double.isNaN(minIn)) {
|
||||
throw new IllegalArgumentException("min cannot be NaN");
|
||||
}
|
||||
if (minInclusive == false) {
|
||||
minIn = Math.nextUp(minIn);
|
||||
}
|
||||
|
||||
if (Double.isNaN(maxIn)) {
|
||||
throw new IllegalArgumentException("max cannot be NaN");
|
||||
}
|
||||
if (maxInclusive == false) {
|
||||
// Why no Math.nextDown?
|
||||
maxIn = Math.nextAfter(maxIn, Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
if (minIn > maxIn) {
|
||||
failNoMatch();
|
||||
}
|
||||
|
||||
this.min = new double[] {minIn};
|
||||
this.max = new double[] {maxIn};
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a double range for RangeOnRange faceting
|
||||
*
|
||||
* @param label the name of the range
|
||||
* @param min the minimum, inclusive
|
||||
* @param max the maximum, inclusive
|
||||
*/
|
||||
public DoubleRange(String label, double[] min, double[] max) {
|
||||
super(label, min.length);
|
||||
checkArgs(min, max);
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "DoubleRange(label: "
|
||||
+ label
|
||||
+ ", min: "
|
||||
+ Arrays.toString(min)
|
||||
+ ", max: "
|
||||
+ Arrays.toString(max)
|
||||
+ ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
DoubleRange doubleRange = (DoubleRange) o;
|
||||
return Arrays.equals(min, doubleRange.min)
|
||||
&& Arrays.equals(max, doubleRange.max)
|
||||
&& label.equals(doubleRange.label)
|
||||
&& dims == doubleRange.dims;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(label, Arrays.hashCode(min), Arrays.hashCode(max), dims);
|
||||
}
|
||||
|
||||
/** validate the arguments */
|
||||
private void checkArgs(final double[] min, final double[] max) {
|
||||
if (min == null || max == null || min.length == 0 || max.length == 0) {
|
||||
failNoMatch();
|
||||
}
|
||||
if (min.length != max.length) {
|
||||
failNoMatch();
|
||||
}
|
||||
|
||||
for (int i = 0; i < min.length; i++) {
|
||||
if (min[i] > max[i]) {
|
||||
failNoMatch();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet.rangeonrange;
|
||||
|
||||
import static org.apache.lucene.document.DoubleRange.verifyAndEncode;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.document.RangeFieldQuery;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/**
|
||||
* Represents counts for double range on range faceting. To be more specific, this means that given
|
||||
* a range (or list of ranges), this class will count all the documents in the index (or that match
|
||||
* a fast match query) that contain ranges that "match" the provided ranges. These ranges are
|
||||
* specified by the field parameter and expected to be of type {@link
|
||||
* org.apache.lucene.document.DoubleRangeDocValuesField}. Matching is defined by the queryType
|
||||
* param, you can see the type of matching supported by looking at {@link
|
||||
* org.apache.lucene.document.RangeFieldQuery.QueryType}. In addition, this class supports
|
||||
* multidimensional ranges. A multidimensional range will be counted as a match if every dimension
|
||||
* matches the corresponding indexed range's dimension.
|
||||
*/
|
||||
public class DoubleRangeOnRangeFacetCounts extends RangeOnRangeFacetCounts {
|
||||
|
||||
/**
|
||||
* Constructor without the fast match query, see other constructor description for more details.
|
||||
*/
|
||||
public DoubleRangeOnRangeFacetCounts(
|
||||
String field,
|
||||
FacetsCollector hits,
|
||||
RangeFieldQuery.QueryType queryType,
|
||||
DoubleRange... ranges)
|
||||
throws IOException {
|
||||
super(
|
||||
field,
|
||||
hits,
|
||||
queryType,
|
||||
null,
|
||||
Double.BYTES,
|
||||
getEncodedRanges(ranges),
|
||||
Range.getLabelsFromRanges(ranges));
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents counts for double range on range faceting. See class javadoc for more details.
|
||||
*
|
||||
* @param field specifies a {@link org.apache.lucene.document.DoubleRangeDocValuesField} that will
|
||||
* define the indexed ranges
|
||||
* @param hits hits we want to count against
|
||||
* @param queryType type of intersection we want to count (IE: range intersection, range contains,
|
||||
* etc.)
|
||||
* @param fastMatchQuery query to quickly discard hits using some heuristic
|
||||
* @param ranges ranges we want the counts of
|
||||
* @throws IOException low level exception
|
||||
*/
|
||||
public DoubleRangeOnRangeFacetCounts(
|
||||
String field,
|
||||
FacetsCollector hits,
|
||||
RangeFieldQuery.QueryType queryType,
|
||||
Query fastMatchQuery,
|
||||
DoubleRange... ranges)
|
||||
throws IOException {
|
||||
super(
|
||||
field,
|
||||
hits,
|
||||
queryType,
|
||||
fastMatchQuery,
|
||||
Double.BYTES,
|
||||
getEncodedRanges(ranges),
|
||||
Range.getLabelsFromRanges(ranges));
|
||||
}
|
||||
|
||||
private static byte[][] getEncodedRanges(DoubleRange... ranges) {
|
||||
byte[][] result = new byte[ranges.length][2 * Double.BYTES * ranges[0].dims];
|
||||
for (int i = 0; i < ranges.length; i++) {
|
||||
verifyAndEncode(ranges[i].min, ranges[i].max, result[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet.rangeonrange;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
/** Represents a long range for RangeOnRange faceting */
|
||||
public class LongRange extends Range {
|
||||
/** Minimum (inclusive). */
|
||||
public final long[] min;
|
||||
|
||||
/** Maximum (inclusive). */
|
||||
public final long[] max;
|
||||
|
||||
/**
|
||||
* Represents a single dimensional long range for RangeOnRange faceting
|
||||
*
|
||||
* @param label the name of the range
|
||||
* @param minIn the minimum
|
||||
* @param minInclusive if the minimum is inclusive
|
||||
* @param maxIn the maximum
|
||||
* @param maxInclusive if the maximum is inclusive
|
||||
*/
|
||||
public LongRange(
|
||||
String label, long minIn, boolean minInclusive, long maxIn, boolean maxInclusive) {
|
||||
super(label, 1);
|
||||
|
||||
if (minInclusive == false) {
|
||||
if (minIn != Long.MAX_VALUE) {
|
||||
minIn++;
|
||||
} else {
|
||||
failNoMatch();
|
||||
}
|
||||
}
|
||||
|
||||
if (maxInclusive == false) {
|
||||
if (maxIn != Long.MIN_VALUE) {
|
||||
maxIn--;
|
||||
} else {
|
||||
failNoMatch();
|
||||
}
|
||||
}
|
||||
|
||||
if (minIn > maxIn) {
|
||||
failNoMatch();
|
||||
}
|
||||
|
||||
this.min = new long[] {minIn};
|
||||
this.max = new long[] {maxIn};
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a multidimensional long range for RangeOnRange faceting
|
||||
*
|
||||
* @param label the name of the range
|
||||
* @param min the minimum, inclusive
|
||||
* @param max the maximum, inclusive
|
||||
*/
|
||||
public LongRange(String label, long[] min, long[] max) {
|
||||
super(label, min.length);
|
||||
checkArgs(min, max);
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "LongRange(label: "
|
||||
+ label
|
||||
+ ", min: "
|
||||
+ Arrays.toString(min)
|
||||
+ ", max: "
|
||||
+ Arrays.toString(max)
|
||||
+ ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
LongRange longRange = (LongRange) o;
|
||||
return Arrays.equals(min, longRange.min)
|
||||
&& Arrays.equals(max, longRange.max)
|
||||
&& label.equals(longRange.label)
|
||||
&& dims == longRange.dims;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(label, Arrays.hashCode(min), Arrays.hashCode(max), dims);
|
||||
}
|
||||
|
||||
private void checkArgs(final long[] min, final long[] max) {
|
||||
if (min == null || max == null || min.length == 0 || max.length == 0) {
|
||||
failNoMatch();
|
||||
}
|
||||
if (min.length != max.length) {
|
||||
failNoMatch();
|
||||
}
|
||||
|
||||
for (int i = 0; i < min.length; i++) {
|
||||
if (min[i] > max[i]) {
|
||||
failNoMatch();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet.rangeonrange;
|
||||
|
||||
import static org.apache.lucene.document.LongRange.verifyAndEncode;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.document.RangeFieldQuery;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
/**
|
||||
* Represents counts for long range on range faceting. To be more specific, this means that given a
|
||||
* range (or list of ranges), this class will count all the documents in the {@link FacetsCollector}
|
||||
* (or that match a fast match query) that contain ranges that "match" the provided ranges. These
|
||||
* ranges are specified by the field parameter and expected to be of type {@link
|
||||
* org.apache.lucene.document.LongRangeDocValuesField}. Matching is defined by the queryType param,
|
||||
* you can see the type of matching supported by looking at {@link
|
||||
* org.apache.lucene.document.RangeFieldQuery.QueryType}. In addition, this class supports
|
||||
* multidimensional ranges. A multidimensional range will be counted as a match if every dimension
|
||||
* matches the corresponding indexed range's dimension.
|
||||
*/
|
||||
public class LongRangeOnRangeFacetCounts extends RangeOnRangeFacetCounts {
|
||||
|
||||
/**
|
||||
* Constructor without the fast match query, see other constructor description for more details.
|
||||
*/
|
||||
public LongRangeOnRangeFacetCounts(
|
||||
String field, FacetsCollector hits, RangeFieldQuery.QueryType queryType, LongRange... ranges)
|
||||
throws IOException {
|
||||
super(
|
||||
field,
|
||||
hits,
|
||||
queryType,
|
||||
null,
|
||||
Long.BYTES,
|
||||
getEncodedRanges(ranges),
|
||||
Range.getLabelsFromRanges(ranges));
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents counts for long range on range faceting. See class javadoc for more details.
|
||||
*
|
||||
* @param field specifies a {@link org.apache.lucene.document.LongRangeDocValuesField} that will
|
||||
* define the indexed ranges
|
||||
* @param hits hits we want to count against
|
||||
* @param queryType type of intersection we want to count (IE: range intersection, range contains,
|
||||
* etc.)
|
||||
* @param fastMatchQuery query to quickly discard hits using some heuristic
|
||||
* @param ranges ranges we want the counts of
|
||||
* @throws IOException low level exception
|
||||
*/
|
||||
public LongRangeOnRangeFacetCounts(
|
||||
String field,
|
||||
FacetsCollector hits,
|
||||
RangeFieldQuery.QueryType queryType,
|
||||
Query fastMatchQuery,
|
||||
LongRange... ranges)
|
||||
throws IOException {
|
||||
super(
|
||||
field,
|
||||
hits,
|
||||
queryType,
|
||||
fastMatchQuery,
|
||||
Long.BYTES,
|
||||
getEncodedRanges(ranges),
|
||||
Range.getLabelsFromRanges(ranges));
|
||||
}
|
||||
|
||||
private static byte[][] getEncodedRanges(LongRange... ranges) {
|
||||
byte[][] result = new byte[ranges.length][2 * Long.BYTES * ranges[0].dims];
|
||||
for (int i = 0; i < ranges.length; i++) {
|
||||
verifyAndEncode(ranges[i].min, ranges[i].max, result[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet.rangeonrange;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Base class for a single labeled range.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class Range {
|
||||
|
||||
/** Label that identifies this range. */
|
||||
public final String label;
|
||||
|
||||
/** Number of dims per range */
|
||||
public final int dims;
|
||||
|
||||
/** Sole constructor. */
|
||||
protected Range(String label, int dims) {
|
||||
if (label == null) {
|
||||
throw new NullPointerException("label must not be null");
|
||||
}
|
||||
this.label = label;
|
||||
this.dims = dims;
|
||||
}
|
||||
|
||||
/** Invoke this for a useless range. */
|
||||
protected void failNoMatch() {
|
||||
throw new IllegalArgumentException("range \"" + label + "\" matches nothing");
|
||||
}
|
||||
|
||||
/** Gets a corresponding array of labels given a range */
|
||||
protected static String[] getLabelsFromRanges(Range[] ranges) {
|
||||
return Arrays.stream(ranges).map(r -> r.label).toArray(String[]::new);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,206 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet.rangeonrange;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.document.BinaryRangeDocValues;
|
||||
import org.apache.lucene.document.RangeFieldQuery;
|
||||
import org.apache.lucene.facet.FacetCountsWithFilterQuery;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
abstract class RangeOnRangeFacetCounts extends FacetCountsWithFilterQuery {
|
||||
|
||||
private final String[] labels;
|
||||
|
||||
/** Counts, initialized in by subclass. */
|
||||
private final int[] counts;
|
||||
|
||||
/** Our field name. */
|
||||
private final String field;
|
||||
|
||||
/** Total number of hits. */
|
||||
private int totCount;
|
||||
|
||||
protected RangeOnRangeFacetCounts(
|
||||
String field,
|
||||
FacetsCollector hits,
|
||||
RangeFieldQuery.QueryType queryType,
|
||||
Query fastMatchQuery,
|
||||
int numEncodedValueBytes,
|
||||
byte[][] encodedRanges,
|
||||
String[] labels)
|
||||
throws IOException {
|
||||
super(fastMatchQuery);
|
||||
|
||||
assert encodedRanges.length == labels.length;
|
||||
assert encodedRanges[0].length % (2 * numEncodedValueBytes) == 0;
|
||||
|
||||
this.field = field;
|
||||
this.labels = labels;
|
||||
this.counts = new int[encodedRanges.length];
|
||||
|
||||
count(field, hits.getMatchingDocs(), encodedRanges, numEncodedValueBytes, queryType);
|
||||
}
|
||||
|
||||
/** Counts from the provided field. */
|
||||
protected void count(
|
||||
String field,
|
||||
List<FacetsCollector.MatchingDocs> matchingDocs,
|
||||
byte[][] encodedRanges,
|
||||
int numEncodedValueBytes,
|
||||
RangeFieldQuery.QueryType queryType)
|
||||
throws IOException {
|
||||
// TODO: We currently just exhaustively check the ranges in each document with every range in
|
||||
// the ranges array.
|
||||
// We might be able to do something more efficient here by grouping the ranges array into a
|
||||
// space partitioning
|
||||
// data structure of some sort.
|
||||
|
||||
int dims = encodedRanges[0].length / (2 * numEncodedValueBytes);
|
||||
ArrayUtil.ByteArrayComparator comparator =
|
||||
ArrayUtil.getUnsignedComparator(numEncodedValueBytes);
|
||||
|
||||
int missingCount = 0;
|
||||
|
||||
for (FacetsCollector.MatchingDocs hits : matchingDocs) {
|
||||
|
||||
BinaryRangeDocValues binaryRangeDocValues =
|
||||
new BinaryRangeDocValues(
|
||||
DocValues.getBinary(hits.context.reader(), field), dims, numEncodedValueBytes);
|
||||
|
||||
final DocIdSetIterator it = createIterator(hits);
|
||||
if (it == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
totCount += hits.totalHits;
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
|
||||
if (binaryRangeDocValues.advanceExact(doc)) {
|
||||
boolean hasValidRange = false;
|
||||
for (int range = 0; range < encodedRanges.length; range++) {
|
||||
byte[] encodedRange = encodedRanges[range];
|
||||
byte[] packedRange = binaryRangeDocValues.getPackedValue();
|
||||
assert encodedRange.length == packedRange.length;
|
||||
if (queryType.matches(
|
||||
encodedRange, packedRange, dims, numEncodedValueBytes, comparator)) {
|
||||
counts[range]++;
|
||||
hasValidRange = true;
|
||||
}
|
||||
}
|
||||
if (hasValidRange == false) {
|
||||
missingCount++;
|
||||
}
|
||||
} else {
|
||||
missingCount++;
|
||||
}
|
||||
doc = it.nextDoc();
|
||||
}
|
||||
}
|
||||
totCount -= missingCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* <p>NOTE: This implementation guarantees that ranges will be returned in the order specified by
|
||||
* the user when calling the constructor.
|
||||
*/
|
||||
@Override
|
||||
public FacetResult getAllChildren(String dim, String... path) throws IOException {
|
||||
validateDimAndPathForGetChildren(dim, path);
|
||||
LabelAndValue[] labelValues = new LabelAndValue[counts.length];
|
||||
for (int i = 0; i < counts.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(labels[i], counts[i]);
|
||||
}
|
||||
return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
validateTopN(topN);
|
||||
validateDimAndPathForGetChildren(dim, path);
|
||||
|
||||
PriorityQueue<Entry> pq =
|
||||
new PriorityQueue<>(Math.min(topN, counts.length)) {
|
||||
@Override
|
||||
protected boolean lessThan(Entry a, Entry b) {
|
||||
int cmp = Integer.compare(a.count, b.count);
|
||||
if (cmp == 0) {
|
||||
cmp = b.label.compareTo(a.label);
|
||||
}
|
||||
return cmp < 0;
|
||||
}
|
||||
};
|
||||
|
||||
int childCount = 0;
|
||||
Entry e = null;
|
||||
for (int i = 0; i < counts.length; i++) {
|
||||
if (counts[i] != 0) {
|
||||
childCount++;
|
||||
if (e == null) {
|
||||
e = new Entry();
|
||||
}
|
||||
e.label = labels[i];
|
||||
e.count = counts[i];
|
||||
e = pq.insertWithOverflow(e);
|
||||
}
|
||||
}
|
||||
|
||||
LabelAndValue[] results = new LabelAndValue[pq.size()];
|
||||
while (pq.size() != 0) {
|
||||
Entry entry = pq.pop();
|
||||
assert entry != null;
|
||||
results[pq.size()] = new LabelAndValue(entry.label, entry.count);
|
||||
}
|
||||
return new FacetResult(dim, path, totCount, results, childCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
validateTopN(topN);
|
||||
return Collections.singletonList(getTopChildren(topN, field));
|
||||
}
|
||||
|
||||
private void validateDimAndPathForGetChildren(String dim, String... path) {
|
||||
if (dim.equals(field) == false) {
|
||||
throw new IllegalArgumentException(
|
||||
"invalid dim \"" + dim + "\"; should be \"" + field + "\"");
|
||||
}
|
||||
if (path.length != 0) {
|
||||
throw new IllegalArgumentException("path.length should be 0");
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Entry {
|
||||
int count;
|
||||
String label;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** Provides range on range faceting capabilities. */
|
||||
package org.apache.lucene.facet.rangeonrange;
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue