LUCENE-7381: add point based DoubleRangeField and RangeFieldQuery for indexing and querying on Ranges up to 4 dimensions

This commit is contained in:
Nicholas Knize 2016-07-15 14:54:44 -05:00
parent d5779335aa
commit 7dfcfcc718
6 changed files with 1154 additions and 0 deletions

View File

@ -11,6 +11,9 @@ API Changes
New Features
* LUCENE-7381: Add point based DoubleRangeField and RangeFieldQuery for
indexing and querying on Ranges up to 4 dimensions (Nick Knize)
* LUCENE-6968: LSH Filter (Tommaso Teofili, Andy Hind, Cao Manh Dat)
* LUCENE-7302: IndexWriter methods that change the index now return a

View File

@ -0,0 +1,262 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import org.apache.lucene.document.RangeFieldQuery.QueryType;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
/**
* An indexed Double Range field.
* <p>
* This field indexes dimensional ranges defined as min/max pairs. It supports
* up to a maximum of 4 dimensions (indexed as 8 numeric values). With 1 dimension representing a single double range,
* 2 dimensions representing a bounding box, 3 dimensions a bounding cube, and 4 dimensions a tesseract.
* <p>
* Multiple values for the same field in one document is supported, and open ended ranges can be defined using
* {@code Double.NEGATIVE_INFINITY} and {@code Double.POSITIVE_INFINITY}.
*
* <p>
* This field defines the following static factory methods for common search operations over double ranges:
* <ul>
* <li>{@link #newIntersectsQuery newIntersectsQuery()} matches ranges that intersect the defined search range.
* <li>{@link #newWithinQuery newWithinQuery()} matches ranges that are within the defined search range.
* <li>{@link #newContainsQuery newContainsQuery()} matches ranges that contain the defined search range.
* </ul>
*/
public class DoubleRangeField extends Field {
/** stores double values so number of bytes is 8 */
public static final int BYTES = Double.BYTES;
/**
* Create a new DoubleRangeField type, from min/max parallel arrays
*
* @param name field name. must not be null.
* @param min range min values; each entry is the min value for the dimension
* @param max range max values; each entry is the max value for the dimension
*/
public DoubleRangeField(String name, final double[] min, final double[] max) {
super(name, getType(min.length));
setRangeValues(min, max);
}
/** set the field type */
private static FieldType getType(int dimensions) {
if (dimensions > 4) {
throw new IllegalArgumentException("DoubleRangeField does not support greater than 4 dimensions");
}
FieldType ft = new FieldType();
// dimensions is set as 2*dimension size (min/max per dimension)
ft.setDimensions(dimensions*2, BYTES);
ft.freeze();
return ft;
}
/**
* Changes the values of the field.
* @param min array of min values. (accepts {@code Double.NEGATIVE_INFINITY})
* @param max array of max values. (accepts {@code Double.POSITIVE_INFINITY})
* @throws IllegalArgumentException if {@code min} or {@code max} is invalid
*/
public void setRangeValues(double[] min, double[] max) {
checkArgs(min, max);
if (min.length*2 != type.pointDimensionCount() || max.length*2 != type.pointDimensionCount()) {
throw new IllegalArgumentException("field (name=" + name + ") uses " + type.pointDimensionCount()/2
+ " dimensions; cannot change to (incoming) " + min.length + " dimensions");
}
final byte[] bytes;
if (fieldsData == null) {
bytes = new byte[BYTES*2*min.length];
fieldsData = new BytesRef(bytes);
} else {
bytes = ((BytesRef)fieldsData).bytes;
}
verifyAndEncode(min, max, bytes);
}
/** validate the arguments */
private static void checkArgs(final double[] min, final double[] max) {
if (min == null || max == null || min.length == 0 || max.length == 0) {
throw new IllegalArgumentException("min/max range values cannot be null or empty");
}
if (min.length != max.length) {
throw new IllegalArgumentException("min/max ranges must agree");
}
if (min.length > 4) {
throw new IllegalArgumentException("DoubleRangeField does not support greater than 4 dimensions");
}
}
/**
* Encodes the min, max ranges into a byte array
*/
private static byte[] encode(double[] min, double[] max) {
checkArgs(min, max);
byte[] b = new byte[BYTES*2*min.length];
verifyAndEncode(min, max, b);
return b;
}
/**
* encode the ranges into a sortable byte array ({@code Double.NaN} not allowed)
* <p>
* example for 4 dimensions (8 bytes per dimension value):
* minD1 ... minD4 | maxD1 ... maxD4
*/
static void verifyAndEncode(double[] min, double[] max, byte[] bytes) {
for (int d=0,i=0,j=min.length*BYTES; d<min.length; ++d, i+=BYTES, j+=BYTES) {
if (Double.isNaN(min[d])) {
throw new IllegalArgumentException("invalid min value (" + Double.NaN + ")" + " in DoubleRangeField");
}
if (Double.isNaN(max[d])) {
throw new IllegalArgumentException("invalid max value (" + Double.NaN + ")" + " in DoubleRangeField");
}
if (min[d] > max[d]) {
throw new IllegalArgumentException("min value (" + min[d] + ") is greater than max value (" + max[d] + ")");
}
encode(min[d], bytes, i);
encode(max[d], bytes, j);
}
}
/** encode the given value into the byte array at the defined offset */
private static void encode(double val, byte[] bytes, int offset) {
NumericUtils.longToSortableBytes(NumericUtils.doubleToSortableLong(val), bytes, offset);
}
/**
* Get the min value for the given dimension
* @param dimension the dimension, always positive
* @return the decoded min value
*/
public double getMin(int dimension) {
if (dimension < 0 || dimension >= type.pointDimensionCount()/2) {
throw new IllegalArgumentException("dimension request (" + dimension +
") out of bounds for field (name=" + name + " dimensions=" + type.pointDimensionCount()/2 + "). ");
}
return decodeMin(((BytesRef)fieldsData).bytes, dimension);
}
/**
* Get the max value for the given dimension
* @param dimension the dimension, always positive
* @return the decoded max value
*/
public double getMax(int dimension) {
if (dimension < 0 || dimension >= type.pointDimensionCount()/2) {
throw new IllegalArgumentException("dimension request (" + dimension +
") out of bounds for field (name=" + name + " dimensions=" + type.pointDimensionCount()/2 + "). ");
}
return decodeMax(((BytesRef)fieldsData).bytes, dimension);
}
/** decodes the min value (for the defined dimension) from the encoded input byte array */
static double decodeMin(byte[] b, int dimension) {
int offset = dimension*BYTES;
return NumericUtils.sortableLongToDouble(NumericUtils.sortableBytesToLong(b, offset));
}
/** decodes the max value (for the defined dimension) from the encoded input byte array */
static double decodeMax(byte[] b, int dimension) {
int offset = b.length/2 + dimension*BYTES;
return NumericUtils.sortableLongToDouble(NumericUtils.sortableBytesToLong(b, offset));
}
/**
* Create a query for matching indexed ranges that intersect the defined range.
* @param field field name. must not be null.
* @param min array of min values. (accepts {@code Double.NEGATIVE_INFINITY})
* @param max array of max values. (accepts {@code Double.POSITIVE_INFINITY})
* @return query for matching intersecting ranges (overlap, within, or contains)
* @throws IllegalArgumentException if {@code field} is null, {@code min} or {@code max} is invalid
*/
public static Query newIntersectsQuery(String field, final double[] min, final double[] max) {
return new RangeFieldQuery(field, encode(min, max), min.length, QueryType.INTERSECTS) {
@Override
protected String toString(byte[] ranges, int dimension) {
return DoubleRangeField.toString(ranges, dimension);
}
};
}
/**
* Create a query for matching indexed ranges that contain the defined range.
* @param field field name. must not be null.
* @param min array of min values. (accepts {@code Double.MIN_VALUE})
* @param max array of max values. (accepts {@code Double.MAX_VALUE})
* @return query for matching ranges that contain the defined range
* @throws IllegalArgumentException if {@code field} is null, {@code min} or {@code max} is invalid
*/
public static Query newContainsQuery(String field, final double[] min, final double[] max) {
return new RangeFieldQuery(field, encode(min, max), min.length, QueryType.CONTAINS) {
@Override
protected String toString(byte[] ranges, int dimension) {
return DoubleRangeField.toString(ranges, dimension);
}
};
}
/**
* Create a query for matching indexed ranges that are within the defined range.
* @param field field name. must not be null.
* @param min array of min values. (accepts {@code Double.MIN_VALUE})
* @param max array of max values. (accepts {@code Double.MAX_VALUE})
* @return query for matching ranges within the defined range
* @throws IllegalArgumentException if {@code field} is null, {@code min} or {@code max} is invalid
*/
public static Query newWithinQuery(String field, final double[] min, final double[] max) {
checkArgs(min, max);
return new RangeFieldQuery(field, encode(min, max), min.length, QueryType.WITHIN) {
@Override
protected String toString(byte[] ranges, int dimension) {
return DoubleRangeField.toString(ranges, dimension);
}
};
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append(" <");
sb.append(name);
sb.append(':');
byte[] b = ((BytesRef)fieldsData).bytes;
toString(b, 0);
for (int d=1; d<type.pointDimensionCount(); ++d) {
sb.append(' ');
toString(b, d);
}
sb.append('>');
return sb.toString();
}
/**
* Returns the String representation for the range at the given dimension
* @param ranges the encoded ranges, never null
* @param dimension the dimension of interest
* @return The string representation for the range at the provided dimension
*/
private static String toString(byte[] ranges, int dimension) {
return "[" + Double.toString(decodeMin(ranges, dimension)) + " : "
+ Double.toString(decodeMax(ranges, dimension)) + "]";
}
}

View File

@ -0,0 +1,313 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.StringHelper;
/**
* Query class for searching {@code RangeField} types by a defined {@link Relation}.
*/
abstract class RangeFieldQuery extends Query {
/** field name */
final String field;
/** query relation
* intersects: {@code CELL_CROSSES_QUERY},
* contains: {@code CELL_CONTAINS_QUERY},
* within: {@code CELL_WITHIN_QUERY} */
final QueryType queryType;
/** number of dimensions - max 4 */
final int numDims;
/** ranges encoded as a sortable byte array */
final byte[] ranges;
/** number of bytes per dimension */
final int bytesPerDim;
/** Used by {@code RangeFieldQuery} to check how each internal or leaf node relates to the query. */
enum QueryType {
/** Use this for intersects queries. */
INTERSECTS,
/** Use this for within queries. */
WITHIN,
/** Use this for contains */
CONTAINS
}
/**
* Create a query for searching indexed ranges that match the provided relation.
* @param field field name. must not be null.
* @param ranges encoded range values; this is done by the {@code RangeField} implementation
* @param queryType the query relation
*/
RangeFieldQuery(String field, final byte[] ranges, final int numDims, final QueryType queryType) {
checkArgs(field, ranges, numDims);
if (queryType == null) {
throw new IllegalArgumentException("Query type cannot be null");
}
this.field = field;
this.queryType = queryType;
this.numDims = numDims;
this.ranges = ranges;
this.bytesPerDim = ranges.length / (2*numDims);
}
/** check input arguments */
private static void checkArgs(String field, final byte[] ranges, final int numDims) {
if (field == null) {
throw new IllegalArgumentException("field must not be null");
}
if (numDims > 4) {
throw new IllegalArgumentException("dimension size cannot be greater than 4");
}
if (ranges == null || ranges.length == 0) {
throw new IllegalArgumentException("encoded ranges cannot be null or empty");
}
}
/** Check indexed field info against the provided query data. */
private void checkFieldInfo(FieldInfo fieldInfo) {
if (fieldInfo.getPointDimensionCount()/2 != numDims) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims="
+ fieldInfo.getPointDimensionCount()/2 + " but this query has numDims=" + numDims);
}
}
@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new ConstantScoreWeight(this) {
final RangeFieldComparator comparator = new RangeFieldComparator();
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
values.intersect(field,
new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) throws IOException {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] leaf) throws IOException {
// add the document iff:
if (// target is within cell and queryType is INTERSECTS or CONTAINS:
(comparator.isWithin(leaf) && queryType != QueryType.WITHIN)
// target contains cell and queryType is INTERSECTS or WITHIN:
|| (comparator.contains(leaf) && queryType != QueryType.CONTAINS)
// target is not disjoint (crosses) and queryType is INTERSECTS
|| (comparator.isDisjoint(leaf) == false && queryType == QueryType.INTERSECTS)) {
adder.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
byte[] node = getInternalRange(minPackedValue, maxPackedValue);
// compute range relation for BKD traversal
if (comparator.isDisjoint(node)) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (comparator.contains(node)) {
// target contains cell; add iff queryType is not a CONTAINS query:
return (queryType == QueryType.CONTAINS) ? Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
} else if (comparator.isWithin(node)) {
// target within cell; continue traversing:
return Relation.CELL_CROSSES_QUERY;
}
// target intersects cell; continue traversing:
return Relation.CELL_CROSSES_QUERY;
}
});
return result.build();
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues();
if (values == null) {
// no docs in this segment indexed any ranges
return null;
}
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
// no docs in this segment indexed this field
}
checkFieldInfo(fieldInfo);
boolean allDocsMatch = true;
if (values.getDocCount(field) == reader.maxDoc()) {
// if query crosses, docs need to be further scrutinized
byte[] range = getInternalRange(values.getMinPackedValue(field), values.getMaxPackedValue(field));
// if the internal node is not contained by the query, all docs do not match
if (((comparator.contains(range) && queryType == QueryType.CONTAINS)) == false) {
allDocsMatch = false;
}
} else {
allDocsMatch = false;
}
DocIdSetIterator iterator = allDocsMatch == true ?
DocIdSetIterator.all(reader.maxDoc()) : buildMatchingDocIdSet(reader, values).iterator();
return new ConstantScoreScorer(this, score(), iterator);
}
/** get an encoded byte representation of the internal node; this is
* the lower half of the min array and the upper half of the max array */
private byte[] getInternalRange(byte[] min, byte[] max) {
byte[] range = new byte[min.length];
final int dimSize = numDims * bytesPerDim;
System.arraycopy(min, 0, range, 0, dimSize);
System.arraycopy(max, dimSize, range, dimSize, dimSize);
return range;
}
};
}
/**
* RangeFieldComparator class provides the core comparison logic for accepting or rejecting indexed
* {@code RangeField} types based on the defined query range and relation.
*/
class RangeFieldComparator {
/** check if the query is outside the candidate range */
private boolean isDisjoint(final byte[] range) {
for (int d=0; d<numDims; ++d) {
if (compareMinMax(range, d) > 0 || compareMaxMin(range, d) < 0) {
return true;
}
}
return false;
}
/** check if query is within candidate range */
private boolean isWithin(final byte[] range) {
for (int d=0; d<numDims; ++d) {
if (compareMinMin(range, d) < 0 || compareMaxMax(range, d) > 0) {
return false;
}
}
return true;
}
/** check if query contains candidate range */
private boolean contains(final byte[] range) {
for (int d=0; d<numDims; ++d) {
if (compareMinMin(range, d) > 0 || compareMaxMax(range, d) < 0) {
return false;
}
}
return true;
}
/** compare the encoded min value (for the defined query dimension) with the encoded min value in the byte array */
private int compareMinMin(byte[] b, int dimension) {
// convert dimension to offset:
dimension *= bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, dimension, b, dimension);
}
/** compare the encoded min value (for the defined query dimension) with the encoded max value in the byte array */
private int compareMinMax(byte[] b, int dimension) {
// convert dimension to offset:
dimension *= bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, dimension, b, numDims * bytesPerDim + dimension);
}
/** compare the encoded max value (for the defined query dimension) with the encoded min value in the byte array */
private int compareMaxMin(byte[] b, int dimension) {
// convert dimension to offset:
dimension *= bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, numDims * bytesPerDim + dimension, b, dimension);
}
/** compare the encoded max value (for the defined query dimension) with the encoded max value in the byte array */
private int compareMaxMax(byte[] b, int dimension) {
// convert dimension to max offset:
dimension = numDims * bytesPerDim + dimension * bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, dimension, b, dimension);
}
}
@Override
public int hashCode() {
int hash = classHash();
hash = 31 * hash + field.hashCode();
hash = 31 * hash + numDims;
hash = 31 * hash + queryType.hashCode();
hash = 31 * hash + Arrays.hashCode(ranges);
return hash;
}
@Override
public final boolean equals(Object o) {
return sameClassAs(o) &&
equalsTo(getClass().cast(o));
}
protected boolean equalsTo(RangeFieldQuery other) {
return Objects.equals(field, other.field) &&
numDims == other.numDims &&
Arrays.equals(ranges, other.ranges) &&
other.queryType == queryType;
}
@Override
public String toString(String field) {
StringBuilder sb = new StringBuilder();
if (this.field.equals(field) == false) {
sb.append(this.field);
sb.append(':');
}
sb.append("<ranges:");
sb.append(toString(ranges, 0));
for (int d=1; d<numDims; ++d) {
sb.append(' ');
sb.append(toString(ranges, d));
}
sb.append('>');
return sb.toString();
}
/**
* Returns a string of a single value in a human-readable format for debugging.
* This is used by {@link #toString()}.
*
* @param dimension dimension of the particular value
* @param ranges encoded ranges, never null
* @return human readable value for debugging
*/
protected abstract String toString(byte[] ranges, int dimension);
}

View File

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import org.apache.lucene.util.LuceneTestCase;
/**
* Random testing for RangeField type.
**/
public class TestDoubleRangeField extends LuceneTestCase {
private static final String FIELD_NAME = "rangeField";
/** test illegal NaN range values */
public void testIllegalNaNValues() {
Document doc = new Document();
IllegalArgumentException expected;
expected = expectThrows(IllegalArgumentException.class, () ->
doc.add(new DoubleRangeField(FIELD_NAME, new double[] {Double.NaN}, new double[] {5})));
assertTrue(expected.getMessage().contains("invalid min value"));
expected = expectThrows(IllegalArgumentException.class, () ->
doc.add(new DoubleRangeField(FIELD_NAME, new double[] {5}, new double[] {Double.NaN})));
assertTrue(expected.getMessage().contains("invalid max value"));
}
/** min/max array sizes must agree */
public void testUnevenArrays() {
Document doc = new Document();
IllegalArgumentException expected;
expected = expectThrows(IllegalArgumentException.class, () ->
doc.add(new DoubleRangeField(FIELD_NAME, new double[] {5, 6}, new double[] {5})));
assertTrue(expected.getMessage().contains("min/max ranges must agree"));
}
/** dimensions greater than 4 not supported */
public void testOversizeDimensions() {
Document doc = new Document();
IllegalArgumentException expected;
expected = expectThrows(IllegalArgumentException.class, () ->
doc.add(new DoubleRangeField(FIELD_NAME, new double[] {1, 2, 3, 4, 5}, new double[] {5})));
assertTrue(expected.getMessage().contains("does not support greater than 4 dimensions"));
}
/** min cannot be greater than max */
public void testMinGreaterThanMax() {
Document doc = new Document();
IllegalArgumentException expected;
expected = expectThrows(IllegalArgumentException.class, () ->
doc.add(new DoubleRangeField(FIELD_NAME, new double[] {3, 4}, new double[] {1, 2})));
assertTrue(expected.getMessage().contains("is greater than max value"));
}
}

View File

@ -0,0 +1,403 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
/**
* Abstract class to do basic tests for a RangeField query.
*/
public abstract class BaseRangeFieldQueryTestCase extends LuceneTestCase {
protected abstract Field newRangeField(double[] min, double[] max);
protected abstract Query newIntersectsQuery(double[] min, double[] max);
protected abstract Query newContainsQuery(double[] min, double[] max);
protected abstract Query newWithinQuery(double[] min, double[] max);
protected int dimension() {
return random().nextInt(4) + 1;
}
public void testRandomTiny() throws Exception {
// Make sure single-leaf-node case is OK:
doTestRandom(10, false);
}
public void testRandomMedium() throws Exception {
doTestRandom(10000, false);
}
@Nightly
public void testRandomBig() throws Exception {
doTestRandom(200000, false);
}
public void testMultiValued() throws Exception {
doTestRandom(10000, true);
}
private void doTestRandom(int count, boolean multiValued) throws Exception {
int numDocs = atLeast(count);
int dimensions = dimension();
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs);
}
Box[][] boxes = new Box[numDocs][];
boolean haveRealDoc = true;
nextdoc: for (int id=0; id<numDocs; ++id) {
int x = random().nextInt(20);
if (boxes[id] == null) {
boxes[id] = new Box[] {nextBox(dimensions)};
}
if (x == 17) {
// dome docs don't have a box:
boxes[id][0].min[0] = Double.NaN;
if (VERBOSE) {
System.out.println(" id=" + id + " is missing");
}
continue;
}
if (multiValued == true && random().nextBoolean()) {
// randomly add multi valued documents (up to 2 fields)
int n = random().nextInt(2) + 1;
boxes[id] = new Box[n];
for (int i=0; i<n; ++i) {
boxes[id][i] = nextBox(dimensions);
}
}
if (id > 0 && x < 9 && haveRealDoc) {
int oldID;
int i=0;
// don't step on missing boxes:
while (true) {
oldID = random().nextInt(id);
if (Double.isNaN(boxes[oldID][0].min[0]) == false) {
break;
} else if (++i > id) {
continue nextdoc;
}
}
if (x == dimensions*2) {
// Fully identical box (use first box in case current is multivalued but old is not)
for (int d=0; d<dimensions; ++d) {
boxes[id][0].min[d] = boxes[oldID][0].min[d];
boxes[id][0].max[d] = boxes[oldID][0].max[d];
}
if (VERBOSE) {
System.out.println(" id=" + id + " box=" + boxes[id] + " (same box as doc=" + oldID + ")");
}
} else {
for (int m = 0, even = dimensions % 2; m < dimensions * 2; ++m) {
if (x == m) {
int d = (int)Math.floor(m/2);
// current could be multivalue but old may not be, so use first box
if (even == 0) {
boxes[id][0].setVal(d, boxes[oldID][0].min[d]);
if (VERBOSE) {
System.out.println(" id=" + id + " box=" + boxes[id] + " (same min[" + d + "] as doc=" + oldID + ")");
}
} else {
boxes[id][0].setVal(d, boxes[oldID][0].max[d]);
if (VERBOSE) {
System.out.println(" id=" + id + " box=" + boxes[id] + " (same max[" + d + "] as doc=" + oldID + ")");
}
}
}
}
}
}
}
verify(boxes);
}
private void verify(Box[][] boxes) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
// Else we can get O(N^2) merging
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < boxes.length/100) {
iwc.setMaxBufferedDocs(boxes.length/100);
}
Directory dir;
if (boxes.length > 50000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
Set<Integer> deleted = new HashSet<>();
IndexWriter w = new IndexWriter(dir, iwc);
for (int id=0; id < boxes.length; ++id) {
Document doc = new Document();
doc.add(newStringField("id", ""+id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (Double.isNaN(boxes[id][0].min[0]) == false) {
for (int n=0; n<boxes[id].length; ++n) {
doc.add(newRangeField(boxes[id][n].min, boxes[id][n].max));
}
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 1) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", ""+idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
w.close();
IndexSearcher s = newSearcher(r);
int dimensions = boxes[0][0].min.length;
int iters = atLeast(25);
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
int maxDoc = s.getIndexReader().maxDoc();
for (int iter=0; iter<iters; ++iter) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " s=" + s);
}
// occasionally test open ended bounding boxes
Box queryBox = nextBox(dimensions);
int rv = random().nextInt(3);
Query query;
Box.QueryType queryType;
if (rv == 0) {
queryType = Box.QueryType.INTERSECTS;
query = newIntersectsQuery(queryBox.min, queryBox.max);
} else if (rv == 1) {
queryType = Box.QueryType.CONTAINS;
query = newContainsQuery(queryBox.min, queryBox.max);
} else {
queryType = Box.QueryType.WITHIN;
query = newWithinQuery(queryBox.min, queryBox.max);
}
if (VERBOSE) {
System.out.println(" query=" + query);
}
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public boolean needsScores() { return false; }
});
for (int docID=0; docID<maxDoc; ++docID) {
int id = (int) docIDToID.get(docID);
boolean expected;
if (liveDocs != null && liveDocs.get(docID) == false) {
// document is deleted
expected = false;
} else if (Double.isNaN(boxes[id][0].min[0])) {
expected = false;
} else {
expected = expectedResult(queryBox, boxes[id], queryType);
}
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
if (expected == true) {
b.append("FAILS: id=" + id + (boxes[id].length > 1 ? " (MultiValue) " : " ") + "should match but did not\n");
} else {
b.append("FAIL: id=" + id + " should not match but did\n");
}
b.append(" queryBox=" + queryBox + "\n");
b.append(" box" + ((boxes[id].length > 1) ? "es=" : "=" ) + boxes[id][0]);
for (int n=1; n<boxes[id].length; ++n) {
b.append(", ");
b.append(boxes[id][n]);
}
b.append("\n queryType=" + queryType + "\n");
b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
fail("wrong hit (first of possibly more):\n\n" + b);
}
}
}
IOUtils.close(r, dir);
}
protected boolean expectedResult(Box queryBox, Box[] box, Box.QueryType queryType) {
for (int i=0; i<box.length; ++i) {
if (expectedBBoxQueryResult(queryBox, box[i], queryType) == true) {
return true;
}
}
return false;
}
protected boolean expectedBBoxQueryResult(Box queryBox, Box box, Box.QueryType queryType) {
Box.QueryType relation = box.relate(queryBox);
if (queryType == Box.QueryType.INTERSECTS) {
return relation != null;
}
return relation == queryType;
}
protected double nextDoubleInternal() {
if (rarely()) {
return random().nextBoolean() ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
}
double max = 100 / 2;
return (max + max) * random().nextDouble() - max;
}
protected Box nextBox(int dimensions) {
double[] min = new double[dimensions];
double[] max = new double[dimensions];
for (int d=0; d<dimensions; ++d) {
min[d] = nextDoubleInternal();
max[d] = nextDoubleInternal();
}
return new Box(min, max);
}
protected static class Box {
double[] min;
double[] max;
enum QueryType { INTERSECTS, WITHIN, CONTAINS }
Box(double[] min, double[] max) {
assert min != null && max != null && min.length > 0 && max.length > 0
: "test box: min/max cannot be null or empty";
assert min.length == max.length : "test box: min/max length do not agree";
this.min = new double[min.length];
this.max = new double[max.length];
for (int d=0; d<min.length; ++d) {
this.min[d] = Math.min(min[d], max[d]);
this.max[d] = Math.max(min[d], max[d]);
}
}
protected void setVal(int dimension, double val) {
if (val <= min[dimension]) {
min[dimension] = val;
} else {
max[dimension] = val;
}
}
QueryType relate(Box other) {
// check disjoint
for (int d=0; d<this.min.length; ++d) {
if (this.min[d] > other.max[d] || this.max[d] < other.min[d]) {
// disjoint:
return null;
}
}
// check within
boolean within = true;
for (int d=0; d<this.min.length; ++d) {
if ((this.min[d] >= other.min[d] && this.max[d] <= other.max[d]) == false) {
// not within:
within = false;
break;
}
}
if (within == true) {
return QueryType.WITHIN;
}
// check contains
boolean contains = true;
for (int d=0; d<this.min.length; ++d) {
if ((this.min[d] <= other.min[d] && this.max[d] >= other.max[d]) == false) {
// not contains:
contains = false;
break;
}
}
if (contains == true) {
return QueryType.CONTAINS;
}
return QueryType.INTERSECTS;
}
@Override
public String toString() {
StringBuilder b = new StringBuilder();
b.append("Box(");
b.append(min[0]);
b.append(" TO ");
b.append(max[0]);
for (int d=1; d<min.length; ++d) {
b.append(", ");
b.append(min[d]);
b.append(" TO ");
b.append(max[d]);
}
b.append(")");
return b.toString();
}
}
}

View File

@ -0,0 +1,106 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleRangeField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
/**
* Random testing for RangeFieldQueries. Testing rigor inspired by {@code BaseGeoPointTestCase}
*/
public class TestDoubleRangeFieldQueries extends BaseRangeFieldQueryTestCase {
private static final String FIELD_NAME = "rangeField";
protected DoubleRangeField newRangeField(double[] min, double[] max) {
return new DoubleRangeField(FIELD_NAME, min, max);
}
protected Query newIntersectsQuery(double[] min, double[] max) {
return DoubleRangeField.newIntersectsQuery(FIELD_NAME, min, max);
}
protected Query newContainsQuery(double[] min, double[] max) {
return DoubleRangeField.newContainsQuery(FIELD_NAME, min, max);
}
protected Query newWithinQuery(double[] min, double[] max) {
return DoubleRangeField.newWithinQuery(FIELD_NAME, min, max);
}
/** Basic test */
public void testBasics() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
// intersects (within)
Document document = new Document();
document.add(new DoubleRangeField(FIELD_NAME, new double[] {-10.0, -10.0}, new double[] {9.1, 10.1}));
writer.addDocument(document);
// intersects (crosses)
document = new Document();
document.add(new DoubleRangeField(FIELD_NAME, new double[] {10.0, -10.0}, new double[] {20.0, 10.0}));
writer.addDocument(document);
// intersects (contains)
document = new Document();
document.add(new DoubleRangeField(FIELD_NAME, new double[] {-20.0, -20.0}, new double[] {30.0, 30.1}));
writer.addDocument(document);
// intersects (crosses)
document = new Document();
document.add(new DoubleRangeField(FIELD_NAME, new double[] {-11.1, -11.2}, new double[] {1.23, 11.5}));
writer.addDocument(document);
// intersects (crosses)
document = new Document();
document.add(new DoubleRangeField(FIELD_NAME, new double[] {12.33, 1.2}, new double[] {15.1, 29.9}));
writer.addDocument(document);
// disjoint
document = new Document();
document.add(new DoubleRangeField(FIELD_NAME, new double[] {-122.33, 1.2}, new double[] {-115.1, 29.9}));
writer.addDocument(document);
// intersects (crosses)
document = new Document();
document.add(new DoubleRangeField(FIELD_NAME, new double[] {Double.NEGATIVE_INFINITY, 1.2}, new double[] {-11.0, 29.9}));
writer.addDocument(document);
// equal (within, contains, intersects)
document = new Document();
document.add(new DoubleRangeField(FIELD_NAME, new double[] {-11, -15}, new double[] {15, 20}));
writer.addDocument(document);
// search
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
assertEquals(7, searcher.count(DoubleRangeField.newIntersectsQuery(FIELD_NAME,
new double[] {-11.0, -15.0}, new double[] {15.0, 20.0})));
assertEquals(2, searcher.count(DoubleRangeField.newWithinQuery(FIELD_NAME,
new double[] {-11.0, -15.0}, new double[] {15.0, 20.0})));
assertEquals(2, searcher.count(DoubleRangeField.newContainsQuery(FIELD_NAME,
new double[] {-11.0, -15.0}, new double[] {15.0, 20.0})));
reader.close();
writer.close();
dir.close();
}
}