LUCENE-7048: add XXXPoint.newSetQuery to match documents with any values from the specified set (this is the analog of TermsQuery, for points)

Merge branch 'point_set_query'
This commit is contained in:
Mike McCandless 2016-02-25 13:08:35 -05:00
commit 446ce8604e
14 changed files with 1444 additions and 20 deletions

View File

@ -16,9 +16,17 @@
*/ */
package org.apache.lucene.document; package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.StringHelper;
/** /**
* An indexed binary field. * An indexed binary field.
@ -32,6 +40,7 @@ import org.apache.lucene.util.BytesRef;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point. * <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range. * <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space. * <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul> * </ul>
*/ */
public final class BinaryPoint extends Field { public final class BinaryPoint extends Field {
@ -198,4 +207,63 @@ public final class BinaryPoint extends Field {
} }
}; };
} }
/**
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, byte[]... valuesIn) throws IOException {
// Make sure all byte[] have the same length
int bytesPerDim = -1;
for(byte[] value : valuesIn) {
if (bytesPerDim == -1) {
bytesPerDim = value.length;
} else if (value.length != bytesPerDim) {
throw new IllegalArgumentException("all byte[] must be the same length, but saw " + bytesPerDim + " and " + value.length);
}
}
if (bytesPerDim == -1) {
// There are no points, and we cannot guess the bytesPerDim here, so we return an equivalent query:
return new MatchNoDocsQuery();
}
// Don't unexpectedly change the user's incoming values array:
byte[][] values = valuesIn.clone();
Arrays.sort(values,
new Comparator<byte[]>() {
@Override
public int compare(byte[] a, byte[] b) {
return StringHelper.compare(a.length, a, 0, b, 0);
}
});
final BytesRef value = new BytesRef(new byte[bytesPerDim]);
return new PointInSetQuery(field, 1, bytesPerDim,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
value.bytes = values[upto];
upto++;
return value;
}
}
}) {
@Override
protected String toString(byte[] value) {
return new BytesRef(value).toString();
}
};
}
} }

View File

@ -16,9 +16,14 @@
*/ */
package org.apache.lucene.document; package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
/** /**
@ -33,6 +38,7 @@ import org.apache.lucene.util.NumericUtils;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point. * <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range. * <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space. * <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul> * </ul>
*/ */
public final class DoublePoint extends Field { public final class DoublePoint extends Field {
@ -135,12 +141,12 @@ public final class DoublePoint extends Field {
/** Encode single double dimension */ /** Encode single double dimension */
public static void encodeDimension(double value, byte dest[], int offset) { public static void encodeDimension(double value, byte dest[], int offset) {
NumericUtils.longToBytesDirect(NumericUtils.doubleToSortableLong(value), dest, offset); NumericUtils.longToBytes(NumericUtils.doubleToSortableLong(value), dest, offset);
} }
/** Decode single double dimension */ /** Decode single double dimension */
public static double decodeDimension(byte value[], int offset) { public static double decodeDimension(byte value[], int offset) {
return NumericUtils.sortableLongToDouble(NumericUtils.bytesToLongDirect(value, offset)); return NumericUtils.sortableLongToDouble(NumericUtils.bytesToLong(value, offset));
} }
// static methods for generating queries // static methods for generating queries
@ -214,4 +220,43 @@ public final class DoublePoint extends Field {
} }
}; };
} }
/**
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, double... valuesIn) throws IOException {
// Don't unexpectedly change the user's incoming values array:
double[] values = valuesIn.clone();
Arrays.sort(values);
final BytesRef value = new BytesRef(new byte[Double.BYTES]);
return new PointInSetQuery(field, 1, Double.BYTES,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
encodeDimension(values[upto], value.bytes, 0);
upto++;
return value;
}
}
}) {
@Override
protected String toString(byte[] value) {
assert value.length == Double.BYTES;
return Double.toString(decodeDimension(value, 0));
}
};
}
} }

View File

@ -16,9 +16,14 @@
*/ */
package org.apache.lucene.document; package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
/** /**
@ -33,6 +38,7 @@ import org.apache.lucene.util.NumericUtils;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point. * <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range. * <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space. * <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul> * </ul>
*/ */
public final class FloatPoint extends Field { public final class FloatPoint extends Field {
@ -135,12 +141,12 @@ public final class FloatPoint extends Field {
/** Encode single float dimension */ /** Encode single float dimension */
public static void encodeDimension(float value, byte dest[], int offset) { public static void encodeDimension(float value, byte dest[], int offset) {
NumericUtils.intToBytesDirect(NumericUtils.floatToSortableInt(value), dest, offset); NumericUtils.intToBytes(NumericUtils.floatToSortableInt(value), dest, offset);
} }
/** Decode single float dimension */ /** Decode single float dimension */
public static float decodeDimension(byte value[], int offset) { public static float decodeDimension(byte value[], int offset) {
return NumericUtils.sortableIntToFloat(NumericUtils.bytesToIntDirect(value, offset)); return NumericUtils.sortableIntToFloat(NumericUtils.bytesToInt(value, offset));
} }
// static methods for generating queries // static methods for generating queries
@ -214,4 +220,43 @@ public final class FloatPoint extends Field {
} }
}; };
} }
/**
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, float... valuesIn) throws IOException {
// Don't unexpectedly change the user's incoming values array:
float[] values = valuesIn.clone();
Arrays.sort(values);
final BytesRef value = new BytesRef(new byte[Float.BYTES]);
return new PointInSetQuery(field, 1, Float.BYTES,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
encodeDimension(values[upto], value.bytes, 0);
upto++;
return value;
}
}
}) {
@Override
protected String toString(byte[] value) {
assert value.length == Float.BYTES;
return Float.toString(decodeDimension(value, 0));
}
};
}
} }

View File

@ -16,9 +16,14 @@
*/ */
package org.apache.lucene.document; package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
/** /**
@ -33,6 +38,7 @@ import org.apache.lucene.util.NumericUtils;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point. * <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range. * <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space. * <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul> * </ul>
*/ */
public final class IntPoint extends Field { public final class IntPoint extends Field {
@ -214,4 +220,43 @@ public final class IntPoint extends Field {
} }
}; };
} }
/**
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, int... valuesIn) throws IOException {
// Don't unexpectedly change the user's incoming values array:
int[] values = valuesIn.clone();
Arrays.sort(values);
final BytesRef value = new BytesRef(new byte[Integer.BYTES]);
return new PointInSetQuery(field, 1, Integer.BYTES,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
encodeDimension(values[upto], value.bytes, 0);
upto++;
return value;
}
}
}) {
@Override
protected String toString(byte[] value) {
assert value.length == Integer.BYTES;
return Integer.toString(decodeDimension(value, 0));
}
};
}
} }

View File

@ -16,9 +16,14 @@
*/ */
package org.apache.lucene.document; package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
/** /**
@ -33,6 +38,7 @@ import org.apache.lucene.util.NumericUtils;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point. * <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range. * <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space. * <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul> * </ul>
*/ */
public final class LongPoint extends Field { public final class LongPoint extends Field {
@ -214,4 +220,43 @@ public final class LongPoint extends Field {
} }
}; };
} }
/**
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, long... valuesIn) throws IOException {
// Don't unexpectedly change the user's incoming values array:
long[] values = valuesIn.clone();
Arrays.sort(values);
final BytesRef value = new BytesRef(new byte[Long.BYTES]);
return new PointInSetQuery(field, 1, Long.BYTES,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
encodeDimension(values[upto], value.bytes, 0);
upto++;
return value;
}
}
}) {
@Override
protected String toString(byte[] value) {
assert value.length == Long.BYTES;
return Long.toString(decodeDimension(value, 0));
}
};
}
} }

View File

@ -67,7 +67,7 @@ public class PrefixCodedTerms implements Accountable {
add(term.field(), term.bytes()); add(term.field(), term.bytes());
} }
/** add a term */ /** add a term. This fully consumes in the incoming {@link BytesRef}. */
public void add(String field, BytesRef bytes) { public void add(String field, BytesRef bytes) {
assert lastTerm.equals(new Term("")) || new Term(field, bytes).compareTo(lastTerm) > 0; assert lastTerm.equals(new Term("")) || new Term(field, bytes).compareTo(lastTerm) > 0;

View File

@ -0,0 +1,370 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
/** Finds all documents whose point value, previously indexed with e.g. {@link org.apache.lucene.document.LongPoint}, is contained in the
* specified set */
public class PointInSetQuery extends Query {
// A little bit overkill for us, since all of our "terms" are always in the same field:
final PrefixCodedTerms sortedPackedPoints;
final int sortedPackedPointsHashCode;
final String field;
final int numDims;
final int bytesPerDim;
/** The {@code packedPoints} iterator must be in sorted order. */
protected PointInSetQuery(String field, int numDims, int bytesPerDim, BytesRefIterator packedPoints) throws IOException {
this.field = field;
if (bytesPerDim < 1 || bytesPerDim > PointValues.MAX_NUM_BYTES) {
throw new IllegalArgumentException("bytesPerDim must be > 0 and <= " + PointValues.MAX_NUM_BYTES + "; got " + bytesPerDim);
}
this.bytesPerDim = bytesPerDim;
if (numDims < 1 || numDims > PointValues.MAX_DIMENSIONS) {
throw new IllegalArgumentException("numDims must be > 0 and <= " + PointValues.MAX_DIMENSIONS + "; got " + numDims);
}
this.numDims = numDims;
// In the 1D case this works well (the more points, the more common prefixes they share, typically), but in
// the > 1 D case, where we are only looking at the first dimension's prefix bytes, it can at worst not hurt:
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
BytesRefBuilder previous = null;
BytesRef current;
while ((current = packedPoints.next()) != null) {
if (current.length != numDims * bytesPerDim) {
throw new IllegalArgumentException("packed point length should be " + (numDims * bytesPerDim) + " but got " + current.length + "; field=\"" + field + "\" numDims=" + numDims + " bytesPerDim=" + bytesPerDim);
}
if (previous == null) {
previous = new BytesRefBuilder();
} else {
int cmp = previous.get().compareTo(current);
if (cmp == 0) {
continue; // deduplicate
} else if (cmp > 0) {
throw new IllegalArgumentException("values are out of order: saw " + previous + " before " + current);
}
}
builder.add(field, current);
previous.copyBytes(current);
}
sortedPackedPoints = builder.finish();
sortedPackedPointsHashCode = sortedPackedPoints.hashCode();
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
// We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
// This is an inverted structure and should be used in the first pass:
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues();
if (values == null) {
// No docs in this segment indexed any points
return null;
}
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
// No docs in this segment indexed this field at all
return null;
}
if (fieldInfo.getPointDimensionCount() != numDims) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + fieldInfo.getPointDimensionCount() + " but this query has numDims=" + numDims);
}
if (fieldInfo.getPointNumBytes() != bytesPerDim) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getPointNumBytes() + " but this query has bytesPerDim=" + bytesPerDim);
}
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
int[] hitCount = new int[1];
if (numDims == 1) {
// We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
values.intersect(field, new MergePointVisitor(sortedPackedPoints.iterator(), hitCount, result));
} else {
// NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect. We could instead do a similar
// optimization as the 1D case, but I think it'd mean building a query-time KD tree so we could efficiently intersect against the
// index, which is probably tricky!
SinglePointVisitor visitor = new SinglePointVisitor(hitCount, result);
TermIterator iterator = sortedPackedPoints.iterator();
for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
visitor.setPoint(point);
values.intersect(field, visitor);
}
}
// NOTE: hitCount[0] will be over-estimate in multi-valued case
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
}
};
}
/** Essentially does a merge sort, only collecting hits when the indexed point and query point are the same. This is an optimization,
* used in the 1D case. */
private class MergePointVisitor implements IntersectVisitor {
private final DocIdSetBuilder result;
private final int[] hitCount;
private final TermIterator iterator;
private BytesRef nextQueryPoint;
private final BytesRef scratch = new BytesRef();
public MergePointVisitor(TermIterator iterator, int[] hitCount, DocIdSetBuilder result) throws IOException {
this.hitCount = hitCount;
this.result = result;
this.iterator = iterator;
nextQueryPoint = iterator.next();
scratch.length = bytesPerDim;
}
@Override
public void grow(int count) {
result.grow(count);
}
@Override
public void visit(int docID) {
hitCount[0]++;
result.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
scratch.bytes = packedValue;
while (nextQueryPoint != null) {
int cmp = nextQueryPoint.compareTo(scratch);
if (cmp == 0) {
// Query point equals index point, so collect and return
hitCount[0]++;
result.add(docID);
break;
} else if (cmp < 0) {
// Query point is before index point, so we move to next query point
nextQueryPoint = iterator.next();
} else {
// Query point is after index point, so we don't collect and we return:
break;
}
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
while (nextQueryPoint != null) {
scratch.bytes = minPackedValue;
int cmpMin = nextQueryPoint.compareTo(scratch);
if (cmpMin < 0) {
// query point is before the start of this cell
nextQueryPoint = iterator.next();
continue;
}
scratch.bytes = maxPackedValue;
int cmpMax = nextQueryPoint.compareTo(scratch);
if (cmpMax > 0) {
// query point is after the end of this cell
return Relation.CELL_OUTSIDE_QUERY;
}
if (cmpMin == 0 && cmpMax == 0) {
// NOTE: we only hit this if we are on a cell whose min and max values are exactly equal to our point,
// which can easily happen if many (> 1024) docs share this one value
return Relation.CELL_INSIDE_QUERY;
} else {
return Relation.CELL_CROSSES_QUERY;
}
}
// We exhausted all points in the query:
return Relation.CELL_OUTSIDE_QUERY;
}
}
/** IntersectVisitor that queries against a highly degenerate shape: a single point. This is used in the > 1D case. */
private class SinglePointVisitor implements IntersectVisitor {
private final DocIdSetBuilder result;
private final int[] hitCount;
private final byte[] pointBytes;
public SinglePointVisitor(int[] hitCount, DocIdSetBuilder result) {
this.hitCount = hitCount;
this.result = result;
this.pointBytes = new byte[bytesPerDim * numDims];
}
public void setPoint(BytesRef point) {
// we verified this up front in query's ctor:
assert point.length == pointBytes.length;
System.arraycopy(point.bytes, point.offset, pointBytes, 0, pointBytes.length);
}
@Override
public void grow(int count) {
result.grow(count);
}
@Override
public void visit(int docID) {
hitCount[0]++;
result.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
assert packedValue.length == pointBytes.length;
if (Arrays.equals(packedValue, pointBytes)) {
// The point for this doc matches the point we are querying on
hitCount[0]++;
result.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
boolean crosses = false;
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
int cmpMin = StringHelper.compare(bytesPerDim, minPackedValue, offset, pointBytes, offset);
if (cmpMin > 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
int cmpMax = StringHelper.compare(bytesPerDim, maxPackedValue, offset, pointBytes, offset);
if (cmpMax < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
if (cmpMin != 0 || cmpMax != 0) {
crosses = true;
}
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
// NOTE: we only hit this if we are on a cell whose min and max values are exactly equal to our point,
// which can easily happen if many docs share this one value
return Relation.CELL_INSIDE_QUERY;
}
}
}
@Override
public int hashCode() {
int hash = super.hashCode();
hash += sortedPackedPointsHashCode^0x14fa55fb;
hash += numDims^0x14fa55fb;
hash += bytesPerDim^0x14fa55fb;
return hash;
}
@Override
public boolean equals(Object other) {
if (super.equals(other)) {
final PointInSetQuery q = (PointInSetQuery) other;
return q.numDims == numDims &&
q.bytesPerDim == bytesPerDim &&
q.sortedPackedPointsHashCode == sortedPackedPointsHashCode &&
q.sortedPackedPoints.equals(sortedPackedPoints);
}
return false;
}
@Override
public String toString(String field) {
final StringBuilder sb = new StringBuilder();
if (this.field.equals(field) == false) {
sb.append(this.field);
sb.append(':');
}
sb.append("{");
TermIterator iterator = sortedPackedPoints.iterator();
byte[] pointBytes = new byte[numDims * bytesPerDim];
boolean first = true;
for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
if (first == false) {
sb.append(" ");
}
first = false;
System.arraycopy(point.bytes, point.offset, pointBytes, 0, pointBytes.length);
sb.append(toString(pointBytes));
}
sb.append("}");
return sb.toString();
}
/**
* Returns a string of a single value in a human-readable format for debugging.
* This is used by {@link #toString()}.
*
* The default implementation encodes the individual byte values.
*
* @param value single value, never null
* @return human readable value for debugging
*/
protected String toString(byte[] value) {
assert value != null;
StringBuilder sb = new StringBuilder();
sb.append("binary(");
for (int i = 0; i < value.length; i++) {
if (i > 0) {
sb.append(' ');
}
sb.append(Integer.toHexString(value[i] & 0xFF));
}
sb.append(')');
return sb.toString();
}
}

View File

@ -158,27 +158,18 @@ public final class NumericUtils {
public static void intToBytes(int x, byte[] dest, int offset) { public static void intToBytes(int x, byte[] dest, int offset) {
// Flip the sign bit, so negative ints sort before positive ints correctly: // Flip the sign bit, so negative ints sort before positive ints correctly:
x ^= 0x80000000; x ^= 0x80000000;
intToBytesDirect(x, dest, offset);
}
public static void intToBytesDirect(int x, byte[] dest, int offset) {
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
dest[offset+i] = (byte) (x >> 24-i*8); dest[offset+i] = (byte) (x >> 24-i*8);
} }
} }
public static int bytesToInt(byte[] src, int index) { public static int bytesToInt(byte[] src, int offset) {
int x = bytesToIntDirect(src, index);
// Re-flip the sign bit to restore the original value:
return x ^ 0x80000000;
}
public static int bytesToIntDirect(byte[] src, int offset) {
int x = 0; int x = 0;
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
x |= (src[offset+i] & 0xff) << (24-i*8); x |= (src[offset+i] & 0xff) << (24-i*8);
} }
return x; // Re-flip the sign bit to restore the original value:
return x ^ 0x80000000;
} }
public static void longToBytes(long v, byte[] bytes, int offset) { public static void longToBytes(long v, byte[] bytes, int offset) {

View File

@ -22,10 +22,14 @@ import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.BitSet; import java.util.BitSet;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointFormat; import org.apache.lucene.codecs.PointFormat;
@ -57,9 +61,11 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -92,6 +98,114 @@ public class TestPointQueries extends LuceneTestCase {
} }
} }
public void testBasicInts() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(new IntPoint("point", -7));
w.addDocument(doc);
doc = new Document();
doc.add(new IntPoint("point", 0));
w.addDocument(doc);
doc = new Document();
doc.add(new IntPoint("point", 3));
w.addDocument(doc);
DirectoryReader r = DirectoryReader.open(w);
IndexSearcher s = new IndexSearcher(r);
assertEquals(2, s.count(IntPoint.newRangeQuery("point", -8, false, 1, false)));
assertEquals(3, s.count(IntPoint.newRangeQuery("point", -7, true, 3, true)));
assertEquals(1, s.count(IntPoint.newExactQuery("point", -7)));
assertEquals(0, s.count(IntPoint.newExactQuery("point", -6)));
w.close();
r.close();
dir.close();
}
public void testBasicFloats() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(new FloatPoint("point", -7.0f));
w.addDocument(doc);
doc = new Document();
doc.add(new FloatPoint("point", 0.0f));
w.addDocument(doc);
doc = new Document();
doc.add(new FloatPoint("point", 3.0f));
w.addDocument(doc);
DirectoryReader r = DirectoryReader.open(w);
IndexSearcher s = new IndexSearcher(r);
assertEquals(2, s.count(FloatPoint.newRangeQuery("point", -8.0f, false, 1.0f, false)));
assertEquals(3, s.count(FloatPoint.newRangeQuery("point", -7.0f, true, 3.0f, true)));
assertEquals(1, s.count(FloatPoint.newExactQuery("point", -7.0f)));
assertEquals(0, s.count(FloatPoint.newExactQuery("point", -6.0f)));
w.close();
r.close();
dir.close();
}
public void testBasicLongs() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(new LongPoint("point", -7));
w.addDocument(doc);
doc = new Document();
doc.add(new LongPoint("point", 0));
w.addDocument(doc);
doc = new Document();
doc.add(new LongPoint("point", 3));
w.addDocument(doc);
DirectoryReader r = DirectoryReader.open(w);
IndexSearcher s = new IndexSearcher(r);
assertEquals(2, s.count(LongPoint.newRangeQuery("point", -8L, false, 1L, false)));
assertEquals(3, s.count(LongPoint.newRangeQuery("point", -7L, true, 3L, true)));
assertEquals(1, s.count(LongPoint.newExactQuery("point", -7L)));
assertEquals(0, s.count(LongPoint.newExactQuery("point", -6L)));
w.close();
r.close();
dir.close();
}
public void testBasicDoubles() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(new DoublePoint("point", -7.0));
w.addDocument(doc);
doc = new Document();
doc.add(new DoublePoint("point", 0.0));
w.addDocument(doc);
doc = new Document();
doc.add(new DoublePoint("point", 3.0));
w.addDocument(doc);
DirectoryReader r = DirectoryReader.open(w);
IndexSearcher s = new IndexSearcher(r);
assertEquals(2, s.count(DoublePoint.newRangeQuery("point", -8.0, false, 1.0, false)));
assertEquals(3, s.count(DoublePoint.newRangeQuery("point", -7.0, true, 3.0, true)));
assertEquals(1, s.count(DoublePoint.newExactQuery("point", -7.0)));
assertEquals(0, s.count(DoublePoint.newExactQuery("point", -6.0)));
w.close();
r.close();
dir.close();
}
public void testAllEqual() throws Exception { public void testAllEqual() throws Exception {
int numValues = atLeast(10000); int numValues = atLeast(10000);
long value = randomValue(false); long value = randomValue(false);
@ -1113,4 +1227,601 @@ public class TestPointQueries extends LuceneTestCase {
new boolean[] { false, true }).toString()); new boolean[] { false, true }).toString());
} }
private int[] toArray(Set<Integer> valuesSet) {
int[] values = new int[valuesSet.size()];
int upto = 0;
for(Integer value : valuesSet) {
values[upto++] = value;
}
return values;
}
private static int randomIntValue(Integer min, Integer max) {
if (min == null) {
return random().nextInt();
} else {
return TestUtil.nextInt(random(), min, max);
}
}
public void testRandomPointInSetQuery() throws Exception {
boolean useNarrowRange = random().nextBoolean();
final Integer valueMin;
final Integer valueMax;
int numValues;
if (useNarrowRange) {
int gap = random().nextInt(100);
valueMin = random().nextInt(Integer.MAX_VALUE-gap);
valueMax = valueMin + gap;
numValues = TestUtil.nextInt(random(), 1, gap+1);
} else {
valueMin = null;
valueMax = null;
numValues = TestUtil.nextInt(random(), 1, 100);
}
final Set<Integer> valuesSet = new HashSet<>();
while (valuesSet.size() < numValues) {
valuesSet.add(randomIntValue(valueMin, valueMax));
}
int[] values = toArray(valuesSet);
int numDocs = TestUtil.nextInt(random(), 1, 10000);
if (VERBOSE) {
System.out.println("TEST: numValues=" + numValues + " numDocs=" + numDocs);
}
Directory dir;
if (numDocs > 100000) {
dir = newFSDirectory(createTempDir("TestPointQueries"));
} else {
dir = newDirectory();
}
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
int[] docValues = new int[numDocs];
for(int i=0;i<numDocs;i++) {
int x = values[random().nextInt(values.length)];
Document doc = new Document();
doc.add(new IntPoint("int", x));
docValues[i] = x;
w.addDocument(doc);
}
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" forceMerge(1)");
}
w.forceMerge(1);
}
final IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
int numThreads = TestUtil.nextInt(random(), 2, 5);
if (VERBOSE) {
System.out.println("TEST: use " + numThreads + " query threads; searcher=" + s);
}
List<Thread> threads = new ArrayList<>();
final int iters = atLeast(100);
final CountDownLatch startingGun = new CountDownLatch(1);
final AtomicBoolean failed = new AtomicBoolean();
for(int i=0;i<numThreads;i++) {
Thread thread = new Thread() {
@Override
public void run() {
try {
_run();
} catch (Exception e) {
failed.set(true);
throw new RuntimeException(e);
}
}
private void _run() throws Exception {
startingGun.await();
for (int iter=0;iter<iters && failed.get() == false;iter++) {
int numValidValuesToQuery = random().nextInt(values.length);
Set<Integer> valuesToQuery = new HashSet<>();
while (valuesToQuery.size() < numValidValuesToQuery) {
valuesToQuery.add(values[random().nextInt(values.length)]);
}
int numExtraValuesToQuery = random().nextInt(20);
while (valuesToQuery.size() < numValidValuesToQuery + numExtraValuesToQuery) {
valuesToQuery.add(random().nextInt());
}
int expectedCount = 0;
for(int value : docValues) {
if (valuesToQuery.contains(value)) {
expectedCount++;
}
}
if (VERBOSE) {
System.out.println("TEST: thread=" + Thread.currentThread() + " values=" + valuesToQuery + " expectedCount=" + expectedCount);
}
assertEquals(expectedCount, s.count(IntPoint.newSetQuery("int", toArray(valuesToQuery))));
}
}
};
thread.setName("T" + i);
thread.start();
threads.add(thread);
}
startingGun.countDown();
for(Thread thread : threads) {
thread.join();
}
IOUtils.close(r, dir);
}
// TODO: in the future, if there is demand for real usage, we can "graduate" this test-only query factory as IntPoint.newMultiSetQuery or
// something (and same for other XXXPoint classes):
private static Query newMultiDimIntSetQuery(String field, final int numDims, int... valuesIn) throws IOException {
if (valuesIn.length % numDims != 0) {
throw new IllegalArgumentException("incongruent number of values: valuesIn.length=" + valuesIn.length + " but numDims=" + numDims);
}
// Pack all values:
byte[][] packedValues = new byte[valuesIn.length / numDims][];
for(int i=0;i<packedValues.length;i++) {
byte[] packedValue = new byte[numDims * Integer.BYTES];
packedValues[i] = packedValue;
for(int dim=0;dim<numDims;dim++) {
IntPoint.encodeDimension(valuesIn[i*numDims+dim], packedValue, dim*Integer.BYTES);
}
}
// Sort:
Arrays.sort(packedValues,
new Comparator<byte[]>() {
@Override
public int compare(byte[] a, byte[] b) {
return StringHelper.compare(a.length, a, 0, b, 0);
}
});
final BytesRef value = new BytesRef();
value.length = numDims * Integer.BYTES;
return new PointInSetQuery(field,
numDims,
Integer.BYTES,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto >= packedValues.length) {
return null;
}
value.bytes = packedValues[upto];
upto++;
return value;
}
}) {
@Override
protected String toString(byte[] value) {
assert value.length == numDims * Integer.BYTES;
StringBuilder sb = new StringBuilder();
for(int dim=0;dim<numDims;dim++) {
if (dim > 0) {
sb.append(',');
}
sb.append(Integer.toString(IntPoint.decodeDimension(value, dim*Integer.BYTES)));
}
return sb.toString();
}
};
}
public void testBasicMultiDimPointInSetQuery() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new IntPoint("int", 17, 42));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(0, s.count(newMultiDimIntSetQuery("int", 2, 17, 41)));
assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42)));
assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, -7, -7, 17, 42)));
assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42, -14, -14)));
w.close();
r.close();
dir.close();
}
public void testBasicMultiValueMultiDimPointInSetQuery() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new IntPoint("int", 17, 42));
doc.add(new IntPoint("int", 34, 79));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(0, s.count(newMultiDimIntSetQuery("int", 2, 17, 41)));
assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42)));
assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42, 34, 79)));
assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, -7, -7, 17, 42)));
assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, -7, -7, 34, 79)));
assertEquals(1, s.count(newMultiDimIntSetQuery("int", 2, 17, 42, -14, -14)));
assertEquals("int:{-14,-14 17,42}", newMultiDimIntSetQuery("int", 2, 17, 42, -14, -14).toString());
w.close();
r.close();
dir.close();
}
public void testManyEqualValuesMultiDimPointInSetQuery() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
int zeroCount = 0;
for(int i=0;i<10000;i++) {
int x = random().nextInt(2);
if (x == 0) {
zeroCount++;
}
Document doc = new Document();
doc.add(new IntPoint("int", x, x));
w.addDocument(doc);
}
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(zeroCount, s.count(newMultiDimIntSetQuery("int", 2, 0, 0)));
assertEquals(10000-zeroCount, s.count(newMultiDimIntSetQuery("int", 2, 1, 1)));
assertEquals(0, s.count(newMultiDimIntSetQuery("int", 2, 2, 2)));
w.close();
r.close();
dir.close();
}
public void testInvalidMultiDimPointInSetQuery() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class,
() -> {
newMultiDimIntSetQuery("int", 2, 3, 4, 5);
});
assertEquals("incongruent number of values: valuesIn.length=3 but numDims=2", expected.getMessage());
}
public void testBasicPointInSetQuery() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new IntPoint("int", 17));
doc.add(new LongPoint("long", 17L));
doc.add(new FloatPoint("float", 17.0f));
doc.add(new DoublePoint("double", 17.0));
doc.add(new BinaryPoint("bytes", new byte[] {0, 17}));
w.addDocument(doc);
doc = new Document();
doc.add(new IntPoint("int", 42));
doc.add(new LongPoint("long", 42L));
doc.add(new FloatPoint("float", 42.0f));
doc.add(new DoublePoint("double", 42.0));
doc.add(new BinaryPoint("bytes", new byte[] {0, 42}));
w.addDocument(doc);
doc = new Document();
doc.add(new IntPoint("int", 97));
doc.add(new LongPoint("long", 97L));
doc.add(new FloatPoint("float", 97.0f));
doc.add(new DoublePoint("double", 97.0));
doc.add(new BinaryPoint("bytes", new byte[] {0, 97}));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(0, s.count(IntPoint.newSetQuery("int", 16)));
assertEquals(1, s.count(IntPoint.newSetQuery("int", 17)));
assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 97, 42)));
assertEquals(3, s.count(IntPoint.newSetQuery("int", -7, 17, 42, 97)));
assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 20, 42, 97)));
assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 105, 42, 97)));
assertEquals(0, s.count(LongPoint.newSetQuery("long", 16)));
assertEquals(1, s.count(LongPoint.newSetQuery("long", 17)));
assertEquals(3, s.count(LongPoint.newSetQuery("long", 17, 97, 42)));
assertEquals(3, s.count(LongPoint.newSetQuery("long", -7, 17, 42, 97)));
assertEquals(3, s.count(LongPoint.newSetQuery("long", 17, 20, 42, 97)));
assertEquals(3, s.count(LongPoint.newSetQuery("long", 17, 105, 42, 97)));
assertEquals(0, s.count(FloatPoint.newSetQuery("float", 16)));
assertEquals(1, s.count(FloatPoint.newSetQuery("float", 17)));
assertEquals(3, s.count(FloatPoint.newSetQuery("float", 17, 97, 42)));
assertEquals(3, s.count(FloatPoint.newSetQuery("float", -7, 17, 42, 97)));
assertEquals(3, s.count(FloatPoint.newSetQuery("float", 17, 20, 42, 97)));
assertEquals(3, s.count(FloatPoint.newSetQuery("float", 17, 105, 42, 97)));
assertEquals(0, s.count(DoublePoint.newSetQuery("double", 16)));
assertEquals(1, s.count(DoublePoint.newSetQuery("double", 17)));
assertEquals(3, s.count(DoublePoint.newSetQuery("double", 17, 97, 42)));
assertEquals(3, s.count(DoublePoint.newSetQuery("double", -7, 17, 42, 97)));
assertEquals(3, s.count(DoublePoint.newSetQuery("double", 17, 20, 42, 97)));
assertEquals(3, s.count(DoublePoint.newSetQuery("double", 17, 105, 42, 97)));
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 16})));
assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17})));
assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 97}, new byte[] {0, 42})));
assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, -7}, new byte[] {0, 17}, new byte[] {0, 42}, new byte[] {0, 97})));
assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 20}, new byte[] {0, 42}, new byte[] {0, 97})));
assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 105}, new byte[] {0, 42}, new byte[] {0, 97})));
w.close();
r.close();
dir.close();
}
public void testBasicMultiValuedPointInSetQuery() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new IntPoint("int", 17));
doc.add(new IntPoint("int", 42));
doc.add(new LongPoint("long", 17L));
doc.add(new LongPoint("long", 42L));
doc.add(new FloatPoint("float", 17.0f));
doc.add(new FloatPoint("float", 42.0f));
doc.add(new DoublePoint("double", 17.0));
doc.add(new DoublePoint("double", 42.0));
doc.add(new BinaryPoint("bytes", new byte[] {0, 17}));
doc.add(new BinaryPoint("bytes", new byte[] {0, 42}));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(0, s.count(IntPoint.newSetQuery("int", 16)));
assertEquals(1, s.count(IntPoint.newSetQuery("int", 17)));
assertEquals(1, s.count(IntPoint.newSetQuery("int", 17, 97, 42)));
assertEquals(1, s.count(IntPoint.newSetQuery("int", -7, 17, 42, 97)));
assertEquals(0, s.count(IntPoint.newSetQuery("int", 16, 20, 41, 97)));
assertEquals(0, s.count(LongPoint.newSetQuery("long", 16)));
assertEquals(1, s.count(LongPoint.newSetQuery("long", 17)));
assertEquals(1, s.count(LongPoint.newSetQuery("long", 17, 97, 42)));
assertEquals(1, s.count(LongPoint.newSetQuery("long", -7, 17, 42, 97)));
assertEquals(0, s.count(LongPoint.newSetQuery("long", 16, 20, 41, 97)));
assertEquals(0, s.count(FloatPoint.newSetQuery("float", 16)));
assertEquals(1, s.count(FloatPoint.newSetQuery("float", 17)));
assertEquals(1, s.count(FloatPoint.newSetQuery("float", 17, 97, 42)));
assertEquals(1, s.count(FloatPoint.newSetQuery("float", -7, 17, 42, 97)));
assertEquals(0, s.count(FloatPoint.newSetQuery("float", 16, 20, 41, 97)));
assertEquals(0, s.count(DoublePoint.newSetQuery("double", 16)));
assertEquals(1, s.count(DoublePoint.newSetQuery("double", 17)));
assertEquals(1, s.count(DoublePoint.newSetQuery("double", 17, 97, 42)));
assertEquals(1, s.count(DoublePoint.newSetQuery("double", -7, 17, 42, 97)));
assertEquals(0, s.count(DoublePoint.newSetQuery("double", 16, 20, 41, 97)));
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 16})));
assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17})));
assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 97}, new byte[] {0, 42})));
assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, -7}, new byte[] {0, 17}, new byte[] {0, 42}, new byte[] {0, 97})));
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 16}, new byte[] {0, 20}, new byte[] {0, 41}, new byte[] {0, 97})));
w.close();
r.close();
dir.close();
}
public void testEmptyPointInSetQuery() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new IntPoint("int", 17));
doc.add(new LongPoint("long", 17L));
doc.add(new FloatPoint("float", 17.0f));
doc.add(new DoublePoint("double", 17.0));
doc.add(new BinaryPoint("bytes", new byte[] {0, 17}));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(0, s.count(IntPoint.newSetQuery("int")));
assertEquals(0, s.count(LongPoint.newSetQuery("long")));
assertEquals(0, s.count(FloatPoint.newSetQuery("float")));
assertEquals(0, s.count(DoublePoint.newSetQuery("double")));
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes")));
w.close();
r.close();
dir.close();
}
public void testPointInSetQueryManyEqualValues() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
int zeroCount = 0;
for(int i=0;i<10000;i++) {
int x = random().nextInt(2);
if (x == 0) {
zeroCount++;
}
Document doc = new Document();
doc.add(new IntPoint("int", x));
doc.add(new LongPoint("long", (long) x));
doc.add(new FloatPoint("float", (float) x));
doc.add(new DoublePoint("double", (double) x));
doc.add(new BinaryPoint("bytes", new byte[] {(byte) x}));
w.addDocument(doc);
}
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0)));
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7)));
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0)));
assertEquals(10000-zeroCount, s.count(IntPoint.newSetQuery("int", 1)));
assertEquals(0, s.count(IntPoint.newSetQuery("int", 2)));
assertEquals(zeroCount, s.count(LongPoint.newSetQuery("long", 0)));
assertEquals(zeroCount, s.count(LongPoint.newSetQuery("long", 0, -7)));
assertEquals(zeroCount, s.count(LongPoint.newSetQuery("long", 7, 0)));
assertEquals(10000-zeroCount, s.count(LongPoint.newSetQuery("long", 1)));
assertEquals(0, s.count(LongPoint.newSetQuery("long", 2)));
assertEquals(zeroCount, s.count(FloatPoint.newSetQuery("float", 0)));
assertEquals(zeroCount, s.count(FloatPoint.newSetQuery("float", 0, -7)));
assertEquals(zeroCount, s.count(FloatPoint.newSetQuery("float", 7, 0)));
assertEquals(10000-zeroCount, s.count(FloatPoint.newSetQuery("float", 1)));
assertEquals(0, s.count(FloatPoint.newSetQuery("float", 2)));
assertEquals(zeroCount, s.count(DoublePoint.newSetQuery("double", 0)));
assertEquals(zeroCount, s.count(DoublePoint.newSetQuery("double", 0, -7)));
assertEquals(zeroCount, s.count(DoublePoint.newSetQuery("double", 7, 0)));
assertEquals(10000-zeroCount, s.count(DoublePoint.newSetQuery("double", 1)));
assertEquals(0, s.count(DoublePoint.newSetQuery("double", 2)));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0})));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0}, new byte[] {-7})));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {7}, new byte[] {0})));
assertEquals(10000-zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {1})));
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {2})));
w.close();
r.close();
dir.close();
}
public void testPointInSetQueryManyEqualValuesWithBigGap() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
int zeroCount = 0;
for(int i=0;i<10000;i++) {
int x = 200 * random().nextInt(2);
if (x == 0) {
zeroCount++;
}
Document doc = new Document();
doc.add(new IntPoint("int", x));
doc.add(new LongPoint("long", (long) x));
doc.add(new FloatPoint("float", (float) x));
doc.add(new DoublePoint("double", (double) x));
doc.add(new BinaryPoint("bytes", new byte[] {(byte) x}));
w.addDocument(doc);
}
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0)));
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7)));
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0)));
assertEquals(10000-zeroCount, s.count(IntPoint.newSetQuery("int", 200)));
assertEquals(0, s.count(IntPoint.newSetQuery("int", 2)));
assertEquals(zeroCount, s.count(LongPoint.newSetQuery("long", 0)));
assertEquals(zeroCount, s.count(LongPoint.newSetQuery("long", 0, -7)));
assertEquals(zeroCount, s.count(LongPoint.newSetQuery("long", 7, 0)));
assertEquals(10000-zeroCount, s.count(LongPoint.newSetQuery("long", 200)));
assertEquals(0, s.count(LongPoint.newSetQuery("long", 2)));
assertEquals(zeroCount, s.count(FloatPoint.newSetQuery("float", 0)));
assertEquals(zeroCount, s.count(FloatPoint.newSetQuery("float", 0, -7)));
assertEquals(zeroCount, s.count(FloatPoint.newSetQuery("float", 7, 0)));
assertEquals(10000-zeroCount, s.count(FloatPoint.newSetQuery("float", 200)));
assertEquals(0, s.count(FloatPoint.newSetQuery("float", 2)));
assertEquals(zeroCount, s.count(DoublePoint.newSetQuery("double", 0)));
assertEquals(zeroCount, s.count(DoublePoint.newSetQuery("double", 0, -7)));
assertEquals(zeroCount, s.count(DoublePoint.newSetQuery("double", 7, 0)));
assertEquals(10000-zeroCount, s.count(DoublePoint.newSetQuery("double", 200)));
assertEquals(0, s.count(DoublePoint.newSetQuery("double", 2)));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0})));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0}, new byte[] {-7})));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {7}, new byte[] {0})));
assertEquals(10000-zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {(byte) 200})));
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {2})));
w.close();
r.close();
dir.close();
}
public void testInvalidPointInSetQuery() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class,
() -> {
new PointInSetQuery("foo", 3, 4,
new BytesRefIterator() {
@Override
public BytesRef next() {
return new BytesRef(new byte[3]);
}
});
});
assertEquals("packed point length should be 12 but got 3; field=\"foo\" numDims=3 bytesPerDim=4", expected.getMessage());
}
public void testInvalidPointInSetBinaryQuery() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class,
() -> {
BinaryPoint.newSetQuery("bytes", new byte[] {2}, new byte[0]);
});
assertEquals("all byte[] must be the same length, but saw 1 and 0", expected.getMessage());
}
public void testPointInSetQueryToString() throws Exception {
// int
assertEquals("int:{-42 18}", IntPoint.newSetQuery("int", -42, 18).toString());
// long
assertEquals("long:{-42 18}", LongPoint.newSetQuery("long", -42L, 18L).toString());
// float
assertEquals("float:{-42.0 18.0}", FloatPoint.newSetQuery("float", -42.0f, 18.0f).toString());
// double
assertEquals("double:{-42.0 18.0}", DoublePoint.newSetQuery("double", -42.0, 18.0).toString());
// binary
assertEquals("bytes:{[12] [2a]}", BinaryPoint.newSetQuery("bytes", new byte[] {42}, new byte[] {18}).toString());
}
} }

View File

@ -16,11 +16,15 @@
*/ */
package org.apache.lucene.document; package org.apache.lucene.document;
import java.io.IOException;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.Arrays;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
/** /**
@ -35,6 +39,7 @@ import org.apache.lucene.util.NumericUtils;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point. * <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range. * <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space. * <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul> * </ul>
*/ */
public class BigIntegerPoint extends Field { public class BigIntegerPoint extends Field {
@ -214,4 +219,43 @@ public class BigIntegerPoint extends Field {
} }
}; };
} }
/**
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, BigInteger... valuesIn) throws IOException {
// Don't unexpectedly change the user's incoming values array:
BigInteger[] values = valuesIn.clone();
Arrays.sort(values);
final BytesRef value = new BytesRef(new byte[BYTES]);
return new PointInSetQuery(field, 1, BYTES,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
encodeDimension(values[upto], value.bytes, 0);
upto++;
return value;
}
}
}) {
@Override
protected String toString(byte[] value) {
assert value.length == BYTES;
return decodeDimension(value, 0).toString();
}
};
}
} }

View File

@ -16,12 +16,16 @@
*/ */
package org.apache.lucene.document; package org.apache.lucene.document;
import java.io.IOException;
import java.net.InetAddress; import java.net.InetAddress;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.util.Arrays;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
/** /**
* An indexed 128-bit {@code InetAddress} field. * An indexed 128-bit {@code InetAddress} field.
@ -35,6 +39,7 @@ import org.apache.lucene.util.BytesRef;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact network address. * <li>{@link #newExactQuery newExactQuery()} for matching an exact network address.
* <li>{@link #newPrefixQuery newPrefixQuery()} for matching a network based on CIDR prefix. * <li>{@link #newPrefixQuery newPrefixQuery()} for matching a network based on CIDR prefix.
* <li>{@link #newRangeQuery newRangeQuery()} for matching arbitrary network address ranges. * <li>{@link #newRangeQuery newRangeQuery()} for matching arbitrary network address ranges.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul> * </ul>
* <p> * <p>
* This field supports both IPv4 and IPv6 addresses: IPv4 addresses are converted * This field supports both IPv4 and IPv6 addresses: IPv4 addresses are converted
@ -194,11 +199,11 @@ public class InetAddressPoint extends Field {
public static Query newRangeQuery(String field, InetAddress lowerValue, boolean lowerInclusive, InetAddress upperValue, boolean upperInclusive) { public static Query newRangeQuery(String field, InetAddress lowerValue, boolean lowerInclusive, InetAddress upperValue, boolean upperInclusive) {
byte[][] lowerBytes = new byte[1][]; byte[][] lowerBytes = new byte[1][];
if (lowerValue != null) { if (lowerValue != null) {
lowerBytes[0] = InetAddressPoint.encode(lowerValue); lowerBytes[0] = encode(lowerValue);
} }
byte[][] upperBytes = new byte[1][]; byte[][] upperBytes = new byte[1][];
if (upperValue != null) { if (upperValue != null) {
upperBytes[0] = InetAddressPoint.encode(upperValue); upperBytes[0] = encode(upperValue);
} }
return new PointRangeQuery(field, lowerBytes, new boolean[] { lowerInclusive }, upperBytes, new boolean[] { upperInclusive }) { return new PointRangeQuery(field, lowerBytes, new boolean[] { lowerInclusive }, upperBytes, new boolean[] { upperInclusive }) {
@Override @Override
@ -207,4 +212,44 @@ public class InetAddressPoint extends Field {
} }
}; };
} }
/**
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, InetAddress... valuesIn) throws IOException {
// Don't unexpectedly change the user's incoming values array:
InetAddress[] values = valuesIn.clone();
Arrays.sort(values);
final BytesRef value = new BytesRef(new byte[BYTES]);
return new PointInSetQuery(field, 1, BYTES,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
value.bytes = encode(values[upto]);
assert value.bytes.length == value.length;
upto++;
return value;
}
}
}) {
@Override
protected String toString(byte[] value) {
assert value.length == BYTES;
return decode(value).getHostAddress();
}
};
}
} }

View File

@ -43,6 +43,9 @@ public class TestBigIntegerPoint extends LuceneTestCase {
IndexSearcher searcher = newSearcher(reader); IndexSearcher searcher = newSearcher(reader);
assertEquals(1, searcher.count(BigIntegerPoint.newExactQuery("field", large))); assertEquals(1, searcher.count(BigIntegerPoint.newExactQuery("field", large)));
assertEquals(1, searcher.count(BigIntegerPoint.newRangeQuery("field", large.subtract(BigInteger.ONE), false, large.add(BigInteger.ONE), false))); assertEquals(1, searcher.count(BigIntegerPoint.newRangeQuery("field", large.subtract(BigInteger.ONE), false, large.add(BigInteger.ONE), false)));
assertEquals(1, searcher.count(BigIntegerPoint.newSetQuery("field", large)));
assertEquals(0, searcher.count(BigIntegerPoint.newSetQuery("field", large.subtract(BigInteger.ONE))));
assertEquals(0, searcher.count(BigIntegerPoint.newSetQuery("field")));
reader.close(); reader.close();
writer.close(); writer.close();
@ -83,5 +86,13 @@ public class TestBigIntegerPoint extends LuceneTestCase {
public void testToString() throws Exception { public void testToString() throws Exception {
assertEquals("BigIntegerPoint <field:1>", new BigIntegerPoint("field", BigInteger.ONE).toString()); assertEquals("BigIntegerPoint <field:1>", new BigIntegerPoint("field", BigInteger.ONE).toString());
assertEquals("BigIntegerPoint <field:1,-2>", new BigIntegerPoint("field", BigInteger.ONE, BigInteger.valueOf(-2)).toString()); assertEquals("BigIntegerPoint <field:1,-2>", new BigIntegerPoint("field", BigInteger.ONE, BigInteger.valueOf(-2)).toString());
assertEquals("field:[1 TO 1]", BigIntegerPoint.newExactQuery("field", BigInteger.ONE).toString());
assertEquals("field:{1 TO 17]", BigIntegerPoint.newRangeQuery("field", BigInteger.ONE, false, BigInteger.valueOf(17), true).toString());
assertEquals("field:{1 TO 17],[0 TO 42}", BigIntegerPoint.newMultiRangeQuery("field",
new BigInteger[] {BigInteger.ONE, BigInteger.ZERO},
new boolean[] {false, true},
new BigInteger[] {BigInteger.valueOf(17), BigInteger.valueOf(42)},
new boolean[] {true, false}).toString());
assertEquals("field:{1}", BigIntegerPoint.newSetQuery("field", BigInteger.ONE).toString());
} }
} }

View File

@ -44,6 +44,9 @@ public class TestInetAddressPoint extends LuceneTestCase {
assertEquals(1, searcher.count(InetAddressPoint.newExactQuery("field", address))); assertEquals(1, searcher.count(InetAddressPoint.newExactQuery("field", address)));
assertEquals(1, searcher.count(InetAddressPoint.newPrefixQuery("field", address, 24))); assertEquals(1, searcher.count(InetAddressPoint.newPrefixQuery("field", address, 24)));
assertEquals(1, searcher.count(InetAddressPoint.newRangeQuery("field", InetAddress.getByName("1.2.3.3"), false, InetAddress.getByName("1.2.3.5"), false))); assertEquals(1, searcher.count(InetAddressPoint.newRangeQuery("field", InetAddress.getByName("1.2.3.3"), false, InetAddress.getByName("1.2.3.5"), false)));
assertEquals(1, searcher.count(InetAddressPoint.newSetQuery("field", InetAddress.getByName("1.2.3.4"))));
assertEquals(0, searcher.count(InetAddressPoint.newSetQuery("field", InetAddress.getByName("1.2.3.3"))));
assertEquals(0, searcher.count(InetAddressPoint.newSetQuery("field")));
reader.close(); reader.close();
writer.close(); writer.close();
@ -83,5 +86,6 @@ public class TestInetAddressPoint extends LuceneTestCase {
assertEquals("field:[1.2.3.0 TO 1.2.3.255]", InetAddressPoint.newPrefixQuery("field", InetAddress.getByName("1.2.3.4"), 24).toString()); assertEquals("field:[1.2.3.0 TO 1.2.3.255]", InetAddressPoint.newPrefixQuery("field", InetAddress.getByName("1.2.3.4"), 24).toString());
assertEquals("field:[fdc8:57ed:f042:ad1:0:0:0:0 TO fdc8:57ed:f042:ad1:ffff:ffff:ffff:ffff]", InetAddressPoint.newPrefixQuery("field", InetAddress.getByName("fdc8:57ed:f042:0ad1:f66d:4ff:fe90:ce0c"), 64).toString()); assertEquals("field:[fdc8:57ed:f042:ad1:0:0:0:0 TO fdc8:57ed:f042:ad1:ffff:ffff:ffff:ffff]", InetAddressPoint.newPrefixQuery("field", InetAddress.getByName("fdc8:57ed:f042:0ad1:f66d:4ff:fe90:ce0c"), 64).toString());
assertEquals("field:{fdc8:57ed:f042:ad1:f66d:4ff:fe90:ce0c}", InetAddressPoint.newSetQuery("field", InetAddress.getByName("fdc8:57ed:f042:0ad1:f66d:4ff:fe90:ce0c")).toString());
} }
} }