add BigIntegerPoint.newSetQuery; fix Float/DoublePoint to encode/decode correctly

This commit is contained in:
Mike McCandless 2016-02-24 16:39:13 -05:00
parent 1c3d52d6b5
commit ae70920378
12 changed files with 137 additions and 32 deletions

View File

@ -37,6 +37,7 @@ import org.apache.lucene.util.NumericUtils;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul>
*/
public final class DoublePoint extends Field {
@ -139,12 +140,12 @@ public final class DoublePoint extends Field {
/** Encode single double dimension */
public static void encodeDimension(double value, byte dest[], int offset) {
NumericUtils.longToBytesDirect(NumericUtils.doubleToSortableLong(value), dest, offset);
NumericUtils.longToBytes(NumericUtils.doubleToSortableLong(value), dest, offset);
}
/** Decode single double dimension */
public static double decodeDimension(byte value[], int offset) {
return NumericUtils.sortableLongToDouble(NumericUtils.bytesToLongDirect(value, offset));
return NumericUtils.sortableLongToDouble(NumericUtils.bytesToLong(value, offset));
}
// static methods for generating queries
@ -223,7 +224,7 @@ public final class DoublePoint extends Field {
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all int values to match
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, double... valuesIn) throws IOException {

View File

@ -37,6 +37,7 @@ import org.apache.lucene.util.NumericUtils;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul>
*/
public final class FloatPoint extends Field {
@ -139,12 +140,12 @@ public final class FloatPoint extends Field {
/** Encode single float dimension */
public static void encodeDimension(float value, byte dest[], int offset) {
NumericUtils.intToBytesDirect(NumericUtils.floatToSortableInt(value), dest, offset);
NumericUtils.intToBytes(NumericUtils.floatToSortableInt(value), dest, offset);
}
/** Decode single float dimension */
public static float decodeDimension(byte value[], int offset) {
return NumericUtils.sortableIntToFloat(NumericUtils.bytesToIntDirect(value, offset));
return NumericUtils.sortableIntToFloat(NumericUtils.bytesToInt(value, offset));
}
// static methods for generating queries
@ -223,7 +224,7 @@ public final class FloatPoint extends Field {
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all int values to match
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, float... valuesIn) throws IOException {
@ -232,8 +233,6 @@ public final class FloatPoint extends Field {
Arrays.sort(values);
System.out.println("VALUES: " + Arrays.toString(values));
final BytesRef value = new BytesRef(new byte[Float.BYTES]);
return new PointInSetQuery(field, 1, Float.BYTES,
@ -248,7 +247,6 @@ public final class FloatPoint extends Field {
} else {
encodeDimension(values[upto], value.bytes, 0);
upto++;
System.out.println("ret: " + value);
return value;
}
}

View File

@ -37,6 +37,7 @@ import org.apache.lucene.util.NumericUtils;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul>
*/
public final class IntPoint extends Field {
@ -223,7 +224,7 @@ public final class IntPoint extends Field {
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all int values to match
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, int... valuesIn) throws IOException {

View File

@ -37,6 +37,7 @@ import org.apache.lucene.util.NumericUtils;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul>
*/
public final class LongPoint extends Field {
@ -223,7 +224,7 @@ public final class LongPoint extends Field {
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all int values to match
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, long... valuesIn) throws IOException {

View File

@ -42,6 +42,7 @@ import org.apache.lucene.util.StringHelper;
/** Finds all documents whose point value, previously indexed with e.g. {@link org.apache.lucene.document.LongPoint}, is contained in the
* specified set */
// nocommit explain that the 1D case must be pre-sorted
public class PointInSetQuery extends Query {
// A little bit overkill for us, since all of our "terms" are always in the same field:
final PrefixCodedTerms sortedPackedPoints;
@ -57,7 +58,7 @@ public class PointInSetQuery extends Query {
throw new IllegalArgumentException("bytesPerDim must be > 0 and <= " + PointValues.MAX_NUM_BYTES + "; got " + bytesPerDim);
}
this.bytesPerDim = bytesPerDim;
if (numDims < 1 || bytesPerDim > PointValues.MAX_DIMENSIONS) {
if (numDims < 1 || numDims > PointValues.MAX_DIMENSIONS) {
throw new IllegalArgumentException("numDims must be > 0 and <= " + PointValues.MAX_DIMENSIONS + "; got " + numDims);
}
this.numDims = numDims;
@ -73,6 +74,7 @@ public class PointInSetQuery extends Query {
}
if (previous == null) {
previous = new BytesRefBuilder();
// nocommit detect out-of-order 1D case
} else if (previous.get().equals(current)) {
continue; // deduplicate
}

View File

@ -158,27 +158,18 @@ public final class NumericUtils {
public static void intToBytes(int x, byte[] dest, int offset) {
// Flip the sign bit, so negative ints sort before positive ints correctly:
x ^= 0x80000000;
intToBytesDirect(x, dest, offset);
}
public static void intToBytesDirect(int x, byte[] dest, int offset) {
for (int i = 0; i < 4; i++) {
dest[offset+i] = (byte) (x >> 24-i*8);
}
}
public static int bytesToInt(byte[] src, int index) {
int x = bytesToIntDirect(src, index);
// Re-flip the sign bit to restore the original value:
return x ^ 0x80000000;
}
public static int bytesToIntDirect(byte[] src, int offset) {
public static int bytesToInt(byte[] src, int offset) {
int x = 0;
for (int i = 0; i < 4; i++) {
x |= (src[offset+i] & 0xff) << (24-i*8);
}
return x;
// Re-flip the sign bit to restore the original value:
return x ^ 0x80000000;
}
public static void longToBytes(long v, byte[] bytes, int offset) {

View File

@ -28,6 +28,7 @@ import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointFormat;
@ -95,6 +96,60 @@ public class TestPointQueries extends LuceneTestCase {
}
}
public void testBasicFloats() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(new FloatPoint("point", -7.0f));
w.addDocument(doc);
doc = new Document();
doc.add(new FloatPoint("point", 0.0f));
w.addDocument(doc);
doc = new Document();
doc.add(new FloatPoint("point", 3.0f));
w.addDocument(doc);
DirectoryReader r = DirectoryReader.open(w);
IndexSearcher s = new IndexSearcher(r);
assertEquals(2, s.count(FloatPoint.newRangeQuery("point", -8.0f, false, 1.0f, false)));
assertEquals(3, s.count(FloatPoint.newRangeQuery("point", -7.0f, true, 3.0f, true)));
assertEquals(1, s.count(FloatPoint.newExactQuery("point", -7.0f)));
assertEquals(0, s.count(FloatPoint.newExactQuery("point", -6.0f)));
w.close();
r.close();
dir.close();
}
public void testBasicDoubles() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(new DoublePoint("point", -7.0));
w.addDocument(doc);
doc = new Document();
doc.add(new DoublePoint("point", 0.0));
w.addDocument(doc);
doc = new Document();
doc.add(new DoublePoint("point", 3.0));
w.addDocument(doc);
DirectoryReader r = DirectoryReader.open(w);
IndexSearcher s = new IndexSearcher(r);
assertEquals(2, s.count(DoublePoint.newRangeQuery("point", -8.0, false, 1.0, false)));
assertEquals(3, s.count(DoublePoint.newRangeQuery("point", -7.0, true, 3.0, true)));
assertEquals(1, s.count(DoublePoint.newExactQuery("point", -7.0)));
assertEquals(0, s.count(DoublePoint.newExactQuery("point", -6.0)));
w.close();
r.close();
dir.close();
}
public void testAllEqual() throws Exception {
int numValues = atLeast(10000);
long value = randomValue(false);

View File

@ -16,11 +16,15 @@
*/
package org.apache.lucene.document;
import java.io.IOException;
import java.math.BigInteger;
import java.util.Arrays;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.NumericUtils;
/**
@ -34,6 +38,7 @@ import org.apache.lucene.util.NumericUtils;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul>
*/
public class BigIntegerPoint extends Field {
@ -214,5 +219,42 @@ public class BigIntegerPoint extends Field {
};
}
// nocommit newSetQuery
/**
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, BigInteger... valuesIn) throws IOException {
// Don't unexpectedly change the user's incoming values array:
BigInteger[] values = valuesIn.clone();
Arrays.sort(values);
final BytesRef value = new BytesRef(new byte[BYTES]);
return new PointInSetQuery(field, 1, BYTES,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
encodeDimension(values[upto], value.bytes, 0);
upto++;
return value;
}
}
}) {
@Override
protected String toString(byte[] value) {
assert value.length == BYTES;
return decodeDimension(value, 0).toString();
}
};
}
}

View File

@ -16,12 +16,16 @@
*/
package org.apache.lucene.document;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Arrays;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
/**
* A field indexing {@link InetAddress} dimensionally such that finding
@ -34,7 +38,7 @@ import org.apache.lucene.util.BytesRef;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact network address.
* <li>{@link #newPrefixQuery newPrefixQuery()} for matching a network based on CIDR prefix.
* <li>{@link #newRangeQuery newRangeQuery()} for matching arbitrary network address ranges.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of addresses.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul>
* <p>
* This field supports both IPv4 and IPv6 addresses: IPv4 addresses are converted
@ -212,7 +216,7 @@ public class InetAddressPoint extends Field {
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all int values to match
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, InetAddress... valuesIn) throws IOException {
@ -233,7 +237,8 @@ public class InetAddressPoint extends Field {
if (upto == values.length) {
return null;
} else {
encode(values[upto], value.bytes, 0);
value.bytes = encode(values[upto]);
assert value.bytes.length == value.length;
upto++;
return value;
}

View File

@ -129,8 +129,6 @@ public class LatLonPoint extends Field {
return x / LON_SCALE;
}
// nocommit newSetQuery
/** Turns quantized value from byte array back into a double. */
public static double decodeLon(byte[] src, int offset) {
return decodeLon(NumericUtils.bytesToInt(src, offset));

View File

@ -43,6 +43,8 @@ public class TestBigIntegerPoint extends LuceneTestCase {
IndexSearcher searcher = newSearcher(reader);
assertEquals(1, searcher.count(BigIntegerPoint.newExactQuery("field", large)));
assertEquals(1, searcher.count(BigIntegerPoint.newRangeQuery("field", large.subtract(BigInteger.ONE), false, large.add(BigInteger.ONE), false)));
assertEquals(1, searcher.count(BigIntegerPoint.newSetQuery("field", large)));
assertEquals(0, searcher.count(BigIntegerPoint.newSetQuery("field", large.subtract(BigInteger.ONE))));
reader.close();
writer.close();
@ -83,5 +85,13 @@ public class TestBigIntegerPoint extends LuceneTestCase {
public void testToString() throws Exception {
assertEquals("BigIntegerPoint <field:1>", new BigIntegerPoint("field", BigInteger.ONE).toString());
assertEquals("BigIntegerPoint <field:1,-2>", new BigIntegerPoint("field", BigInteger.ONE, BigInteger.valueOf(-2)).toString());
assertEquals("field:[1 TO 1]", BigIntegerPoint.newExactQuery("field", BigInteger.ONE).toString());
assertEquals("field:{1 TO 17]", BigIntegerPoint.newRangeQuery("field", BigInteger.ONE, false, BigInteger.valueOf(17), true).toString());
assertEquals("field:{1 TO 17],[0 TO 42}", BigIntegerPoint.newMultiRangeQuery("field",
new BigInteger[] {BigInteger.ONE, BigInteger.ZERO},
new boolean[] {false, true},
new BigInteger[] {BigInteger.valueOf(17), BigInteger.valueOf(42)},
new boolean[] {true, false}).toString());
assertEquals("field:{1}", BigIntegerPoint.newSetQuery("field", BigInteger.ONE).toString());
}
}

View File

@ -44,8 +44,8 @@ public class TestInetAddressPoint extends LuceneTestCase {
assertEquals(1, searcher.count(InetAddressPoint.newExactQuery("field", address)));
assertEquals(1, searcher.count(InetAddressPoint.newPrefixQuery("field", address, 24)));
assertEquals(1, searcher.count(InetAddressPoint.newRangeQuery("field", InetAddress.getByName("1.2.3.3"), false, InetAddress.getByName("1.2.3.5"), false)));
assertEquals(1, searcher.count(InetAddressPoint.newSetQuery("field", InetAddress.getByName("1.2.3.3")));
assertEquals(0, searcher.count(InetAddressPoint.newSetQuery("field", InetAddress.getByName("1.2.3.4")));
assertEquals(1, searcher.count(InetAddressPoint.newSetQuery("field", InetAddress.getByName("1.2.3.4"))));
assertEquals(0, searcher.count(InetAddressPoint.newSetQuery("field", InetAddress.getByName("1.2.3.3"))));
reader.close();
writer.close();
@ -85,5 +85,6 @@ public class TestInetAddressPoint extends LuceneTestCase {
assertEquals("field:[1.2.3.0 TO 1.2.3.255]", InetAddressPoint.newPrefixQuery("field", InetAddress.getByName("1.2.3.4"), 24).toString());
assertEquals("field:[fdc8:57ed:f042:ad1:0:0:0:0 TO fdc8:57ed:f042:ad1:ffff:ffff:ffff:ffff]", InetAddressPoint.newPrefixQuery("field", InetAddress.getByName("fdc8:57ed:f042:0ad1:f66d:4ff:fe90:ce0c"), 64).toString());
assertEquals("field:{fdc8:57ed:f042:ad1:f66d:4ff:fe90:ce0c}", InetAddressPoint.newSetQuery("field", InetAddress.getByName("fdc8:57ed:f042:0ad1:f66d:4ff:fe90:ce0c")).toString());
}
}