add BinaryPoint.newSetQuery

This commit is contained in:
Mike McCandless 2016-02-24 17:45:56 -05:00
parent c056f4b388
commit 41a336f1e5
3 changed files with 124 additions and 13 deletions

View File

@ -16,9 +16,16 @@
*/
package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.StringHelper;
/**
* A binary field that is indexed dimensionally such that finding
@ -31,6 +38,7 @@ import org.apache.lucene.util.BytesRef;
* <li>{@link #newExactQuery newExactQuery()} for matching an exact 1D point.
* <li>{@link #newRangeQuery newRangeQuery()} for matching a 1D range.
* <li>{@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space.
* <li>{@link #newSetQuery newSetQuery()} for matching a set of 1D values.
* </ul>
*/
public final class BinaryPoint extends Field {
@ -197,4 +205,64 @@ public final class BinaryPoint extends Field {
}
};
}
/**
* Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}.
*
* @param field field name. must not be {@code null}.
* @param valuesIn all values to match
*/
public static Query newSetQuery(String field, byte[]... valuesIn) throws IOException {
// Make sure all byte[] have the same length
int bytesPerDim = -1;
for(byte[] value : valuesIn) {
if (bytesPerDim == -1) {
bytesPerDim = value.length;
} else if (value.length != bytesPerDim) {
throw new IllegalArgumentException("all byte[] must be the same length, but saw " + bytesPerDim + " and " + value.length);
}
}
// Don't unexpectedly change the user's incoming values array:
byte[][] values = valuesIn.clone();
Arrays.sort(values,
new Comparator<byte[]>() {
@Override
public int compare(byte[] a, byte[] b) {
return StringHelper.compare(a.length, a, 0, b, 0);
}
});
// Silliness:
if (bytesPerDim == -1) {
// nocommit make sure this is tested
bytesPerDim = 1;
}
final BytesRef value = new BytesRef(new byte[bytesPerDim]);
return new PointInSetQuery(field, 1, bytesPerDim,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
value.bytes = values[upto];
upto++;
return value;
}
}
}) {
@Override
protected String toString(byte[] value) {
return new BytesRef(value).toString();
}
};
}
}

View File

@ -116,7 +116,6 @@ public class PointInSetQuery extends Query {
if (fieldInfo.getPointNumBytes() != bytesPerDim) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getPointNumBytes() + " but this query has bytesPerDim=" + bytesPerDim);
}
int bytesPerDim = fieldInfo.getPointNumBytes();
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());

View File

@ -1383,6 +1383,7 @@ public class TestPointQueries extends LuceneTestCase {
doc.add(new LongPoint("long", 17L));
doc.add(new FloatPoint("float", 17.0f));
doc.add(new DoublePoint("double", 17.0));
doc.add(new BinaryPoint("bytes", new byte[] {0, 17}));
w.addDocument(doc);
doc = new Document();
@ -1390,6 +1391,7 @@ public class TestPointQueries extends LuceneTestCase {
doc.add(new LongPoint("long", 42L));
doc.add(new FloatPoint("float", 42.0f));
doc.add(new DoublePoint("double", 42.0));
doc.add(new BinaryPoint("bytes", new byte[] {0, 42}));
w.addDocument(doc);
doc = new Document();
@ -1397,6 +1399,7 @@ public class TestPointQueries extends LuceneTestCase {
doc.add(new LongPoint("long", 97L));
doc.add(new FloatPoint("float", 97.0f));
doc.add(new DoublePoint("double", 97.0));
doc.add(new BinaryPoint("bytes", new byte[] {0, 97}));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
@ -1415,19 +1418,27 @@ public class TestPointQueries extends LuceneTestCase {
assertEquals(3, s.count(LongPoint.newSetQuery("long", 17, 20, 42, 97)));
assertEquals(3, s.count(LongPoint.newSetQuery("long", 17, 105, 42, 97)));
assertEquals(0, s.count(LongPoint.newSetQuery("float", 16)));
assertEquals(1, s.count(LongPoint.newSetQuery("float", 17)));
assertEquals(3, s.count(LongPoint.newSetQuery("float", 17, 97, 42)));
assertEquals(3, s.count(LongPoint.newSetQuery("float", -7, 17, 42, 97)));
assertEquals(3, s.count(LongPoint.newSetQuery("float", 17, 20, 42, 97)));
assertEquals(3, s.count(LongPoint.newSetQuery("float", 17, 105, 42, 97)));
assertEquals(0, s.count(FloatPoint.newSetQuery("float", 16)));
assertEquals(1, s.count(FloatPoint.newSetQuery("float", 17)));
assertEquals(3, s.count(FloatPoint.newSetQuery("float", 17, 97, 42)));
assertEquals(3, s.count(FloatPoint.newSetQuery("float", -7, 17, 42, 97)));
assertEquals(3, s.count(FloatPoint.newSetQuery("float", 17, 20, 42, 97)));
assertEquals(3, s.count(FloatPoint.newSetQuery("float", 17, 105, 42, 97)));
assertEquals(0, s.count(LongPoint.newSetQuery("double", 16)));
assertEquals(1, s.count(LongPoint.newSetQuery("double", 17)));
assertEquals(3, s.count(LongPoint.newSetQuery("double", 17, 97, 42)));
assertEquals(3, s.count(LongPoint.newSetQuery("double", -7, 17, 42, 97)));
assertEquals(3, s.count(LongPoint.newSetQuery("double", 17, 20, 42, 97)));
assertEquals(3, s.count(LongPoint.newSetQuery("double", 17, 105, 42, 97)));
assertEquals(0, s.count(DoublePoint.newSetQuery("double", 16)));
assertEquals(1, s.count(DoublePoint.newSetQuery("double", 17)));
assertEquals(3, s.count(DoublePoint.newSetQuery("double", 17, 97, 42)));
assertEquals(3, s.count(DoublePoint.newSetQuery("double", -7, 17, 42, 97)));
assertEquals(3, s.count(DoublePoint.newSetQuery("double", 17, 20, 42, 97)));
assertEquals(3, s.count(DoublePoint.newSetQuery("double", 17, 105, 42, 97)));
// nocommit make sure invalid bytes length hits iae
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 16})));
assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17})));
assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 97}, new byte[] {0, 42})));
assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, -7}, new byte[] {0, 17}, new byte[] {0, 42}, new byte[] {0, 97})));
assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 20}, new byte[] {0, 42}, new byte[] {0, 97})));
assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 105}, new byte[] {0, 42}, new byte[] {0, 97})));
w.close();
r.close();
@ -1449,6 +1460,8 @@ public class TestPointQueries extends LuceneTestCase {
doc.add(new FloatPoint("float", 42.0f));
doc.add(new DoublePoint("double", 17.0));
doc.add(new DoublePoint("double", 42.0));
doc.add(new BinaryPoint("bytes", new byte[] {0, 17}));
doc.add(new BinaryPoint("bytes", new byte[] {0, 42}));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
@ -1477,6 +1490,12 @@ public class TestPointQueries extends LuceneTestCase {
assertEquals(1, s.count(DoublePoint.newSetQuery("double", -7, 17, 42, 97)));
assertEquals(0, s.count(DoublePoint.newSetQuery("double", 16, 20, 41, 97)));
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 16})));
assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17})));
assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 97}, new byte[] {0, 42})));
assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, -7}, new byte[] {0, 17}, new byte[] {0, 42}, new byte[] {0, 97})));
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 16}, new byte[] {0, 20}, new byte[] {0, 41}, new byte[] {0, 97})));
w.close();
r.close();
dir.close();
@ -1499,6 +1518,7 @@ public class TestPointQueries extends LuceneTestCase {
doc.add(new LongPoint("long", (long) x));
doc.add(new FloatPoint("float", (float) x));
doc.add(new DoublePoint("double", (double) x));
doc.add(new BinaryPoint("bytes", new byte[] {(byte) x}));
w.addDocument(doc);
}
@ -1528,6 +1548,12 @@ public class TestPointQueries extends LuceneTestCase {
assertEquals(10000-zeroCount, s.count(DoublePoint.newSetQuery("double", 1)));
assertEquals(0, s.count(DoublePoint.newSetQuery("double", 2)));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0})));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0}, new byte[] {-7})));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {7}, new byte[] {0})));
assertEquals(10000-zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {1})));
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {2})));
w.close();
r.close();
dir.close();
@ -1550,6 +1576,7 @@ public class TestPointQueries extends LuceneTestCase {
doc.add(new LongPoint("long", (long) x));
doc.add(new FloatPoint("float", (float) x));
doc.add(new DoublePoint("double", (double) x));
doc.add(new BinaryPoint("bytes", new byte[] {(byte) x}));
w.addDocument(doc);
}
@ -1579,6 +1606,12 @@ public class TestPointQueries extends LuceneTestCase {
assertEquals(10000-zeroCount, s.count(DoublePoint.newSetQuery("double", 200)));
assertEquals(0, s.count(DoublePoint.newSetQuery("double", 2)));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0})));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0}, new byte[] {-7})));
assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {7}, new byte[] {0})));
assertEquals(10000-zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {(byte) 200})));
assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {2})));
w.close();
r.close();
dir.close();
@ -1598,6 +1631,14 @@ public class TestPointQueries extends LuceneTestCase {
assertEquals("packed point length should be 12 but got 3; field=\"foo\" numDims=3 bytesPerDim=4", expected.getMessage());
}
public void testInvalidPointInSetBinaryQuery() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class,
() -> {
BinaryPoint.newSetQuery("bytes", new byte[] {2}, new byte[0]);
});
assertEquals("all byte[] must be the same length, but saw 1 and 0", expected.getMessage());
}
public void testPointInSetQueryToString() throws Exception {
// int
assertEquals("int:{-42 18}", IntPoint.newSetQuery("int", -42, 18).toString());
@ -1610,5 +1651,8 @@ public class TestPointQueries extends LuceneTestCase {
// double
assertEquals("double:{-42.0 18.0}", DoublePoint.newSetQuery("double", -42.0, 18.0).toString());
// binary
assertEquals("bytes:{[12] [2a]}", BinaryPoint.newSetQuery("bytes", new byte[] {42}, new byte[] {18}).toString());
}
}