From 41a336f1e591c65fc5a6e8c85530367661e8907d Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Wed, 24 Feb 2016 17:45:56 -0500 Subject: [PATCH] add BinaryPoint.newSetQuery --- .../apache/lucene/document/BinaryPoint.java | 68 +++++++++++++++++++ .../apache/lucene/search/PointInSetQuery.java | 1 - .../lucene/search/TestPointQueries.java | 68 +++++++++++++++---- 3 files changed, 124 insertions(+), 13 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/BinaryPoint.java b/lucene/core/src/java/org/apache/lucene/document/BinaryPoint.java index 6a3cb856704..b8cbc1e96b6 100644 --- a/lucene/core/src/java/org/apache/lucene/document/BinaryPoint.java +++ b/lucene/core/src/java/org/apache/lucene/document/BinaryPoint.java @@ -16,9 +16,16 @@ */ package org.apache.lucene.document; +import java.io.IOException; +import java.util.Arrays; +import java.util.Comparator; + +import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; +import org.apache.lucene.util.StringHelper; /** * A binary field that is indexed dimensionally such that finding @@ -31,6 +38,7 @@ import org.apache.lucene.util.BytesRef; *
  • {@link #newExactQuery newExactQuery()} for matching an exact 1D point. *
  • {@link #newRangeQuery newRangeQuery()} for matching a 1D range. *
  • {@link #newMultiRangeQuery newMultiRangeQuery()} for matching points/ranges in n-dimensional space. + *
  • {@link #newSetQuery newSetQuery()} for matching a set of 1D values. * */ public final class BinaryPoint extends Field { @@ -197,4 +205,64 @@ public final class BinaryPoint extends Field { } }; } + + /** + * Create a query matching any of the specified 1D values. This is the points equivalent of {@code TermsQuery}. + * + * @param field field name. must not be {@code null}. + * @param valuesIn all values to match + */ + public static Query newSetQuery(String field, byte[]... valuesIn) throws IOException { + + // Make sure all byte[] have the same length + int bytesPerDim = -1; + for(byte[] value : valuesIn) { + if (bytesPerDim == -1) { + bytesPerDim = value.length; + } else if (value.length != bytesPerDim) { + throw new IllegalArgumentException("all byte[] must be the same length, but saw " + bytesPerDim + " and " + value.length); + } + } + + // Don't unexpectedly change the user's incoming values array: + byte[][] values = valuesIn.clone(); + + Arrays.sort(values, + new Comparator() { + @Override + public int compare(byte[] a, byte[] b) { + return StringHelper.compare(a.length, a, 0, b, 0); + } + }); + + // Silliness: + if (bytesPerDim == -1) { + // nocommit make sure this is tested + bytesPerDim = 1; + } + + final BytesRef value = new BytesRef(new byte[bytesPerDim]); + + return new PointInSetQuery(field, 1, bytesPerDim, + new BytesRefIterator() { + + int upto; + + @Override + public BytesRef next() { + if (upto == values.length) { + return null; + } else { + value.bytes = values[upto]; + upto++; + return value; + } + } + }) { + @Override + protected String toString(byte[] value) { + return new BytesRef(value).toString(); + } + }; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index c00973ecb73..d34e3f79a93 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -116,7 +116,6 @@ public class PointInSetQuery extends Query { if (fieldInfo.getPointNumBytes() != bytesPerDim) { throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getPointNumBytes() + " but this query has bytesPerDim=" + bytesPerDim); } - int bytesPerDim = fieldInfo.getPointNumBytes(); DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java index 51d914d0126..095ddc93043 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java @@ -1383,6 +1383,7 @@ public class TestPointQueries extends LuceneTestCase { doc.add(new LongPoint("long", 17L)); doc.add(new FloatPoint("float", 17.0f)); doc.add(new DoublePoint("double", 17.0)); + doc.add(new BinaryPoint("bytes", new byte[] {0, 17})); w.addDocument(doc); doc = new Document(); @@ -1390,6 +1391,7 @@ public class TestPointQueries extends LuceneTestCase { doc.add(new LongPoint("long", 42L)); doc.add(new FloatPoint("float", 42.0f)); doc.add(new DoublePoint("double", 42.0)); + doc.add(new BinaryPoint("bytes", new byte[] {0, 42})); w.addDocument(doc); doc = new Document(); @@ -1397,6 +1399,7 @@ public class TestPointQueries extends LuceneTestCase { doc.add(new LongPoint("long", 97L)); doc.add(new FloatPoint("float", 97.0f)); doc.add(new DoublePoint("double", 97.0)); + doc.add(new BinaryPoint("bytes", new byte[] {0, 97})); w.addDocument(doc); IndexReader r = DirectoryReader.open(w); @@ -1415,19 +1418,27 @@ public class TestPointQueries extends LuceneTestCase { assertEquals(3, s.count(LongPoint.newSetQuery("long", 17, 20, 42, 97))); assertEquals(3, s.count(LongPoint.newSetQuery("long", 17, 105, 42, 97))); - assertEquals(0, s.count(LongPoint.newSetQuery("float", 16))); - assertEquals(1, s.count(LongPoint.newSetQuery("float", 17))); - assertEquals(3, s.count(LongPoint.newSetQuery("float", 17, 97, 42))); - assertEquals(3, s.count(LongPoint.newSetQuery("float", -7, 17, 42, 97))); - assertEquals(3, s.count(LongPoint.newSetQuery("float", 17, 20, 42, 97))); - assertEquals(3, s.count(LongPoint.newSetQuery("float", 17, 105, 42, 97))); + assertEquals(0, s.count(FloatPoint.newSetQuery("float", 16))); + assertEquals(1, s.count(FloatPoint.newSetQuery("float", 17))); + assertEquals(3, s.count(FloatPoint.newSetQuery("float", 17, 97, 42))); + assertEquals(3, s.count(FloatPoint.newSetQuery("float", -7, 17, 42, 97))); + assertEquals(3, s.count(FloatPoint.newSetQuery("float", 17, 20, 42, 97))); + assertEquals(3, s.count(FloatPoint.newSetQuery("float", 17, 105, 42, 97))); - assertEquals(0, s.count(LongPoint.newSetQuery("double", 16))); - assertEquals(1, s.count(LongPoint.newSetQuery("double", 17))); - assertEquals(3, s.count(LongPoint.newSetQuery("double", 17, 97, 42))); - assertEquals(3, s.count(LongPoint.newSetQuery("double", -7, 17, 42, 97))); - assertEquals(3, s.count(LongPoint.newSetQuery("double", 17, 20, 42, 97))); - assertEquals(3, s.count(LongPoint.newSetQuery("double", 17, 105, 42, 97))); + assertEquals(0, s.count(DoublePoint.newSetQuery("double", 16))); + assertEquals(1, s.count(DoublePoint.newSetQuery("double", 17))); + assertEquals(3, s.count(DoublePoint.newSetQuery("double", 17, 97, 42))); + assertEquals(3, s.count(DoublePoint.newSetQuery("double", -7, 17, 42, 97))); + assertEquals(3, s.count(DoublePoint.newSetQuery("double", 17, 20, 42, 97))); + assertEquals(3, s.count(DoublePoint.newSetQuery("double", 17, 105, 42, 97))); + + // nocommit make sure invalid bytes length hits iae + assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 16}))); + assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}))); + assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 97}, new byte[] {0, 42}))); + assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, -7}, new byte[] {0, 17}, new byte[] {0, 42}, new byte[] {0, 97}))); + assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 20}, new byte[] {0, 42}, new byte[] {0, 97}))); + assertEquals(3, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 105}, new byte[] {0, 42}, new byte[] {0, 97}))); w.close(); r.close(); @@ -1449,6 +1460,8 @@ public class TestPointQueries extends LuceneTestCase { doc.add(new FloatPoint("float", 42.0f)); doc.add(new DoublePoint("double", 17.0)); doc.add(new DoublePoint("double", 42.0)); + doc.add(new BinaryPoint("bytes", new byte[] {0, 17})); + doc.add(new BinaryPoint("bytes", new byte[] {0, 42})); w.addDocument(doc); IndexReader r = DirectoryReader.open(w); @@ -1477,6 +1490,12 @@ public class TestPointQueries extends LuceneTestCase { assertEquals(1, s.count(DoublePoint.newSetQuery("double", -7, 17, 42, 97))); assertEquals(0, s.count(DoublePoint.newSetQuery("double", 16, 20, 41, 97))); + assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 16}))); + assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}))); + assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 17}, new byte[] {0, 97}, new byte[] {0, 42}))); + assertEquals(1, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, -7}, new byte[] {0, 17}, new byte[] {0, 42}, new byte[] {0, 97}))); + assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0, 16}, new byte[] {0, 20}, new byte[] {0, 41}, new byte[] {0, 97}))); + w.close(); r.close(); dir.close(); @@ -1499,6 +1518,7 @@ public class TestPointQueries extends LuceneTestCase { doc.add(new LongPoint("long", (long) x)); doc.add(new FloatPoint("float", (float) x)); doc.add(new DoublePoint("double", (double) x)); + doc.add(new BinaryPoint("bytes", new byte[] {(byte) x})); w.addDocument(doc); } @@ -1528,6 +1548,12 @@ public class TestPointQueries extends LuceneTestCase { assertEquals(10000-zeroCount, s.count(DoublePoint.newSetQuery("double", 1))); assertEquals(0, s.count(DoublePoint.newSetQuery("double", 2))); + assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0}))); + assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0}, new byte[] {-7}))); + assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {7}, new byte[] {0}))); + assertEquals(10000-zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {1}))); + assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {2}))); + w.close(); r.close(); dir.close(); @@ -1550,6 +1576,7 @@ public class TestPointQueries extends LuceneTestCase { doc.add(new LongPoint("long", (long) x)); doc.add(new FloatPoint("float", (float) x)); doc.add(new DoublePoint("double", (double) x)); + doc.add(new BinaryPoint("bytes", new byte[] {(byte) x})); w.addDocument(doc); } @@ -1579,6 +1606,12 @@ public class TestPointQueries extends LuceneTestCase { assertEquals(10000-zeroCount, s.count(DoublePoint.newSetQuery("double", 200))); assertEquals(0, s.count(DoublePoint.newSetQuery("double", 2))); + assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0}))); + assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {0}, new byte[] {-7}))); + assertEquals(zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {7}, new byte[] {0}))); + assertEquals(10000-zeroCount, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {(byte) 200}))); + assertEquals(0, s.count(BinaryPoint.newSetQuery("bytes", new byte[] {2}))); + w.close(); r.close(); dir.close(); @@ -1598,6 +1631,14 @@ public class TestPointQueries extends LuceneTestCase { assertEquals("packed point length should be 12 but got 3; field=\"foo\" numDims=3 bytesPerDim=4", expected.getMessage()); } + public void testInvalidPointInSetBinaryQuery() throws Exception { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, + () -> { + BinaryPoint.newSetQuery("bytes", new byte[] {2}, new byte[0]); + }); + assertEquals("all byte[] must be the same length, but saw 1 and 0", expected.getMessage()); + } + public void testPointInSetQueryToString() throws Exception { // int assertEquals("int:{-42 18}", IntPoint.newSetQuery("int", -42, 18).toString()); @@ -1610,5 +1651,8 @@ public class TestPointQueries extends LuceneTestCase { // double assertEquals("double:{-42.0 18.0}", DoublePoint.newSetQuery("double", -42.0, 18.0).toString()); + + // binary + assertEquals("bytes:{[12] [2a]}", BinaryPoint.newSetQuery("bytes", new byte[] {42}, new byte[] {18}).toString()); } }