From 1654818e9814b99a75d2d4f4ac590813fab2f10a Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Tue, 23 Feb 2016 17:12:50 -0500 Subject: [PATCH] more tests; move factory method to IntPoint --- .../org/apache/lucene/document/IntPoint.java | 40 +++++++++++ .../apache/lucene/search/PointInSetQuery.java | 66 ++++++++--------- .../lucene/search/TestPointQueries.java | 70 +++++++++++++++++-- 3 files changed, 133 insertions(+), 43 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/IntPoint.java b/lucene/core/src/java/org/apache/lucene/document/IntPoint.java index be91bfdea07..42091d95281 100644 --- a/lucene/core/src/java/org/apache/lucene/document/IntPoint.java +++ b/lucene/core/src/java/org/apache/lucene/document/IntPoint.java @@ -16,7 +16,12 @@ */ package org.apache.lucene.document; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.NumericUtils; /** An int field that is indexed dimensionally such that finding @@ -88,6 +93,41 @@ public final class IntPoint extends Field { super(name, pack(point), getType(point.length)); } + /** Returns a query efficiently finding all documents that indexed the provided 1D int values */ + public static PointInSetQuery newSetQuery(String field, int... valuesIn) throws IOException { + + // Don't unexpectedly change the user's incoming array: + int[] values = valuesIn.clone(); + + Arrays.sort(values); + + final BytesRef value = new BytesRef(new byte[Integer.BYTES]); + value.length = Integer.BYTES; + + return new PointInSetQuery(field, 1, Integer.BYTES, + new BytesRefIterator() { + + int upto; + + @Override + public BytesRef next() { + if (upto == values.length) { + return null; + } else { + IntPoint.encodeDimension(values[upto], value.bytes, 0); + upto++; + return value; + } + } + }) { + @Override + protected String toString(byte[] value) { + assert value.length == Integer.BYTES; + return Integer.toString(decodeDimension(value, 0)); + } + }; + } + @Override public String toString() { StringBuilder result = new StringBuilder(); diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index bbc9a540ebe..adb9c530fea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -80,40 +80,6 @@ public class PointInSetQuery extends Query { sortedPackedPointsHashCode = sortedPackedPoints.hashCode(); } - /** Use in the 1D case when you indexed 1D int values using {@link org.apache.lucene.document.IntPoint} */ - public static PointInSetQuery newIntSet(String field, int... valuesIn) { - - // Don't unexpectedly change the user's incoming array: - int[] values = valuesIn.clone(); - - Arrays.sort(values); - - final BytesRef value = new BytesRef(new byte[Integer.BYTES]); - value.length = Integer.BYTES; - - try { - return new PointInSetQuery(field, 1, Integer.BYTES, - new BytesRefIterator() { - - int upto; - - @Override - public BytesRef next() { - if (upto == values.length) { - return null; - } else { - IntPoint.encodeDimension(values[upto], value.bytes, 0); - upto++; - return value; - } - } - }); - } catch (IOException bogus) { - // Should never happen ;) - throw new RuntimeException(bogus); - } - } - @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { @@ -354,18 +320,44 @@ public class PointInSetQuery extends Query { sb.append(getClass().getSimpleName()); sb.append(':'); if (this.field.equals(field) == false) { - sb.append("field="); + sb.append(" field="); sb.append(this.field); sb.append(':'); } + sb.append(" points:"); + TermIterator iterator = sortedPackedPoints.iterator(); + byte[] pointBytes = new byte[numDims * bytesPerDim]; for (BytesRef point = iterator.next(); point != null; point = iterator.next()) { sb.append(' '); - // nocommit fix me to convert back to the numbers/etc.: - sb.append(point); + System.arraycopy(point.bytes, point.offset, pointBytes, 0, pointBytes.length); + sb.append(toString(pointBytes)); } return sb.toString(); } + + /** + * Returns a string of a single value in a human-readable format for debugging. + * This is used by {@link #toString()}. + * + * The default implementation encodes the individual byte values. + * + * @param value single value, never null + * @return human readable value for debugging + */ + protected String toString(byte[] value) { + assert value != null; + StringBuilder sb = new StringBuilder(); + sb.append("binary("); + for (int i = 0; i < value.length; i++) { + if (i > 0) { + sb.append(' '); + } + sb.append(Integer.toHexString(value[i] & 0xFF)); + } + sb.append(')'); + return sb.toString(); + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java index ea432f7ab2d..c48cacb530d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java @@ -1139,12 +1139,70 @@ public class TestPointQueries extends LuceneTestCase { IndexReader r = DirectoryReader.open(w); IndexSearcher s = newSearcher(r); - assertEquals(0, s.count(PointInSetQuery.newIntSet("int", 16))); - assertEquals(1, s.count(PointInSetQuery.newIntSet("int", 17))); - assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 97, 42))); - assertEquals(3, s.count(PointInSetQuery.newIntSet("int", -7, 17, 42, 97))); - assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 20, 42, 97))); - assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 105, 42, 97))); + assertEquals(0, s.count(IntPoint.newSetQuery("int", 16))); + assertEquals(1, s.count(IntPoint.newSetQuery("int", 17))); + assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 97, 42))); + assertEquals(3, s.count(IntPoint.newSetQuery("int", -7, 17, 42, 97))); + assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 20, 42, 97))); + assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 105, 42, 97))); + w.close(); + r.close(); + dir.close(); + } + + public void testPointInSetQueryManyEqualValues() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + IndexWriter w = new IndexWriter(dir, iwc); + + int zeroCount = 0; + for(int i=0;i<10000;i++) { + int x = random().nextInt(2); + if (x == 0) { + zeroCount++; + } + Document doc = new Document(); + doc.add(new IntPoint("int", x)); + w.addDocument(doc); + } + + IndexReader r = DirectoryReader.open(w); + IndexSearcher s = newSearcher(r); + assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0))); + assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7))); + assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0))); + assertEquals(10000-zeroCount, s.count(IntPoint.newSetQuery("int", 1))); + assertEquals(0, s.count(IntPoint.newSetQuery("int", 2))); + w.close(); + r.close(); + dir.close(); + } + + public void testPointInSetQueryManyEqualValuesBigGap() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setCodec(getCodec()); + IndexWriter w = new IndexWriter(dir, iwc); + + int zeroCount = 0; + for(int i=0;i<10000;i++) { + int x = 200 * random().nextInt(2); + if (x == 0) { + zeroCount++; + } + Document doc = new Document(); + doc.add(new IntPoint("int", x)); + w.addDocument(doc); + } + + IndexReader r = DirectoryReader.open(w); + IndexSearcher s = newSearcher(r); + assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0))); + assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7))); + assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0))); + assertEquals(10000-zeroCount, s.count(IntPoint.newSetQuery("int", 200))); + assertEquals(0, s.count(IntPoint.newSetQuery("int", 2))); w.close(); r.close(); dir.close();