more tests; move factory method to IntPoint

This commit is contained in:
Mike McCandless 2016-02-23 17:12:50 -05:00
parent 96ed42bb13
commit 1654818e98
3 changed files with 133 additions and 43 deletions

View File

@ -16,7 +16,12 @@
*/
package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.NumericUtils;
/** An int field that is indexed dimensionally such that finding
@ -88,6 +93,41 @@ public final class IntPoint extends Field {
super(name, pack(point), getType(point.length));
}
/** Returns a query efficiently finding all documents that indexed the provided 1D int values */
public static PointInSetQuery newSetQuery(String field, int... valuesIn) throws IOException {
// Don't unexpectedly change the user's incoming array:
int[] values = valuesIn.clone();
Arrays.sort(values);
final BytesRef value = new BytesRef(new byte[Integer.BYTES]);
value.length = Integer.BYTES;
return new PointInSetQuery(field, 1, Integer.BYTES,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
IntPoint.encodeDimension(values[upto], value.bytes, 0);
upto++;
return value;
}
}
}) {
@Override
protected String toString(byte[] value) {
assert value.length == Integer.BYTES;
return Integer.toString(decodeDimension(value, 0));
}
};
}
@Override
public String toString() {
StringBuilder result = new StringBuilder();

View File

@ -80,40 +80,6 @@ public class PointInSetQuery extends Query {
sortedPackedPointsHashCode = sortedPackedPoints.hashCode();
}
/** Use in the 1D case when you indexed 1D int values using {@link org.apache.lucene.document.IntPoint} */
public static PointInSetQuery newIntSet(String field, int... valuesIn) {
// Don't unexpectedly change the user's incoming array:
int[] values = valuesIn.clone();
Arrays.sort(values);
final BytesRef value = new BytesRef(new byte[Integer.BYTES]);
value.length = Integer.BYTES;
try {
return new PointInSetQuery(field, 1, Integer.BYTES,
new BytesRefIterator() {
int upto;
@Override
public BytesRef next() {
if (upto == values.length) {
return null;
} else {
IntPoint.encodeDimension(values[upto], value.bytes, 0);
upto++;
return value;
}
}
});
} catch (IOException bogus) {
// Should never happen ;)
throw new RuntimeException(bogus);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
@ -359,13 +325,39 @@ public class PointInSetQuery extends Query {
sb.append(':');
}
sb.append(" points:");
TermIterator iterator = sortedPackedPoints.iterator();
byte[] pointBytes = new byte[numDims * bytesPerDim];
for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
sb.append(' ');
// nocommit fix me to convert back to the numbers/etc.:
sb.append(point);
System.arraycopy(point.bytes, point.offset, pointBytes, 0, pointBytes.length);
sb.append(toString(pointBytes));
}
return sb.toString();
}
/**
* Returns a string of a single value in a human-readable format for debugging.
* This is used by {@link #toString()}.
*
* The default implementation encodes the individual byte values.
*
* @param value single value, never null
* @return human readable value for debugging
*/
protected String toString(byte[] value) {
assert value != null;
StringBuilder sb = new StringBuilder();
sb.append("binary(");
for (int i = 0; i < value.length; i++) {
if (i > 0) {
sb.append(' ');
}
sb.append(Integer.toHexString(value[i] & 0xFF));
}
sb.append(')');
return sb.toString();
}
}

View File

@ -1139,12 +1139,70 @@ public class TestPointQueries extends LuceneTestCase {
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(0, s.count(PointInSetQuery.newIntSet("int", 16)));
assertEquals(1, s.count(PointInSetQuery.newIntSet("int", 17)));
assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 97, 42)));
assertEquals(3, s.count(PointInSetQuery.newIntSet("int", -7, 17, 42, 97)));
assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 20, 42, 97)));
assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 105, 42, 97)));
assertEquals(0, s.count(IntPoint.newSetQuery("int", 16)));
assertEquals(1, s.count(IntPoint.newSetQuery("int", 17)));
assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 97, 42)));
assertEquals(3, s.count(IntPoint.newSetQuery("int", -7, 17, 42, 97)));
assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 20, 42, 97)));
assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 105, 42, 97)));
w.close();
r.close();
dir.close();
}
public void testPointInSetQueryManyEqualValues() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
int zeroCount = 0;
for(int i=0;i<10000;i++) {
int x = random().nextInt(2);
if (x == 0) {
zeroCount++;
}
Document doc = new Document();
doc.add(new IntPoint("int", x));
w.addDocument(doc);
}
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0)));
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7)));
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0)));
assertEquals(10000-zeroCount, s.count(IntPoint.newSetQuery("int", 1)));
assertEquals(0, s.count(IntPoint.newSetQuery("int", 2)));
w.close();
r.close();
dir.close();
}
public void testPointInSetQueryManyEqualValuesBigGap() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
int zeroCount = 0;
for(int i=0;i<10000;i++) {
int x = 200 * random().nextInt(2);
if (x == 0) {
zeroCount++;
}
Document doc = new Document();
doc.add(new IntPoint("int", x));
w.addDocument(doc);
}
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0)));
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7)));
assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0)));
assertEquals(10000-zeroCount, s.count(IntPoint.newSetQuery("int", 200)));
assertEquals(0, s.count(IntPoint.newSetQuery("int", 2)));
w.close();
r.close();
dir.close();