LUCENE-6881: cutover all BKD implementations to the codec's implementation

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1713278 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-11-08 18:18:19 +00:00
parent 17fbe3a6cd
commit f12cb52a3b
80 changed files with 2292 additions and 8445 deletions

View File

@ -49,6 +49,9 @@ New Features
* LUCENE-6879: Allow to define custom CharTokenizer instances without
subclassing using Java 8 lambdas or method references. (Uwe Schindler)
* LUCENE-6881: Cutover all BKD implementations to dimensional values
(Mike McCandless)
API Changes
* LUCENE-3312: The API of oal.document was restructured to

View File

@ -148,7 +148,7 @@ class SimpleTextDimensionalWriter extends DimensionalWriter {
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
}
});
indexFPs.put(fieldInfo.name, writer.finish(dataOut));

View File

@ -77,7 +77,7 @@ public abstract class DimensionalWriter implements Closeable {
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
// Forces this segment's DimensionalReader to always visit all docs + values:
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
}
});
}

View File

@ -1,6 +1,5 @@
package org.apache.lucene.codecs.lucene60;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -97,7 +96,7 @@ public class Lucene60DimensionalWriter extends DimensionalWriter implements Clos
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
}
});

View File

@ -18,6 +18,8 @@ package org.apache.lucene.document;
*/
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.bkd.BKDUtil;
/** A field that is indexed dimensionally such that finding
* all documents within an N-dimensional at search time is
@ -65,6 +67,9 @@ public final class DimensionalField extends Field {
if (point.length == 0) {
throw new IllegalArgumentException("point cannot be 0 dimensions");
}
if (point.length == 1) {
return new BytesRef(point[0]);
}
int bytesPerDim = -1;
for(byte[] dim : point) {
if (dim == null) {
@ -86,19 +91,20 @@ public final class DimensionalField extends Field {
return new BytesRef(packed);
}
/** Sugar API: indexes a one-dimensional point */
public DimensionalField(String name, byte[] dim1) {
super(name, dim1, getType(1, dim1.length));
}
private static BytesRef pack(long... point) {
if (point == null) {
throw new IllegalArgumentException("point cannot be null");
}
if (point.length == 0) {
throw new IllegalArgumentException("point cannot be 0 dimensions");
}
byte[] packed = new byte[point.length * RamUsageEstimator.NUM_BYTES_LONG];
for(int dim=0;dim<point.length;dim++) {
BKDUtil.longToBytes(point[dim], packed, dim);
}
/** Sugar API: indexes a two-dimensional point */
public DimensionalField(String name, byte[] dim1, byte[] dim2) {
super(name, pack(dim1, dim2), getType(2, dim1.length));
}
/** Sugar API: indexes a three-dimensional point */
public DimensionalField(String name, byte[] dim1, byte[] dim2, byte[] dim3) {
super(name, pack(dim1, dim2, dim3), getType(3, dim1.length));
return new BytesRef(packed);
}
/** General purpose API: creates a new DimensionalField, indexing the
@ -108,10 +114,21 @@ public final class DimensionalField extends Field {
* @param point byte[][] value
* @throws IllegalArgumentException if the field name or value is null.
*/
public DimensionalField(String name, byte[][] point) {
public DimensionalField(String name, byte[]... point) {
super(name, pack(point), getType(point));
}
/** General purpose API: creates a new DimensionalField, indexing the
* provided N-dimensional long point.
*
* @param name field name
* @param point long[] value
* @throws IllegalArgumentException if the field name or value is null.
*/
public DimensionalField(String name, long... point) {
super(name, pack(point), getType(point.length, RamUsageEstimator.NUM_BYTES_LONG));
}
/** Expert API */
public DimensionalField(String name, byte[] packedPoint, FieldType type) {
super(name, packedPoint, type);

View File

@ -1750,7 +1750,7 @@ public class CheckIndex implements Closeable {
// We always pretend the query shape is so complex that it crosses every cell, so
// that packedValue is passed for every document
return DimensionalValues.Relation.QUERY_CROSSES_CELL;
return DimensionalValues.Relation.CELL_CROSSES_QUERY;
}
private void checkPackedValue(String desc, byte[] packedValue, int docID) {

View File

@ -321,6 +321,11 @@ public abstract class CodecReader extends LeafReader implements Accountable {
if (getTermVectorsReader() != null) {
ramBytesUsed += getTermVectorsReader().ramBytesUsed();
}
// dimensional values
if (getDimensionalReader() != null) {
ramBytesUsed += getDimensionalReader().ramBytesUsed();
}
return ramBytesUsed;
}
@ -352,6 +357,11 @@ public abstract class CodecReader extends LeafReader implements Accountable {
if (getTermVectorsReader() != null) {
resources.add(Accountables.namedAccountable("term vectors", getTermVectorsReader()));
}
// dimensional values
if (getDimensionalReader() != null) {
resources.add(Accountables.namedAccountable("dimensional values", getDimensionalReader()));
}
return Collections.unmodifiableList(resources);
}

View File

@ -137,6 +137,9 @@ final class DefaultIndexingChain extends DocConsumer {
if (dimensionalWriter == null) {
// lazy init
DimensionalFormat fmt = state.segmentInfo.getCodec().dimensionalFormat();
if (fmt == null) {
throw new IllegalStateException("field=\"" + perField.fieldInfo.name + "\" was indexed dimensionally but codec does not support dimensional formats");
}
dimensionalWriter = fmt.fieldsWriter(state);
}

View File

@ -33,9 +33,9 @@ public abstract class DimensionalValues {
/** Return this if the cell is fully contained by the query */
CELL_INSIDE_QUERY,
/** Return this if the cell and query do not overlap */
QUERY_OUTSIDE_CELL,
CELL_OUTSIDE_QUERY,
/** Return this if the cell partially overlapps the query */
QUERY_CROSSES_CELL
CELL_CROSSES_QUERY
};
/** We recurse the BKD tree, using a provided instance of this to guide the recursion.
@ -53,6 +53,10 @@ public abstract class DimensionalValues {
/** Called for non-leaf cells to test how the cell relates to the query, to
* determine how to further recurse down the treer. */
Relation compare(byte[] minPackedValue, byte[] maxPackedValue);
/** Notifies the caller that this many documents (from one block) are about
* to be visited */
default void grow(int count) {};
}
/** Finds all documents and points matching the provided visitor.

View File

@ -321,11 +321,11 @@ public class ParallelLeafReader extends LeafReader {
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
LeafReader reader = fieldToReader.get(fieldName);
if (reader == null) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index dimensional values");
return;
}
DimensionalValues dimValues = reader.getDimensionalValues();
if (dimValues == null) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index dimensional values");
return;
}
dimValues.intersect(fieldName, visitor);
}

View File

@ -0,0 +1,303 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.document.DimensionalField;
import org.apache.lucene.index.DimensionalValues;
import org.apache.lucene.index.DimensionalValues.IntersectVisitor;
import org.apache.lucene.index.DimensionalValues.Relation;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.bkd.BKDUtil;
/** Searches for ranges in fields previously indexed using {@link DimensionalField}. In
* a 1D field this is a simple range query; in a multi-dimensional field it's a box shape. */
public class DimensionalRangeQuery extends Query {
final String field;
final int numDims;
final byte[][] lowerPoint;
final boolean[] lowerInclusive;
final byte[][] upperPoint;
final boolean[] upperInclusive;
// This is null only in the "fully open range" case
final Integer bytesPerDim;
public DimensionalRangeQuery(String field,
byte[][] lowerPoint, boolean[] lowerInclusive,
byte[][] upperPoint, boolean[] upperInclusive) {
this.field = field;
if (lowerPoint == null) {
throw new IllegalArgumentException("lowerPoint must not be null");
}
if (upperPoint == null) {
throw new IllegalArgumentException("upperPoint must not be null");
}
numDims = lowerPoint.length;
if (upperPoint.length != numDims) {
throw new IllegalArgumentException("lowerPoint has length=" + numDims + " but upperPoint has different length=" + upperPoint.length);
}
this.lowerPoint = lowerPoint;
this.lowerInclusive = lowerInclusive;
this.upperPoint = upperPoint;
this.upperInclusive = upperInclusive;
int bytesPerDim = -1;
for(byte[] value : lowerPoint) {
if (value != null) {
if (bytesPerDim == -1) {
bytesPerDim = value.length;
} else if (value.length != bytesPerDim) {
throw new IllegalArgumentException("all dimensions must have same bytes length, but saw " + bytesPerDim + " and " + value.length);
}
}
}
for(byte[] value : upperPoint) {
if (value != null) {
if (bytesPerDim == -1) {
bytesPerDim = value.length;
} else if (value.length != bytesPerDim) {
throw new IllegalArgumentException("all dimensions must have same bytes length, but saw " + bytesPerDim + " and " + value.length);
}
}
}
if (bytesPerDim == -1) {
this.bytesPerDim = null;
} else {
this.bytesPerDim = bytesPerDim;
}
}
/** Sugar constructor: use in the 1D case when you indexed 1D long values using {@link DimensionalField} */
public DimensionalRangeQuery(String field, Long lowerValue, boolean lowerInclusive, Long upperValue, boolean upperInclusive) {
this(field, pack(lowerValue), new boolean[] {lowerInclusive}, pack(upperValue), new boolean[] {upperInclusive});
}
/** Sugar constructor: use in the 1D case when you indexed binary values using {@link DimensionalField} */
public DimensionalRangeQuery(String field, byte[] lowerValue, boolean lowerInclusive, byte[] upperValue, boolean upperInclusive) {
this(field, new byte[][] {lowerValue}, new boolean[] {lowerInclusive}, new byte[][] {upperValue}, new boolean[] {upperInclusive});
}
private static byte[][] pack(Long value) {
if (value == null) {
// OK: open ended range
return new byte[1][];
}
byte[][] result = new byte[][] {new byte[RamUsageEstimator.NUM_BYTES_LONG]};
BKDUtil.longToBytes(value, result[0], 0);
return result;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
// We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
// This is an inverted structure and should be used in the first pass:
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
DimensionalValues values = reader.getDimensionalValues();
if (values == null) {
// No docs in this segment indexed any field dimensionally
return null;
}
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
// No docs in this segment indexed this field at all
return null;
}
if (fieldInfo.getDimensionCount() != numDims) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + fieldInfo.getDimensionCount() + " but this query has numDims=" + numDims);
}
if (bytesPerDim != null && bytesPerDim.intValue() != fieldInfo.getDimensionNumBytes()) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getDimensionNumBytes() + " but this query has bytesPerDim=" + bytesPerDim);
}
int bytesPerDim = fieldInfo.getDimensionNumBytes();
byte[] packedLowerIncl = new byte[numDims * bytesPerDim];
byte[] packedUpperIncl = new byte[numDims * bytesPerDim];
byte[] minValue = new byte[bytesPerDim];
byte[] maxValue = new byte[bytesPerDim];
Arrays.fill(maxValue, (byte) 0xff);
byte[] one = new byte[bytesPerDim];
one[bytesPerDim-1] = 1;
// Carefully pack lower and upper bounds, taking care of per-dim inclusive:
for(int dim=0;dim<numDims;dim++) {
if (lowerPoint[dim] != null) {
if (lowerInclusive[dim] == false) {
if (Arrays.equals(lowerPoint[dim], maxValue)) {
return null;
} else {
byte[] value = new byte[bytesPerDim];
BKDUtil.add(bytesPerDim, 0, lowerPoint[dim], one, value);
System.arraycopy(value, 0, packedLowerIncl, dim*bytesPerDim, bytesPerDim);
}
} else {
System.arraycopy(lowerPoint[dim], 0, packedLowerIncl, dim*bytesPerDim, bytesPerDim);
}
} else {
// Open-ended range: we just leave 0s in this packed dim for the lower value
}
if (upperPoint[dim] != null) {
if (upperInclusive[dim] == false) {
if (Arrays.equals(upperPoint[dim], minValue)) {
return null;
} else {
byte[] value = new byte[bytesPerDim];
BKDUtil.subtract(bytesPerDim, 0, upperPoint[dim], one, value);
System.arraycopy(value, 0, packedUpperIncl, dim*bytesPerDim, bytesPerDim);
}
} else {
System.arraycopy(upperPoint[dim], 0, packedUpperIncl, dim*bytesPerDim, bytesPerDim);
}
} else {
// Open-ended range: fill with max point for this dim:
System.arraycopy(maxValue, 0, packedUpperIncl, dim*bytesPerDim, bytesPerDim);
}
}
// Now packedLowerIncl and packedUpperIncl are inclusive, and non-empty space:
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
int[] hitCount = new int[1];
values.intersect(field,
new IntersectVisitor() {
@Override
public void grow(int count) {
result.grow(count);
}
@Override
public void visit(int docID) {
hitCount[0]++;
result.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLowerIncl, offset) < 0) {
// Doc's value is too low, in this dimension
return;
}
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpperIncl, offset) > 0) {
// Doc's value is too high, in this dimension
return;
}
}
// Doc is in-bounds
hitCount[0]++;
result.add(docID);
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
boolean crosses = false;
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpperIncl, offset) > 0 ||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLowerIncl, offset) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLowerIncl, offset) < 0 ||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpperIncl, offset) > 0;
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
// NOTE: hitCount[0] will be over-estimate in multi-valued case
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
}
};
}
@Override
public int hashCode() {
int hash = super.hashCode();
hash += Arrays.hashCode(lowerPoint)^0x14fa55fb;
hash += Arrays.hashCode(upperPoint)^0x733fa5fe;
hash += Arrays.hashCode(lowerInclusive)^0x14fa55fb;
hash += Arrays.hashCode(upperInclusive)^0x733fa5fe;
hash += numDims^0x14fa55fb;
hash += Objects.hashCode(bytesPerDim);
return hash;
}
@Override
public boolean equals(Object other) {
if (super.equals(other)) {
final DimensionalRangeQuery q = (DimensionalRangeQuery) other;
return q.numDims == numDims &&
q.bytesPerDim == bytesPerDim &&
Arrays.equals(lowerPoint, q.lowerPoint) &&
Arrays.equals(lowerInclusive, q.lowerInclusive) &&
Arrays.equals(upperPoint, q.upperPoint) &&
Arrays.equals(upperInclusive, q.upperInclusive);
}
return false;
}
@Override
public String toString(String field) {
final StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append(':');
if (this.field.equals(field) == false) {
sb.append("field=");
sb.append(this.field);
sb.append(':');
}
return sb.append('[')
.append(Arrays.toString(lowerPoint))
.append(" TO ")
.append(Arrays.toString(upperPoint))
.append(']')
.toString();
}
}

View File

@ -380,4 +380,18 @@ public abstract class StringHelper {
return new BytesRef(bytes);
}
/** Compares a fixed length slice of two byte arrays interpreted as
* unsigned values. Returns positive int if a &gt; b, negative
* int if a &lt; b and 0 if a == b */
public static int compare(int count, byte[] a, int aOffset, byte[] b, int bOffset) {
for(int i=0;i<count;i++) {
int cmp = (a[aOffset+i]&0xff) - (b[bOffset+i]&0xff);
if (cmp != 0) {
return cmp;
}
}
return 0;
}
}

View File

@ -113,6 +113,7 @@ public class BKDReader implements Accountable {
//System.out.println("R: addAll nodeID=" + nodeID);
if (nodeID >= leafNodeOffset) {
//System.out.println("ADDALL");
visitDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.visitor);
} else {
addAll(state, 2*nodeID);
@ -126,13 +127,12 @@ public class BKDReader implements Accountable {
// How many points are stored in this leaf cell:
int count = in.readVInt();
visitor.grow(count);
// TODO: especially for the 1D case, this was a decent speedup, because caller could know it should budget for around XXX docs:
//state.docs.grow(count);
int docID = 0;
for(int i=0;i<count;i++) {
docID += in.readVInt();
visitor.visit(docID);
visitor.visit(in.readInt());
}
}
@ -145,16 +145,15 @@ public class BKDReader implements Accountable {
// TODO: we could maybe pollute the IntersectVisitor API with a "grow" method if this maybe helps perf
// enough (it did before, esp. for the 1D case):
//state.docs.grow(count);
int docID = 0;
for(int i=0;i<count;i++) {
docID += in.readVInt();
docIDs[i] = docID;
docIDs[i] = in.readInt();
}
return count;
}
protected void visitDocValues(byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
visitor.grow(count);
for(int i=0;i<count;i++) {
in.readBytes(scratchPackedValue, 0, scratchPackedValue.length);
visitor.visit(docIDs[i], scratchPackedValue);
@ -175,7 +174,7 @@ public class BKDReader implements Accountable {
Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
if (r == Relation.QUERY_OUTSIDE_CELL) {
if (r == Relation.CELL_OUTSIDE_QUERY) {
// This cell is fully outside of the query shape: stop recursing
return;
} else if (r == Relation.CELL_INSIDE_QUERY) {
@ -187,6 +186,7 @@ public class BKDReader implements Accountable {
}
if (nodeID >= leafNodeOffset) {
//System.out.println("FILTER");
// Leaf node; scan and filter all points in this block:
int count = readDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.scratchDocIDs);

View File

@ -20,14 +20,14 @@ package org.apache.lucene.util.bkd;
import java.math.BigInteger;
import java.util.Arrays;
/** Utility methods to convert to/from N-dimensional packed byte[] as numbers */
/** Utility methods to convert to/from N-dimensional packed byte[] as unsigned numbers */
public final class BKDUtil {
private BKDUtil() {
// No instance
}
/** result = a - b, where a &gt;= b */
/** Result = a - b, where a &gt;= b, else {@code IllegalArgumentException} is thrown. */
public static void subtract(int bytesPerDim, int dim, byte[] a, byte[] b, byte[] result) {
int start = dim * bytesPerDim;
int end = start + bytesPerDim;
@ -43,10 +43,30 @@ public final class BKDUtil {
result[i-start] = (byte) diff;
}
if (borrow != 0) {
throw new IllegalArgumentException("a < b?");
throw new IllegalArgumentException("a < b");
}
}
/** Result = a + b, where a and b are unsigned. If there is an overflow, {@code IllegalArgumentException} is thrown. */
public static void add(int bytesPerDim, int dim, byte[] a, byte[] b, byte[] result) {
int start = dim * bytesPerDim;
int end = start + bytesPerDim;
int carry = 0;
for(int i=end-1;i>=start;i--) {
int digitSum = (a[i]&0xff) + (b[i]&0xff) + carry;
if (digitSum > 255) {
digitSum -= 256;
carry = 1;
} else {
carry = 0;
}
result[i-start] = (byte) digitSum;
}
if (carry != 0) {
throw new IllegalArgumentException("a + b overflows bytesPerDim=" + bytesPerDim);
}
}
/** Returns positive int if a &gt; b, negative int if a &lt; b and 0 if a == b */
public static int compare(int bytesPerDim, byte[] a, int aIndex, byte[] b, int bIndex) {
for(int i=0;i<bytesPerDim;i++) {
@ -93,6 +113,36 @@ public final class BKDUtil {
return x ^ 0x80000000;
}
public static void longToBytes(long v, byte[] bytes, int dim) {
// Flip the sign bit so negative longs sort before positive longs:
v ^= 0x8000000000000000L;
int offset = 8 * dim;
bytes[offset] = (byte) (v >> 56);
bytes[offset+1] = (byte) (v >> 48);
bytes[offset+2] = (byte) (v >> 40);
bytes[offset+3] = (byte) (v >> 32);
bytes[offset+4] = (byte) (v >> 24);
bytes[offset+5] = (byte) (v >> 16);
bytes[offset+6] = (byte) (v >> 8);
bytes[offset+7] = (byte) v;
}
public static long bytesToLong(byte[] bytes, int index) {
int offset = 8 * index;
long v = ((bytes[offset] & 0xffL) << 56) |
((bytes[offset+1] & 0xffL) << 48) |
((bytes[offset+2] & 0xffL) << 40) |
((bytes[offset+3] & 0xffL) << 32) |
((bytes[offset+4] & 0xffL) << 24) |
((bytes[offset+5] & 0xffL) << 16) |
((bytes[offset+6] & 0xffL) << 8) |
(bytes[offset+7] & 0xffL);
// Flip the sign bit back
v ^= 0x8000000000000000L;
return v;
}
public static void sortableBigIntBytes(byte[] bytes) {
bytes[0] ^= 0x80;
for(int i=1;i<bytes.length;i++) {

View File

@ -240,17 +240,15 @@ public class BKDWriter implements Closeable {
@Override
protected int compare(int i, int j) {
if (dim != -1) {
writer.readPackedValue(i, scratch1);
writer.readPackedValue(j, scratch2);
int cmp = BKDUtil.compare(bytesPerDim, scratch1, dim, scratch2, dim);
if (cmp != 0) {
return cmp;
}
writer.readPackedValue(i, scratch1);
writer.readPackedValue(j, scratch2);
int cmp = BKDUtil.compare(bytesPerDim, scratch1, dim, scratch2, dim);
if (cmp != 0) {
return cmp;
}
// Tie-break
int cmp = Integer.compare(writer.docIDs[i], writer.docIDs[j]);
cmp = Integer.compare(writer.docIDs[i], writer.docIDs[j]);
if (cmp != 0) {
return cmp;
}
@ -422,9 +420,12 @@ public class BKDWriter implements Closeable {
boolean success = false;
try {
//long t0 = System.nanoTime();
for(int dim=0;dim<numDims;dim++) {
sortedPointWriters[dim] = new PathSlice(sort(dim), 0, pointCount);
}
//long t1 = System.nanoTime();
//System.out.println("sort time: " + ((t1-t0)/1000000.0) + " msec");
if (tempInput != null) {
tempDir.deleteFile(tempInput.getName());
@ -446,6 +447,8 @@ public class BKDWriter implements Closeable {
// If no exception, we should have cleaned everything up:
assert tempDir.getCreatedFiles().isEmpty();
//long t2 = System.nanoTime();
//System.out.println("write time: " + ((t2-t1)/1000000.0) + " msec");
success = true;
} finally {
@ -485,11 +488,8 @@ public class BKDWriter implements Closeable {
protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
out.writeVInt(count);
int lastDocID = 0;
for (int i=0;i<count;i++) {
int docID = docIDs[start + i];
out.writeVInt(docID - lastDocID);
lastDocID = docID;
out.writeInt(docIDs[start + i]);
}
}
@ -582,6 +582,7 @@ public class BKDWriter implements Closeable {
}
}
//System.out.println("SPLIT: " + splitDim);
return splitDim;
}
@ -633,9 +634,6 @@ public class BKDWriter implements Closeable {
// We ensured that maxPointsSortInHeap was >= maxPointsInLeafNode, so we better be in heap at this point:
HeapPointWriter heapSource = (HeapPointWriter) source.writer;
// Sort by docID in the leaf so we can delta-vInt encode:
sortHeapPointWriter(heapSource, Math.toIntExact(source.start), Math.toIntExact(source.count), -1);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();

View File

@ -77,7 +77,7 @@ public class TestDimensionalValues extends LuceneTestCase {
new IntersectVisitor() {
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
}
public void visit(int docID) {
throw new IllegalStateException();
@ -119,7 +119,7 @@ public class TestDimensionalValues extends LuceneTestCase {
new IntersectVisitor() {
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
}
public void visit(int docID) {
throw new IllegalStateException();
@ -164,7 +164,7 @@ public class TestDimensionalValues extends LuceneTestCase {
new IntersectVisitor() {
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
}
public void visit(int docID) {
throw new IllegalStateException();
@ -411,14 +411,14 @@ public class TestDimensionalValues extends LuceneTestCase {
assert max.compareTo(min) >= 0;
if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
return Relation.QUERY_OUTSIDE_CELL;
return Relation.CELL_OUTSIDE_QUERY;
} else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
crosses = true;
}
}
if (crosses) {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
@ -1079,7 +1079,7 @@ public class TestDimensionalValues extends LuceneTestCase {
if (BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMin[dim], 0) < 0 ||
BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMax[dim], 0) > 0) {
//System.out.println(" query_outside_cell");
return Relation.QUERY_OUTSIDE_CELL;
return Relation.CELL_OUTSIDE_QUERY;
} else if (BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMin[dim], 0) < 0 ||
BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMax[dim], 0) > 0) {
crosses = true;
@ -1088,7 +1088,7 @@ public class TestDimensionalValues extends LuceneTestCase {
if (crosses) {
//System.out.println(" query_crosses_cell");
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
} else {
//System.out.println(" cell_inside_query");
return Relation.CELL_INSIDE_QUERY;

File diff suppressed because it is too large Load Diff

View File

@ -91,11 +91,11 @@ public class TestBKD extends LuceneTestCase {
}
if (max < queryMin || min > queryMax) {
return Relation.QUERY_OUTSIDE_CELL;
return Relation.CELL_OUTSIDE_QUERY;
} else if (min >= queryMin && max <= queryMax) {
return Relation.CELL_INSIDE_QUERY;
} else {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
}
}
});
@ -198,14 +198,14 @@ public class TestBKD extends LuceneTestCase {
assert max >= min;
if (max < queryMin[dim] || min > queryMax[dim]) {
return Relation.QUERY_OUTSIDE_CELL;
return Relation.CELL_OUTSIDE_QUERY;
} else if (min < queryMin[dim] || max > queryMax[dim]) {
crosses = true;
}
}
if (crosses) {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
@ -319,14 +319,14 @@ public class TestBKD extends LuceneTestCase {
assert max.compareTo(min) >= 0;
if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
return Relation.QUERY_OUTSIDE_CELL;
return Relation.CELL_OUTSIDE_QUERY;
} else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
crosses = true;
}
}
if (crosses) {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
@ -517,6 +517,87 @@ public class TestBKD extends LuceneTestCase {
verify(docValuesArray, docIDsArray, numDims, numBytesPerDim);
}
public void testBKDUtilAdd() throws Exception {
int iters = atLeast(10000);
int numBytes = TestUtil.nextInt(random(), 1, 100);
for(int iter=0;iter<iters;iter++) {
BigInteger v1 = new BigInteger(8*numBytes-1, random());
BigInteger v2 = new BigInteger(8*numBytes-1, random());
byte[] v1Bytes = new byte[numBytes];
byte[] v1RawBytes = v1.toByteArray();
assert v1RawBytes.length <= numBytes;
System.arraycopy(v1RawBytes, 0, v1Bytes, v1Bytes.length-v1RawBytes.length, v1RawBytes.length);
byte[] v2Bytes = new byte[numBytes];
byte[] v2RawBytes = v2.toByteArray();
assert v1RawBytes.length <= numBytes;
System.arraycopy(v2RawBytes, 0, v2Bytes, v2Bytes.length-v2RawBytes.length, v2RawBytes.length);
byte[] result = new byte[numBytes];
BKDUtil.add(numBytes, 0, v1Bytes, v2Bytes, result);
BigInteger sum = v1.add(v2);
assertTrue("sum=" + sum + " v1=" + v1 + " v2=" + v2 + " but result=" + new BigInteger(1, result), sum.equals(new BigInteger(1, result)));
}
}
public void testIllegalBKDUtilAdd() throws Exception {
byte[] bytes = new byte[4];
Arrays.fill(bytes, (byte) 0xff);
byte[] one = new byte[4];
one[3] = 1;
try {
BKDUtil.add(4, 0, bytes, one, new byte[4]);
} catch (IllegalArgumentException iae) {
assertEquals("a + b overflows bytesPerDim=4", iae.getMessage());
}
}
public void testBKDUtilSubtract() throws Exception {
int iters = atLeast(10000);
int numBytes = TestUtil.nextInt(random(), 1, 100);
for(int iter=0;iter<iters;iter++) {
BigInteger v1 = new BigInteger(8*numBytes-1, random());
BigInteger v2 = new BigInteger(8*numBytes-1, random());
if (v1.compareTo(v2) < 0) {
BigInteger tmp = v1;
v1 = v2;
v2 = tmp;
}
byte[] v1Bytes = new byte[numBytes];
byte[] v1RawBytes = v1.toByteArray();
assert v1RawBytes.length <= numBytes: "length=" + v1RawBytes.length + " vs numBytes=" + numBytes;
System.arraycopy(v1RawBytes, 0, v1Bytes, v1Bytes.length-v1RawBytes.length, v1RawBytes.length);
byte[] v2Bytes = new byte[numBytes];
byte[] v2RawBytes = v2.toByteArray();
assert v2RawBytes.length <= numBytes;
assert v2RawBytes.length <= numBytes: "length=" + v2RawBytes.length + " vs numBytes=" + numBytes;
System.arraycopy(v2RawBytes, 0, v2Bytes, v2Bytes.length-v2RawBytes.length, v2RawBytes.length);
byte[] result = new byte[numBytes];
BKDUtil.subtract(numBytes, 0, v1Bytes, v2Bytes, result);
BigInteger diff = v1.subtract(v2);
assertTrue("diff=" + diff + " vs result=" + new BigInteger(result) + " v1=" + v1 + " v2=" + v2, diff.equals(new BigInteger(result)));
}
}
public void testIllegalBKDUtilSubtract() throws Exception {
byte[] v1 = new byte[4];
v1[3] = (byte) 0xf0;
byte[] v2 = new byte[4];
v2[3] = (byte) 0xf1;
try {
BKDUtil.subtract(4, 0, v1, v2, new byte[4]);
} catch (IllegalArgumentException iae) {
assertEquals("a < b", iae.getMessage());
}
}
/** docIDs can be null, for the single valued case, else it maps value to docID */
private void verify(byte[][][] docValues, int[] docIDs, int numDims, int numBytesPerDim) throws Exception {
try (Directory dir = getDirectory(docValues.length)) {
@ -627,7 +708,7 @@ public class TestBKD extends LuceneTestCase {
for(int dim=0;dim<numDims;dim++) {
if (BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMin[dim], 0) < 0 ||
BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMax[dim], 0) > 0) {
return Relation.QUERY_OUTSIDE_CELL;
return Relation.CELL_OUTSIDE_QUERY;
} else if (BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMin[dim], 0) < 0 ||
BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMax[dim], 0) > 0) {
crosses = true;
@ -635,7 +716,7 @@ public class TestBKD extends LuceneTestCase {
}
if (crosses) {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}

View File

@ -32,8 +32,8 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.DimensionalField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
@ -395,7 +395,7 @@ public abstract class SorterTestBase extends LuceneTestCase {
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.QUERY_CROSSES_CELL;
return Relation.CELL_CROSSES_QUERY;
}
});
}

View File

@ -1,50 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DocValuesType;
/** Add this to a document to index lat/lon point, but be sure to use {@link BKDTreeDocValuesFormat} for the field. */
public final class BKDPointField extends Field {
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC);
TYPE.freeze();
}
/**
* Creates a new BKDPointField field with the specified lat and lon
* @param name field name
* @param lat double latitude
* @param lon double longitude
* @throws IllegalArgumentException if the field name is null or lat or lon are out of bounds
*/
public BKDPointField(String name, double lat, double lon) {
super(name, TYPE);
if (BKDTreeWriter.validLat(lat) == false) {
throw new IllegalArgumentException("invalid lat (" + lat + "): must be -90 to 90");
}
if (BKDTreeWriter.validLon(lon) == false) {
throw new IllegalArgumentException("invalid lon (" + lon + "): must be -180 to 180");
}
fieldsData = Long.valueOf(((long) BKDTreeWriter.encodeLat(lat) << 32) | (BKDTreeWriter.encodeLon(lon) & 0xffffffffL));
}
}

View File

@ -1,138 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
class BKDTreeDocValuesConsumer extends DocValuesConsumer implements Closeable {
final DocValuesConsumer delegate;
final int maxPointsInLeafNode;
final int maxPointsSortInHeap;
final IndexOutput out;
final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
final SegmentWriteState state;
final Directory tempDir;
final String tempFileNamePrefix;
public BKDTreeDocValuesConsumer(Directory tempDir, String tempFileNamePrefix, DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
BKDTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.delegate = delegate;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
this.state = state;
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.DATA_EXTENSION);
out = state.directory.createOutput(datFileName, state.context);
CodecUtil.writeIndexHeader(out, BKDTreeDocValuesFormat.DATA_CODEC_NAME, BKDTreeDocValuesFormat.DATA_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
}
@Override
public void close() throws IOException {
boolean success = false;
try {
CodecUtil.writeFooter(out);
success = true;
} finally {
if (success) {
IOUtils.close(delegate, out);
} else {
IOUtils.closeWhileHandlingException(delegate, out);
}
}
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.META_EXTENSION);
IndexOutput metaOut = state.directory.createOutput(metaFileName, state.context);
success = false;
try {
CodecUtil.writeIndexHeader(metaOut, BKDTreeDocValuesFormat.META_CODEC_NAME, BKDTreeDocValuesFormat.META_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
metaOut.writeVInt(fieldIndexFPs.size());
for(Map.Entry<Integer,Long> ent : fieldIndexFPs.entrySet()) {
metaOut.writeVInt(ent.getKey());
metaOut.writeVLong(ent.getValue());
}
CodecUtil.writeFooter(metaOut);
success = true;
} finally {
if (success) {
IOUtils.close(metaOut);
} else {
IOUtils.closeWhileHandlingException(metaOut);
}
}
}
@Override
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
delegate.addSortedNumericField(field, docToValueCount, values);
BKDTreeWriter writer = new BKDTreeWriter(tempDir, tempFileNamePrefix, maxPointsInLeafNode, maxPointsSortInHeap);
Iterator<Number> valueIt = values.iterator();
Iterator<Number> valueCountIt = docToValueCount.iterator();
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
assert valueCountIt.hasNext();
int count = valueCountIt.next().intValue();
for(int i=0;i<count;i++) {
assert valueIt.hasNext();
long value = valueIt.next().longValue();
int latEnc = (int) (value >> 32);
int lonEnc = (int) (value & 0xffffffff);
writer.add(latEnc, lonEnc, docID);
}
}
long indexStartFP = writer.finish(out);
fieldIndexFPs.put(field.number, indexStartFP);
}
@Override
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) {
throw new UnsupportedOperationException();
}
@Override
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) {
throw new UnsupportedOperationException();
}
@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) {
throw new UnsupportedOperationException();
}
}

View File

@ -1,109 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
/**
* A {@link DocValuesFormat} to efficiently index geo-spatial lat/lon points
* from {@link BKDPointField} for fast bounding-box ({@link BKDPointInBBoxQuery})
* and polygon ({@link BKDPointInPolygonQuery}) queries.
*
* <p>This wraps {@link Lucene54DocValuesFormat}, but saves its own BKD tree
* structures to disk for fast query-time intersection. See <a
* href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a>
* for details.
*
* <p>The BKD tree slices up 2D (lat/lon) space into smaller and
* smaller rectangles, until the smallest rectangles have approximately
* between X/2 and X (X default is 1024) points in them, at which point
* such leaf cells are written as a block to disk, while the index tree
* structure records how space was sub-divided is loaded into HEAP
* at search time. At search time, the tree is recursed based on whether
* each of left or right child overlap with the query shape, and once
* a leaf block is reached, all documents in that leaf block are collected
* if the cell is fully enclosed by the query shape, or filtered and then
* collected, if not.
*
* <p>The index is also quite compact, because docs only appear once in
* the tree (no "prefix terms").
*
* <p>In addition to the files written by {@link Lucene54DocValuesFormat}, this format writes:
* <ol>
* <li><tt>.kdd</tt>: BKD leaf data and index</li>
* <li><tt>.kdm</tt>: BKD metadata</li>
* </ol>
*
* <p>The disk format is experimental and free to change suddenly, and this code likely has new and exciting bugs!
*
* @lucene.experimental */
public class BKDTreeDocValuesFormat extends DocValuesFormat {
static final String DATA_CODEC_NAME = "BKDData";
static final int DATA_VERSION_START = 0;
static final int DATA_VERSION_CURRENT = DATA_VERSION_START;
static final String DATA_EXTENSION = "kdd";
static final String META_CODEC_NAME = "BKDMeta";
static final int META_VERSION_START = 0;
static final int META_VERSION_CURRENT = META_VERSION_START;
static final String META_EXTENSION = "kdm";
private final int maxPointsInLeafNode;
private final int maxPointsSortInHeap;
private final DocValuesFormat delegate = new Lucene54DocValuesFormat();
/** Default constructor */
public BKDTreeDocValuesFormat() {
this(BKDTreeWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDTreeWriter.DEFAULT_MAX_POINTS_SORT_IN_HEAP);
}
/** Creates this with custom configuration.
*
* @param maxPointsInLeafNode Maximum number of points in each leaf cell. Smaller values create a deeper tree with larger in-heap index and possibly
* faster searching. The default is 1024.
* @param maxPointsSortInHeap Maximum number of points where in-heap sort can be used. When the number of points exceeds this, a (slower)
* offline sort is used. The default is 128 * 1024.
*
* @lucene.experimental */
public BKDTreeDocValuesFormat(int maxPointsInLeafNode, int maxPointsSortInHeap) {
super("BKDTree");
BKDTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
}
@Override
public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
return new BKDTreeDocValuesConsumer(state.directory, state.segmentInfo.name, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return new BKDTreeDocValuesProducer(delegate.fieldsProducer(state), state);
}
}

View File

@ -1,175 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
class BKDTreeDocValuesProducer extends DocValuesProducer {
private final Map<String,BKDTreeReader> treeReaders = new HashMap<>();
private final Map<Integer,Long> fieldToIndexFPs = new HashMap<>();
private final IndexInput datIn;
private final AtomicLong ramBytesUsed;
private final int maxDoc;
private final DocValuesProducer delegate;
private final boolean merging;
public BKDTreeDocValuesProducer(DocValuesProducer delegate, SegmentReadState state) throws IOException {
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.META_EXTENSION);
ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName, state.context);
CodecUtil.checkIndexHeader(metaIn, BKDTreeDocValuesFormat.META_CODEC_NAME, BKDTreeDocValuesFormat.META_VERSION_START, BKDTreeDocValuesFormat.META_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
int fieldCount = metaIn.readVInt();
for(int i=0;i<fieldCount;i++) {
int fieldNumber = metaIn.readVInt();
long indexFP = metaIn.readVLong();
fieldToIndexFPs.put(fieldNumber, indexFP);
}
CodecUtil.checkFooter(metaIn);
metaIn.close();
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.DATA_EXTENSION);
datIn = state.directory.openInput(datFileName, state.context);
CodecUtil.checkIndexHeader(datIn, BKDTreeDocValuesFormat.DATA_CODEC_NAME, BKDTreeDocValuesFormat.DATA_VERSION_START, BKDTreeDocValuesFormat.DATA_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
maxDoc = state.segmentInfo.maxDoc();
this.delegate = delegate;
merging = false;
}
// clone for merge: we don't hang onto the BKDTrees we load
BKDTreeDocValuesProducer(BKDTreeDocValuesProducer orig) throws IOException {
assert Thread.holdsLock(orig);
datIn = orig.datIn.clone();
ramBytesUsed = new AtomicLong(orig.ramBytesUsed.get());
delegate = orig.delegate.getMergeInstance();
fieldToIndexFPs.putAll(orig.fieldToIndexFPs);
treeReaders.putAll(orig.treeReaders);
merging = true;
maxDoc = orig.maxDoc;
}
@Override
public synchronized SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
BKDTreeReader treeReader = treeReaders.get(field.name);
if (treeReader == null) {
// Lazy load
Long fp = fieldToIndexFPs.get(field.number);
if (fp == null) {
throw new IllegalArgumentException("this field was not indexed as a BKDPointField");
}
// LUCENE-6697: never do real IOPs with the original IndexInput because search
// threads can be concurrently cloning it:
IndexInput clone = datIn.clone();
clone.seek(fp);
treeReader = new BKDTreeReader(clone, maxDoc);
// Only hang onto the reader when we are not merging:
if (merging == false) {
treeReaders.put(field.name, treeReader);
ramBytesUsed.addAndGet(treeReader.ramBytesUsed());
}
}
return new BKDTreeSortedNumericDocValues(treeReader, delegate.getSortedNumeric(field));
}
@Override
public void close() throws IOException {
IOUtils.close(datIn, delegate);
}
@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(datIn);
}
@Override
public NumericDocValues getNumeric(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public BinaryDocValues getBinary(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public SortedDocValues getSorted(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public Bits getDocsWithField(FieldInfo field) throws IOException {
return delegate.getDocsWithField(field);
}
@Override
public synchronized Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
for(Map.Entry<String,BKDTreeReader> ent : treeReaders.entrySet()) {
resources.add(Accountables.namedAccountable("field " + ent.getKey(), ent.getValue()));
}
resources.add(Accountables.namedAccountable("delegate", delegate));
return resources;
}
@Override
public synchronized DocValuesProducer getMergeInstance() throws IOException {
return new BKDTreeDocValuesProducer(this);
}
@Override
public long ramBytesUsed() {
return ramBytesUsed.get() + delegate.ramBytesUsed();
}
}

View File

@ -1,379 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.RamUsageEstimator;
/** Handles intersection of a shape with a BKD tree previously written with {@link BKDTreeWriter}.
*
* @lucene.experimental */
final class BKDTreeReader implements Accountable {
final private int[] splitValues;
final private int leafNodeOffset;
final private long[] leafBlockFPs;
final int maxDoc;
final IndexInput in;
enum Relation {CELL_INSIDE_SHAPE, SHAPE_CROSSES_CELL, SHAPE_OUTSIDE_CELL};
interface LatLonFilter {
// TODO: move DVs/encoding out on top: this method should just take a docID
boolean accept(double lat, double lon);
// TODO: move DVs/encoding out on top: this method should take ints and do its own decode
Relation compare(double latMin, double latMax, double lonMin, double lonMax);
}
public BKDTreeReader(IndexInput in, int maxDoc) throws IOException {
// Read index:
int numLeaves = in.readVInt();
leafNodeOffset = numLeaves;
// Tree is fully balanced binary tree, so number of nodes = numLeaves-1, except our nodeIDs are 1-based (splitValues[0] is unused):
splitValues = new int[numLeaves];
for(int i=0;i<numLeaves;i++) {
splitValues[i] = in.readInt();
}
leafBlockFPs = new long[numLeaves];
for(int i=0;i<numLeaves;i++) {
leafBlockFPs[i] = in.readVLong();
}
this.maxDoc = maxDoc;
this.in = in;
}
private static final class QueryState {
final IndexInput in;
byte[] scratch = new byte[16];
final ByteArrayDataInput scratchReader = new ByteArrayDataInput(scratch);
final DocIdSetBuilder docs;
final int latMinEnc;
final int latMaxEnc;
final int lonMinEnc;
final int lonMaxEnc;
final LatLonFilter latLonFilter;
final SortedNumericDocValues sndv;
public QueryState(IndexInput in, int maxDoc,
int latMinEnc, int latMaxEnc,
int lonMinEnc, int lonMaxEnc,
LatLonFilter latLonFilter,
SortedNumericDocValues sndv) {
this.in = in;
this.docs = new DocIdSetBuilder(maxDoc);
this.latMinEnc = latMinEnc;
this.latMaxEnc = latMaxEnc;
this.lonMinEnc = lonMinEnc;
this.lonMaxEnc = lonMaxEnc;
this.latLonFilter = latLonFilter;
this.sndv = sndv;
}
}
// TODO: move DVs/encoding out on top: this method should take ints, and encode should be done up above
public DocIdSet intersect(double latMin, double latMax, double lonMin, double lonMax, LatLonFilter filter, SortedNumericDocValues sndv) throws IOException {
if (BKDTreeWriter.validLat(latMin) == false) {
throw new IllegalArgumentException("invalid latMin: " + latMin);
}
if (BKDTreeWriter.validLat(latMax) == false) {
throw new IllegalArgumentException("invalid latMax: " + latMax);
}
if (BKDTreeWriter.validLon(lonMin) == false) {
throw new IllegalArgumentException("invalid lonMin: " + lonMin);
}
if (BKDTreeWriter.validLon(lonMax) == false) {
throw new IllegalArgumentException("invalid lonMax: " + lonMax);
}
int latMinEnc = BKDTreeWriter.encodeLat(latMin);
int latMaxEnc = BKDTreeWriter.encodeLat(latMax);
int lonMinEnc = BKDTreeWriter.encodeLon(lonMin);
int lonMaxEnc = BKDTreeWriter.encodeLon(lonMax);
QueryState state = new QueryState(in.clone(), maxDoc,
latMinEnc, latMaxEnc,
lonMinEnc, lonMaxEnc,
filter,
sndv);
int hitCount = intersect(state, 1,
BKDTreeWriter.encodeLat(-90.0),
BKDTreeWriter.encodeLat(Math.nextAfter(90.0, Double.POSITIVE_INFINITY)),
BKDTreeWriter.encodeLon(-180.0),
BKDTreeWriter.encodeLon(Math.nextAfter(180.0, Double.POSITIVE_INFINITY)));
// NOTE: hitCount is an over-estimate in the multi-valued case:
return state.docs.build(hitCount);
}
private boolean accept(QueryState state, int docID) throws IOException {
//System.out.println(" check accept docID=" + docID);
state.sndv.setDocument(docID);
// How many values this doc has:
int count = state.sndv.count();
for(int j=0;j<count;j++) {
long enc = state.sndv.valueAt(j);
int latEnc = (int) ((enc>>32) & 0xffffffffL);
int lonEnc = (int) (enc & 0xffffffffL);
//System.out.println(" lat=" + BKDTreeWriter.decodeLat(latEnc) + " lon=" + BKDTreeWriter.decodeLon(lonEnc));
if (latEnc >= state.latMinEnc &&
latEnc < state.latMaxEnc &&
lonEnc >= state.lonMinEnc &&
lonEnc < state.lonMaxEnc &&
(state.latLonFilter == null ||
state.latLonFilter.accept(BKDTreeWriter.decodeLat(latEnc), BKDTreeWriter.decodeLon(lonEnc)))) {
//System.out.println(" yes");
return true;
}
}
return false;
}
/** Fast path: this is called when the query rect fully encompasses all cells under this node. */
private int addAll(QueryState state, int nodeID) throws IOException {
//System.out.println(" addAll nodeID=" + nodeID);
//long latRange = (long) cellLatMaxEnc - (long) cellLatMinEnc;
//long lonRange = (long) cellLonMaxEnc - (long) cellLonMinEnc;
if (nodeID >= leafNodeOffset) {
//System.out.println(" leaf");
/*
System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
*/
// Leaf node
long fp = leafBlockFPs[nodeID-leafNodeOffset];
//System.out.println(" leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + fp);
if (fp == 0) {
// Dead end node (adversary case):
return 0;
}
state.in.seek(fp);
//System.out.println(" seek to leafFP=" + fp);
// How many points are stored in this leaf cell:
int count = state.in.readVInt();
state.docs.grow(count);
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
//System.out.println(" docID=" + docID);
assert accept(state, docID);
state.docs.add(docID);
}
return count;
} else {
int splitValue = splitValues[nodeID];
if (splitValue == Integer.MAX_VALUE) {
// Dead end node (adversary case):
return 0;
}
//System.out.println(" splitValue=" + splitValue);
//System.out.println(" addAll: inner");
int count = addAll(state, 2*nodeID);
count += addAll(state, 2*nodeID+1);
//System.out.println(" addAll: return count=" + count);
return count;
}
}
private int intersect(QueryState state,
int nodeID,
int cellLatMinEnc, int cellLatMaxEnc, int cellLonMinEnc, int cellLonMaxEnc)
throws IOException {
//System.out.println("\nBKD: intersect nodeID=" + nodeID + " lat=" + BKDTreeWriter.decodeLat(state.latMinEnc) + " TO " + BKDTreeWriter.decodeLat(state.latMaxEnc) +
//" lon=" + BKDTreeWriter.decodeLon(state.lonMinEnc) + " TO " + BKDTreeWriter.decodeLon(state.lonMaxEnc));
// 2.06 sec -> 1.52 sec for 225 OSM London queries:
if (state.latLonFilter != null) {
// Don't check the filter if the current cell fully contains the query bbox (just keep recursing in that case):
if (cellLatMinEnc > state.latMinEnc || cellLatMaxEnc < state.latMaxEnc ||
cellLonMinEnc > state.lonMinEnc || cellLonMaxEnc < state.lonMaxEnc) {
Relation r = state.latLonFilter.compare(BKDTreeWriter.decodeLat(cellLatMinEnc),
BKDTreeWriter.decodeLat(cellLatMaxEnc),
BKDTreeWriter.decodeLon(cellLonMinEnc),
BKDTreeWriter.decodeLon(cellLonMaxEnc));
// System.out.println("BKD.intersect cellLat=" + BKDTreeWriter.decodeLat(cellLatMinEnc) + " TO " + BKDTreeWriter.decodeLat(cellLatMaxEnc) + ", cellLon=" + BKDTreeWriter.decodeLon(cellLonMinEnc) + " TO " + BKDTreeWriter.decodeLon(cellLonMaxEnc) + " compare=" + r);
if (r == Relation.SHAPE_OUTSIDE_CELL) {
// This cell is fully outside of the query shape: stop recursing
return 0;
} else if (r == Relation.CELL_INSIDE_SHAPE) {
// This cell is fully inside of the query shape: recursively add all points in this cell without filtering
return addAll(state, nodeID);
} else {
// The cell crosses the shape boundary, so we fall through and do full filtering
}
} else {
//System.out.println(" straight recurse");
}
// TODO: clean this up: the bbox case should also just be a filter, and we should assert filter != null at the start
} else if (state.latMinEnc <= cellLatMinEnc && state.latMaxEnc >= cellLatMaxEnc && state.lonMinEnc <= cellLonMinEnc && state.lonMaxEnc >= cellLonMaxEnc) {
// Bbox query: optimize the case when the query fully contains this cell: we can
// recursively add all points without checking if they match the query:
return addAll(state, nodeID);
}
long latRange = (long) cellLatMaxEnc - (long) cellLatMinEnc;
long lonRange = (long) cellLonMaxEnc - (long) cellLonMinEnc;
int dim;
if (latRange >= lonRange) {
dim = 0;
} else {
dim = 1;
}
//System.out.println("\nintersect node=" + nodeID + " vs " + leafNodeOffset);
if (nodeID >= leafNodeOffset) {
// Leaf node; scan and filter all points in this block:
//System.out.println(" intersect leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + leafBlockFPs[nodeID-leafNodeOffset]);
int hitCount = 0;
long fp = leafBlockFPs[nodeID-leafNodeOffset];
//System.out.println(" intersect leaf fp=" + fp);
if (fp == 0) {
// Dead end node (adversary case):
//System.out.println(" dead-end leaf");
return 0;
}
/*
System.out.println("I: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
*/
state.in.seek(fp);
// How many points are stored in this leaf cell:
int count = state.in.readVInt();
state.docs.grow(count);
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
if (accept(state, docID)) {
state.docs.add(docID);
hitCount++;
}
}
return hitCount;
} else {
int splitValue = splitValues[nodeID];
if (splitValue == Integer.MAX_VALUE) {
// Dead end node (adversary case):
//System.out.println(" dead-end sub-tree");
return 0;
}
//System.out.println(" splitValue=" + splitValue);
int count = 0;
if (dim == 0) {
//System.out.println(" split on lat=" + BKDTreeWriter.decodeLat(splitValue));
// Inner node split on lat:
// Left node:
if (state.latMinEnc < splitValue) {
//System.out.println(" recurse left");
count += intersect(state,
2*nodeID,
cellLatMinEnc, splitValue, cellLonMinEnc, cellLonMaxEnc);
} else {
//System.out.println(" no recurse left");
}
// Right node:
if (state.latMaxEnc >= splitValue) {
//System.out.println(" recurse right");
count += intersect(state,
2*nodeID+1,
splitValue, cellLatMaxEnc, cellLonMinEnc, cellLonMaxEnc);
} else {
//System.out.println(" no recurse right");
}
} else {
// Inner node split on lon:
assert dim == 1;
//System.out.println(" split on lon=" + BKDTreeWriter.decodeLon(splitValue));
// Left node:
if (state.lonMinEnc < splitValue) {
//System.out.println(" recurse left");
count += intersect(state,
2*nodeID,
cellLatMinEnc, cellLatMaxEnc, cellLonMinEnc, splitValue);
} else {
//System.out.println(" no recurse left");
}
// Right node:
if (state.lonMaxEnc >= splitValue) {
//System.out.println(" recurse right");
count += intersect(state,
2*nodeID+1,
cellLatMinEnc, cellLatMaxEnc, splitValue, cellLonMaxEnc);
} else {
//System.out.println(" no recurse right");
}
}
//System.out.println(" return nodeID=" + nodeID);
return count;
}
}
@Override
public long ramBytesUsed() {
return splitValues.length * RamUsageEstimator.NUM_BYTES_INT +
leafBlockFPs.length * RamUsageEstimator.NUM_BYTES_LONG;
}
}

View File

@ -1,49 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.SortedNumericDocValues;
class BKDTreeSortedNumericDocValues extends SortedNumericDocValues {
final BKDTreeReader bkdTreeReader;
final SortedNumericDocValues delegate;
public BKDTreeSortedNumericDocValues(BKDTreeReader bkdTreeReader, SortedNumericDocValues delegate) {
this.bkdTreeReader = bkdTreeReader;
this.delegate = delegate;
}
public BKDTreeReader getBKDTreeReader() {
return bkdTreeReader;
}
@Override
public void setDocument(int doc) {
delegate.setDocument(doc);
}
@Override
public long valueAt(int index) {
return delegate.valueAt(index);
}
@Override
public int count() {
return delegate.count();
}
}

View File

@ -1,882 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.RamUsageEstimator;
// TODO
// - could we just "use postings" to map leaf -> docIDs?
// - the polygon query really should be 2-phase
// - if we could merge trees, we could drop delegating to wrapped DV?
// - we could also index "auto-prefix terms" here, and use better compression, and maybe only use for the "fully contained" case so we'd
// only index docIDs
// - the index could be efficiently encoded as an FST, so we don't have wasteful
// (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
// the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
// per leaf, and you can reduce that by putting more points per leaf
// - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf
// - we could use threads while building; the higher nodes are very parallelizable
// - generalize to N dimenions? i think there are reasonable use cases here, e.g.
// 2 dimensional points to store houses, plus e.g. 3rd dimension for "household income"
// - geo3d integration should be straightforward? better accuracy, faster performance for small-poly-with-bbox cases? right now the poly
// check is very costly...
/** Recursively builds a BKD tree to assign all incoming points to smaller
* and smaller rectangles until the number of points in a given
* rectangle is &lt= the <code>maxPointsInLeafNode</code>. The tree is
* fully balanced, which means the leaf nodes will have between 50% and 100% of
* the requested <code>maxPointsInLeafNode</code>, except for the adversarial case
* of indexing exactly the same point many times.
*
* <p>
* See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
*
* <p>This consumes heap during writing: it allocates a <code>LongBitSet(numPoints)</code>,
* and for any nodes with fewer than <code>maxPointsSortInHeap</code>, it holds
* the points in memory as simple java arrays.
*
* <p>
* <b>NOTE</b>: This can write at most Integer.MAX_VALUE * <code>maxPointsInLeafNode</code> total points.
*
* @lucene.experimental */
class BKDTreeWriter {
// latEnc (int) + lonEnc (int) + ord (long) + docID (int)
static final int BYTES_PER_DOC = RamUsageEstimator.NUM_BYTES_LONG + 3 * RamUsageEstimator.NUM_BYTES_INT;
//static final boolean DEBUG = false;
public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024;
/** This works out to max of ~10 MB peak heap tied up during writing: */
public static final int DEFAULT_MAX_POINTS_SORT_IN_HEAP = 128*1024;;
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
private final Directory tempDir;
private final String tempFileNamePrefix;
private OfflineSorter.ByteSequencesWriter offlineWriter;
private GrowingHeapLatLonWriter heapWriter;
private IndexOutput tempInput;
private final int maxPointsInLeafNode;
private final int maxPointsSortInHeap;
private long pointCount;
public BKDTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
}
// TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
public BKDTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
// We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
heapWriter = new GrowingHeapLatLonWriter(maxPointsSortInHeap);
}
public static void verifyParams(int maxPointsInLeafNode, int maxPointsSortInHeap) {
if (maxPointsInLeafNode <= 0) {
throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
}
if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
}
if (maxPointsSortInHeap < maxPointsInLeafNode) {
throw new IllegalArgumentException("maxPointsSortInHeap must be >= maxPointsInLeafNode; got " + maxPointsSortInHeap + " vs maxPointsInLeafNode="+ maxPointsInLeafNode);
}
if (maxPointsSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException("maxPointsSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsSortInHeap);
}
}
public void add(double lat, double lon, int docID) throws IOException {
if (validLat(lat) == false) {
throw new IllegalArgumentException("invalid lat: " + lat);
}
if (validLon(lon) == false) {
throw new IllegalArgumentException("invalid lon: " + lon);
}
// Quantize to 32 bit precision, which is plenty: ~.0093 meter precision (longitude) at the equator
add(encodeLat(lat), encodeLon(lon), docID);
}
/** If the current segment has too many points then we switchover to temp files / offline sort. */
private void switchToOffline() throws IOException {
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
for(int i=0;i<pointCount;i++) {
scratchBytesOutput.reset(scratchBytes);
scratchBytesOutput.writeInt(heapWriter.latEncs[i]);
scratchBytesOutput.writeInt(heapWriter.lonEncs[i]);
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
scratchBytesOutput.writeVLong(i);
// TODO: can/should OfflineSorter optimize the fixed-width case?
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
}
heapWriter = null;
}
void add(int latEnc, int lonEnc, int docID) throws IOException {
assert latEnc > Integer.MIN_VALUE;
assert latEnc < Integer.MAX_VALUE;
assert lonEnc > Integer.MIN_VALUE;
assert lonEnc < Integer.MAX_VALUE;
if (pointCount >= maxPointsSortInHeap) {
if (offlineWriter == null) {
switchToOffline();
}
scratchBytesOutput.reset(scratchBytes);
scratchBytesOutput.writeInt(latEnc);
scratchBytesOutput.writeInt(lonEnc);
scratchBytesOutput.writeVInt(docID);
scratchBytesOutput.writeVLong(pointCount);
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
} else {
// Not too many points added yet, continue using heap:
heapWriter.append(latEnc, lonEnc, pointCount, docID);
}
pointCount++;
}
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
* as we recurse in {@link #build}. */
private LatLonWriter convertToFixedWidth(String in) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
scratch.grow(BYTES_PER_DOC);
BytesRef bytes = scratch.get();
ByteArrayDataInput dataReader = new ByteArrayDataInput();
OfflineSorter.ByteSequencesReader reader = null;
LatLonWriter sortedWriter = null;
boolean success = false;
try {
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
sortedWriter = getWriter(pointCount);
for (long i=0;i<pointCount;i++) {
boolean result = reader.read(scratch);
assert result;
dataReader.reset(bytes.bytes, bytes.offset, bytes.length);
int latEnc = dataReader.readInt();
int lonEnc = dataReader.readInt();
int docID = dataReader.readVInt();
long ord = dataReader.readVLong();
assert docID >= 0: "docID=" + docID;
assert latEnc > Integer.MIN_VALUE;
assert latEnc < Integer.MAX_VALUE;
assert lonEnc > Integer.MIN_VALUE;
assert lonEnc < Integer.MAX_VALUE;
sortedWriter.append(latEnc, lonEnc, ord, docID);
}
success = true;
} finally {
if (success) {
IOUtils.close(sortedWriter, reader);
} else {
IOUtils.closeWhileHandlingException(sortedWriter, reader);
try {
sortedWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
}
}
return sortedWriter;
}
private LatLonWriter sort(boolean lon) throws IOException {
if (heapWriter != null) {
// All buffered points are still in heap
assert pointCount < Integer.MAX_VALUE;
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
int docID = heapWriter.docIDs[i];
heapWriter.docIDs[i] = heapWriter.docIDs[j];
heapWriter.docIDs[j] = docID;
long ord = heapWriter.ords[i];
heapWriter.ords[i] = heapWriter.ords[j];
heapWriter.ords[j] = ord;
int latEnc = heapWriter.latEncs[i];
heapWriter.latEncs[i] = heapWriter.latEncs[j];
heapWriter.latEncs[j] = latEnc;
int lonEnc = heapWriter.lonEncs[i];
heapWriter.lonEncs[i] = heapWriter.lonEncs[j];
heapWriter.lonEncs[j] = lonEnc;
}
@Override
protected int compare(int i, int j) {
int cmp;
if (lon) {
cmp = Integer.compare(heapWriter.lonEncs[i], heapWriter.lonEncs[j]);
} else {
cmp = Integer.compare(heapWriter.latEncs[i], heapWriter.latEncs[j]);
}
if (cmp != 0) {
return cmp;
}
// Tie-break
cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]);
if (cmp != 0) {
return cmp;
}
return Long.compare(heapWriter.ords[i], heapWriter.ords[j]);
}
}.sort(0, (int) pointCount);
HeapLatLonWriter sorted = new HeapLatLonWriter((int) pointCount);
for(int i=0;i<pointCount;i++) {
sorted.append(heapWriter.latEncs[i],
heapWriter.lonEncs[i],
heapWriter.ords[i],
heapWriter.docIDs[i]);
}
sorted.close();
return sorted;
} else {
// Offline sort:
assert tempInput != null;
final ByteArrayDataInput reader = new ByteArrayDataInput();
Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
private final ByteArrayDataInput readerB = new ByteArrayDataInput();
@Override
public int compare(BytesRef a, BytesRef b) {
reader.reset(a.bytes, a.offset, a.length);
final int latAEnc = reader.readInt();
final int lonAEnc = reader.readInt();
final int docIDA = reader.readVInt();
final long ordA = reader.readVLong();
reader.reset(b.bytes, b.offset, b.length);
final int latBEnc = reader.readInt();
final int lonBEnc = reader.readInt();
final int docIDB = reader.readVInt();
final long ordB = reader.readVLong();
int cmp;
if (lon) {
cmp = Integer.compare(lonAEnc, lonBEnc);
} else {
cmp = Integer.compare(latAEnc, latBEnc);
}
if (cmp != 0) {
return cmp;
}
// Tie-break
cmp = Integer.compare(docIDA, docIDB);
if (cmp != 0) {
return cmp;
}
return Long.compare(ordA, ordB);
}
};
boolean success = false;
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
String sortedFileName = sorter.sort(tempInput.getName());
try {
LatLonWriter writer = convertToFixedWidth(sortedFileName);
success = true;
return writer;
} finally {
if (success) {
tempDir.deleteFile(sortedFileName);
} else {
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
}
}
}
}
/** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
public long finish(IndexOutput out) throws IOException {
//System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter);
if (offlineWriter != null) {
// This also closes the temp file output:
offlineWriter.close();
}
LongBitSet bitSet = new LongBitSet(pointCount);
long countPerLeaf = pointCount;
long innerNodeCount = 1;
while (countPerLeaf > maxPointsInLeafNode) {
countPerLeaf = (countPerLeaf+1)/2;
innerNodeCount *= 2;
}
//System.out.println("innerNodeCount=" + innerNodeCount);
if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
}
innerNodeCount--;
int numLeaves = (int) (innerNodeCount+1);
// Indexed by nodeID, but first (root) nodeID is 1
int[] splitValues = new int[numLeaves];
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
long[] leafBlockFPs = new long[numLeaves];
// Make sure the math above "worked":
assert pointCount / splitValues.length <= maxPointsInLeafNode: "pointCount=" + pointCount + " splitValues.length=" + splitValues.length + " maxPointsInLeafNode=" + maxPointsInLeafNode;
//System.out.println(" avg pointsPerLeaf=" + (pointCount/splitValues.length));
// Sort all docs once by lat, once by lon:
LatLonWriter latSortedWriter = null;
LatLonWriter lonSortedWriter = null;
boolean success = false;
try {
lonSortedWriter = sort(true);
latSortedWriter = sort(false);
heapWriter = null;
build(1, numLeaves, new PathSlice(latSortedWriter, 0, pointCount),
new PathSlice(lonSortedWriter, 0, pointCount),
bitSet, out,
Integer.MIN_VALUE, Integer.MAX_VALUE,
Integer.MIN_VALUE, Integer.MAX_VALUE,
//encodeLat(-90.0), encodeLat(Math.nextAfter(90.0, Double.POSITIVE_INFINITY)),
//encodeLon(-180.0), encodeLon(Math.nextAfter(180.0, Double.POSITIVE_INFINITY)),
splitValues,
leafBlockFPs);
success = true;
} finally {
if (success) {
latSortedWriter.destroy();
lonSortedWriter.destroy();
if (tempInput != null) {
tempDir.deleteFile(tempInput.getName());
}
} else {
try {
latSortedWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
try {
lonSortedWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
if (tempInput != null) {
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
}
}
}
//System.out.println("Total nodes: " + innerNodeCount);
// Write index:
long indexFP = out.getFilePointer();
out.writeVInt(numLeaves);
// NOTE: splitValues[0] is unused, because nodeID is 1-based:
for (int i=0;i<splitValues.length;i++) {
out.writeInt(splitValues[i]);
}
for (int i=0;i<leafBlockFPs.length;i++) {
out.writeVLong(leafBlockFPs[i]);
}
return indexFP;
}
/** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
private static final class PathSlice {
final LatLonWriter writer;
final long start;
final long count;
public PathSlice(LatLonWriter writer, long start, long count) {
this.writer = writer;
this.start = start;
this.count = count;
}
@Override
public String toString() {
return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
}
}
/** Marks bits for the ords (points) that belong in the left sub tree. */
private long markLeftTree(int splitDim, PathSlice source, LongBitSet bitSet, int[] splitValueRet,
int minLatEnc, int maxLatEnc, int minLonEnc, int maxLonEnc) throws IOException {
// This is the initital size of our left tree, but we may lower it below for == case:
long leftCount = source.count / 2;
// Read the split value:
//if (DEBUG) System.out.println(" leftCount=" + leftCount + " vs " + source.count);
LatLonReader reader = source.writer.getReader(source.start + leftCount);
boolean success = false;
int splitValue;
try {
boolean result = reader.next();
assert result;
int latSplitEnc = reader.latEnc();
assert latSplitEnc >= minLatEnc && latSplitEnc < maxLatEnc: "latSplitEnc=" + latSplitEnc + " minLatEnc=" + minLatEnc + " maxLatEnc=" + maxLatEnc;
int lonSplitEnc = reader.lonEnc();
assert lonSplitEnc >= minLonEnc && lonSplitEnc < maxLonEnc: "lonSplitEnc=" + lonSplitEnc + " minLonEnc=" + minLonEnc + " maxLonEnc=" + maxLonEnc;
if (splitDim == 0) {
splitValue = latSplitEnc;
//if (DEBUG) System.out.println(" splitValue=" + decodeLat(splitValue));
} else {
splitValue = lonSplitEnc;
//if (DEBUG) System.out.println(" splitValue=" + decodeLon(splitValue));
}
success = true;
} finally {
if (success) {
IOUtils.close(reader);
} else {
IOUtils.closeWhileHandlingException(reader);
}
}
splitValueRet[0] = splitValue;
// Mark ords that fall into the left half, and also handle the == boundary case:
assert bitSet.cardinality() == 0: "cardinality=" + bitSet.cardinality();
success = false;
reader = source.writer.getReader(source.start);
try {
int lastValue = Integer.MIN_VALUE;
for (int i=0;i<leftCount;i++) {
boolean result = reader.next();
assert result;
int latEnc = reader.latEnc();
int lonEnc = reader.lonEnc();
int value;
if (splitDim == 0) {
value = latEnc;
} else {
value = lonEnc;
}
// Our input source is supposed to be sorted on the incoming dimension:
assert value >= lastValue;
lastValue = value;
if (value == splitValue) {
// TODO: we could simplify this, by allowing splitValue to be on either side?
// If we have identical points at the split, we move the count back to before the identical points:
leftCount = i;
break;
}
assert value < splitValue: "i=" + i + " value=" + value + " vs splitValue=" + splitValue;
long ord = reader.ord();
int docID = reader.docID();
assert docID >= 0: "docID=" + docID + " reader=" + reader;
// We should never see dup ords:
assert bitSet.get(ord) == false;
bitSet.set(ord);
}
success = true;
} finally {
if (success) {
IOUtils.close(reader);
} else {
IOUtils.closeWhileHandlingException(reader);
}
}
assert leftCount == bitSet.cardinality(): "leftCount=" + leftCount + " cardinality=" + bitSet.cardinality();
return leftCount;
}
/** The incoming PathSlice for the dim we will split is already partitioned/sorted. */
private void build(int nodeID, int leafNodeOffset,
PathSlice lastLatSorted,
PathSlice lastLonSorted,
LongBitSet bitSet,
IndexOutput out,
int minLatEnc, int maxLatEnc, int minLonEnc, int maxLonEnc,
int[] splitValues,
long[] leafBlockFPs) throws IOException {
PathSlice source;
PathSlice nextSource;
long latRange = (long) maxLatEnc - (long) minLatEnc;
long lonRange = (long) maxLonEnc - (long) minLonEnc;
assert lastLatSorted.count == lastLonSorted.count;
// Compute which dim we should split on at this level:
int splitDim;
if (latRange >= lonRange) {
// Split by lat:
splitDim = 0;
source = lastLatSorted;
nextSource = lastLonSorted;
} else {
// Split by lon:
splitDim = 1;
source = lastLonSorted;
nextSource = lastLatSorted;
}
long count = source.count;
//if (DEBUG) System.out.println("\nBUILD: nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset + " splitDim=" + splitDim + "\n lastLatSorted=" + lastLatSorted + "\n lastLonSorted=" + lastLonSorted + "\n count=" + count + " lat=" + decodeLat(minLatEnc) + " TO " + decodeLat(maxLatEnc) + " lon=" + decodeLon(minLonEnc) + " TO " + decodeLon(maxLonEnc));
if (count == 0) {
// Dead end in the tree, due to adversary cases, e.g. many identical points:
if (nodeID < splitValues.length) {
// Sentinel used to mark that the tree is dead under here:
splitValues[nodeID] = Integer.MAX_VALUE;
}
//if (DEBUG) System.out.println(" dead-end sub-tree");
return;
}
if (nodeID >= leafNodeOffset) {
// Leaf node: write block
//if (DEBUG) System.out.println(" leaf");
assert maxLatEnc > minLatEnc;
assert maxLonEnc > minLonEnc;
//System.out.println("\nleaf:\n lat range: " + ((long) maxLatEnc-minLatEnc));
//System.out.println(" lon range: " + ((long) maxLonEnc-minLonEnc));
// Sort by docID in the leaf so we get sequentiality at search time (may not matter?):
LatLonReader reader = source.writer.getReader(source.start);
// TODO: we can reuse this
int[] docIDs = new int[(int) count];
boolean success = false;
try {
for (int i=0;i<source.count;i++) {
// NOTE: we discard ord at this point; we only needed it temporarily
// during building to uniquely identify each point to properly handle
// the multi-valued case (one docID having multiple values):
// We also discard lat/lon, since at search time, we reside on the
// wrapped doc values for this:
boolean result = reader.next();
assert result;
docIDs[i] = reader.docID();
}
success = true;
} finally {
if (success) {
IOUtils.close(reader);
} else {
IOUtils.closeWhileHandlingException(reader);
}
}
Arrays.sort(docIDs);
// Dedup docIDs: for the multi-valued case where more than one value for the doc
// wound up in this leaf cell, we only need to store the docID once:
int lastDocID = -1;
int uniqueCount = 0;
for(int i=0;i<docIDs.length;i++) {
int docID = docIDs[i];
if (docID != lastDocID) {
uniqueCount++;
lastDocID = docID;
}
}
assert uniqueCount <= count;
long startFP = out.getFilePointer();
out.writeVInt(uniqueCount);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = startFP;
//System.out.println(" leafFP=" + startFP);
lastDocID = -1;
for (int i=0;i<docIDs.length;i++) {
// Absolute int encode; with "vInt of deltas" encoding, the .kdd size dropped from
// 697 MB -> 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec.
// I think if we also indexed prefix terms here we could do less costly compression
// on those lists:
int docID = docIDs[i];
if (docID != lastDocID) {
//System.out.println(" docID=" + docID);
out.writeInt(docID);
lastDocID = docID;
}
}
//long endFP = out.getFilePointer();
//System.out.println(" bytes/doc: " + ((endFP - startFP) / count));
} else {
// Inner node: partition/recurse
assert nodeID < splitValues.length: "nodeID=" + nodeID + " splitValues.length=" + splitValues.length;
int[] splitValueArray = new int[1];
long leftCount = markLeftTree(splitDim, source, bitSet, splitValueArray,
minLatEnc, maxLatEnc, minLonEnc, maxLonEnc);
int splitValue = splitValueArray[0];
// TODO: we could save split value in here so we don't have to re-open file later:
// Partition nextSource into sorted left and right sets, so we can recurse. This is somewhat hairy: we partition the next lon set
// according to how we had just partitioned the lat set, and vice/versa:
LatLonWriter leftWriter = null;
LatLonWriter rightWriter = null;
LatLonReader reader = null;
boolean success = false;
int nextLeftCount = 0;
try {
leftWriter = getWriter(leftCount);
rightWriter = getWriter(count - leftCount);
//if (DEBUG) System.out.println(" partition:\n splitValueEnc=" + splitValue + "\n " + nextSource + "\n --> leftSorted=" + leftWriter + "\n --> rightSorted=" + rightWriter + ")");
reader = nextSource.writer.getReader(nextSource.start);
// TODO: we could compute the split value here for each sub-tree and save an O(N) pass on recursion, but makes code hairier and only
// changes the constant factor of building, not the big-oh:
for (int i=0;i<count;i++) {
boolean result = reader.next();
assert result;
int latEnc = reader.latEnc();
int lonEnc = reader.lonEnc();
long ord = reader.ord();
int docID = reader.docID();
assert docID >= 0: "docID=" + docID + " reader=" + reader;
if (bitSet.get(ord)) {
if (splitDim == 0) {
assert latEnc < splitValue: "latEnc=" + latEnc + " splitValue=" + splitValue;
} else {
assert lonEnc < splitValue: "lonEnc=" + lonEnc + " splitValue=" + splitValue;
}
leftWriter.append(latEnc, lonEnc, ord, docID);
nextLeftCount++;
} else {
if (splitDim == 0) {
assert latEnc >= splitValue: "latEnc=" + latEnc + " splitValue=" + splitValue;
} else {
assert lonEnc >= splitValue: "lonEnc=" + lonEnc + " splitValue=" + splitValue;
}
rightWriter.append(latEnc, lonEnc, ord, docID);
}
}
bitSet.clear(0, pointCount);
success = true;
} finally {
if (success) {
IOUtils.close(reader, leftWriter, rightWriter);
} else {
IOUtils.closeWhileHandlingException(reader, leftWriter, rightWriter);
}
}
assert leftCount == nextLeftCount: "leftCount=" + leftCount + " nextLeftCount=" + nextLeftCount;
success = false;
try {
if (splitDim == 0) {
//if (DEBUG) System.out.println(" recurse left");
build(2*nodeID, leafNodeOffset,
new PathSlice(source.writer, source.start, leftCount),
new PathSlice(leftWriter, 0, leftCount),
bitSet,
out,
minLatEnc, splitValue, minLonEnc, maxLonEnc,
splitValues, leafBlockFPs);
leftWriter.destroy();
//if (DEBUG) System.out.println(" recurse right");
build(2*nodeID+1, leafNodeOffset,
new PathSlice(source.writer, source.start+leftCount, count-leftCount),
new PathSlice(rightWriter, 0, count - leftCount),
bitSet,
out,
splitValue, maxLatEnc, minLonEnc, maxLonEnc,
splitValues, leafBlockFPs);
rightWriter.destroy();
} else {
//if (DEBUG) System.out.println(" recurse left");
build(2*nodeID, leafNodeOffset,
new PathSlice(leftWriter, 0, leftCount),
new PathSlice(source.writer, source.start, leftCount),
bitSet,
out,
minLatEnc, maxLatEnc, minLonEnc, splitValue,
splitValues, leafBlockFPs);
leftWriter.destroy();
//if (DEBUG) System.out.println(" recurse right");
build(2*nodeID+1, leafNodeOffset,
new PathSlice(rightWriter, 0, count-leftCount),
new PathSlice(source.writer, source.start+leftCount, count-leftCount),
bitSet,
out,
minLatEnc, maxLatEnc, splitValue, maxLonEnc,
splitValues, leafBlockFPs);
rightWriter.destroy();
}
success = true;
} finally {
if (success == false) {
try {
leftWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
try {
rightWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
}
}
splitValues[nodeID] = splitValue;
}
}
LatLonWriter getWriter(long count) throws IOException {
if (count < maxPointsSortInHeap) {
return new HeapLatLonWriter((int) count);
} else {
return new OfflineLatLonWriter(tempDir, tempFileNamePrefix, count);
}
}
// TODO: move/share all this into GeoUtils
// We allow one iota over the true max:
static final double MAX_LAT_INCL = Math.nextAfter(90.0D, Double.POSITIVE_INFINITY);
static final double MAX_LON_INCL = Math.nextAfter(180.0D, Double.POSITIVE_INFINITY);
static final double MIN_LAT_INCL = -90.0D;
static final double MIN_LON_INCL = -180.0D;
static boolean validLat(double lat) {
return Double.isNaN(lat) == false && lat >= MIN_LAT_INCL && lat <= MAX_LAT_INCL;
}
static boolean validLon(double lon) {
return Double.isNaN(lon) == false && lon >= MIN_LON_INCL && lon <= MAX_LON_INCL;
}
private static final int BITS = 32;
// -3 so valid lat/lon never hit the Integer.MIN_VALUE nor Integer.MAX_VALUE:
private static final double LON_SCALE = ((0x1L<<BITS)-3)/360.0D;
private static final double LAT_SCALE = ((0x1L<<BITS)-3)/180.0D;
/** Max quantization error for both lat and lon when encoding/decoding into 32 bits */
public static final double TOLERANCE = 1E-7;
/** Quantizes double (64 bit) latitude into 32 bits */
static int encodeLat(double lat) {
assert validLat(lat): "lat=" + lat;
long x = (long) (lat * LAT_SCALE);
// We use Integer.MAX_VALUE as a sentinel:
assert x < Integer.MAX_VALUE: "lat=" + lat + " mapped to Integer.MAX_VALUE + " + (x - Integer.MAX_VALUE);
assert x > Integer.MIN_VALUE: "lat=" + lat + " mapped to Integer.MIN_VALUE";
return (int) x;
}
/** Quantizes double (64 bit) longitude into 32 bits */
static int encodeLon(double lon) {
assert validLon(lon): "lon=" + lon;
long x = (long) (lon * LON_SCALE);
// We use Integer.MAX_VALUE as a sentinel:
assert x < Integer.MAX_VALUE;
assert x > Integer.MIN_VALUE;
return (int) x;
}
/** Turns quantized value from {@link #encodeLat} back into a double. */
static double decodeLat(int x) {
return x / LAT_SCALE;
}
/** Turns quantized value from {@link #encodeLon} back into a double. */
static double decodeLon(int x) {
return x / LON_SCALE;
}
}

View File

@ -1,88 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
final class GrowingHeapLatLonWriter implements LatLonWriter {
int[] latEncs;
int[] lonEncs;
int[] docIDs;
long[] ords;
private int nextWrite;
final int maxSize;
public GrowingHeapLatLonWriter(int maxSize) {
latEncs = new int[16];
lonEncs = new int[16];
docIDs = new int[16];
ords = new long[16];
this.maxSize = maxSize;
}
private int[] growExact(int[] arr, int size) {
assert size > arr.length;
int[] newArr = new int[size];
System.arraycopy(arr, 0, newArr, 0, arr.length);
return newArr;
}
private long[] growExact(long[] arr, int size) {
assert size > arr.length;
long[] newArr = new long[size];
System.arraycopy(arr, 0, newArr, 0, arr.length);
return newArr;
}
@Override
public void append(int latEnc, int lonEnc, long ord, int docID) {
assert ord == nextWrite;
if (latEncs.length == nextWrite) {
int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT));
assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
latEncs = growExact(latEncs, nextSize);
lonEncs = growExact(lonEncs, nextSize);
ords = growExact(ords, nextSize);
docIDs = growExact(docIDs, nextSize);
}
latEncs[nextWrite] = latEnc;
lonEncs[nextWrite] = lonEnc;
ords[nextWrite] = ord;
docIDs[nextWrite] = docID;
nextWrite++;
}
@Override
public LatLonReader getReader(long start) {
return new HeapLatLonReader(latEncs, lonEncs, ords, docIDs, (int) start, nextWrite);
}
@Override
public void close() {
}
@Override
public void destroy() {
}
@Override
public String toString() {
return "GrowingHeapLatLonWriter(count=" + nextWrite + " alloc=" + latEncs.length + ")";
}
}

View File

@ -1,67 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class HeapLatLonReader implements LatLonReader {
private int curRead;
final int[] latEncs;
final int[] lonEncs;
final long[] ords;
final int[] docIDs;
final int end;
HeapLatLonReader(int[] latEncs, int[] lonEncs, long[] ords, int[] docIDs, int start, int end) {
this.latEncs = latEncs;
this.lonEncs = lonEncs;
this.ords = ords;
this.docIDs = docIDs;
curRead = start-1;
this.end = end;
}
@Override
public boolean next() {
curRead++;
return curRead < end;
}
@Override
public int latEnc() {
return latEncs[curRead];
}
@Override
public int lonEnc() {
return lonEncs[curRead];
}
@Override
public int docID() {
return docIDs[curRead];
}
@Override
public long ord() {
return ords[curRead];
}
@Override
public void close() {
}
}

View File

@ -1,66 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class HeapLatLonWriter implements LatLonWriter {
final int[] latEncs;
final int[] lonEncs;
final int[] docIDs;
final long[] ords;
private int nextWrite;
private boolean closed;
public HeapLatLonWriter(int count) {
latEncs = new int[count];
lonEncs = new int[count];
docIDs = new int[count];
ords = new long[count];
}
@Override
public void append(int latEnc, int lonEnc, long ord, int docID) {
latEncs[nextWrite] = latEnc;
lonEncs[nextWrite] = lonEnc;
ords[nextWrite] = ord;
docIDs[nextWrite] = docID;
nextWrite++;
}
@Override
public LatLonReader getReader(long start) {
assert closed;
return new HeapLatLonReader(latEncs, lonEncs, ords, docIDs, (int) start, latEncs.length);
}
@Override
public void close() {
closed = true;
if (nextWrite != latEncs.length) {
throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + latEncs.length);
}
}
@Override
public void destroy() {
}
@Override
public String toString() {
return "HeapLatLonWriter(count=" + latEncs.length + ")";
}
}

View File

@ -1,31 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
interface LatLonReader extends Closeable {
boolean next() throws IOException;
int latEnc();
int lonEnc();
long ord();
int docID();
}

View File

@ -1,29 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
interface LatLonWriter extends Closeable {
void append(int latEnc, int lonEnc, long ord, int docID) throws IOException;
LatLonReader getReader(long start) throws IOException;
void destroy() throws IOException;
}

View File

@ -1,78 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
final class OfflineLatLonReader implements LatLonReader {
final IndexInput in;
long countLeft;
private int latEnc;
private int lonEnc;
private long ord;
private int docID;
OfflineLatLonReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
in = tempDir.openInput(tempFileName, IOContext.READONCE);
in.seek(start * BKDTreeWriter.BYTES_PER_DOC);
this.countLeft = count;
}
@Override
public boolean next() throws IOException {
if (countLeft == 0) {
return false;
}
countLeft--;
latEnc = in.readInt();
lonEnc = in.readInt();
ord = in.readLong();
docID = in.readInt();
return true;
}
@Override
public int latEnc() {
return latEnc;
}
@Override
public int lonEnc() {
return lonEnc;
}
@Override
public long ord() {
return ord;
}
@Override
public int docID() {
return docID;
}
@Override
public void close() throws IOException {
in.close();
}
}

View File

@ -1,77 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
final class OfflineLatLonWriter implements LatLonWriter {
final Directory tempDir;
final byte[] scratchBytes = new byte[BKDTreeWriter.BYTES_PER_DOC];
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
final IndexOutput out;
final long count;
private long countWritten;
private boolean closed;
public OfflineLatLonWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
this.tempDir = tempDir;
out = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
this.count = count;
}
@Override
public void append(int latEnc, int lonEnc, long ord, int docID) throws IOException {
out.writeInt(latEnc);
out.writeInt(lonEnc);
out.writeLong(ord);
out.writeInt(docID);
countWritten++;
}
@Override
public LatLonReader getReader(long start) throws IOException {
assert closed;
return new OfflineLatLonReader(tempDir, out.getName(), start, count-start);
}
@Override
public void close() throws IOException {
closed = true;
out.close();
if (count != countWritten) {
throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
}
}
@Override
public void destroy() throws IOException {
tempDir.deleteFile(out.getName());
}
@Override
public String toString() {
return "OfflineLatLonWriter(count=" + count + " tempFileName=" + out.getName() + ")";
}
}

View File

@ -1,28 +0,0 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- not a package-info.java, because we already defined this package in core/ -->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
This package contains a BKD spatial tree implementation for indexing lat/lon points and fast shape searching.
</body>
</html>

View File

@ -0,0 +1,88 @@
package org.apache.lucene.document;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.bkd.BKDUtil;
/** Add this to a document to index lat/lon point dimensionally */
public final class DimensionalLatLonField extends Field {
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDimensions(2, 4);
TYPE.freeze();
}
/**
* Creates a new DimensionalLatLonField with the specified lat and lon
* @param name field name
* @param lat double latitude
* @param lon double longitude
* @throws IllegalArgumentException if the field name is null or lat or lon are out of bounds
*/
public DimensionalLatLonField(String name, double lat, double lon) {
super(name, TYPE);
if (GeoUtils.isValidLat(lat) == false) {
throw new IllegalArgumentException("invalid lat (" + lat + "): must be -90 to 90");
}
if (GeoUtils.isValidLon(lon) == false) {
throw new IllegalArgumentException("invalid lon (" + lon + "): must be -180 to 180");
}
byte[] bytes = new byte[8];
BKDUtil.intToBytes(encodeLat(lat), bytes, 0);
BKDUtil.intToBytes(encodeLon(lon), bytes, 1);
fieldsData = new BytesRef(bytes);
}
public static final double TOLERANCE = 1E-7;
private static final int BITS = 32;
private static final double LON_SCALE = (0x1L<<BITS)/360.0D;
private static final double LAT_SCALE = (0x1L<<BITS)/180.0D;
/** Quantizes double (64 bit) latitude into 32 bits */
public static int encodeLat(double lat) {
assert GeoUtils.isValidLat(lat): "lat=" + lat;
long x = (long) (lat * LAT_SCALE);
assert x < Integer.MAX_VALUE: "lat=" + lat + " mapped to Integer.MAX_VALUE + " + (x - Integer.MAX_VALUE);
assert x > Integer.MIN_VALUE: "lat=" + lat + " mapped to Integer.MIN_VALUE";
return (int) x;
}
/** Quantizes double (64 bit) longitude into 32 bits */
public static int encodeLon(double lon) {
assert GeoUtils.isValidLon(lon): "lon=" + lon;
long x = (long) (lon * LON_SCALE);
assert x < Integer.MAX_VALUE;
assert x > Integer.MIN_VALUE;
return (int) x;
}
/** Turns quantized value from {@link #encodeLat} back into a double. */
public static double decodeLat(int x) {
return x / LAT_SCALE;
}
/** Turns quantized value from {@link #encodeLon} back into a double. */
public static double decodeLon(int x) {
return x / LON_SCALE;
}
}

View File

@ -1,84 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
final class GrowingHeapSliceWriter implements SliceWriter {
long[] values;
int[] docIDs;
long[] ords;
private int nextWrite;
final int maxSize;
public GrowingHeapSliceWriter(int maxSize) {
values = new long[16];
docIDs = new int[16];
ords = new long[16];
this.maxSize = maxSize;
}
private int[] growExact(int[] arr, int size) {
assert size > arr.length;
int[] newArr = new int[size];
System.arraycopy(arr, 0, newArr, 0, arr.length);
return newArr;
}
private long[] growExact(long[] arr, int size) {
assert size > arr.length;
long[] newArr = new long[size];
System.arraycopy(arr, 0, newArr, 0, arr.length);
return newArr;
}
@Override
public void append(long value, long ord, int docID) {
assert ord == nextWrite;
if (values.length == nextWrite) {
int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT));
assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
values = growExact(values, nextSize);
ords = growExact(ords, nextSize);
docIDs = growExact(docIDs, nextSize);
}
values[nextWrite] = value;
ords[nextWrite] = ord;
docIDs[nextWrite] = docID;
nextWrite++;
}
@Override
public SliceReader getReader(long start) {
return new HeapSliceReader(values, ords, docIDs, (int) start, nextWrite);
}
@Override
public void close() {
}
@Override
public void destroy() {
}
@Override
public String toString() {
return "GrowingHeapSliceWriter(count=" + nextWrite + " alloc=" + values.length + ")";
}
}

View File

@ -1,60 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class HeapSliceReader implements SliceReader {
private int curRead;
final long[] values;
final long[] ords;
final int[] docIDs;
final int end;
HeapSliceReader(long[] values, long[] ords, int[] docIDs, int start, int end) {
this.values = values;
this.ords = ords;
this.docIDs = docIDs;
curRead = start-1;
this.end = end;
}
@Override
public boolean next() {
curRead++;
return curRead < end;
}
@Override
public long value() {
return values[curRead];
}
@Override
public int docID() {
return docIDs[curRead];
}
@Override
public long ord() {
return ords[curRead];
}
@Override
public void close() {
}
}

View File

@ -1,63 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class HeapSliceWriter implements SliceWriter {
final long[] values;
final int[] docIDs;
final long[] ords;
private int nextWrite;
private boolean closed;
public HeapSliceWriter(int count) {
values = new long[count];
docIDs = new int[count];
ords = new long[count];
}
@Override
public void append(long value, long ord, int docID) {
values[nextWrite] = value;
ords[nextWrite] = ord;
docIDs[nextWrite] = docID;
nextWrite++;
}
@Override
public SliceReader getReader(long start) {
assert closed;
return new HeapSliceReader(values, ords, docIDs, (int) start, values.length);
}
@Override
public void close() {
closed = true;
if (nextWrite != values.length) {
throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + values.length);
}
}
@Override
public void destroy() {
}
@Override
public String toString() {
return "HeapSliceWriter(count=" + values.length + ")";
}
}

View File

@ -1,157 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import java.io.IOException;
/** Finds all previously indexed long values that fall within the specified range.
*
* <p>The field must be indexed with {@link RangeTreeDocValuesFormat}, and {@link SortedNumericDocValuesField} added per document.
*
* @lucene.experimental */
public class NumericRangeTreeQuery extends Query {
final String field;
final Long minValue;
final Long maxValue;
final boolean minInclusive;
final boolean maxInclusive;
// TODO: sugar for all numeric conversions?
/** Matches all values in the specified long range. */
public NumericRangeTreeQuery(String field, Long minValue, boolean minInclusive, Long maxValue, boolean maxInclusive) {
this.field = field;
this.minInclusive = minInclusive;
this.minValue = minValue;
this.maxInclusive = maxInclusive;
this.maxValue = maxValue;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
// used in the first pass:
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
SortedNumericDocValues sdv = reader.getSortedNumericDocValues(field);
if (sdv == null) {
// No docs in this segment had this field
return null;
}
if (sdv instanceof RangeTreeSortedNumericDocValues == false) {
throw new IllegalStateException("field \"" + field + "\" was not indexed with RangeTreeDocValuesFormat: got: " + sdv);
}
RangeTreeSortedNumericDocValues treeDV = (RangeTreeSortedNumericDocValues) sdv;
RangeTreeReader tree = treeDV.getRangeTreeReader();
// lower
long minBoundIncl = (minValue == null) ? Long.MIN_VALUE : minValue.longValue();
if (minInclusive == false && minValue != null) {
if (minBoundIncl == Long.MAX_VALUE) {
return null;
}
minBoundIncl++;
}
// upper
long maxBoundIncl = (maxValue == null) ? Long.MAX_VALUE : maxValue.longValue();
if (maxInclusive == false && maxValue != null) {
if (maxBoundIncl == Long.MIN_VALUE) {
return null;
}
maxBoundIncl--;
}
if (maxBoundIncl < minBoundIncl) {
return null;
}
DocIdSet result = tree.intersect(minBoundIncl, maxBoundIncl, treeDV.delegate, context.reader().maxDoc());
final DocIdSetIterator disi = result.iterator();
return new ConstantScoreScorer(this, score(), disi);
}
};
}
@Override
public int hashCode() {
int hash = super.hashCode();
if (minValue != null) hash += minValue.hashCode()^0x14fa55fb;
if (maxValue != null) hash += maxValue.hashCode()^0x733fa5fe;
return hash +
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
}
@Override
public boolean equals(Object other) {
if (super.equals(other)) {
final NumericRangeTreeQuery q = (NumericRangeTreeQuery) other;
return (
(q.minValue == null ? minValue == null : q.minValue.equals(minValue)) &&
(q.maxValue == null ? maxValue == null : q.maxValue.equals(maxValue)) &&
minInclusive == q.minInclusive &&
maxInclusive == q.maxInclusive
);
}
return false;
}
@Override
public String toString(String field) {
final StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append(':');
if (this.field.equals(field) == false) {
sb.append("field=");
sb.append(this.field);
sb.append(':');
}
return sb.append(minInclusive ? '[' : '{')
.append((minValue == null) ? "*" : minValue.toString())
.append(" TO ")
.append((maxValue == null) ? "*" : maxValue.toString())
.append(maxInclusive ? ']' : '}')
.toString();
}
}

View File

@ -1,71 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
final class OfflineSliceReader implements SliceReader {
final IndexInput in;
private long countLeft;
private long value;
private long ord;
private int docID;
OfflineSliceReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
in = tempDir.openInput(tempFileName, IOContext.READONCE);
in.seek(start * RangeTreeWriter.BYTES_PER_DOC);
this.countLeft = count;
}
@Override
public boolean next() throws IOException {
if (countLeft == 0) {
return false;
}
countLeft--;
value = in.readLong();
ord = in.readLong();
docID = in.readInt();
return true;
}
@Override
public long value() {
return value;
}
@Override
public long ord() {
return ord;
}
@Override
public int docID() {
return docID;
}
@Override
public void close() throws IOException {
in.close();
}
}

View File

@ -1,76 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
final class OfflineSliceWriter implements SliceWriter {
final Directory tempDir;
final byte[] scratchBytes = new byte[RangeTreeWriter.BYTES_PER_DOC];
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
final IndexOutput tempFile;
final long count;
private boolean closed;
private long countWritten;
public OfflineSliceWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
this.tempDir = tempDir;
tempFile = tempDir.createTempOutput(tempFileNamePrefix, "rangetree", IOContext.DEFAULT);
this.count = count;
}
@Override
public void append(long value, long ord, int docID) throws IOException {
tempFile.writeLong(value);
tempFile.writeLong(ord);
tempFile.writeInt(docID);
countWritten++;
}
@Override
public SliceReader getReader(long start) throws IOException {
assert closed;
return new OfflineSliceReader(tempDir, tempFile.getName(), start, count-start);
}
@Override
public void close() throws IOException {
closed = true;
tempFile.close();
if (count != countWritten) {
throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
}
}
@Override
public void destroy() throws IOException {
tempDir.deleteFile(tempFile.getName());
}
@Override
public String toString() {
return "OfflineSliceWriter(count=" + count + " tempFileName=" + tempFile.getName() + ")";
}
}

View File

@ -1,148 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
class RangeTreeDocValuesConsumer extends DocValuesConsumer implements Closeable {
final DocValuesConsumer delegate;
final int maxPointsInLeafNode;
final int maxPointsSortInHeap;
final IndexOutput out;
final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
final SegmentWriteState state;
public RangeTreeDocValuesConsumer(DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
RangeTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.delegate = delegate;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
this.state = state;
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.DATA_EXTENSION);
out = state.directory.createOutput(datFileName, state.context);
CodecUtil.writeIndexHeader(out, RangeTreeDocValuesFormat.DATA_CODEC_NAME, RangeTreeDocValuesFormat.DATA_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
}
@Override
public void close() throws IOException {
boolean success = false;
try {
CodecUtil.writeFooter(out);
success = true;
} finally {
if (success) {
IOUtils.close(delegate, out);
} else {
IOUtils.closeWhileHandlingException(delegate, out);
}
}
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.META_EXTENSION);
IndexOutput metaOut = state.directory.createOutput(metaFileName, state.context);
success = false;
try {
CodecUtil.writeIndexHeader(metaOut, RangeTreeDocValuesFormat.META_CODEC_NAME, RangeTreeDocValuesFormat.META_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
metaOut.writeVInt(fieldIndexFPs.size());
for(Map.Entry<Integer,Long> ent : fieldIndexFPs.entrySet()) {
metaOut.writeVInt(ent.getKey());
metaOut.writeVLong(ent.getValue());
}
CodecUtil.writeFooter(metaOut);
success = true;
} finally {
if (success) {
IOUtils.close(metaOut);
} else {
IOUtils.closeWhileHandlingException(metaOut);
}
}
}
@Override
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
delegate.addSortedNumericField(field, docToValueCount, values);
RangeTreeWriter writer = new RangeTreeWriter(state.directory, state.segmentInfo.name, maxPointsInLeafNode, maxPointsSortInHeap);
Iterator<Number> valueIt = values.iterator();
Iterator<Number> valueCountIt = docToValueCount.iterator();
//System.out.println("\nSNF: field=" + field.name);
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
assert valueCountIt.hasNext();
int count = valueCountIt.next().intValue();
for(int i=0;i<count;i++) {
assert valueIt.hasNext();
writer.add(valueIt.next().longValue(), docID);
}
}
long indexStartFP = writer.finish(out);
fieldIndexFPs.put(field.number, indexStartFP);
}
@Override
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField");
}
@Override
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) {
throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField");
}
@Override
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) {
throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField");
}
@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
delegate.addSortedSetField(field, values, docToOrdCount, ords);
RangeTreeWriter writer = new RangeTreeWriter(state.directory, state.segmentInfo.name, maxPointsInLeafNode, maxPointsSortInHeap);
Iterator<Number> docToOrdCountIt = docToOrdCount.iterator();
Iterator<Number> ordsIt = ords.iterator();
//System.out.println("\nSSF: field=" + field.name);
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
assert docToOrdCountIt.hasNext();
int count = docToOrdCountIt.next().intValue();
for(int i=0;i<count;i++) {
assert ordsIt.hasNext();
long ord = ordsIt.next().longValue();
writer.add(ord, docID);
}
}
long indexStartFP = writer.finish(out);
fieldIndexFPs.put(field.number, indexStartFP);
}
}

View File

@ -1,112 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import java.io.IOException;
/**
* A {@link DocValuesFormat} to efficiently index numeric values from
* from {@link SortedNumericDocValuesField} or BytesRef values from {@link SortedSetDocValuesField}
* for numeric range queries using ({@link NumericRangeTreeQuery}) and arbitrary binary
* range queries using {@link SortedSetRangeTreeQuery}.
*
* <p>This wraps {@link Lucene54DocValuesFormat}, but saves its own numeric tree
* structures to disk for fast query-time intersection. See <a
* href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a>
* for details.
*
* <p>The numeric tree slices up 1D space into smaller and
* smaller ranges, until the smallest ranges have approximately
* between X/2 and X (X default is 1024) values in them, at which point
* such leaf cells are written as a block to disk, while the index tree
* structure records how space was sub-divided is loaded into HEAP
* at search time. At search time, the tree is recursed based on whether
* each of left or right child overlap with the query range, and once
* a leaf block is reached, all documents in that leaf block are collected
* if the cell is fully enclosed by the query shape, or filtered and then
* collected, if not.
*
* <p>The index is also quite compact, because docs only appear once in
* the tree (no "prefix terms").
*
* <p>In addition to the files written by {@link Lucene54DocValuesFormat}, this format writes:
* <ol>
* <li><tt>.ndd</tt>: numeric tree leaf data and index</li>
* <li><tt>.ndm</tt>: numeric tree metadata</li>
* </ol>
*
* <p>The disk format is experimental and free to change suddenly, and this code likely has new and exciting bugs!
*
* @lucene.experimental */
public class RangeTreeDocValuesFormat extends DocValuesFormat {
static final String DATA_CODEC_NAME = "RangeTreeData";
static final int DATA_VERSION_START = 0;
static final int DATA_VERSION_CURRENT = DATA_VERSION_START;
static final String DATA_EXTENSION = "ndd";
static final String META_CODEC_NAME = "RangeTreeMeta";
static final int META_VERSION_START = 0;
static final int META_VERSION_CURRENT = META_VERSION_START;
static final String META_EXTENSION = "ndm";
private final int maxPointsInLeafNode;
private final int maxPointsSortInHeap;
private final DocValuesFormat delegate = new Lucene54DocValuesFormat();
/** Default constructor */
public RangeTreeDocValuesFormat() {
this(RangeTreeWriter.DEFAULT_MAX_VALUES_IN_LEAF_NODE, RangeTreeWriter.DEFAULT_MAX_VALUES_SORT_IN_HEAP);
}
/** Creates this with custom configuration.
*
* @param maxPointsInLeafNode Maximum number of points in each leaf cell. Smaller values create a deeper tree with larger in-heap index and possibly
* faster searching. The default is 1024.
* @param maxPointsSortInHeap Maximum number of points where in-heap sort can be used. When the number of points exceeds this, a (slower)
* offline sort is used. The default is 128 * 1024.
*
* @lucene.experimental */
public RangeTreeDocValuesFormat(int maxPointsInLeafNode, int maxPointsSortInHeap) {
super("RangeTree");
RangeTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
}
@Override
public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
return new RangeTreeDocValuesConsumer(delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return new RangeTreeDocValuesProducer(delegate.fieldsProducer(state), state);
}
}

View File

@ -1,196 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
class RangeTreeDocValuesProducer extends DocValuesProducer {
private final Map<String,RangeTreeReader> treeReaders = new HashMap<>();
private final Map<Integer,Long> fieldToIndexFPs = new HashMap<>();
private final IndexInput datIn;
private final AtomicLong ramBytesUsed;
private final int maxDoc;
private final DocValuesProducer delegate;
private final boolean merging;
public RangeTreeDocValuesProducer(DocValuesProducer delegate, SegmentReadState state) throws IOException {
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.META_EXTENSION);
ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName, state.context);
CodecUtil.checkIndexHeader(metaIn, RangeTreeDocValuesFormat.META_CODEC_NAME, RangeTreeDocValuesFormat.META_VERSION_START, RangeTreeDocValuesFormat.META_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
int fieldCount = metaIn.readVInt();
for(int i=0;i<fieldCount;i++) {
int fieldNumber = metaIn.readVInt();
long indexFP = metaIn.readVLong();
fieldToIndexFPs.put(fieldNumber, indexFP);
}
CodecUtil.checkFooter(metaIn);
metaIn.close();
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.DATA_EXTENSION);
datIn = state.directory.openInput(datFileName, state.context);
CodecUtil.checkIndexHeader(datIn, RangeTreeDocValuesFormat.DATA_CODEC_NAME, RangeTreeDocValuesFormat.DATA_VERSION_START, RangeTreeDocValuesFormat.DATA_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
maxDoc = state.segmentInfo.maxDoc();
this.delegate = delegate;
merging = false;
}
// clone for merge: we don't hang onto the RangeTrees we load
RangeTreeDocValuesProducer(RangeTreeDocValuesProducer orig) throws IOException {
assert Thread.holdsLock(orig);
datIn = orig.datIn.clone();
ramBytesUsed = new AtomicLong(orig.ramBytesUsed.get());
delegate = orig.delegate.getMergeInstance();
fieldToIndexFPs.putAll(orig.fieldToIndexFPs);
treeReaders.putAll(orig.treeReaders);
merging = true;
maxDoc = orig.maxDoc;
}
@Override
public synchronized SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
RangeTreeReader treeReader = treeReaders.get(field.name);
if (treeReader == null) {
// Lazy load
Long fp = fieldToIndexFPs.get(field.number);
// FieldInfos checks has already ensured we are a DV field of this type, and Codec ensures
// this DVFormat was used at write time:
assert fp != null;
// LUCENE-6697: never do real IOPs with the original IndexInput because search
// threads can be concurrently cloning it:
IndexInput clone = datIn.clone();
clone.seek(fp);
treeReader = new RangeTreeReader(clone);
// Only hang onto the reader when we are not merging:
if (merging == false) {
treeReaders.put(field.name, treeReader);
ramBytesUsed.addAndGet(treeReader.ramBytesUsed());
}
}
return new RangeTreeSortedNumericDocValues(treeReader, delegate.getSortedNumeric(field));
}
@Override
public void close() throws IOException {
IOUtils.close(datIn, delegate);
}
@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(datIn);
}
@Override
public NumericDocValues getNumeric(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public BinaryDocValues getBinary(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public SortedDocValues getSorted(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public synchronized SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
RangeTreeReader treeReader = treeReaders.get(field.name);
if (treeReader == null) {
// Lazy load
Long fp = fieldToIndexFPs.get(field.number);
// FieldInfos checks has already ensured we are a DV field of this type, and Codec ensures
// this DVFormat was used at write time:
assert fp != null;
// LUCENE-6697: never do real IOPs with the original IndexInput because search
// threads can be concurrently cloning it:
IndexInput clone = datIn.clone();
clone.seek(fp);
treeReader = new RangeTreeReader(clone);
// Only hang onto the reader when we are not merging:
if (merging == false) {
treeReaders.put(field.name, treeReader);
ramBytesUsed.addAndGet(treeReader.ramBytesUsed());
}
}
return new RangeTreeSortedSetDocValues(treeReader, delegate.getSortedSet(field));
}
@Override
public Bits getDocsWithField(FieldInfo field) throws IOException {
return delegate.getDocsWithField(field);
}
@Override
public synchronized Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
for(Map.Entry<String,RangeTreeReader> ent : treeReaders.entrySet()) {
resources.add(Accountables.namedAccountable("field " + ent.getKey(), ent.getValue()));
}
resources.add(Accountables.namedAccountable("delegate", delegate));
return resources;
}
@Override
public synchronized DocValuesProducer getMergeInstance() throws IOException {
return new RangeTreeDocValuesProducer(this);
}
@Override
public long ramBytesUsed() {
return ramBytesUsed.get() + delegate.ramBytesUsed();
}
}

View File

@ -1,202 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
/** Handles intersection of a range with a numeric tree previously written with {@link RangeTreeWriter}.
*
* @lucene.experimental */
final class RangeTreeReader implements Accountable {
final private long[] blockFPs;
final private long[] blockMinValues;
final IndexInput in;
final long globalMaxValue;
final int approxDocsPerBlock;
public RangeTreeReader(IndexInput in) throws IOException {
// Read index:
int numLeaves = in.readVInt();
approxDocsPerBlock = in.readVInt();
blockMinValues = new long[numLeaves];
for(int i=0;i<numLeaves;i++) {
blockMinValues[i] = in.readLong();
}
blockFPs = new long[numLeaves];
for(int i=0;i<numLeaves;i++) {
blockFPs[i] = in.readVLong();
}
globalMaxValue = in.readLong();
this.in = in;
}
public long getMinValue() {
return blockMinValues[0];
}
public long getMaxValue() {
return globalMaxValue;
}
private static final class QueryState {
final IndexInput in;
final DocIdSetBuilder docs;
final long minValueIncl;
final long maxValueIncl;
final SortedNumericDocValues sndv;
public QueryState(IndexInput in, int maxDoc,
long minValueIncl, long maxValueIncl,
SortedNumericDocValues sndv) {
this.in = in;
this.docs = new DocIdSetBuilder(maxDoc);
this.minValueIncl = minValueIncl;
this.maxValueIncl = maxValueIncl;
this.sndv = sndv;
}
}
public DocIdSet intersect(long minIncl, long maxIncl, SortedNumericDocValues sndv, int maxDoc) throws IOException {
if (minIncl > maxIncl) {
return DocIdSet.EMPTY;
}
if (minIncl > globalMaxValue || maxIncl < blockMinValues[0]) {
return DocIdSet.EMPTY;
}
QueryState state = new QueryState(in.clone(), maxDoc,
minIncl, maxIncl,
sndv);
int startBlockIncl = Arrays.binarySearch(blockMinValues, minIncl);
if (startBlockIncl >= 0) {
// There can be dups here, when the same value is added many
// times. Also, we need the first block whose min is < minIncl:
while (startBlockIncl > 0 && blockMinValues[startBlockIncl] == minIncl) {
startBlockIncl--;
}
} else {
startBlockIncl = Math.max(-startBlockIncl-2, 0);
}
int endBlockIncl = Arrays.binarySearch(blockMinValues, maxIncl);
if (endBlockIncl >= 0) {
// There can be dups here, when the same value is added many
// times. Also, we need the first block whose max is > minIncl:
while (endBlockIncl < blockMinValues.length-1 && blockMinValues[endBlockIncl] == maxIncl) {
endBlockIncl++;
}
} else {
endBlockIncl = Math.max(-endBlockIncl-2, 0);
}
assert startBlockIncl <= endBlockIncl;
state.in.seek(blockFPs[startBlockIncl]);
//System.out.println("startBlockIncl=" + startBlockIncl + " endBlockIncl=" + endBlockIncl);
// Rough estimate of how many hits we'll see. Note that in the degenerate case
// (index same value many times) this could be a big over-estimate, but in the typical
// case it's good:
state.docs.grow(approxDocsPerBlock * (endBlockIncl - startBlockIncl + 1));
int hitCount = 0;
for (int block=startBlockIncl;block<=endBlockIncl;block++) {
boolean doFilter = blockMinValues[block] <= minIncl || block == blockMinValues.length-1 || blockMinValues[block+1] >= maxIncl;
//System.out.println(" block=" + block + " min=" + blockMinValues[block] + " doFilter=" + doFilter);
int newCount;
if (doFilter) {
// We must filter each hit:
newCount = addSome(state);
} else {
newCount = addAll(state);
}
hitCount += newCount;
}
// NOTE: hitCount is an over-estimate in the multi-valued case:
return state.docs.build(hitCount);
}
/** Adds all docs from the current block. */
private int addAll(QueryState state) throws IOException {
// How many values are stored in this leaf cell:
int count = state.in.readVInt();
state.docs.grow(count);
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
state.docs.add(docID);
}
return count;
}
/** Adds docs from the current block, filtering each hit against the query min/max. This
* is only needed on the boundary blocks. */
private int addSome(QueryState state) throws IOException {
int hitCount = 0;
// How many points are stored in this leaf cell:
int count = state.in.readVInt();
state.docs.grow(count);
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
state.sndv.setDocument(docID);
// How many values this doc has:
int docValueCount = state.sndv.count();
for(int j=0;j<docValueCount;j++) {
long value = state.sndv.valueAt(j);
if (value >= state.minValueIncl && value <= state.maxValueIncl) {
state.docs.add(docID);
hitCount++;
// Stop processing values for this doc:
break;
}
}
}
return hitCount;
}
@Override
public long ramBytesUsed() {
return blockMinValues.length * RamUsageEstimator.NUM_BYTES_LONG +
blockFPs.length * RamUsageEstimator.NUM_BYTES_LONG;
}
}

View File

@ -1,49 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.SortedNumericDocValues;
class RangeTreeSortedNumericDocValues extends SortedNumericDocValues {
final RangeTreeReader rangeTreeReader;
final SortedNumericDocValues delegate;
public RangeTreeSortedNumericDocValues(RangeTreeReader rangeTreeReader, SortedNumericDocValues delegate) {
this.rangeTreeReader = rangeTreeReader;
this.delegate = delegate;
}
public RangeTreeReader getRangeTreeReader() {
return rangeTreeReader;
}
@Override
public void setDocument(int doc) {
delegate.setDocument(doc);
}
@Override
public long valueAt(int index) {
return delegate.valueAt(index);
}
@Override
public int count() {
return delegate.count();
}
}

View File

@ -1,66 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
class RangeTreeSortedSetDocValues extends SortedSetDocValues {
final RangeTreeReader rangeTreeReader;
final SortedSetDocValues delegate;
public RangeTreeSortedSetDocValues(RangeTreeReader rangeTreeReader, SortedSetDocValues delegate) {
this.rangeTreeReader = rangeTreeReader;
this.delegate = delegate;
}
public RangeTreeReader getRangeTreeReader() {
return rangeTreeReader;
}
@Override
public long nextOrd() {
return delegate.nextOrd();
}
@Override
public void setDocument(int doc) {
delegate.setDocument(doc);
}
@Override
public BytesRef lookupOrd(long ord) {
return delegate.lookupOrd(ord);
}
@Override
public long getValueCount() {
return delegate.getValueCount();
}
@Override
public long lookupTerm(BytesRef key) {
return delegate.lookupTerm(key);
}
@Override
public TermsEnum termsEnum() {
return delegate.termsEnum();
}
}

View File

@ -1,580 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.RamUsageEstimator;
// TODO
// - could we just "use postings" to map leaf -> docIDs?
// - we could also index "auto-prefix terms" here, and use better compression
// - the index could be efficiently encoded as an FST, so we don't have wasteful
// (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
// the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
// per leaf, and you can reduce that by putting more points per leaf
// - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf
/** Recursively builds a 1d BKD tree to assign all incoming {@code long} values to smaller
* and smaller ranges until the number of points in a given
* range is &lt= the <code>maxPointsInLeafNode</code>. The tree is
* fully balanced, which means the leaf nodes will have between 50% and 100% of
* the requested <code>maxPointsInLeafNode</code>, except for the adversarial case
* of indexing exactly the same value many times.
*
* <p>
* See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
*
* <p>This consumes heap during writing: for any nodes with fewer than <code>maxPointsSortInHeap</code>, it holds
* the points in memory as simple java arrays.
*
* <p>
* <b>NOTE</b>: This can write at most Integer.MAX_VALUE * <code>maxPointsInLeafNode</code> total values,
* which should be plenty since a Lucene index can have at most Integer.MAX_VALUE-1 documents.
*
* @lucene.experimental */
class RangeTreeWriter {
// value (long) + ord (long) + docID (int)
static final int BYTES_PER_DOC = 2 * RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT;
public static final int DEFAULT_MAX_VALUES_IN_LEAF_NODE = 1024;
/** This works out to max of ~10 MB peak heap tied up during writing: */
public static final int DEFAULT_MAX_VALUES_SORT_IN_HEAP = 128*1024;;
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
private final Directory tempDir;
private final String tempFileNamePrefix;
private OfflineSorter.ByteSequencesWriter offlineWriter;
private GrowingHeapSliceWriter heapWriter;
private IndexOutput tempInput;
private final int maxValuesInLeafNode;
private final int maxValuesSortInHeap;
private long valueCount;
private long globalMinValue = Long.MAX_VALUE;
private long globalMaxValue = Long.MIN_VALUE;
public RangeTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_VALUES_IN_LEAF_NODE, DEFAULT_MAX_VALUES_SORT_IN_HEAP);
}
// TODO: instead of maxValuesSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
public RangeTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxValuesInLeafNode, int maxValuesSortInHeap) throws IOException {
verifyParams(maxValuesInLeafNode, maxValuesSortInHeap);
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.maxValuesInLeafNode = maxValuesInLeafNode;
this.maxValuesSortInHeap = maxValuesSortInHeap;
// We write first maxValuesSortInHeap in heap, then cutover to offline for additional points:
heapWriter = new GrowingHeapSliceWriter(maxValuesSortInHeap);
}
public static void verifyParams(int maxValuesInLeafNode, int maxValuesSortInHeap) {
if (maxValuesInLeafNode <= 0) {
throw new IllegalArgumentException("maxValuesInLeafNode must be > 0; got " + maxValuesInLeafNode);
}
if (maxValuesInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException("maxValuesInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxValuesInLeafNode);
}
if (maxValuesSortInHeap < maxValuesInLeafNode) {
throw new IllegalArgumentException("maxValuesSortInHeap must be >= maxValuesInLeafNode; got " + maxValuesSortInHeap + " vs maxValuesInLeafNode="+ maxValuesInLeafNode);
}
if (maxValuesSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException("maxValuesSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxValuesSortInHeap);
}
}
/** If the current segment has too many points then we switchover to temp files / offline sort. */
private void switchToOffline() throws IOException {
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "rangetree", IOContext.DEFAULT);
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
for(int i=0;i<valueCount;i++) {
scratchBytesOutput.reset(scratchBytes);
scratchBytesOutput.writeLong(heapWriter.values[i]);
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
scratchBytesOutput.writeVLong(i);
// TODO: can/should OfflineSorter optimize the fixed-width case?
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
}
heapWriter = null;
}
void add(long value, int docID) throws IOException {
if (valueCount >= maxValuesSortInHeap) {
if (offlineWriter == null) {
switchToOffline();
}
scratchBytesOutput.reset(scratchBytes);
scratchBytesOutput.writeLong(value);
scratchBytesOutput.writeVInt(docID);
scratchBytesOutput.writeVLong(valueCount);
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
} else {
// Not too many points added yet, continue using heap:
heapWriter.append(value, valueCount, docID);
}
valueCount++;
globalMaxValue = Math.max(value, globalMaxValue);
globalMinValue = Math.min(value, globalMinValue);
}
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
* as we recurse in {@link #build}. */
private SliceWriter convertToFixedWidth(String in) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
scratch.grow(BYTES_PER_DOC);
BytesRef bytes = scratch.get();
ByteArrayDataInput dataReader = new ByteArrayDataInput();
OfflineSorter.ByteSequencesReader reader = null;
SliceWriter sortedWriter = null;
boolean success = false;
try {
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
sortedWriter = getWriter(valueCount);
for (long i=0;i<valueCount;i++) {
boolean result = reader.read(scratch);
assert result;
dataReader.reset(bytes.bytes, bytes.offset, bytes.length);
long value = dataReader.readLong();
int docID = dataReader.readVInt();
assert docID >= 0: "docID=" + docID;
long ord = dataReader.readVLong();
sortedWriter.append(value, ord, docID);
}
success = true;
} finally {
if (success) {
IOUtils.close(sortedWriter, reader);
} else {
IOUtils.closeWhileHandlingException(sortedWriter, reader);
try {
sortedWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
}
}
return sortedWriter;
}
private SliceWriter sort() throws IOException {
if (heapWriter != null) {
assert valueCount < Integer.MAX_VALUE;
// All buffered points are still in heap
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
int docID = heapWriter.docIDs[i];
heapWriter.docIDs[i] = heapWriter.docIDs[j];
heapWriter.docIDs[j] = docID;
long ord = heapWriter.ords[i];
heapWriter.ords[i] = heapWriter.ords[j];
heapWriter.ords[j] = ord;
long value = heapWriter.values[i];
heapWriter.values[i] = heapWriter.values[j];
heapWriter.values[j] = value;
}
@Override
protected int compare(int i, int j) {
int cmp = Long.compare(heapWriter.values[i], heapWriter.values[j]);
if (cmp != 0) {
return cmp;
}
// Tie-break
cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]);
if (cmp != 0) {
return cmp;
}
return Long.compare(heapWriter.ords[i], heapWriter.ords[j]);
}
}.sort(0, (int) valueCount);
HeapSliceWriter sorted = new HeapSliceWriter((int) valueCount);
for(int i=0;i<valueCount;i++) {
sorted.append(heapWriter.values[i],
heapWriter.ords[i],
heapWriter.docIDs[i]);
}
sorted.close();
return sorted;
} else {
// Offline sort:
assert tempInput != null;
final ByteArrayDataInput reader = new ByteArrayDataInput();
Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
private final ByteArrayDataInput readerB = new ByteArrayDataInput();
@Override
public int compare(BytesRef a, BytesRef b) {
reader.reset(a.bytes, a.offset, a.length);
final long valueA = reader.readLong();
final int docIDA = reader.readVInt();
final long ordA = reader.readVLong();
reader.reset(b.bytes, b.offset, b.length);
final long valueB = reader.readLong();
final int docIDB = reader.readVInt();
final long ordB = reader.readVLong();
int cmp = Long.compare(valueA, valueB);
if (cmp != 0) {
return cmp;
}
// Tie-break
cmp = Integer.compare(docIDA, docIDB);
if (cmp != 0) {
return cmp;
}
return Long.compare(ordA, ordB);
}
};
boolean success = false;
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
String sortedFileName = sorter.sort(tempInput.getName());
try {
SliceWriter writer = convertToFixedWidth(sortedFileName);
success = true;
return writer;
} finally {
if (success) {
tempDir.deleteFile(sortedFileName);
} else {
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
}
}
}
}
/** Writes the 1d BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
public long finish(IndexOutput out) throws IOException {
if (offlineWriter != null) {
offlineWriter.close();
}
if (valueCount == 0) {
throw new IllegalStateException("at least one value must be indexed");
}
// TODO: we should use in-memory sort here, if number of points is small enough:
long countPerLeaf = valueCount;
long innerNodeCount = 1;
while (countPerLeaf > maxValuesInLeafNode) {
countPerLeaf = (countPerLeaf+1)/2;
innerNodeCount *= 2;
}
//System.out.println("innerNodeCount=" + innerNodeCount);
if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
throw new IllegalStateException("too many nodes; increase maxValuesInLeafNode (currently " + maxValuesInLeafNode + ") and reindex");
}
innerNodeCount--;
int numLeaves = (int) (innerNodeCount+1);
// Indexed by nodeID, but first (root) nodeID is 1
long[] blockMinValues = new long[numLeaves];
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
long[] leafBlockFPs = new long[numLeaves];
// Make sure the math above "worked":
assert valueCount / blockMinValues.length <= maxValuesInLeafNode: "valueCount=" + valueCount + " blockMinValues.length=" + blockMinValues.length + " maxValuesInLeafNode=" + maxValuesInLeafNode;
//System.out.println(" avg pointsPerLeaf=" + (valueCount/blockMinValues.length));
// Sort all docs by value:
SliceWriter sortedWriter = null;
boolean success = false;
try {
sortedWriter = sort();
heapWriter = null;
build(1, numLeaves,
new PathSlice(sortedWriter, 0, valueCount),
out,
globalMinValue, globalMaxValue,
blockMinValues,
leafBlockFPs);
success = true;
} finally {
if (success) {
sortedWriter.destroy();
if (tempInput != null) {
tempDir.deleteFile(tempInput.getName());
}
} else {
try {
sortedWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
if (tempInput != null) {
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
}
}
}
//System.out.println("Total nodes: " + innerNodeCount);
// Write index:
long indexFP = out.getFilePointer();
out.writeVInt(numLeaves);
out.writeVInt((int) (valueCount / numLeaves));
for (int i=0;i<blockMinValues.length;i++) {
out.writeLong(blockMinValues[i]);
}
for (int i=0;i<leafBlockFPs.length;i++) {
out.writeVLong(leafBlockFPs[i]);
}
out.writeLong(globalMaxValue);
return indexFP;
}
/** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
private static final class PathSlice {
final SliceWriter writer;
final long start;
final long count;
public PathSlice(SliceWriter writer, long start, long count) {
this.writer = writer;
this.start = start;
this.count = count;
}
@Override
public String toString() {
return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
}
}
private long getSplitValue(PathSlice source, long leftCount, long minValue, long maxValue) throws IOException {
// Read the split value:
SliceReader reader = source.writer.getReader(source.start + leftCount);
boolean success = false;
long splitValue;
try {
boolean result = reader.next();
assert result;
splitValue = reader.value();
assert splitValue >= minValue && splitValue <= maxValue: "splitValue=" + splitValue + " minValue=" + minValue + " maxValue=" + maxValue + " reader=" + reader;
success = true;
} finally {
if (success) {
IOUtils.close(reader);
} else {
IOUtils.closeWhileHandlingException(reader);
}
}
return splitValue;
}
/** The incoming PathSlice for the dim we will split is already partitioned/sorted. */
private void build(int nodeID, int leafNodeOffset,
PathSlice source,
IndexOutput out,
long minValue, long maxValue,
long[] blockMinValues,
long[] leafBlockFPs) throws IOException {
long count = source.count;
if (source.writer instanceof OfflineSliceWriter && count <= maxValuesSortInHeap) {
// Cutover to heap:
SliceWriter writer = new HeapSliceWriter((int) count);
SliceReader reader = source.writer.getReader(source.start);
try {
for(int i=0;i<count;i++) {
boolean hasNext = reader.next();
assert hasNext;
writer.append(reader.value(), reader.ord(), reader.docID());
}
} finally {
IOUtils.close(reader, writer);
}
source = new PathSlice(writer, 0, count);
}
// We should never hit dead-end nodes on recursion even in the adversarial cases:
assert count > 0;
if (nodeID >= leafNodeOffset) {
// Leaf node: write block
assert maxValue >= minValue;
//System.out.println("\nleaf:\n lat range: " + ((long) maxLatEnc-minLatEnc));
//System.out.println(" lon range: " + ((long) maxLonEnc-minLonEnc));
// Sort by docID in the leaf so we can .or(DISI) at search time:
SliceReader reader = source.writer.getReader(source.start);
int[] docIDs = new int[(int) count];
boolean success = false;
try {
for (int i=0;i<source.count;i++) {
// NOTE: we discard ord at this point; we only needed it temporarily
// during building to uniquely identify each point to properly handle
// the multi-valued case (one docID having multiple values):
// We also discard lat/lon, since at search time, we reside on the
// wrapped doc values for this:
boolean result = reader.next();
assert result;
docIDs[i] = reader.docID();
}
success = true;
} finally {
if (success) {
IOUtils.close(reader);
} else {
IOUtils.closeWhileHandlingException(reader);
}
}
// TODO: not clear we need to do this anymore (we used to make a DISI over
// the block at search time), but maybe it buys some memory
// locality/sequentiality at search time?
Arrays.sort(docIDs);
// Dedup docIDs: for the multi-valued case where more than one value for the doc
// wound up in this leaf cell, we only need to store the docID once:
int lastDocID = -1;
int uniqueCount = 0;
for(int i=0;i<docIDs.length;i++) {
int docID = docIDs[i];
if (docID != lastDocID) {
uniqueCount++;
lastDocID = docID;
}
}
assert uniqueCount <= count;
// TODO: in theory we could compute exactly what this fp will be, since we fixed-width (writeInt) encode docID, and up-front we know
// how many docIDs are in every leaf since we don't do anything special about multiple splitValue boundary case?
long startFP = out.getFilePointer();
out.writeVInt(uniqueCount);
// Save the block file pointer:
int blockID = nodeID - leafNodeOffset;
leafBlockFPs[blockID] = startFP;
//System.out.println(" leafFP=" + startFP);
blockMinValues[blockID] = minValue;
lastDocID = -1;
for (int i=0;i<docIDs.length;i++) {
// Absolute int encode; with "vInt of deltas" encoding, the .kdd size dropped from
// 697 MB -> 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec.
// I think if we also indexed prefix terms here we could do less costly compression
// on those lists:
int docID = docIDs[i];
if (docID != lastDocID) {
out.writeInt(docID);
lastDocID = docID;
}
}
//long endFP = out.getFilePointer();
//System.out.println(" bytes/doc: " + ((endFP - startFP) / count));
} else {
// Inner node: sort, partition/recurse
assert nodeID < blockMinValues.length: "nodeID=" + nodeID + " blockMinValues.length=" + blockMinValues.length;
assert source.count == count;
long leftCount = source.count / 2;
// NOTE: we don't tweak leftCount for the boundary cases, which means at search time if we are looking for exactly splitValue then we
// must search both left and right trees:
long splitValue = getSplitValue(source, leftCount, minValue, maxValue);
build(2*nodeID, leafNodeOffset,
new PathSlice(source.writer, source.start, leftCount),
out,
minValue, splitValue,
blockMinValues, leafBlockFPs);
build(2*nodeID+1, leafNodeOffset,
new PathSlice(source.writer, source.start+leftCount, count-leftCount),
out,
splitValue, maxValue,
blockMinValues, leafBlockFPs);
}
}
SliceWriter getWriter(long count) throws IOException {
if (count < maxValuesSortInHeap) {
return new HeapSliceWriter((int) count);
} else {
return new OfflineSliceWriter(tempDir, tempFileNamePrefix, count);
}
}
}

View File

@ -1,31 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
/** Iterates over one slice of the sorted values. This abstracts away whether
* OfflineSorter or simple arrays in heap are used. */
interface SliceReader extends Closeable {
boolean next() throws IOException;
long value();
long ord();
int docID();
}

View File

@ -1,29 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
interface SliceWriter extends Closeable {
void append(long value, long ord, int docID) throws IOException;
SliceReader getReader(long start) throws IOException;
void destroy() throws IOException;
}

View File

@ -1,217 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
/** Finds all previously indexed values that fall within the specified {@link BytesRef} range.
*
* <p>The field must be indexed with {@link RangeTreeDocValuesFormat}, and {@link SortedSetDocValuesField} added per document.
*
* @lucene.experimental */
public class SortedSetRangeTreeQuery extends Query {
final String field;
final BytesRef minValue;
final BytesRef maxValue;
final boolean minInclusive;
final boolean maxInclusive;
/** Matches all values in the specified {@link BytesRef} range. */
public SortedSetRangeTreeQuery(String field, BytesRef minValue, boolean minInclusive, BytesRef maxValue, boolean maxInclusive) {
this.field = field;
this.minInclusive = minInclusive;
this.minValue = minValue;
this.maxInclusive = maxInclusive;
this.maxValue = maxValue;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
// used in the first pass:
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
final SortedSetDocValues ssdv = reader.getSortedSetDocValues(field);
if (ssdv == null) {
// No docs in this segment had this field
return null;
}
if (ssdv instanceof RangeTreeSortedSetDocValues == false) {
throw new IllegalStateException("field \"" + field + "\" was not indexed with RangeTreeDocValuesFormat: got: " + ssdv);
}
RangeTreeSortedSetDocValues treeDV = (RangeTreeSortedSetDocValues) ssdv;
RangeTreeReader tree = treeDV.getRangeTreeReader();
/*
for(int i=0;i<treeDV.getValueCount();i++) {
System.out.println(" ord " + i + " -> " + treeDV.lookupOrd(i));
}
*/
// lower
final long minOrdIncl;
if (minValue == null) {
minOrdIncl = 0;
} else {
long ord = ssdv.lookupTerm(minValue);
if (ord >= 0) {
// Exact match
if (minInclusive) {
minOrdIncl = ord;
} else {
minOrdIncl = ord+1;
}
} else {
minOrdIncl = -ord-1;
}
}
// upper
final long maxOrdIncl;
if (maxValue == null) {
maxOrdIncl = Long.MAX_VALUE;
} else {
long ord = ssdv.lookupTerm(maxValue);
if (ord >= 0) {
// Exact match
if (maxInclusive) {
maxOrdIncl = ord;
} else {
maxOrdIncl = ord-1;
}
} else {
maxOrdIncl = -ord-2;
}
}
if (maxOrdIncl < minOrdIncl) {
// This can happen when the requested range lies entirely between 2 adjacent ords:
return null;
}
//System.out.println(reader + ": ORD: " + minOrdIncl + "-" + maxOrdIncl + "; " + minValue + " - " + maxValue);
// Just a "view" of only the ords from the SSDV, as an SNDV. Maybe we
// have this view implemented somewhere else already? It's not so bad that
// we are inefficient here (making 2 passes over the ords): this is only
// used in at most 2 leaf cells (the boundary cells).
SortedNumericDocValues ords = new SortedNumericDocValues() {
private long[] ords = new long[2];
private int count;
@Override
public void setDocument(int doc) {
ssdv.setDocument(doc);
long ord;
count = 0;
while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
if (count == ords.length) {
ords = ArrayUtil.grow(ords, count+1);
}
ords[count++] = ord;
}
}
@Override
public int count() {
return count;
}
@Override
public long valueAt(int index) {
return ords[index];
}
};
DocIdSet result = tree.intersect(minOrdIncl, maxOrdIncl, ords, context.reader().maxDoc());
final DocIdSetIterator disi = result.iterator();
return new ConstantScoreScorer(this, score(), disi);
}
};
}
@Override
public int hashCode() {
int hash = super.hashCode();
if (minValue != null) hash += minValue.hashCode()^0x14fa55fb;
if (maxValue != null) hash += maxValue.hashCode()^0x733fa5fe;
return hash +
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
}
@Override
public boolean equals(Object other) {
if (super.equals(other)) {
final SortedSetRangeTreeQuery q = (SortedSetRangeTreeQuery) other;
return (
(q.minValue == null ? minValue == null : q.minValue.equals(minValue)) &&
(q.maxValue == null ? maxValue == null : q.maxValue.equals(maxValue)) &&
minInclusive == q.minInclusive &&
maxInclusive == q.maxInclusive
);
}
return false;
}
@Override
public String toString(String field) {
final StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append(':');
if (this.field.equals(field) == false) {
sb.append("field=");
sb.append(this.field);
sb.append(':');
}
return sb.append(minInclusive ? '[' : '{')
.append((minValue == null) ? "*" : minValue.toString())
.append(" TO ")
.append((maxValue == null) ? "*" : maxValue.toString())
.append(maxInclusive ? ']' : '}')
.toString();
}
}

View File

@ -1,28 +0,0 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- not a package-info.java, because we already defined this package in core/ -->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
This package contains a numeric tree implementation for indexing long values enabling fast range searching.
</body>
</html>

View File

@ -1,4 +1,4 @@
package org.apache.lucene.bkdtree;
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,35 +19,24 @@ package org.apache.lucene.bkdtree;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.document.DimensionalLatLonField;
import org.apache.lucene.index.DimensionalValues;
import org.apache.lucene.index.DimensionalValues.IntersectVisitor;
import org.apache.lucene.index.DimensionalValues.Relation;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.bkd.BKDUtil;
/** Finds all previously indexed points that fall within the specified polygon.
*
* <p>The field must be indexed with {@link BKDTreeDocValuesFormat}, and {@link BKDPointField} added per document.
* <p>The field must be indexed with using {@link DimensionalLatLonField} added per document.
*
* <p>Because this implementation cannot intersect each cell with the polygon, it will be costly especially for large polygons, as every
* possible point must be checked.
*
* <p><b>NOTE</b>: for fastest performance, this allocates FixedBitSet(maxDoc) for each segment. The score of each hit is the query boost.
*
* @lucene.experimental */
* @lucene.experimental */
public class BKDPointInPolygonQuery extends Query {
public class DimensionalPointInPolygonQuery extends Query {
final String field;
final double minLat;
final double maxLat;
@ -57,7 +46,7 @@ public class BKDPointInPolygonQuery extends Query {
final double[] polyLons;
/** The lats/lons must be clockwise or counter-clockwise. */
public BKDPointInPolygonQuery(String field, double[] polyLats, double[] polyLons) {
public DimensionalPointInPolygonQuery(String field, double[] polyLats, double[] polyLons) {
this.field = field;
if (polyLats.length != polyLons.length) {
throw new IllegalArgumentException("polyLats and polyLons must be equal length");
@ -83,13 +72,13 @@ public class BKDPointInPolygonQuery extends Query {
double maxLat = Double.NEGATIVE_INFINITY;
for(int i=0;i<polyLats.length;i++) {
double lat = polyLats[i];
if (BKDTreeWriter.validLat(lat) == false) {
if (GeoUtils.isValidLat(lat) == false) {
throw new IllegalArgumentException("polyLats[" + i + "]=" + lat + " is not a valid latitude");
}
minLat = Math.min(minLat, lat);
maxLat = Math.max(maxLat, lat);
double lon = polyLons[i];
if (BKDTreeWriter.validLon(lon) == false) {
if (GeoUtils.isValidLon(lon) == false) {
throw new IllegalArgumentException("polyLons[" + i + "]=" + lat + " is not a valid longitude");
}
minLon = Math.min(minLon, lon);
@ -115,42 +104,59 @@ public class BKDPointInPolygonQuery extends Query {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
SortedNumericDocValues sdv = reader.getSortedNumericDocValues(field);
if (sdv == null) {
// No docs in this segment had this field
DimensionalValues values = reader.getDimensionalValues();
if (values == null) {
// No docs in this segment had any dimensional fields
return null;
}
if (sdv instanceof BKDTreeSortedNumericDocValues == false) {
throw new IllegalStateException("field \"" + field + "\" was not indexed with BKDTreeDocValuesFormat: got: " + sdv);
}
BKDTreeSortedNumericDocValues treeDV = (BKDTreeSortedNumericDocValues) sdv;
BKDTreeReader tree = treeDV.getBKDTreeReader();
DocIdSet result = tree.intersect(minLat, maxLat, minLon, maxLon,
new BKDTreeReader.LatLonFilter() {
@Override
public boolean accept(double lat, double lon) {
return GeoUtils.pointInPolygon(polyLons, polyLats, lat, lon);
}
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
int[] hitCount = new int[1];
values.intersect(field,
new IntersectVisitor() {
@Override
public void visit(int docID) {
hitCount[0]++;
result.add(docID);
}
@Override
public BKDTreeReader.Relation compare(double cellLatMin, double cellLatMax, double cellLonMin, double cellLonMax) {
if (GeoUtils.rectWithinPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax,
polyLons, polyLats,
minLon, minLat, maxLon, maxLat)) {
return BKDTreeReader.Relation.CELL_INSIDE_SHAPE;
} else if (GeoUtils.rectCrossesPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax,
polyLons, polyLats,
minLon, minLat, maxLon, maxLat)) {
return BKDTreeReader.Relation.SHAPE_CROSSES_CELL;
} else {
return BKDTreeReader.Relation.SHAPE_OUTSIDE_CELL;
}
}
}, treeDV.delegate);
@Override
public void visit(int docID, byte[] packedValue) {
assert packedValue.length == 8;
double lat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(packedValue, 0));
double lon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(packedValue, 1));
if (GeoUtils.pointInPolygon(polyLons, polyLats, lat, lon)) {
hitCount[0]++;
result.add(docID);
}
}
return new ConstantScoreScorer(this, score(), result.iterator());
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
double cellMinLat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(minPackedValue, 0));
double cellMinLon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(minPackedValue, 1));
double cellMaxLat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(maxPackedValue, 0));
double cellMaxLon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(maxPackedValue, 1));
if (cellMinLat <= minLat && cellMaxLat >= maxLat && cellMinLon <= minLon && cellMaxLon >= maxLon) {
// Cell fully encloses the query
return Relation.CELL_CROSSES_QUERY;
} else if (GeoUtils.rectWithinPoly(cellMinLon, cellMinLat, cellMaxLon, cellMaxLat,
polyLons, polyLats,
minLon, minLat, maxLon, maxLat)) {
return Relation.CELL_INSIDE_QUERY;
} else if (GeoUtils.rectCrossesPoly(cellMinLon, cellMinLat, cellMaxLon, cellMaxLat,
polyLons, polyLats,
minLon, minLat, maxLon, maxLat)) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_OUTSIDE_QUERY;
}
}
});
// NOTE: hitCount[0] will be over-estimate in multi-valued case
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
}
};
}
@ -162,7 +168,7 @@ public class BKDPointInPolygonQuery extends Query {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
BKDPointInPolygonQuery that = (BKDPointInPolygonQuery) o;
DimensionalPointInPolygonQuery that = (DimensionalPointInPolygonQuery) o;
if (Arrays.equals(polyLons, that.polyLons) == false) {
return false;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.bkdtree;
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -18,33 +18,25 @@ package org.apache.lucene.bkdtree;
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.document.DimensionalLatLonField;
import org.apache.lucene.index.DimensionalValues;
import org.apache.lucene.index.DimensionalValues.IntersectVisitor;
import org.apache.lucene.index.DimensionalValues.Relation;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.bkd.BKDUtil;
/** Finds all previously indexed points that fall within the specified boundings box.
*
* <p>The field must be indexed with {@link BKDTreeDocValuesFormat}, and {@link BKDPointField} added per document.
* <p>The field must be indexed with using {@link DimensionalLatLonField} added per document.
*
* <p><b>NOTE</b>: for fastest performance, this allocates FixedBitSet(maxDoc) for each segment. The score of each hit is the query boost.
*
* @lucene.experimental */
* @lucene.experimental */
public class BKDPointInBBoxQuery extends Query {
public class DimensionalPointInRectQuery extends Query {
final String field;
final double minLat;
final double maxLat;
@ -52,18 +44,18 @@ public class BKDPointInBBoxQuery extends Query {
final double maxLon;
/** Matches all points &gt;= minLon, minLat (inclusive) and &lt; maxLon, maxLat (exclusive). */
public BKDPointInBBoxQuery(String field, double minLat, double maxLat, double minLon, double maxLon) {
public DimensionalPointInRectQuery(String field, double minLat, double maxLat, double minLon, double maxLon) {
this.field = field;
if (BKDTreeWriter.validLat(minLat) == false) {
if (GeoUtils.isValidLat(minLat) == false) {
throw new IllegalArgumentException("minLat=" + minLat + " is not a valid latitude");
}
if (BKDTreeWriter.validLat(maxLat) == false) {
if (GeoUtils.isValidLat(maxLat) == false) {
throw new IllegalArgumentException("maxLat=" + maxLat + " is not a valid latitude");
}
if (BKDTreeWriter.validLon(minLon) == false) {
if (GeoUtils.isValidLon(minLon) == false) {
throw new IllegalArgumentException("minLon=" + minLon + " is not a valid longitude");
}
if (BKDTreeWriter.validLon(maxLon) == false) {
if (GeoUtils.isValidLon(maxLon) == false) {
throw new IllegalArgumentException("maxLon=" + maxLon + " is not a valid longitude");
}
this.minLon = minLon;
@ -82,21 +74,59 @@ public class BKDPointInBBoxQuery extends Query {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
SortedNumericDocValues sdv = reader.getSortedNumericDocValues(field);
if (sdv == null) {
// No docs in this segment had this field
DimensionalValues values = reader.getDimensionalValues();
if (values == null) {
// No docs in this segment had any dimensional fields
return null;
}
if (sdv instanceof BKDTreeSortedNumericDocValues == false) {
throw new IllegalStateException("field \"" + field + "\" was not indexed with BKDTreeDocValuesFormat: got: " + sdv);
}
BKDTreeSortedNumericDocValues treeDV = (BKDTreeSortedNumericDocValues) sdv;
BKDTreeReader tree = treeDV.getBKDTreeReader();
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
int[] hitCount = new int[1];
values.intersect(field,
new IntersectVisitor() {
@Override
public void grow(int count) {
result.grow(count);
}
DocIdSet result = tree.intersect(minLat, maxLat, minLon, maxLon, null, treeDV.delegate);
@Override
public void visit(int docID) {
hitCount[0]++;
result.add(docID);
}
return new ConstantScoreScorer(this, score(), result.iterator());
@Override
public void visit(int docID, byte[] packedValue) {
assert packedValue.length == 8;
double lat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(packedValue, 0));
double lon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(packedValue, 1));
if (lat >= minLat && lat <= maxLat && lon >= minLon && lon <= maxLon) {
hitCount[0]++;
result.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
double cellMinLat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(minPackedValue, 0));
double cellMinLon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(minPackedValue, 1));
double cellMaxLat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(maxPackedValue, 0));
double cellMaxLon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(maxPackedValue, 1));
if (minLat <= cellMinLat && maxLat >= cellMaxLat && minLon <= cellMinLon && maxLon >= cellMaxLon) {
return Relation.CELL_INSIDE_QUERY;
}
if (cellMaxLat < minLat || cellMinLat > maxLat || cellMaxLon < minLon || cellMinLon > maxLon) {
return Relation.CELL_OUTSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
});
// NOTE: hitCount[0] will be over-estimate in multi-valued case
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
}
};
}
@ -111,9 +141,9 @@ public class BKDPointInBBoxQuery extends Query {
q.setDisableCoord(true);
// E.g.: maxLon = -179, minLon = 179
BKDPointInBBoxQuery left = new BKDPointInBBoxQuery(field, minLat, maxLat, BKDTreeWriter.MIN_LON_INCL, maxLon);
DimensionalPointInRectQuery left = new DimensionalPointInRectQuery(field, minLat, maxLat, GeoUtils.MIN_LON_INCL, maxLon);
q.add(new BooleanClause(left, BooleanClause.Occur.SHOULD));
BKDPointInBBoxQuery right = new BKDPointInBBoxQuery(field, minLat, maxLat, minLon, BKDTreeWriter.MAX_LON_INCL);
DimensionalPointInRectQuery right = new DimensionalPointInRectQuery(field, minLat, maxLat, minLon, GeoUtils.MAX_LON_INCL);
q.add(new BooleanClause(right, BooleanClause.Occur.SHOULD));
return new ConstantScoreQuery(q.build());
} else {
@ -133,8 +163,8 @@ public class BKDPointInBBoxQuery extends Query {
@Override
public boolean equals(Object other) {
if (super.equals(other) && other instanceof BKDPointInBBoxQuery) {
final BKDPointInBBoxQuery q = (BKDPointInBBoxQuery) other;
if (super.equals(other) && other instanceof DimensionalPointInRectQuery) {
final DimensionalPointInRectQuery q = (DimensionalPointInRectQuery) other;
return field.equals(q.field) &&
minLat == q.minLat &&
maxLat == q.maxLat &&

View File

@ -1,18 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.lucene.bkdtree.BKDTreeDocValuesFormat
org.apache.lucene.rangetree.RangeTreeDocValuesFormat

View File

@ -1,194 +0,0 @@
package org.apache.lucene.bkdtree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene60.Lucene60Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BaseGeoPointTestCase;
import org.apache.lucene.util.GeoRect;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.TestUtil;
// TODO: can test framework assert we don't leak temp files?
public class TestBKDTree extends BaseGeoPointTestCase {
@Override
protected void addPointToDoc(String field, Document doc, double lat, double lon) {
doc.add(new BKDPointField(field, lat, lon));
}
@Override
protected Query newBBoxQuery(String field, GeoRect rect) {
return new BKDPointInBBoxQuery(field, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
}
@Override
protected Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters) {
// return new BKDDistanceQuery(field, centerLat, centerLon, radiusMeters);
return null;
}
@Override
protected Query newDistanceRangeQuery(String field, double centerLat, double centerLon, double minRadiusMeters, double radiusMeters) {
return null;
}
@Override
protected Query newPolygonQuery(String field, double[] lats, double[] lons) {
return new BKDPointInPolygonQuery(FIELD_NAME, lats, lons);
}
@Override
protected void initIndexWriterConfig(final String fieldName, IndexWriterConfig iwc) {
final DocValuesFormat dvFormat = getDocValuesFormat();
Codec codec = new Lucene60Codec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
if (field.equals(fieldName)) {
return dvFormat;
} else {
return super.getDocValuesFormatForField(field);
}
}
};
iwc.setCodec(codec);
}
@Override
protected Boolean rectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
assert Double.isNaN(pointLat) == false;
int rectLatMinEnc = BKDTreeWriter.encodeLat(rect.minLat);
int rectLatMaxEnc = BKDTreeWriter.encodeLat(rect.maxLat);
int rectLonMinEnc = BKDTreeWriter.encodeLon(rect.minLon);
int rectLonMaxEnc = BKDTreeWriter.encodeLon(rect.maxLon);
int pointLatEnc = BKDTreeWriter.encodeLat(pointLat);
int pointLonEnc = BKDTreeWriter.encodeLon(pointLon);
if (rect.minLon < rect.maxLon) {
return pointLatEnc >= rectLatMinEnc &&
pointLatEnc < rectLatMaxEnc &&
pointLonEnc >= rectLonMinEnc &&
pointLonEnc < rectLonMaxEnc;
} else {
// Rect crosses dateline:
return pointLatEnc >= rectLatMinEnc &&
pointLatEnc < rectLatMaxEnc &&
(pointLonEnc >= rectLonMinEnc ||
pointLonEnc < rectLonMaxEnc);
}
}
private static final double POLY_TOLERANCE = 1e-7;
@Override
protected Boolean polyRectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
if (Math.abs(rect.minLat-pointLat) < POLY_TOLERANCE ||
Math.abs(rect.maxLat-pointLat) < POLY_TOLERANCE ||
Math.abs(rect.minLon-pointLon) < POLY_TOLERANCE ||
Math.abs(rect.maxLon-pointLon) < POLY_TOLERANCE) {
// The poly check quantizes slightly differently, so we allow for boundary cases to disagree
return null;
} else {
return rectContainsPoint(rect, pointLat, pointLon);
}
}
@Override
protected Boolean circleContainsPoint(double centerLat, double centerLon, double radiusMeters, double pointLat, double pointLon) {
double distanceKM = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon);
boolean result = distanceKM*1000.0 <= radiusMeters;
//System.out.println(" shouldMatch? centerLon=" + centerLon + " centerLat=" + centerLat + " pointLon=" + pointLon + " pointLat=" + pointLat + " result=" + result + " distanceMeters=" + (distanceKM * 1000));
return result;
}
@Override
protected Boolean distanceRangeContainsPoint(double centerLat, double centerLon, double minRadiusMeters, double radiusMeters, double pointLat, double pointLon) {
final double d = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon)*1000.0;
return d >= minRadiusMeters && d <= radiusMeters;
}
public void testEncodeDecode() throws Exception {
int iters = atLeast(10000);
boolean small = random().nextBoolean();
for(int iter=0;iter<iters;iter++) {
double lat = randomLat(small);
double latQuantized = BKDTreeWriter.decodeLat(BKDTreeWriter.encodeLat(lat));
assertEquals(lat, latQuantized, BKDTreeWriter.TOLERANCE);
double lon = randomLon(small);
double lonQuantized = BKDTreeWriter.decodeLon(BKDTreeWriter.encodeLon(lon));
assertEquals(lon, lonQuantized, BKDTreeWriter.TOLERANCE);
}
}
public void testEncodeDecodeMax() throws Exception {
int x = BKDTreeWriter.encodeLat(Math.nextAfter(90.0, Double.POSITIVE_INFINITY));
assertTrue(x < Integer.MAX_VALUE);
int y = BKDTreeWriter.encodeLon(Math.nextAfter(180.0, Double.POSITIVE_INFINITY));
assertTrue(y < Integer.MAX_VALUE);
}
public void testAccountableHasDelegate() throws Exception {
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(TestUtil.alwaysDocValuesFormat(getDocValuesFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BKDPointField(FIELD_NAME, -18.2861, 147.7));
w.addDocument(doc);
IndexReader r = w.getReader();
// We can't wrap with "exotic" readers because the BKD query must see the BKDDVFormat:
IndexSearcher s = newSearcher(r, false);
// Need to run a query so the DV field is really loaded:
TopDocs hits = s.search(new BKDPointInBBoxQuery(FIELD_NAME, -30, 0, 140, 150), 1);
assertEquals(1, hits.totalHits);
assertTrue(Accountables.toString((Accountable) r.leaves().get(0).reader()).contains("delegate"));
IOUtils.close(r, w, dir);
}
private static DocValuesFormat getDocValuesFormat() {
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
if (VERBOSE) {
System.out.println(" BKD params: maxPointsInLeaf=" + maxPointsInLeaf + " maxPointsSortInHeap=" + maxPointsSortInHeap);
}
return new BKDTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap);
}
private Directory getDirectory() {
return noVirusChecker(newDirectory());
}
}

View File

@ -1,781 +0,0 @@
package org.apache.lucene.rangetree;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene60.Lucene60Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.BeforeClass;
public class TestRangeTree extends LuceneTestCase {
// Controls what range of values we randomly generate, so we sometimes test narrow ranges:
static long valueMid;
static int valueRange;
@BeforeClass
public static void beforeClass() {
if (random().nextBoolean()) {
valueMid = random().nextLong();
if (random().nextBoolean()) {
// Wide range
valueRange = TestUtil.nextInt(random(), 1, Integer.MAX_VALUE);
} else {
// Narrow range
valueRange = TestUtil.nextInt(random(), 1, 100000);
}
if (VERBOSE) {
System.out.println("TEST: will generate long values " + valueMid + " +/- " + valueRange);
}
} else {
// All longs
valueRange = 0;
if (VERBOSE) {
System.out.println("TEST: will generate all long values");
}
}
}
public void testAllEqual() throws Exception {
int numValues = atLeast(10000);
long value = randomValue();
long[] values = new long[numValues];
FixedBitSet missing = new FixedBitSet(numValues);
if (VERBOSE) {
System.out.println("TEST: use same value=" + value);
}
for(int docID=0;docID<numValues;docID++) {
int x = random().nextInt(20);
if (x == 17) {
// Some docs don't have a point:
missing.set(docID);
if (VERBOSE) {
System.out.println(" doc=" + docID + " is missing");
}
continue;
}
values[docID] = value;
}
verify(missing, values);
}
public void testMultiValued() throws Exception {
int numValues = atLeast(10000);
// Every doc has 2 values:
long[] values = new long[2*numValues];
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// We rely on docID order:
iwc.setMergePolicy(newLogMergePolicy());
Codec codec = TestUtil.alwaysDocValuesFormat(getDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int docID=0;docID<numValues;docID++) {
Document doc = new Document();
values[2*docID] = randomValue();
doc.add(new SortedNumericDocValuesField("value", values[2*docID]));
values[2*docID+1] = randomValue();
doc.add(new SortedNumericDocValuesField("value", values[2*docID+1]));
w.addDocument(doc);
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
IndexReader r = w.getReader();
w.close();
// We can't wrap with "exotic" readers because the NumericRangeTreeQuery must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
int iters = atLeast(100);
for (int iter=0;iter<iters;iter++) {
long lower = randomValue();
long upper = randomValue();
if (upper < lower) {
long x = lower;
lower = upper;
upper = x;
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " value=" + lower + " TO " + upper);
}
boolean includeLower = random().nextBoolean();
boolean includeUpper = random().nextBoolean();
Query query = new NumericRangeTreeQuery("value", lower, includeLower, upper, includeUpper);
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase+doc);
}
});
for(int docID=0;docID<values.length/2;docID++) {
long docValue1 = values[2*docID];
long docValue2 = values[2*docID+1];
boolean expected = matches(lower, includeLower, upper, includeUpper, docValue1) ||
matches(lower, includeLower, upper, includeUpper, docValue2);
if (hits.get(docID) != expected) {
fail("docID=" + docID + " docValue1=" + docValue1 + " docValue2=" + docValue2 + " expected " + expected + " but got: " + hits.get(docID));
}
}
}
r.close();
dir.close();
}
public void testMultiValuedSortedSet() throws Exception {
int numValues = atLeast(10000);
// Every doc has 2 values:
long[] values = new long[2*numValues];
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// We rely on docID order:
iwc.setMergePolicy(newLogMergePolicy());
Codec codec = TestUtil.alwaysDocValuesFormat(getDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int docID=0;docID<numValues;docID++) {
Document doc = new Document();
values[2*docID] = randomValue();
doc.add(new SortedSetDocValuesField("value", longToBytes(values[2*docID])));
values[2*docID+1] = randomValue();
doc.add(new SortedSetDocValuesField("value", longToBytes(values[2*docID+1])));
w.addDocument(doc);
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
IndexReader r = w.getReader();
w.close();
// We can't wrap with "exotic" readers because the NumericRangeTreeQuery must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
int iters = atLeast(100);
for (int iter=0;iter<iters;iter++) {
long lower = randomValue();
long upper = randomValue();
if (upper < lower) {
long x = lower;
lower = upper;
upper = x;
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " value=" + lower + " TO " + upper);
}
boolean includeLower = random().nextBoolean();
boolean includeUpper = random().nextBoolean();
Query query = new SortedSetRangeTreeQuery("value", longToBytes(lower), includeLower, longToBytes(upper), includeUpper);
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase+doc);
}
});
for(int docID=0;docID<values.length/2;docID++) {
long docValue1 = values[2*docID];
long docValue2 = values[2*docID+1];
boolean expected = matches(lower, includeLower, upper, includeUpper, docValue1) ||
matches(lower, includeLower, upper, includeUpper, docValue2);
if (hits.get(docID) != expected) {
fail("docID=" + docID + " docValue1=" + docValue1 + " docValue2=" + docValue2 + " expected " + expected + " but got: " + hits.get(docID));
}
}
}
r.close();
dir.close();
}
public void testRandomTiny() throws Exception {
// Make sure single-leaf-node case is OK:
doTestRandom(10);
}
public void testRandomMedium() throws Exception {
doTestRandom(10000);
}
@Nightly
public void testRandomBig() throws Exception {
doTestRandom(200000);
}
private void doTestRandom(int count) throws Exception {
int numValues = atLeast(count);
if (VERBOSE) {
System.out.println("TEST: numValues=" + numValues);
}
long[] values = new long[numValues];
FixedBitSet missing = new FixedBitSet(numValues);
boolean haveRealDoc = false;
for (int docID=0;docID<numValues;docID++) {
int x = random().nextInt(20);
if (x == 17) {
// Some docs don't have a point:
missing.set(docID);
if (VERBOSE) {
System.out.println(" doc=" + docID + " is missing");
}
continue;
}
if (docID > 0 && x == 0 && haveRealDoc) {
int oldDocID;
while (true) {
oldDocID = random().nextInt(docID);
if (missing.get(oldDocID) == false) {
break;
}
}
// Identical to old value
values[docID] = values[oldDocID];
if (VERBOSE) {
System.out.println(" doc=" + docID + " value=" + values[docID] + " bytes=" + longToBytes(values[docID]) + " (same as doc=" + oldDocID + ")");
}
} else {
values[docID] = randomValue();
haveRealDoc = true;
if (VERBOSE) {
System.out.println(" doc=" + docID + " value=" + values[docID] + " bytes=" + longToBytes(values[docID]));
}
}
}
verify(missing, values);
}
private static void verify(Bits missing, long[] values) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < values.length/100) {
iwc.setMaxBufferedDocs(values.length/100);
}
final DocValuesFormat dvFormat = getDocValuesFormat();
Codec codec = new Lucene60Codec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
if (field.equals("sn_value") || field.equals("ss_value")) {
return dvFormat;
} else {
return super.getDocValuesFormatForField(field);
}
}
};
iwc.setCodec(codec);
Directory dir;
if (values.length > 100000) {
dir = noVirusChecker(newFSDirectory(createTempDir("TestRangeTree")));
} else {
dir = getDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for(int id=0;id<values.length;id++) {
Document doc = new Document();
doc.add(newStringField("id", ""+id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (missing.get(id) == false) {
doc.add(new SortedNumericDocValuesField("sn_value", values[id]));
doc.add(new SortedSetDocValuesField("ss_value", longToBytes(values[id])));
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", ""+idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" forceMerge(1)");
}
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w, true);
w.close();
// We can't wrap with "exotic" readers because the NumericRangeTreeQuery must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
int numThreads = TestUtil.nextInt(random(), 2, 5);
if (VERBOSE) {
System.out.println("TEST: use " + numThreads + " query threads");
}
List<Thread> threads = new ArrayList<>();
final int iters = atLeast(100);
final CountDownLatch startingGun = new CountDownLatch(1);
final AtomicBoolean failed = new AtomicBoolean();
for(int i=0;i<numThreads;i++) {
Thread thread = new Thread() {
@Override
public void run() {
try {
_run();
} catch (Exception e) {
failed.set(true);
throw new RuntimeException(e);
}
}
private void _run() throws Exception {
startingGun.await();
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int iter=0;iter<iters && failed.get() == false;iter++) {
long lower = randomValue();
long upper = randomValue();
if (upper < lower) {
long x = lower;
lower = upper;
upper = x;
}
if (VERBOSE) {
System.out.println("\n" + Thread.currentThread().getName() + ": TEST: iter=" + iter + " value=" + lower + " TO " + upper);
}
boolean includeLower = random().nextBoolean();
boolean includeUpper = random().nextBoolean();
Query query;
if (random().nextBoolean()) {
query = new NumericRangeTreeQuery("sn_value", lower, includeLower, upper, includeUpper);
} else {
query = new SortedSetRangeTreeQuery("ss_value", longToBytes(lower), includeLower, longToBytes(upper), includeUpper);
}
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": using query: " + query);
}
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase+doc);
}
});
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": hitCount: " + hits.cardinality());
}
for(int docID=0;docID<r.maxDoc();docID++) {
int id = (int) docIDToID.get(docID);
boolean expected = missing.get(id) == false && deleted.contains(id) == false && matches(lower, includeLower, upper, includeUpper, values[id]);
if (hits.get(docID) != expected) {
// We do exact quantized comparison so the bbox query should never disagree:
fail(Thread.currentThread().getName() + ": iter=" + iter + " id=" + id + " docID=" + docID + " value=" + values[id] + " (range: " + lower + " TO " + upper + ") expected " + expected + " but got: " + hits.get(docID) + " deleted?=" + deleted.contains(id) + " query=" + query);
}
}
}
}
};
thread.setName("T" + i);
thread.start();
threads.add(thread);
}
startingGun.countDown();
for(Thread thread : threads) {
thread.join();
}
IOUtils.close(r, dir);
}
private static boolean matches(long lower, boolean includeLower, long upper, boolean includeUpper, long value) {
if (includeLower == false) {
if (lower == Long.MAX_VALUE) {
return false;
}
lower++;
}
if (includeUpper == false) {
if (upper == Long.MIN_VALUE) {
return false;
}
upper--;
}
return value >= lower && value <= upper;
}
private static long randomValue() {
if (valueRange == 0) {
return random().nextLong();
} else {
return valueMid + TestUtil.nextInt(random(), -valueRange, valueRange);
}
}
public void testAccountableHasDelegate() throws Exception {
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("value", 187));
w.addDocument(doc);
IndexReader r = w.getReader();
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
// Need to run a query so the DV field is really loaded:
TopDocs hits = s.search(new NumericRangeTreeQuery("value", -30L, true, 187L, true), 1);
assertEquals(1, hits.totalHits);
assertTrue(Accountables.toString((Accountable) r.leaves().get(0).reader()).contains("delegate"));
IOUtils.close(r, w, dir);
}
public void testMinMaxLong() throws Exception {
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("value", Long.MIN_VALUE));
w.addDocument(doc);
doc = new Document();
doc.add(new SortedNumericDocValuesField("value", Long.MAX_VALUE));
w.addDocument(doc);
IndexReader r = w.getReader();
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, 0L, true)));
assertEquals(1, s.count(new NumericRangeTreeQuery("value", 0L, true, Long.MAX_VALUE, true)));
assertEquals(2, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true)));
IOUtils.close(r, w, dir);
}
public void testBasicSortedSet() throws Exception {
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("value", new BytesRef("abc")));
w.addDocument(doc);
doc = new Document();
doc.add(new SortedSetDocValuesField("value", new BytesRef("def")));
w.addDocument(doc);
IndexReader r = w.getReader();
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("aaa"), true, new BytesRef("bbb"), true)));
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("c"), true, new BytesRef("e"), true)));
assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("z"), true)));
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", null, true, new BytesRef("abc"), true)));
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("abc"), true)));
assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("abc"), false)));
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), true, null, false)));
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), true, new BytesRef("z"), true)));
assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), false, new BytesRef("z"), true)));
IOUtils.close(r, w, dir);
}
public void testLongMinMaxNumeric() throws Exception {
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("value", Long.MIN_VALUE));
w.addDocument(doc);
doc = new Document();
doc.add(new SortedNumericDocValuesField("value", Long.MAX_VALUE));
w.addDocument(doc);
IndexReader r = w.getReader();
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
assertEquals(2, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true)));
assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, false)));
assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, true)));
assertEquals(0, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, false)));
assertEquals(2, s.count(new NumericRangeTreeQuery("value", null, true, null, true)));
IOUtils.close(r, w, dir);
}
public void testLongMinMaxSortedSet() throws Exception {
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("value", longToBytes(Long.MIN_VALUE)));
w.addDocument(doc);
doc = new Document();
doc.add(new SortedSetDocValuesField("value", longToBytes(Long.MAX_VALUE)));
w.addDocument(doc);
IndexReader r = w.getReader();
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), true, longToBytes(Long.MAX_VALUE), true)));
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), true, longToBytes(Long.MAX_VALUE), false)));
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), false, longToBytes(Long.MAX_VALUE), true)));
assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), false, longToBytes(Long.MAX_VALUE), false)));
assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", null, true, null, true)));
IOUtils.close(r, w, dir);
}
public void testSortedSetNoOrdsMatch() throws Exception {
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("value", new BytesRef("a")));
w.addDocument(doc);
doc = new Document();
doc.add(new SortedSetDocValuesField("value", new BytesRef("z")));
w.addDocument(doc);
IndexReader r = w.getReader();
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("m"), true, new BytesRef("n"), false)));
assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", null, true, null, true)));
IOUtils.close(r, w, dir);
}
public void testNumericNoValuesMatch() throws Exception {
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("value", 17));
w.addDocument(doc);
doc = new Document();
doc.add(new SortedNumericDocValuesField("value", 22));
w.addDocument(doc);
IndexReader r = w.getReader();
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
assertEquals(0, s.count(new NumericRangeTreeQuery("value", 17L, true, 13L, false)));
IOUtils.close(r, w, dir);
}
public void testNoDocs() throws Exception {
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
w.addDocument(new Document());
IndexReader r = w.getReader();
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
IndexSearcher s = newSearcher(r, false);
assertEquals(0, s.count(new NumericRangeTreeQuery("value", 17L, true, 13L, false)));
IOUtils.close(r, w, dir);
}
private static BytesRef longToBytes(long v) {
// Flip the sign bit so negative longs sort before positive longs:
v ^= 0x8000000000000000L;
byte[] bytes = new byte[8];
bytes[0] = (byte) (v >> 56);
bytes[1] = (byte) (v >> 48);
bytes[2] = (byte) (v >> 40);
bytes[3] = (byte) (v >> 32);
bytes[4] = (byte) (v >> 24);
bytes[5] = (byte) (v >> 16);
bytes[6] = (byte) (v >> 8);
bytes[7] = (byte) v;
return new BytesRef(bytes);
}
/*
private static long bytesToLong(BytesRef bytes) {
long v = ((bytes.bytes[bytes.offset]&0xFFL) << 56) |
((bytes.bytes[bytes.offset+1]&0xFFL) << 48) |
((bytes.bytes[bytes.offset+2]&0xFFL) << 40) |
((bytes.bytes[bytes.offset+3]&0xFFL) << 32) |
((bytes.bytes[bytes.offset+4]&0xFFL) << 24) |
((bytes.bytes[bytes.offset+5]&0xFFL) << 16) |
((bytes.bytes[bytes.offset+6]&0xFFL) << 8) |
(bytes.bytes[bytes.offset+7]&0xFFL);
// Flip the sign bit back:
return v ^ 0x8000000000000000L;
}
*/
private static DocValuesFormat getDocValuesFormat() {
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
return new RangeTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap);
}
private static Directory noVirusChecker(Directory dir) {
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
private static Directory getDirectory() {
return noVirusChecker(newDirectory());
}
}

View File

@ -0,0 +1,124 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.document.DimensionalLatLonField;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BaseGeoPointTestCase;
import org.apache.lucene.util.GeoRect;
import org.apache.lucene.util.SloppyMath;
public class TestDimensionalQueries extends BaseGeoPointTestCase {
@Override
protected void addPointToDoc(String field, Document doc, double lat, double lon) {
doc.add(new DimensionalLatLonField(field, lat, lon));
}
@Override
protected Query newRectQuery(String field, GeoRect rect) {
return new DimensionalPointInRectQuery(field, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
}
@Override
protected Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters) {
// return new BKDDistanceQuery(field, centerLat, centerLon, radiusMeters);
return null;
}
@Override
protected Query newDistanceRangeQuery(String field, double centerLat, double centerLon, double minRadiusMeters, double radiusMeters) {
return null;
}
@Override
protected Query newPolygonQuery(String field, double[] lats, double[] lons) {
return new DimensionalPointInPolygonQuery(FIELD_NAME, lats, lons);
}
@Override
protected Boolean rectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
assert Double.isNaN(pointLat) == false;
int rectLatMinEnc = DimensionalLatLonField.encodeLat(rect.minLat);
int rectLatMaxEnc = DimensionalLatLonField.encodeLat(rect.maxLat);
int rectLonMinEnc = DimensionalLatLonField.encodeLon(rect.minLon);
int rectLonMaxEnc = DimensionalLatLonField.encodeLon(rect.maxLon);
int pointLatEnc = DimensionalLatLonField.encodeLat(pointLat);
int pointLonEnc = DimensionalLatLonField.encodeLon(pointLon);
if (rect.minLon < rect.maxLon) {
return pointLatEnc >= rectLatMinEnc &&
pointLatEnc <= rectLatMaxEnc &&
pointLonEnc >= rectLonMinEnc &&
pointLonEnc <= rectLonMaxEnc;
} else {
// Rect crosses dateline:
return pointLatEnc >= rectLatMinEnc &&
pointLatEnc <= rectLatMaxEnc &&
(pointLonEnc >= rectLonMinEnc ||
pointLonEnc <= rectLonMaxEnc);
}
}
private static final double POLY_TOLERANCE = 1e-7;
@Override
protected Boolean polyRectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
if (Math.abs(rect.minLat-pointLat) < POLY_TOLERANCE ||
Math.abs(rect.maxLat-pointLat) < POLY_TOLERANCE ||
Math.abs(rect.minLon-pointLon) < POLY_TOLERANCE ||
Math.abs(rect.maxLon-pointLon) < POLY_TOLERANCE) {
// The poly check quantizes slightly differently, so we allow for boundary cases to disagree
return null;
} else {
return rectContainsPoint(rect, pointLat, pointLon);
}
}
@Override
protected Boolean circleContainsPoint(double centerLat, double centerLon, double radiusMeters, double pointLat, double pointLon) {
double distanceKM = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon);
boolean result = distanceKM*1000.0 <= radiusMeters;
//System.out.println(" shouldMatch? centerLon=" + centerLon + " centerLat=" + centerLat + " pointLon=" + pointLon + " pointLat=" + pointLat + " result=" + result + " distanceMeters=" + (distanceKM * 1000));
return result;
}
@Override
protected Boolean distanceRangeContainsPoint(double centerLat, double centerLon, double minRadiusMeters, double radiusMeters, double pointLat, double pointLon) {
final double d = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon)*1000.0;
return d >= minRadiusMeters && d <= radiusMeters;
}
public void testEncodeDecode() throws Exception {
int iters = atLeast(10000);
boolean small = random().nextBoolean();
for(int iter=0;iter<iters;iter++) {
double lat = randomLat(small);
double latQuantized = DimensionalLatLonField.decodeLat(DimensionalLatLonField.encodeLat(lat));
assertEquals(lat, latQuantized, DimensionalLatLonField.TOLERANCE);
double lon = randomLon(small);
double lonQuantized = DimensionalLatLonField.decodeLon(DimensionalLatLonField.encodeLon(lon));
assertEquals(lon, lonQuantized, DimensionalLatLonField.TOLERANCE);
}
}
}

View File

@ -48,13 +48,19 @@ public class TestGeoPointQuery extends BaseGeoPointTestCase {
// error threshold for point-distance queries (in percent) NOTE: Guideline from USGS
private static final double DISTANCE_PCT_ERR = 0.005;
@Override
protected boolean forceSmall() {
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
return true;
}
@Override
protected void addPointToDoc(String field, Document doc, double lat, double lon) {
doc.add(new GeoPointField(field, lon, lat, Field.Store.NO));
}
@Override
protected Query newBBoxQuery(String field, GeoRect rect) {
protected Query newRectQuery(String field, GeoRect rect) {
return new GeoPointInBBoxQuery(field, rect.minLon, rect.minLat, rect.maxLon, rect.maxLat);
}

View File

@ -72,11 +72,18 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
originLat = GeoUtils.normalizeLat(GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2 * latRange) * random().nextDouble());
}
/** Return true when testing on a non-small region may be too slow (GeoPoint*Query) */
protected boolean forceSmall() {
return false;
}
// A particularly tricky adversary for BKD tree:
@Nightly
public void testSamePointManyTimes() throws Exception {
// For GeoPointQuery, only run this test nightly:
assumeTrue("GeoPoint*Query is too slow otherwise", TEST_NIGHTLY || forceSmall() == false);
int numPoints = atLeast(1000);
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
boolean small = random().nextBoolean();
// Every doc has 2 points:
@ -92,12 +99,13 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
verify(small, lats, lons);
}
@Nightly
public void testAllLatEqual() throws Exception {
// For GeoPointQuery, only run this test nightly:
assumeTrue("GeoPoint*Query is too slow otherwise", TEST_NIGHTLY || forceSmall() == false);
int numPoints = atLeast(10000);
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
// boolean small = random().nextBoolean();
boolean small = true;
boolean small = forceSmall() || random().nextBoolean();
double lat = randomLat(small);
double[] lats = new double[numPoints];
double[] lons = new double[numPoints];
@ -142,12 +150,13 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
verify(small, lats, lons);
}
@Nightly
public void testAllLonEqual() throws Exception {
// For GeoPointQuery, only run this test nightly:
assumeTrue("GeoPoint*Query is too slow otherwise", TEST_NIGHTLY || forceSmall() == false);
int numPoints = atLeast(10000);
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
// boolean small = random().nextBoolean();
boolean small = true;
boolean small = forceSmall() || random().nextBoolean();
double theLon = randomLon(small);
double[] lats = new double[numPoints];
double[] lons = new double[numPoints];
@ -194,8 +203,11 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
verify(small, lats, lons);
}
@Nightly
public void testMultiValued() throws Exception {
// For GeoPointQuery, only run this test nightly:
assumeTrue("GeoPoint*Query is too slow otherwise", TEST_NIGHTLY || forceSmall() == false);
int numPoints = atLeast(10000);
// Every doc has 2 points:
double[] lats = new double[2*numPoints];
@ -209,9 +221,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
boolean small = random().nextBoolean();
//boolean small = true;
for (int id=0;id<numPoints;id++) {
Document doc = new Document();
@ -231,6 +241,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
w.addDocument(doc);
}
// TODO: share w/ verify; just need parallel array of the expected ids
if (random().nextBoolean()) {
w.forceMerge(1);
}
@ -245,10 +256,10 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
GeoRect rect = randomRect(small, small == false);
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " bbox=" + rect);
System.out.println("\nTEST: iter=" + iter + " rect=" + rect);
}
Query query = newBBoxQuery(FIELD_NAME, rect);
Query query = newRectQuery(FIELD_NAME, rect);
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
@ -340,7 +351,6 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
double[] lats = new double[numPoints];
double[] lons = new double[numPoints];
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
boolean small = random().nextBoolean();
boolean haveRealDoc = false;
@ -424,6 +434,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
} else {
result = -90 + 180.0 * random().nextDouble();
}
// TODO: we should not do this here! it weakens the test, and users don't pre-quantize the lat/lons they send us:
return unscaleLat(scaleLat(result));
}
@ -434,6 +445,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
} else {
result = -180 + 360.0 * random().nextDouble();
}
// TODO: we should not do this here! it weakens the test, and users don't pre-quantize the lat/lons they send us:
return unscaleLon(scaleLon(result));
}
@ -463,7 +475,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
protected abstract void addPointToDoc(String field, Document doc, double lat, double lon);
protected abstract Query newBBoxQuery(String field, GeoRect bbox);
protected abstract Query newRectQuery(String field, GeoRect bbox);
protected abstract Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters);
@ -557,7 +569,6 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
if (mbd != -1 && mbd < lats.length/100) {
iwc.setMaxBufferedDocs(lats.length/100);
}
initIndexWriterConfig(FIELD_NAME, iwc);
Directory dir;
if (lats.length > 100000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
@ -631,15 +642,15 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
VerifyHits verifyHits;
if (random().nextBoolean()) {
// BBox: don't allow dateline crossing when testing small:
final GeoRect bbox = randomRect(small, small == false);
// Rect: don't allow dateline crossing when testing small:
final GeoRect rect = randomRect(small, small == false);
query = newBBoxQuery(FIELD_NAME, bbox);
query = newRectQuery(FIELD_NAME, rect);
verifyHits = new VerifyHits() {
@Override
protected Boolean shouldMatch(double pointLat, double pointLon) {
return rectContainsPoint(bbox, pointLat, pointLon);
return rectContainsPoint(rect, pointLat, pointLon);
}
@Override
protected void describe(int docID, double lat, double lon) {

View File

@ -1,343 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.RamUsageEstimator;
/** Handles intersection of a shape with a BKD tree previously written with {@link BKD3DTreeWriter}.
*
* @lucene.experimental */
final class BKD3DTreeReader implements Accountable {
final private int[] splitValues;
final private int leafNodeOffset;
final private long[] leafBlockFPs;
final int maxDoc;
final IndexInput in;
enum Relation {CELL_INSIDE_SHAPE, SHAPE_CROSSES_CELL, SHAPE_OUTSIDE_CELL, SHAPE_INSIDE_CELL};
interface ValueFilter {
boolean accept(int docID);
Relation compare(int cellXMin, int cellXMax, int cellYMin, int cellYMax, int cellZMin, int cellZMax);
}
public BKD3DTreeReader(IndexInput in, int maxDoc) throws IOException {
// Read index:
int numLeaves = in.readVInt();
leafNodeOffset = numLeaves;
// Tree is fully balanced binary tree, so number of nodes = numLeaves-1, except our nodeIDs are 1-based (splitValues[0] is unused):
splitValues = new int[numLeaves];
for(int i=0;i<numLeaves;i++) {
splitValues[i] = in.readInt();
}
leafBlockFPs = new long[numLeaves];
for(int i=0;i<numLeaves;i++) {
leafBlockFPs[i] = in.readVLong();
}
this.maxDoc = maxDoc;
this.in = in;
}
private static final class QueryState {
final IndexInput in;
byte[] scratch = new byte[16];
final ByteArrayDataInput scratchReader = new ByteArrayDataInput(scratch);
final DocIdSetBuilder docs;
final int xMin;
final int xMax;
final int yMin;
final int yMax;
final int zMin;
final int zMax;
final ValueFilter valueFilter;
public QueryState(IndexInput in, int maxDoc,
int xMin, int xMax,
int yMin, int yMax,
int zMin, int zMax,
ValueFilter valueFilter) {
this.in = in;
this.docs = new DocIdSetBuilder(maxDoc);
this.xMin = xMin;
this.xMax = xMax;
this.yMin = yMin;
this.yMax = yMax;
this.zMin = zMin;
this.zMax = zMax;
this.valueFilter = valueFilter;
}
}
public DocIdSet intersect(ValueFilter filter) throws IOException {
return intersect(Integer.MIN_VALUE, Integer.MAX_VALUE,
Integer.MIN_VALUE, Integer.MAX_VALUE,
Integer.MIN_VALUE, Integer.MAX_VALUE,
filter);
}
/** Optimized intersect which takes the 3D bbox for the query and uses that to avoid filter.compare calls
* when cells are clearly outside the bbox. */
public DocIdSet intersect(int xMin, int xMax, int yMin, int yMax, int zMin, int zMax, ValueFilter filter) throws IOException {
QueryState state = new QueryState(in.clone(), maxDoc,
xMin, xMax,
yMin, yMax,
zMin, zMax,
filter);
int hitCount = intersect(state, 1,
Integer.MIN_VALUE, Integer.MAX_VALUE,
Integer.MIN_VALUE, Integer.MAX_VALUE,
Integer.MIN_VALUE, Integer.MAX_VALUE);
// NOTE: hitCount is an over-estimate in the multi-valued case:
return state.docs.build(hitCount);
}
/** Fast path: this is called when the query rect fully encompasses all cells under this node. */
private int addAll(QueryState state, int nodeID) throws IOException {
//System.out.println(" addAll nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset);
if (nodeID >= leafNodeOffset) {
/*
System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
*/
// Leaf node
long fp = leafBlockFPs[nodeID-leafNodeOffset];
//System.out.println(" leaf fp=" + fp);
state.in.seek(fp);
//System.out.println(" seek to leafFP=" + fp);
// How many points are stored in this leaf cell:
int count = state.in.readVInt();
//System.out.println(" count=" + count);
state.docs.grow(count);
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
state.docs.add(docID);
// Up above in the recursion we asked valueFilter to relate our cell, and it returned Relation.CELL_INSIDE_SHAPE
// so all docs inside this cell better be accepted by the filter:
// NOTE: this is too anal, because we lost precision in the pack/unpack (8 bytes to 4 bytes), a point that's a bit above/below the
// earth's surface due to that quantization may incorrectly evaluate as not inside the shape:
// assert state.valueFilter.accept(docID);
}
return count;
} else {
int count = addAll(state, 2*nodeID);
count += addAll(state, 2*nodeID+1);
return count;
}
}
private int intersect(QueryState state,
int nodeID,
int cellXMin, int cellXMax,
int cellYMin, int cellYMax,
int cellZMin, int cellZMax)
throws IOException {
//System.out.println("BKD3D.intersect nodeID=" + nodeID + " cellX=" + cellXMin + " TO " + cellXMax + ", cellY=" + cellYMin + " TO " + cellYMax + ", cellZ=" + cellZMin + " TO " + cellZMax);
if (cellXMin >= state.xMin ||
cellXMax <= state.xMax ||
cellYMin >= state.yMin ||
cellYMax <= state.yMax ||
cellZMin >= state.zMin ||
cellZMax <= state.zMax) {
// Only call the filter when the current cell does not fully contain the bbox:
Relation r = state.valueFilter.compare(cellXMin, cellXMax,
cellYMin, cellYMax,
cellZMin, cellZMax);
//System.out.println(" relation: " + r);
if (r == Relation.SHAPE_OUTSIDE_CELL) {
// This cell is fully outside of the query shape: stop recursing
return 0;
} else if (r == Relation.CELL_INSIDE_SHAPE) {
// This cell is fully inside of the query shape: recursively add all points in this cell without filtering
/*
System.out.println(Thread.currentThread() + ": switch to addAll at cell" +
" x=" + Geo3DDocValuesFormat.decodeValue(cellXMin) + " to " + Geo3DDocValuesFormat.decodeValue(cellXMax) +
" y=" + Geo3DDocValuesFormat.decodeValue(cellYMin) + " to " + Geo3DDocValuesFormat.decodeValue(cellYMax) +
" z=" + Geo3DDocValuesFormat.decodeValue(cellZMin) + " to " + Geo3DDocValuesFormat.decodeValue(cellZMax));
*/
return addAll(state, nodeID);
} else {
// The cell crosses the shape boundary, so we fall through and do full filtering
}
} else {
// The whole point of the incoming bbox (state.xMin/xMax/etc.) is that it is
// supposed to fully enclose the shape, so this cell we are visiting, which
// fully contains the query's bbox, better in turn fully contain the shape!
assert state.valueFilter.compare(cellXMin, cellXMax, cellYMin, cellYMax, cellZMin, cellZMax) == Relation.SHAPE_INSIDE_CELL: "got " + state.valueFilter.compare(cellXMin, cellXMax, cellYMin, cellYMax, cellZMin, cellZMax);
}
//System.out.println("\nintersect node=" + nodeID + " vs " + leafNodeOffset);
if (nodeID >= leafNodeOffset) {
//System.out.println(" leaf");
// Leaf node; scan and filter all points in this block:
//System.out.println(" intersect leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + leafBlockFPs[nodeID-leafNodeOffset]);
int hitCount = 0;
long fp = leafBlockFPs[nodeID-leafNodeOffset];
/*
System.out.println("I: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
*/
state.in.seek(fp);
// How many points are stored in this leaf cell:
int count = state.in.readVInt();
state.docs.grow(count);
//System.out.println(" count=" + count);
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
//System.out.println(" check docID=" + docID);
if (state.valueFilter.accept(docID)) {
state.docs.add(docID);
hitCount++;
}
}
return hitCount;
} else {
//System.out.println(" non-leaf");
int splitDim = BKD3DTreeWriter.getSplitDim(cellXMin, cellXMax,
cellYMin, cellYMax,
cellZMin, cellZMax);
int splitValue = splitValues[nodeID];
int count = 0;
if (splitDim == 0) {
//System.out.println(" split on lat=" + splitValue);
// Inner node split on x:
// Left node:
if (state.xMin <= splitValue) {
//System.out.println(" recurse left");
count += intersect(state,
2*nodeID,
cellXMin, splitValue,
cellYMin, cellYMax,
cellZMin, cellZMax);
}
// Right node:
if (state.xMax >= splitValue) {
//System.out.println(" recurse right");
count += intersect(state,
2*nodeID+1,
splitValue, cellXMax,
cellYMin, cellYMax,
cellZMin, cellZMax);
}
} else if (splitDim == 1) {
// Inner node split on y:
// System.out.println(" split on lon=" + splitValue);
// Left node:
if (state.yMin <= splitValue) {
// System.out.println(" recurse left");
count += intersect(state,
2*nodeID,
cellXMin, cellXMax,
cellYMin, splitValue,
cellZMin, cellZMax);
}
// Right node:
if (state.yMax >= splitValue) {
// System.out.println(" recurse right");
count += intersect(state,
2*nodeID+1,
cellXMin, cellXMax,
splitValue, cellYMax,
cellZMin, cellZMax);
}
} else {
// Inner node split on z:
// System.out.println(" split on lon=" + splitValue);
// Left node:
if (state.zMin <= splitValue) {
// System.out.println(" recurse left");
count += intersect(state,
2*nodeID,
cellXMin, cellXMax,
cellYMin, cellYMax,
cellZMin, splitValue);
}
// Right node:
if (state.zMax >= splitValue) {
// System.out.println(" recurse right");
count += intersect(state,
2*nodeID+1,
cellXMin, cellXMax,
cellYMin, cellYMax,
splitValue, cellZMax);
}
}
return count;
}
}
@Override
public long ramBytesUsed() {
return splitValues.length * RamUsageEstimator.NUM_BYTES_INT +
leafBlockFPs.length * RamUsageEstimator.NUM_BYTES_LONG;
}
}

View File

@ -1,924 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.RamUsageEstimator;
// TODO
// - we could also index "auto-prefix terms" here, and use better compression, and maybe only use for the "fully contained" case so we'd
// only index docIDs
// - the index could be efficiently encoded as an FST, so we don't have wasteful
// (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
// the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
// per leaf, and you can reduce that by putting more points per leaf
// - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf
// - we could use threads while building; the higher nodes are very parallelizable
// - generalize to N dimenions? i think there are reasonable use cases here, e.g.
// 2 dimensional points to store houses, plus e.g. 3rd dimension for "household income"
/** Recursively builds a BKD tree to assign all incoming points to smaller
* and smaller rectangles until the number of points in a given
* rectangle is &lt= the <code>maxPointsInLeafNode</code>. The tree is
* fully balanced, which means the leaf nodes will have between 50% and 100% of
* the requested <code>maxPointsInLeafNode</code>, except for the adversarial case
* of indexing exactly the same point many times.
*
* <p>
* See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
*
* <p>This consumes heap during writing: it allocates a <code>LongBitSet(numPoints)</code>,
* and for any nodes with fewer than <code>maxPointsSortInHeap</code>, it holds
* the points in memory as simple java arrays.
*
* <p>
* <b>NOTE</b>: This can write at most Integer.MAX_VALUE * <code>maxPointsInLeafNode</code> total points.
*
* @lucene.experimental */
class BKD3DTreeWriter {
// x (int), y (int), z (int) + ord (long) + docID (int)
static final int BYTES_PER_DOC = RamUsageEstimator.NUM_BYTES_LONG + 4 * RamUsageEstimator.NUM_BYTES_INT;
//static final boolean DEBUG = false;
public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024;
/** This works out to max of ~10 MB peak heap tied up during writing: */
public static final int DEFAULT_MAX_POINTS_SORT_IN_HEAP = 128*1024;;
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
private final Directory tempDir;
private final String tempFileNamePrefix;
private OfflineSorter.ByteSequencesWriter offlineWriter;
private GrowingHeapWriter heapWriter;
private IndexOutput tempInput;
private final int maxPointsInLeafNode;
private final int maxPointsSortInHeap;
private long pointCount;
private final int[] scratchDocIDs;
public BKD3DTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
}
// TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
public BKD3DTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
scratchDocIDs = new int[maxPointsInLeafNode];
// We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
heapWriter = new GrowingHeapWriter(maxPointsSortInHeap);
}
public static void verifyParams(int maxPointsInLeafNode, int maxPointsSortInHeap) {
if (maxPointsInLeafNode <= 0) {
throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
}
if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
}
if (maxPointsSortInHeap < maxPointsInLeafNode) {
throw new IllegalArgumentException("maxPointsSortInHeap must be >= maxPointsInLeafNode; got " + maxPointsSortInHeap + " vs maxPointsInLeafNode="+ maxPointsInLeafNode);
}
if (maxPointsSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException("maxPointsSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsSortInHeap);
}
}
/** If the current segment has too many points then we switchover to temp files / offline sort. */
private void switchToOffline() throws IOException {
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "bkd3d", IOContext.DEFAULT);
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
for(int i=0;i<pointCount;i++) {
scratchBytesOutput.reset(scratchBytes);
scratchBytesOutput.writeInt(heapWriter.xs[i]);
scratchBytesOutput.writeInt(heapWriter.ys[i]);
scratchBytesOutput.writeInt(heapWriter.zs[i]);
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
scratchBytesOutput.writeVLong(i);
// TODO: can/should OfflineSorter optimize the fixed-width case?
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
}
heapWriter = null;
}
public void add(int x, int y, int z, int docID) throws IOException {
if (pointCount >= maxPointsSortInHeap) {
if (offlineWriter == null) {
switchToOffline();
}
scratchBytesOutput.reset(scratchBytes);
scratchBytesOutput.writeInt(x);
scratchBytesOutput.writeInt(y);
scratchBytesOutput.writeInt(z);
scratchBytesOutput.writeVInt(docID);
scratchBytesOutput.writeVLong(pointCount);
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
} else {
// Not too many points added yet, continue using heap:
heapWriter.append(x, y, z, pointCount, docID);
}
pointCount++;
}
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
* as we recurse in {@link #build}. */
private Writer convertToFixedWidth(String in) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
scratch.grow(BYTES_PER_DOC);
BytesRef bytes = scratch.get();
ByteArrayDataInput dataReader = new ByteArrayDataInput();
OfflineSorter.ByteSequencesReader reader = null;
Writer sortedWriter = null;
boolean success = false;
try {
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
sortedWriter = getWriter(pointCount);
for (long i=0;i<pointCount;i++) {
boolean result = reader.read(scratch);
assert result;
dataReader.reset(bytes.bytes, bytes.offset, bytes.length);
int x = dataReader.readInt();
int y = dataReader.readInt();
int z = dataReader.readInt();
int docID = dataReader.readVInt();
long ord = dataReader.readVLong();
assert docID >= 0: "docID=" + docID;
sortedWriter.append(x, y, z, ord, docID);
}
success = true;
} finally {
if (success) {
IOUtils.close(sortedWriter, reader);
} else {
IOUtils.closeWhileHandlingException(sortedWriter, reader);
try {
sortedWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
}
}
return sortedWriter;
}
/** dim: 0=x, 1=y, 2=z */
private Writer sort(int dim) throws IOException {
if (heapWriter != null) {
assert pointCount < Integer.MAX_VALUE;
// All buffered points are still in heap
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
int docID = heapWriter.docIDs[i];
heapWriter.docIDs[i] = heapWriter.docIDs[j];
heapWriter.docIDs[j] = docID;
long ord = heapWriter.ords[i];
heapWriter.ords[i] = heapWriter.ords[j];
heapWriter.ords[j] = ord;
int x = heapWriter.xs[i];
heapWriter.xs[i] = heapWriter.xs[j];
heapWriter.xs[j] = x;
int y = heapWriter.ys[i];
heapWriter.ys[i] = heapWriter.ys[j];
heapWriter.ys[j] = y;
int z = heapWriter.zs[i];
heapWriter.zs[i] = heapWriter.zs[j];
heapWriter.zs[j] = z;
}
@Override
protected int compare(int i, int j) {
int cmp;
if (dim == 0) {
cmp = Integer.compare(heapWriter.xs[i], heapWriter.xs[j]);
} else if (dim == 1) {
cmp = Integer.compare(heapWriter.ys[i], heapWriter.ys[j]);
} else {
cmp = Integer.compare(heapWriter.zs[i], heapWriter.zs[j]);
}
if (cmp != 0) {
return cmp;
}
// Tie-break
cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]);
if (cmp != 0) {
return cmp;
}
return Long.compare(heapWriter.ords[i], heapWriter.ords[j]);
}
}.sort(0, (int) pointCount);
HeapWriter sorted = new HeapWriter((int) pointCount);
//System.out.println("sorted dim=" + dim);
for(int i=0;i<pointCount;i++) {
/*
System.out.println(" docID=" + heapWriter.docIDs[i] +
" x=" + heapWriter.xs[i] +
" y=" + heapWriter.ys[i] +
" z=" + heapWriter.zs[i]);
*/
sorted.append(heapWriter.xs[i],
heapWriter.ys[i],
heapWriter.zs[i],
heapWriter.ords[i],
heapWriter.docIDs[i]);
}
sorted.close();
return sorted;
} else {
// Offline sort:
assert tempInput != null;
final ByteArrayDataInput reader = new ByteArrayDataInput();
Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
private final ByteArrayDataInput readerB = new ByteArrayDataInput();
@Override
public int compare(BytesRef a, BytesRef b) {
reader.reset(a.bytes, a.offset, a.length);
final int xa = reader.readInt();
final int ya = reader.readInt();
final int za = reader.readInt();
final int docIDA = reader.readVInt();
final long ordA = reader.readVLong();
reader.reset(b.bytes, b.offset, b.length);
final int xb = reader.readInt();
final int yb = reader.readInt();
final int zb = reader.readInt();
final int docIDB = reader.readVInt();
final long ordB = reader.readVLong();
int cmp;
if (dim == 0) {
cmp = Integer.compare(xa, xb);
} else if (dim == 1) {
cmp = Integer.compare(ya, yb);
} else {
cmp = Integer.compare(za, zb);
}
if (cmp != 0) {
return cmp;
}
// Tie-break
cmp = Integer.compare(docIDA, docIDB);
if (cmp != 0) {
return cmp;
}
return Long.compare(ordA, ordB);
}
};
boolean success = false;
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
String sortedFileName = sorter.sort(tempInput.getName());
try {
Writer writer = convertToFixedWidth(sortedFileName);
success = true;
return writer;
} finally {
if (success) {
tempDir.deleteFile(sortedFileName);
} else {
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
}
}
}
}
/** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
public long finish(IndexOutput out) throws IOException {
//System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter + " maxPointsInLeafNode=" + maxPointsInLeafNode);
if (offlineWriter != null) {
offlineWriter.close();
}
LongBitSet bitSet = new LongBitSet(pointCount);
long countPerLeaf = pointCount;
long innerNodeCount = 1;
while (countPerLeaf > maxPointsInLeafNode) {
countPerLeaf = (countPerLeaf+1)/2;
innerNodeCount *= 2;
}
//System.out.println("innerNodeCount=" + innerNodeCount + " countPerLeaf=" + countPerLeaf);
if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
}
innerNodeCount--;
int numLeaves = (int) (innerNodeCount+1);
//System.out.println(" numLeaves=" + numLeaves);
// Indexed by nodeID, but first (root) nodeID is 1
int[] splitValues = new int[numLeaves];
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
long[] leafBlockFPs = new long[numLeaves];
// Make sure the math above "worked":
assert pointCount / splitValues.length <= maxPointsInLeafNode: "pointCount=" + pointCount + " splitValues.length=" + splitValues.length + " maxPointsInLeafNode=" + maxPointsInLeafNode;
//System.out.println(" avg pointsPerLeaf=" + (pointCount/splitValues.length));
// Sort all docs once by x, once by y, once by z:
Writer xSortedWriter = null;
Writer ySortedWriter = null;
Writer zSortedWriter = null;
boolean success = false;
try {
xSortedWriter = sort(0);
ySortedWriter = sort(1);
zSortedWriter = sort(2);
heapWriter = null;
build(1, numLeaves,
new PathSlice(xSortedWriter, 0, pointCount),
new PathSlice(ySortedWriter, 0, pointCount),
new PathSlice(zSortedWriter, 0, pointCount),
bitSet, out,
Integer.MIN_VALUE, Integer.MAX_VALUE,
Integer.MIN_VALUE, Integer.MAX_VALUE,
Integer.MIN_VALUE, Integer.MAX_VALUE,
splitValues,
leafBlockFPs);
success = true;
} finally {
if (success) {
xSortedWriter.destroy();
ySortedWriter.destroy();
zSortedWriter.destroy();
if (tempInput != null) {
tempDir.deleteFile(tempInput.getName());
}
} else {
try {
xSortedWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
try {
ySortedWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
try {
zSortedWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
if (tempInput != null) {
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
}
}
}
//System.out.println("Total nodes: " + innerNodeCount);
// Write index:
long indexFP = out.getFilePointer();
//System.out.println("indexFP=" + indexFP);
out.writeVInt(numLeaves);
// NOTE: splitValues[0] is unused, because nodeID is 1-based:
for (int i=0;i<splitValues.length;i++) {
out.writeInt(splitValues[i]);
}
for (int i=0;i<leafBlockFPs.length;i++) {
out.writeVLong(leafBlockFPs[i]);
}
return indexFP;
}
/** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
private static final class PathSlice {
final Writer writer;
final long start;
final long count;
public PathSlice(Writer writer, long start, long count) {
this.writer = writer;
this.start = start;
this.count = count;
}
@Override
public String toString() {
return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
}
}
/** Marks bits for the ords (points) that belong in the left sub tree. */
private int markLeftTree(int splitDim, PathSlice source, LongBitSet bitSet,
int minX, int maxX,
int minY, int maxY,
int minZ, int maxZ) throws IOException {
// This is the size of our left tree
long leftCount = source.count / 2;
// Read the split value:
//if (DEBUG) System.out.println(" leftCount=" + leftCount + " vs " + source.count);
Reader reader = source.writer.getReader(source.start + leftCount);
boolean success = false;
int splitValue;
try {
boolean result = reader.next();
assert result;
int x = reader.x();
assert x >= minX && x <= maxX: "x=" + x + " minX=" + minX + " maxX=" + maxX;
int y = reader.y();
assert y >= minY && y <= maxY: "y=" + y + " minY=" + minY + " maxY=" + maxY;
int z = reader.z();
assert z >= minZ && z <= maxZ: "z=" + z + " minZ=" + minZ + " maxZ=" + maxZ;
if (splitDim == 0) {
splitValue = x;
} else if (splitDim == 1) {
splitValue = y;
} else {
splitValue = z;
}
success = true;
} finally {
if (success) {
IOUtils.close(reader);
} else {
IOUtils.closeWhileHandlingException(reader);
}
}
// Mark ords that fall into the left half, and also handle the == boundary case:
assert bitSet.cardinality() == 0: "cardinality=" + bitSet.cardinality();
success = false;
reader = source.writer.getReader(source.start);
try {
int lastValue = Integer.MIN_VALUE;
for (int i=0;i<leftCount;i++) {
boolean result = reader.next();
assert result;
int x = reader.x();
int y = reader.y();
int z = reader.z();
int value;
if (splitDim == 0) {
value = x;
} else if (splitDim == 1) {
value = y;
} else {
value = z;
}
// Our input source is supposed to be sorted on the incoming dimension:
assert value >= lastValue;
lastValue = value;
assert value <= splitValue: "i=" + i + " value=" + value + " vs splitValue=" + splitValue;
long ord = reader.ord();
int docID = reader.docID();
assert docID >= 0: "docID=" + docID + " reader=" + reader;
// We should never see dup ords:
assert bitSet.get(ord) == false;
bitSet.set(ord);
}
success = true;
} finally {
if (success) {
IOUtils.close(reader);
} else {
IOUtils.closeWhileHandlingException(reader);
}
}
assert leftCount == bitSet.cardinality(): "leftCount=" + leftCount + " cardinality=" + bitSet.cardinality();
return splitValue;
}
// Split on the dim with the largest range:
static int getSplitDim(int minX, int maxX, int minY, int maxY, int minZ, int maxZ) {
long xRange = (long) maxX - (long) minX;
long yRange = (long) maxY - (long) minY;
long zRange = (long) maxZ - (long) minZ;
if (xRange > yRange) {
if (xRange > zRange) {
return 0;
} else {
return 2;
}
} else if (yRange > zRange) {
return 1;
} else {
return 2;
}
}
/** The incoming PathSlice for the dim we will split is already partitioned/sorted. */
private void build(int nodeID, int leafNodeOffset,
PathSlice lastXSorted,
PathSlice lastYSorted,
PathSlice lastZSorted,
LongBitSet bitSet,
IndexOutput out,
int minX, int maxX,
int minY, int maxY,
int minZ, int maxZ,
int[] splitValues,
long[] leafBlockFPs) throws IOException {
long count = lastXSorted.count;
assert count > 0;
assert count <= ArrayUtil.MAX_ARRAY_LENGTH;
assert count == lastYSorted.count;
assert count == lastZSorted.count;
//if (DEBUG) System.out.println("\nBUILD: nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset + "\n lastXSorted=" + lastXSorted + "\n lastYSorted=" + lastYSorted + "\n lastZSorted=" + lastZSorted + "\n count=" + lastXSorted.count + " x=" + minX + " TO " + maxX + " y=" + minY + " TO " + maxY + " z=" + minZ + " TO " + maxZ);
if (nodeID >= leafNodeOffset) {
// Leaf node: write block
//if (DEBUG) System.out.println(" leaf");
assert maxX >= minX;
assert maxY >= minY;
assert maxZ >= minZ;
//System.out.println("\nleaf:\n lat range: " + ((long) maxLatEnc-minLatEnc));
//System.out.println(" lon range: " + ((long) maxLonEnc-minLonEnc));
// Sort by docID in the leaf so we get sequentiality at search time (may not matter?):
Reader reader = lastXSorted.writer.getReader(lastXSorted.start);
assert count <= scratchDocIDs.length: "count=" + count + " scratchDocIDs.length=" + scratchDocIDs.length;
boolean success = false;
try {
for (int i=0;i<count;i++) {
// NOTE: we discard ord at this point; we only needed it temporarily
// during building to uniquely identify each point to properly handle
// the multi-valued case (one docID having multiple values):
// We also discard lat/lon, since at search time, we reside on the
// wrapped doc values for this:
boolean result = reader.next();
assert result;
scratchDocIDs[i] = reader.docID();
}
success = true;
} finally {
if (success) {
IOUtils.close(reader);
} else {
IOUtils.closeWhileHandlingException(reader);
}
}
Arrays.sort(scratchDocIDs, 0, (int) count);
// Dedup docIDs: for the multi-valued case where more than one value for the doc
// wound up in this leaf cell, we only need to store the docID once:
int lastDocID = -1;
int uniqueCount = 0;
for(int i=0;i<count;i++) {
int docID = scratchDocIDs[i];
if (docID != lastDocID) {
uniqueCount++;
lastDocID = docID;
}
}
assert uniqueCount <= count;
long startFP = out.getFilePointer();
out.writeVInt(uniqueCount);
// Save the block file pointer:
leafBlockFPs[nodeID - leafNodeOffset] = startFP;
//System.out.println(" leafFP=" + startFP);
lastDocID = -1;
for (int i=0;i<count;i++) {
// Absolute int encode; with "vInt of deltas" encoding, the .kdd size dropped from
// 697 MB -> 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec.
// I think if we also indexed prefix terms here we could do less costly compression
// on those lists:
int docID = scratchDocIDs[i];
if (docID != lastDocID) {
out.writeInt(docID);
//System.out.println(" write docID=" + docID);
lastDocID = docID;
}
}
//long endFP = out.getFilePointer();
//System.out.println(" bytes/doc: " + ((endFP - startFP) / count));
} else {
int splitDim = getSplitDim(minX, maxX, minY, maxY, minZ, maxZ);
//System.out.println(" splitDim=" + splitDim);
PathSlice source;
if (splitDim == 0) {
source = lastXSorted;
} else if (splitDim == 1) {
source = lastYSorted;
} else {
source = lastZSorted;
}
// We let ties go to either side, so we should never get down to count == 0, even
// in adversarial case (all values are the same):
assert count > 0;
// Inner node: partition/recurse
//if (DEBUG) System.out.println(" non-leaf");
assert nodeID < splitValues.length: "nodeID=" + nodeID + " splitValues.length=" + splitValues.length;
int splitValue = markLeftTree(splitDim, source, bitSet,
minX, maxX,
minY, maxY,
minZ, maxZ);
long leftCount = count/2;
// TODO: we could save split value in here so we don't have to re-open file later:
// Partition the other (not split) dims into sorted left and right sets, so we can recurse.
// This is somewhat hairy: we partition the next X, Y set according to how we had just
// partitioned the Z set, etc.
Writer[] leftWriters = new Writer[3];
Writer[] rightWriters = new Writer[3];
for(int dim=0;dim<3;dim++) {
if (dim == splitDim) {
continue;
}
Writer leftWriter = null;
Writer rightWriter = null;
Reader reader = null;
boolean success = false;
int nextLeftCount = 0;
PathSlice nextSource;
if (dim == 0) {
nextSource = lastXSorted;
} else if (dim == 1) {
nextSource = lastYSorted;
} else {
nextSource = lastZSorted;
}
try {
leftWriter = getWriter(leftCount);
rightWriter = getWriter(nextSource.count - leftCount);
assert nextSource.count == count;
reader = nextSource.writer.getReader(nextSource.start);
// TODO: we could compute the split value here for each sub-tree and save an O(N) pass on recursion, but makes code hairier and only
// changes the constant factor of building, not the big-oh:
for (int i=0;i<count;i++) {
boolean result = reader.next();
assert result;
int x = reader.x();
int y = reader.y();
int z = reader.z();
long ord = reader.ord();
int docID = reader.docID();
assert docID >= 0: "docID=" + docID + " reader=" + reader;
//System.out.println(" i=" + i + " x=" + x + " ord=" + ord + " docID=" + docID);
if (bitSet.get(ord)) {
if (splitDim == 0) {
assert x <= splitValue: "x=" + x + " splitValue=" + splitValue;
} else if (splitDim == 1) {
assert y <= splitValue: "y=" + y + " splitValue=" + splitValue;
} else {
assert z <= splitValue: "z=" + z + " splitValue=" + splitValue;
}
leftWriter.append(x, y, z, ord, docID);
nextLeftCount++;
} else {
if (splitDim == 0) {
assert x >= splitValue: "x=" + x + " splitValue=" + splitValue;
} else if (splitDim == 1) {
assert y >= splitValue: "y=" + y + " splitValue=" + splitValue;
} else {
assert z >= splitValue: "z=" + z + " splitValue=" + splitValue;
}
rightWriter.append(x, y, z, ord, docID);
}
}
success = true;
} finally {
if (success) {
IOUtils.close(reader, leftWriter, rightWriter);
} else {
IOUtils.closeWhileHandlingException(reader, leftWriter, rightWriter);
}
}
assert leftCount == nextLeftCount: "leftCount=" + leftCount + " nextLeftCount=" + nextLeftCount;
leftWriters[dim] = leftWriter;
rightWriters[dim] = rightWriter;
}
bitSet.clear(0, pointCount);
long rightCount = count - leftCount;
boolean success = false;
try {
if (splitDim == 0) {
build(2*nodeID, leafNodeOffset,
new PathSlice(source.writer, source.start, leftCount),
new PathSlice(leftWriters[1], 0, leftCount),
new PathSlice(leftWriters[2], 0, leftCount),
bitSet,
out,
minX, splitValue,
minY, maxY,
minZ, maxZ,
splitValues, leafBlockFPs);
leftWriters[1].destroy();
leftWriters[2].destroy();
build(2*nodeID+1, leafNodeOffset,
new PathSlice(source.writer, source.start+leftCount, rightCount),
new PathSlice(rightWriters[1], 0, rightCount),
new PathSlice(rightWriters[2], 0, rightCount),
bitSet,
out,
splitValue, maxX,
minY, maxY,
minZ, maxZ,
splitValues, leafBlockFPs);
rightWriters[1].destroy();
rightWriters[2].destroy();
} else if (splitDim == 1) {
build(2*nodeID, leafNodeOffset,
new PathSlice(leftWriters[0], 0, leftCount),
new PathSlice(source.writer, source.start, leftCount),
new PathSlice(leftWriters[2], 0, leftCount),
bitSet,
out,
minX, maxX,
minY, splitValue,
minZ, maxZ,
splitValues, leafBlockFPs);
leftWriters[0].destroy();
leftWriters[2].destroy();
build(2*nodeID+1, leafNodeOffset,
new PathSlice(rightWriters[0], 0, rightCount),
new PathSlice(source.writer, source.start+leftCount, rightCount),
new PathSlice(rightWriters[2], 0, rightCount),
bitSet,
out,
minX, maxX,
splitValue, maxY,
minZ, maxZ,
splitValues, leafBlockFPs);
rightWriters[0].destroy();
rightWriters[2].destroy();
} else {
build(2*nodeID, leafNodeOffset,
new PathSlice(leftWriters[0], 0, leftCount),
new PathSlice(leftWriters[1], 0, leftCount),
new PathSlice(source.writer, source.start, leftCount),
bitSet,
out,
minX, maxX,
minY, maxY,
minZ, splitValue,
splitValues, leafBlockFPs);
leftWriters[0].destroy();
leftWriters[1].destroy();
build(2*nodeID+1, leafNodeOffset,
new PathSlice(rightWriters[0], 0, rightCount),
new PathSlice(rightWriters[1], 0, rightCount),
new PathSlice(source.writer, source.start+leftCount, rightCount),
bitSet,
out,
minX, maxX,
minY, maxY,
splitValue, maxZ,
splitValues, leafBlockFPs);
rightWriters[0].destroy();
rightWriters[1].destroy();
}
success = true;
} finally {
if (success == false) {
for(Writer writer : leftWriters) {
if (writer != null) {
try {
writer.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
}
}
for(Writer writer : rightWriters) {
if (writer != null) {
try {
writer.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
}
}
}
}
splitValues[nodeID] = splitValue;
}
}
Writer getWriter(long count) throws IOException {
if (count < maxPointsSortInHeap) {
return new HeapWriter((int) count);
} else {
return new OfflineWriter(tempDir, tempFileNamePrefix, count);
}
}
}

View File

@ -1,42 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
class Geo3DBinaryDocValues extends BinaryDocValues {
final BKD3DTreeReader bkdTreeReader;
final BinaryDocValues delegate;
final double planetMax;
public Geo3DBinaryDocValues(BKD3DTreeReader bkdTreeReader, BinaryDocValues delegate, double planetMax) {
this.bkdTreeReader = bkdTreeReader;
this.delegate = delegate;
this.planetMax = planetMax;
}
public BKD3DTreeReader getBKD3DTreeReader() {
return bkdTreeReader;
}
@Override
public BytesRef get(int docID) {
return delegate.get(docID);
}
}

View File

@ -1,145 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.geo3d.PlanetModel;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
class Geo3DDocValuesConsumer extends DocValuesConsumer implements Closeable {
final DocValuesConsumer delegate;
final int maxPointsInLeafNode;
final int maxPointsSortInHeap;
final IndexOutput out;
final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
final SegmentWriteState state;
final Directory tempDir;
final String tempFileNamePrefix;
public Geo3DDocValuesConsumer(Directory tempDir, String tempFileNamePrefix, PlanetModel planetModel, DocValuesConsumer delegate,
SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
BKD3DTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.delegate = delegate;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
this.state = state;
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.DATA_EXTENSION);
out = state.directory.createOutput(datFileName, state.context);
CodecUtil.writeIndexHeader(out, Geo3DDocValuesFormat.DATA_CODEC_NAME, Geo3DDocValuesFormat.DATA_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
// We write the max for this PlanetModel into the index so we know we are decoding correctly at search time, and so we can also do
// best-effort check that the search time PlanetModel "matches":
out.writeLong(Double.doubleToLongBits(planetModel.getMaximumMagnitude()));
}
@Override
public void close() throws IOException {
boolean success = false;
try {
CodecUtil.writeFooter(out);
success = true;
} finally {
if (success) {
IOUtils.close(delegate, out);
} else {
IOUtils.closeWhileHandlingException(delegate, out);
}
}
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.META_EXTENSION);
IndexOutput metaOut = state.directory.createOutput(metaFileName, state.context);
success = false;
try {
CodecUtil.writeIndexHeader(metaOut, Geo3DDocValuesFormat.META_CODEC_NAME, Geo3DDocValuesFormat.META_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
metaOut.writeVInt(fieldIndexFPs.size());
for(Map.Entry<Integer,Long> ent : fieldIndexFPs.entrySet()) {
metaOut.writeVInt(ent.getKey());
metaOut.writeVLong(ent.getValue());
}
CodecUtil.writeFooter(metaOut);
success = true;
} finally {
if (success) {
IOUtils.close(metaOut);
} else {
IOUtils.closeWhileHandlingException(metaOut);
}
}
}
@Override
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
delegate.addBinaryField(field, values);
BKD3DTreeWriter writer = new BKD3DTreeWriter(tempDir, tempFileNamePrefix, maxPointsInLeafNode, maxPointsSortInHeap);
Iterator<BytesRef> valuesIt = values.iterator();
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
assert valuesIt.hasNext();
BytesRef value = valuesIt.next();
// TODO: we should allow multi-valued here, just appended into the BDV
// 3 ints packed into byte[]
if (value != null) {
assert value.length == 12;
int x = Geo3DDocValuesFormat.readInt(value.bytes, value.offset);
int y = Geo3DDocValuesFormat.readInt(value.bytes, value.offset+4);
int z = Geo3DDocValuesFormat.readInt(value.bytes, value.offset+8);
writer.add(x, y, z, docID);
}
}
long indexStartFP = writer.finish(out);
fieldIndexFPs.put(field.number, indexStartFP);
}
@Override
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) {
throw new UnsupportedOperationException();
}
@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) {
throw new UnsupportedOperationException();
}
}

View File

@ -1,167 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat;
import org.apache.lucene.geo3d.PlanetModel;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import java.io.IOException;
/**
* A {@link DocValuesFormat} to efficiently index geo-spatial 3D x,y,z points
* from {@link Geo3DPointField} for fast shape intersection queries using
* ({@link PointInGeo3DShapeQuery})
*
* <p>This wraps {@link Lucene54DocValuesFormat}, but saves its own BKD tree
* structures to disk for fast query-time intersection. See <a
* href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a>
* for details.
*
* <p>The BKD tree slices up 3D x,y,z space into smaller and
* smaller 3D rectangles, until the smallest rectangles have approximately
* between X/2 and X (X default is 1024) points in them, at which point
* such leaf cells are written as a block to disk, while the index tree
* structure records how space was sub-divided is loaded into HEAP
* at search time. At search time, the tree is recursed based on whether
* each of left or right child overlap with the query shape, and once
* a leaf block is reached, all documents in that leaf block are collected
* if the cell is fully enclosed by the query shape, or filtered and then
* collected, if not.
*
* <p>The index is also quite compact, because docs only appear once in
* the tree (no "prefix terms").
*
* <p>In addition to the files written by {@link Lucene54DocValuesFormat}, this format writes:
* <ol>
* <li><tt>.kd3d</tt>: BKD leaf data and index</li>
* <li><tt>.kd3m</tt>: BKD metadata</li>
* </ol>
*
* <p>The disk format is experimental and free to change suddenly, and this code
* likely has new and exciting bugs!
*
* @lucene.experimental */
public class Geo3DDocValuesFormat extends DocValuesFormat {
static final String DATA_CODEC_NAME = "Geo3DData";
static final int DATA_VERSION_START = 0;
static final int DATA_VERSION_CURRENT = DATA_VERSION_START;
static final String DATA_EXTENSION = "g3dd";
static final String META_CODEC_NAME = "Geo3DMeta";
static final int META_VERSION_START = 0;
static final int META_VERSION_CURRENT = META_VERSION_START;
static final String META_EXTENSION = "g3dm";
private final int maxPointsInLeafNode;
private final int maxPointsSortInHeap;
private final DocValuesFormat delegate = new Lucene54DocValuesFormat();
private final PlanetModel planetModel;
/** Default constructor */
public Geo3DDocValuesFormat() {
this(PlanetModel.WGS84, BKD3DTreeWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKD3DTreeWriter.DEFAULT_MAX_POINTS_SORT_IN_HEAP);
}
/** Creates this with custom configuration.
*
* @param planetModel the {@link PlanetModel} to use; this is only used when writing
* @param maxPointsInLeafNode Maximum number of points in each leaf cell. Smaller values create a deeper tree with larger in-heap index and possibly
* faster searching. The default is 1024.
* @param maxPointsSortInHeap Maximum number of points where in-heap sort can be used. When the number of points exceeds this, a (slower)
* offline sort is used. The default is 128 * 1024.
*
* @lucene.experimental */
public Geo3DDocValuesFormat(PlanetModel planetModel, int maxPointsInLeafNode, int maxPointsSortInHeap) {
super("BKD3DTree");
BKD3DTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
this.planetModel = planetModel;
}
@Override
public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
return new Geo3DDocValuesConsumer(state.directory, state.segmentInfo.name, planetModel, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
}
@Override
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
return new Geo3DDocValuesProducer(delegate.fieldsProducer(state), state);
}
/** Clips the incoming value to the allowed min/max range before encoding, instead of throwing an exception. */
static int encodeValueLenient(double planetMax, double x) {
if (x > planetMax) {
x = planetMax;
} else if (x < -planetMax) {
x = -planetMax;
}
return encodeValue(planetMax, x);
}
static int encodeValue(double planetMax, double x) {
if (x > planetMax) {
throw new IllegalArgumentException("value=" + x + " is out-of-bounds (greater than planetMax=" + planetMax + ")");
}
if (x < -planetMax) {
throw new IllegalArgumentException("value=" + x + " is out-of-bounds (less than than -planetMax=" + -planetMax + ")");
}
long y = Math.round (x * (Integer.MAX_VALUE / planetMax));
assert y >= Integer.MIN_VALUE;
assert y <= Integer.MAX_VALUE;
return (int) y;
}
/** Center decode */
static double decodeValueCenter(double planetMax, int x) {
return x * (planetMax / Integer.MAX_VALUE);
}
/** More negative decode, at bottom of cell */
static double decodeValueMin(double planetMax, int x) {
return (((double)x) - 0.5) * (planetMax / Integer.MAX_VALUE);
}
/** More positive decode, at top of cell */
static double decodeValueMax(double planetMax, int x) {
return (((double)x) + 0.5) * (planetMax / Integer.MAX_VALUE);
}
static int readInt(byte[] bytes, int offset) {
return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset+1] & 0xFF) << 16)
| ((bytes[offset+2] & 0xFF) << 8) | (bytes[offset+3] & 0xFF);
}
static void writeInt(int value, byte[] bytes, int offset) {
bytes[offset] = (byte) ((value >> 24) & 0xff);
bytes[offset+1] = (byte) ((value >> 16) & 0xff);
bytes[offset+2] = (byte) ((value >> 8) & 0xff);
bytes[offset+3] = (byte) (value & 0xff);
}
}

View File

@ -1,177 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
class Geo3DDocValuesProducer extends DocValuesProducer {
private final Map<String,BKD3DTreeReader> treeReaders = new HashMap<>();
private final Map<Integer,Long> fieldToIndexFPs = new HashMap<>();
private final IndexInput datIn;
private final AtomicLong ramBytesUsed;
private final int maxDoc;
private final DocValuesProducer delegate;
private final boolean merging;
private final double planetMax;
public Geo3DDocValuesProducer(DocValuesProducer delegate, SegmentReadState state) throws IOException {
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.META_EXTENSION);
ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName, state.context);
CodecUtil.checkIndexHeader(metaIn, Geo3DDocValuesFormat.META_CODEC_NAME, Geo3DDocValuesFormat.META_VERSION_START, Geo3DDocValuesFormat.META_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
int fieldCount = metaIn.readVInt();
for(int i=0;i<fieldCount;i++) {
int fieldNumber = metaIn.readVInt();
long indexFP = metaIn.readVLong();
fieldToIndexFPs.put(fieldNumber, indexFP);
}
CodecUtil.checkFooter(metaIn);
metaIn.close();
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.DATA_EXTENSION);
datIn = state.directory.openInput(datFileName, state.context);
CodecUtil.checkIndexHeader(datIn, Geo3DDocValuesFormat.DATA_CODEC_NAME, Geo3DDocValuesFormat.DATA_VERSION_START, Geo3DDocValuesFormat.DATA_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
planetMax = Double.longBitsToDouble(datIn.readLong());
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
maxDoc = state.segmentInfo.maxDoc();
this.delegate = delegate;
merging = false;
}
// clone for merge: we don't hang onto the Geo3Ds we load
Geo3DDocValuesProducer(Geo3DDocValuesProducer orig) throws IOException {
assert Thread.holdsLock(orig);
datIn = orig.datIn.clone();
ramBytesUsed = new AtomicLong(orig.ramBytesUsed.get());
delegate = orig.delegate.getMergeInstance();
fieldToIndexFPs.putAll(orig.fieldToIndexFPs);
treeReaders.putAll(orig.treeReaders);
merging = true;
maxDoc = orig.maxDoc;
planetMax = orig.planetMax;
}
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void close() throws IOException {
IOUtils.close(datIn, delegate);
}
@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(datIn);
}
@Override
public NumericDocValues getNumeric(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public synchronized BinaryDocValues getBinary(FieldInfo field) throws IOException {
BKD3DTreeReader treeReader = treeReaders.get(field.name);
if (treeReader == null) {
// Lazy load
Long fp = fieldToIndexFPs.get(field.number);
if (fp == null) {
throw new IllegalArgumentException("this field was not indexed as a BKDPointField");
}
// LUCENE-6697: never do real IOPs with the original IndexInput because search
// threads can be concurrently cloning it:
IndexInput clone = datIn.clone();
clone.seek(fp);
treeReader = new BKD3DTreeReader(clone, maxDoc);
// Only hang onto the reader when we are not merging:
if (merging == false) {
treeReaders.put(field.name, treeReader);
ramBytesUsed.addAndGet(treeReader.ramBytesUsed());
}
}
return new Geo3DBinaryDocValues(treeReader, delegate.getBinary(field), planetMax);
}
@Override
public SortedDocValues getSorted(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public Bits getDocsWithField(FieldInfo field) throws IOException {
return delegate.getDocsWithField(field);
}
@Override
public synchronized Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
for(Map.Entry<String,BKD3DTreeReader> ent : treeReaders.entrySet()) {
resources.add(Accountables.namedAccountable("field " + ent.getKey(), ent.getValue()));
}
resources.add(Accountables.namedAccountable("delegate", delegate));
return resources;
}
@Override
public synchronized DocValuesProducer getMergeInstance() throws IOException {
return new Geo3DDocValuesProducer(this);
}
@Override
public long ramBytesUsed() {
return ramBytesUsed.get() + delegate.ramBytesUsed();
}
}

View File

@ -1,92 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
final class GrowingHeapWriter implements Writer {
int[] xs;
int[] ys;
int[] zs;
int[] docIDs;
long[] ords;
private int nextWrite;
final int maxSize;
public GrowingHeapWriter(int maxSize) {
xs = new int[16];
ys = new int[16];
zs = new int[16];
docIDs = new int[16];
ords = new long[16];
this.maxSize = maxSize;
}
private int[] growExact(int[] arr, int size) {
assert size > arr.length;
int[] newArr = new int[size];
System.arraycopy(arr, 0, newArr, 0, arr.length);
return newArr;
}
private long[] growExact(long[] arr, int size) {
assert size > arr.length;
long[] newArr = new long[size];
System.arraycopy(arr, 0, newArr, 0, arr.length);
return newArr;
}
@Override
public void append(int x, int y, int z, long ord, int docID) {
assert ord == nextWrite;
if (xs.length == nextWrite) {
int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT));
assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
xs = growExact(xs, nextSize);
ys = growExact(ys, nextSize);
zs = growExact(zs, nextSize);
ords = growExact(ords, nextSize);
docIDs = growExact(docIDs, nextSize);
}
xs[nextWrite] = x;
ys[nextWrite] = y;
zs[nextWrite] = z;
ords[nextWrite] = ord;
docIDs[nextWrite] = docID;
nextWrite++;
}
@Override
public Reader getReader(long start) {
return new HeapReader(xs, ys, zs, ords, docIDs, (int) start, nextWrite);
}
@Override
public void close() {
}
@Override
public void destroy() {
}
@Override
public String toString() {
return "GrowingHeapWriter(count=" + nextWrite + " alloc=" + xs.length + ")";
}
}

View File

@ -1,73 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class HeapReader implements Reader {
private int curRead;
final int[] xs;
final int[] ys;
final int[] zs;
final long[] ords;
final int[] docIDs;
final int end;
HeapReader(int[] xs, int[] ys, int[] zs, long[] ords, int[] docIDs, int start, int end) {
this.xs = xs;
this.ys = ys;
this.zs = zs;
this.ords = ords;
this.docIDs = docIDs;
curRead = start-1;
this.end = end;
}
@Override
public boolean next() {
curRead++;
return curRead < end;
}
@Override
public int x() {
return xs[curRead];
}
@Override
public int y() {
return ys[curRead];
}
@Override
public int z() {
return zs[curRead];
}
@Override
public int docID() {
return docIDs[curRead];
}
@Override
public long ord() {
return ords[curRead];
}
@Override
public void close() {
}
}

View File

@ -1,69 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class HeapWriter implements Writer {
final int[] xs;
final int[] ys;
final int[] zs;
final int[] docIDs;
final long[] ords;
private int nextWrite;
private boolean closed;
public HeapWriter(int count) {
xs = new int[count];
ys = new int[count];
zs = new int[count];
docIDs = new int[count];
ords = new long[count];
}
@Override
public void append(int x, int y, int z, long ord, int docID) {
xs[nextWrite] = x;
ys[nextWrite] = y;
zs[nextWrite] = z;
ords[nextWrite] = ord;
docIDs[nextWrite] = docID;
nextWrite++;
}
@Override
public Reader getReader(long start) {
assert closed;
return new HeapReader(xs, ys, zs, ords, docIDs, (int) start, xs.length);
}
@Override
public void close() {
closed = true;
if (nextWrite != xs.length) {
throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + xs.length);
}
}
@Override
public void destroy() {
}
@Override
public String toString() {
return "HeapWriter(count=" + xs.length + ")";
}
}

View File

@ -1,84 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
final class OfflineReader implements Reader {
final IndexInput in;
long countLeft;
private int x;
private int y;
private int z;
private long ord;
private int docID;
OfflineReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
in = tempDir.openInput(tempFileName, IOContext.READONCE);
in.seek(start * BKD3DTreeWriter.BYTES_PER_DOC);
this.countLeft = count;
}
@Override
public boolean next() throws IOException {
if (countLeft == 0) {
return false;
}
countLeft--;
x = in.readInt();
y = in.readInt();
z = in.readInt();
ord = in.readLong();
docID = in.readInt();
return true;
}
@Override
public int x() {
return x;
}
@Override
public int y() {
return y;
}
@Override
public int z() {
return z;
}
@Override
public long ord() {
return ord;
}
@Override
public int docID() {
return docID;
}
@Override
public void close() throws IOException {
in.close();
}
}

View File

@ -1,77 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
final class OfflineWriter implements Writer {
final Directory tempDir;
final IndexOutput out;
final byte[] scratchBytes = new byte[BKD3DTreeWriter.BYTES_PER_DOC];
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
final long count;
private long countWritten;
private boolean closed;
public OfflineWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
this.tempDir = tempDir;
out = tempDir.createTempOutput(tempFileNamePrefix, "bkd3d", IOContext.DEFAULT);
this.count = count;
}
@Override
public void append(int x, int y, int z, long ord, int docID) throws IOException {
out.writeInt(x);
out.writeInt(y);
out.writeInt(z);
out.writeLong(ord);
out.writeInt(docID);
countWritten++;
}
@Override
public Reader getReader(long start) throws IOException {
assert closed;
return new OfflineReader(tempDir, out.getName(), start, count-start);
}
@Override
public void close() throws IOException {
closed = true;
out.close();
if (count != countWritten) {
throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
}
}
@Override
public void destroy() throws IOException {
tempDir.deleteFile(out.getName());
}
@Override
public String toString() {
return "OfflineWriter(count=" + count + " tempFileName=" + out.getName() + ")";
}
}

View File

@ -1,222 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.geo3d.GeoArea;
import org.apache.lucene.geo3d.GeoAreaFactory;
import org.apache.lucene.geo3d.GeoShape;
import org.apache.lucene.geo3d.PlanetModel;
import org.apache.lucene.geo3d.XYZBounds;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
/** Finds all previously indexed points that fall within the specified polygon.
*
* <p>The field must be indexed with {@link Geo3DDocValuesFormat}, and {@link Geo3DPointField} added per document.
*
* <p>Because this implementation cannot intersect each cell with the polygon, it will be costly especially for large polygons, as every
* possible point must be checked.
*
* <p><b>NOTE</b>: for fastest performance, this allocates FixedBitSet(maxDoc) for each segment. The score of each hit is the query boost.
*
* @lucene.experimental */
public class PointInGeo3DShapeQuery extends Query {
final String field;
final PlanetModel planetModel;
final GeoShape shape;
/** The lats/lons must be clockwise or counter-clockwise. */
public PointInGeo3DShapeQuery(PlanetModel planetModel, String field, GeoShape shape) {
this.field = field;
this.planetModel = planetModel;
this.shape = shape;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
// used in the first pass:
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
BinaryDocValues bdv = reader.getBinaryDocValues(field);
if (bdv == null) {
// No docs in this segment had this field
return null;
}
if (bdv instanceof Geo3DBinaryDocValues == false) {
throw new IllegalStateException("field \"" + field + "\" was not indexed with Geo3DBinaryDocValuesFormat: got: " + bdv);
}
final Geo3DBinaryDocValues treeDV = (Geo3DBinaryDocValues) bdv;
BKD3DTreeReader tree = treeDV.getBKD3DTreeReader();
XYZBounds bounds = new XYZBounds();
shape.getBounds(bounds);
final double planetMax = planetModel.getMaximumMagnitude();
if (planetMax != treeDV.planetMax) {
throw new IllegalStateException(planetModel + " is not the same one used during indexing: planetMax=" + planetMax + " vs indexing planetMax=" + treeDV.planetMax);
}
/*
GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel,
bounds.getMinimumX(),
bounds.getMaximumX(),
bounds.getMinimumY(),
bounds.getMaximumY(),
bounds.getMinimumZ(),
bounds.getMaximumZ());
assert xyzSolid.getRelationship(shape) == GeoArea.WITHIN || xyzSolid.getRelationship(shape) == GeoArea.OVERLAPS: "expected WITHIN (1) or OVERLAPS (2) but got " + xyzSolid.getRelationship(shape) + "; shape="+shape+"; XYZSolid="+xyzSolid;
*/
DocIdSet result = tree.intersect(Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMinimumX()),
Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMaximumX()),
Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMinimumY()),
Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMaximumY()),
Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMinimumZ()),
Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMaximumZ()),
new BKD3DTreeReader.ValueFilter() {
@Override
public boolean accept(int docID) {
//System.out.println(" accept? docID=" + docID);
BytesRef bytes = treeDV.get(docID);
if (bytes == null) {
//System.out.println(" false (null)");
return false;
}
assert bytes.length == 12;
double x = Geo3DDocValuesFormat.decodeValueCenter(treeDV.planetMax, Geo3DDocValuesFormat.readInt(bytes.bytes, bytes.offset));
double y = Geo3DDocValuesFormat.decodeValueCenter(treeDV.planetMax, Geo3DDocValuesFormat.readInt(bytes.bytes, bytes.offset+4));
double z = Geo3DDocValuesFormat.decodeValueCenter(treeDV.planetMax, Geo3DDocValuesFormat.readInt(bytes.bytes, bytes.offset+8));
// System.out.println(" accept docID=" + docID + " point: x=" + x + " y=" + y + " z=" + z);
// True if x,y,z is within shape
//System.out.println(" x=" + x + " y=" + y + " z=" + z);
//System.out.println(" ret: " + shape.isWithin(x, y, z));
return shape.isWithin(x, y, z);
}
@Override
public BKD3DTreeReader.Relation compare(int cellXMinEnc, int cellXMaxEnc, int cellYMinEnc, int cellYMaxEnc, int cellZMinEnc, int cellZMaxEnc) {
assert cellXMinEnc <= cellXMaxEnc;
assert cellYMinEnc <= cellYMaxEnc;
assert cellZMinEnc <= cellZMaxEnc;
// Because the BKD tree operates in quantized (64 bit -> 32 bit) space, and the cell bounds
// here are inclusive, we need to extend the bounds to the largest un-quantized values that
// could quantize into these bounds. The encoding (Geo3DDocValuesFormat.encodeValue) does
// a Math.round from double to long, so e.g. 1.4 -> 1, and -1.4 -> -1:
double cellXMin = Geo3DDocValuesFormat.decodeValueMin(treeDV.planetMax, cellXMinEnc);
double cellXMax = Geo3DDocValuesFormat.decodeValueMax(treeDV.planetMax, cellXMaxEnc);
double cellYMin = Geo3DDocValuesFormat.decodeValueMin(treeDV.planetMax, cellYMinEnc);
double cellYMax = Geo3DDocValuesFormat.decodeValueMax(treeDV.planetMax, cellYMaxEnc);
double cellZMin = Geo3DDocValuesFormat.decodeValueMin(treeDV.planetMax, cellZMinEnc);
double cellZMax = Geo3DDocValuesFormat.decodeValueMax(treeDV.planetMax, cellZMaxEnc);
//System.out.println(" compare: x=" + cellXMin + "-" + cellXMax + " y=" + cellYMin + "-" + cellYMax + " z=" + cellZMin + "-" + cellZMax);
GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel, cellXMin, cellXMax, cellYMin, cellYMax, cellZMin, cellZMax);
switch(xyzSolid.getRelationship(shape)) {
case GeoArea.CONTAINS:
// Shape fully contains the cell
//System.out.println(" inside");
return BKD3DTreeReader.Relation.CELL_INSIDE_SHAPE;
case GeoArea.OVERLAPS:
// They do overlap but neither contains the other:
//System.out.println(" crosses1");
return BKD3DTreeReader.Relation.SHAPE_CROSSES_CELL;
case GeoArea.WITHIN:
// Cell fully contains the shape:
//System.out.println(" crosses2");
return BKD3DTreeReader.Relation.SHAPE_INSIDE_CELL;
case GeoArea.DISJOINT:
// They do not overlap at all
//System.out.println(" outside");
return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL;
default:
assert false;
return BKD3DTreeReader.Relation.SHAPE_CROSSES_CELL;
}
}
});
final DocIdSetIterator disi = result.iterator();
return new ConstantScoreScorer(this, score(), disi);
}
};
}
@Override
@SuppressWarnings({"unchecked","rawtypes"})
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
PointInGeo3DShapeQuery that = (PointInGeo3DShapeQuery) o;
return planetModel.equals(that.planetModel) && shape.equals(that.shape);
}
@Override
public final int hashCode() {
int result = super.hashCode();
result = 31 * result + planetModel.hashCode();
result = 31 * result + shape.hashCode();
return result;
}
@Override
public String toString(String field) {
final StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append(':');
if (this.field.equals(field) == false) {
sb.append(" field=");
sb.append(this.field);
sb.append(':');
}
sb.append("PlanetModel: ");
sb.append(planetModel);
sb.append(" Shape: ");
sb.append(shape);
return sb.toString();
}
}

View File

@ -1,31 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
interface Reader extends Closeable {
boolean next() throws IOException;
int x();
int y();
int z();
long ord();
int docID();
}

View File

@ -1,29 +0,0 @@
package org.apache.lucene.bkdtree3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
interface Writer extends Closeable {
void append(int x, int y, int z, long ord, int docID) throws IOException;
Reader getReader(long start) throws IOException;
void destroy() throws IOException;
}

View File

@ -1,21 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Fast "indexed point inside geo3d shape" query implementation.
*/
package org.apache.lucene.bkdtree3d;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.bkdtree3d;
package org.apache.lucene.geo3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -17,24 +17,23 @@ package org.apache.lucene.bkdtree3d;
* limitations under the License.
*/
import org.apache.lucene.geo3d.PlanetModel;
import org.apache.lucene.geo3d.GeoPoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.bkd.BKDUtil;
// TODO: allow multi-valued, packing all points into a single BytesRef
/** Add this to a document to index lat/lon point, but be sure to use {@link Geo3DDocValuesFormat} for the field.
/** Add this to a document to index lat/lon or x/y/z point, indexed as a dimensional value.
* Multiple values are allowed: just add multiple Geo3DPointField to the document with the
* same field name.
*
* @lucene.experimental */
public final class Geo3DPointField extends Field {
/** Indexing {@link FieldType}. */
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValuesType(DocValuesType.BINARY);
TYPE.setDimensions(3, RamUsageEstimator.NUM_BYTES_INT);
TYPE.freeze();
}
@ -62,9 +61,9 @@ public final class Geo3DPointField extends Field {
private void fillFieldsData(double planetMax, double x, double y, double z) {
byte[] bytes = new byte[12];
Geo3DDocValuesFormat.writeInt(Geo3DDocValuesFormat.encodeValue(planetMax, x), bytes, 0);
Geo3DDocValuesFormat.writeInt(Geo3DDocValuesFormat.encodeValue(planetMax, y), bytes, 4);
Geo3DDocValuesFormat.writeInt(Geo3DDocValuesFormat.encodeValue(planetMax, z), bytes, 8);
BKDUtil.intToBytes(Geo3DUtil.encodeValue(planetMax, x), bytes, 0);
BKDUtil.intToBytes(Geo3DUtil.encodeValue(planetMax, y), bytes, 1);
BKDUtil.intToBytes(Geo3DUtil.encodeValue(planetMax, z), bytes, 2);
fieldsData = new BytesRef(bytes);
}
}

View File

@ -0,0 +1,60 @@
package org.apache.lucene.geo3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
class Geo3DUtil {
/** Clips the incoming value to the allowed min/max range before encoding, instead of throwing an exception. */
public static int encodeValueLenient(double planetMax, double x) {
if (x > planetMax) {
x = planetMax;
} else if (x < -planetMax) {
x = -planetMax;
}
return encodeValue(planetMax, x);
}
public static int encodeValue(double planetMax, double x) {
if (x > planetMax) {
throw new IllegalArgumentException("value=" + x + " is out-of-bounds (greater than planetMax=" + planetMax + ")");
}
if (x < -planetMax) {
throw new IllegalArgumentException("value=" + x + " is out-of-bounds (less than than -planetMax=" + -planetMax + ")");
}
long y = Math.round (x * (Integer.MAX_VALUE / planetMax));
assert y >= Integer.MIN_VALUE;
assert y <= Integer.MAX_VALUE;
return (int) y;
}
/** Center decode */
public static double decodeValueCenter(double planetMax, int x) {
return x * (planetMax / Integer.MAX_VALUE);
}
/** More negative decode, at bottom of cell */
public static double decodeValueMin(double planetMax, int x) {
return (((double)x) - 0.5) * (planetMax / Integer.MAX_VALUE);
}
/** More positive decode, at top of cell */
public static double decodeValueMax(double planetMax, int x) {
return (((double)x) + 0.5) * (planetMax / Integer.MAX_VALUE);
}
}

View File

@ -0,0 +1,205 @@
package org.apache.lucene.geo3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.DimensionalValues;
import org.apache.lucene.index.DimensionalValues.IntersectVisitor;
import org.apache.lucene.index.DimensionalValues.Relation;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.bkd.BKDUtil;
/** Finds all previously indexed points that fall within the specified polygon.
*
* <p>The field must be indexed using {@link Geo3DPointField}.
*
* @lucene.experimental */
public class PointInGeo3DShapeQuery extends Query {
final String field;
final PlanetModel planetModel;
final GeoShape shape;
/** The lats/lons must be clockwise or counter-clockwise. */
public PointInGeo3DShapeQuery(PlanetModel planetModel, String field, GeoShape shape) {
this.field = field;
this.planetModel = planetModel;
this.shape = shape;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
// used in the first pass:
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
DimensionalValues values = reader.getDimensionalValues();
if (values == null) {
return null;
}
/*
XYZBounds bounds = new XYZBounds();
shape.getBounds(bounds);
final double planetMax = planetModel.getMaximumMagnitude();
if (planetMax != treeDV.planetMax) {
throw new IllegalStateException(planetModel + " is not the same one used during indexing: planetMax=" + planetMax + " vs indexing planetMax=" + treeDV.planetMax);
}
*/
/*
GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel,
bounds.getMinimumX(),
bounds.getMaximumX(),
bounds.getMinimumY(),
bounds.getMaximumY(),
bounds.getMinimumZ(),
bounds.getMaximumZ());
assert xyzSolid.getRelationship(shape) == GeoArea.WITHIN || xyzSolid.getRelationship(shape) == GeoArea.OVERLAPS: "expected WITHIN (1) or OVERLAPS (2) but got " + xyzSolid.getRelationship(shape) + "; shape="+shape+"; XYZSolid="+xyzSolid;
*/
double planetMax = planetModel.getMaximumMagnitude();
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
int[] hitCount = new int[1];
values.intersect(field,
new IntersectVisitor() {
@Override
public void visit(int docID) {
result.add(docID);
hitCount[0]++;
}
@Override
public void visit(int docID, byte[] packedValue) {
assert packedValue.length == 12;
double x = Geo3DUtil.decodeValueCenter(planetMax, BKDUtil.bytesToInt(packedValue, 0));
double y = Geo3DUtil.decodeValueCenter(planetMax, BKDUtil.bytesToInt(packedValue, 1));
double z = Geo3DUtil.decodeValueCenter(planetMax, BKDUtil.bytesToInt(packedValue, 2));
if (shape.isWithin(x, y, z)) {
result.add(docID);
hitCount[0]++;
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
// Because the dimensional format operates in quantized (64 bit -> 32 bit) space, and the cell bounds
// here are inclusive, we need to extend the bounds to the largest un-quantized values that
// could quantize into these bounds. The encoding (Geo3DUtil.encodeValue) does
// a Math.round from double to long, so e.g. 1.4 -> 1, and -1.4 -> -1:
double xMin = Geo3DUtil.decodeValueMin(planetMax, BKDUtil.bytesToInt(minPackedValue, 0));
double xMax = Geo3DUtil.decodeValueMax(planetMax, BKDUtil.bytesToInt(maxPackedValue, 0));
double yMin = Geo3DUtil.decodeValueMin(planetMax, BKDUtil.bytesToInt(minPackedValue, 1));
double yMax = Geo3DUtil.decodeValueMax(planetMax, BKDUtil.bytesToInt(maxPackedValue, 1));
double zMin = Geo3DUtil.decodeValueMin(planetMax, BKDUtil.bytesToInt(minPackedValue, 2));
double zMax = Geo3DUtil.decodeValueMax(planetMax, BKDUtil.bytesToInt(maxPackedValue, 2));
//System.out.println(" compare: x=" + cellXMin + "-" + cellXMax + " y=" + cellYMin + "-" + cellYMax + " z=" + cellZMin + "-" + cellZMax);
assert xMin <= xMax;
assert yMin <= yMax;
assert zMin <= zMax;
GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel, xMin, xMax, yMin, yMax, zMin, zMax);
switch(xyzSolid.getRelationship(shape)) {
case GeoArea.CONTAINS:
// Shape fully contains the cell
//System.out.println(" inside");
return Relation.CELL_INSIDE_QUERY;
case GeoArea.OVERLAPS:
// They do overlap but neither contains the other:
//System.out.println(" crosses1");
return Relation.CELL_CROSSES_QUERY;
case GeoArea.WITHIN:
// Cell fully contains the shape:
//System.out.println(" crosses2");
// return Relation.SHAPE_INSIDE_CELL;
return Relation.CELL_CROSSES_QUERY;
case GeoArea.DISJOINT:
// They do not overlap at all
//System.out.println(" outside");
return Relation.CELL_OUTSIDE_QUERY;
default:
assert false;
return Relation.CELL_CROSSES_QUERY;
}
}
});
// NOTE: hitCount[0] will be over-estimate in multi-valued case
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
}
};
}
@Override
@SuppressWarnings({"unchecked","rawtypes"})
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
PointInGeo3DShapeQuery that = (PointInGeo3DShapeQuery) o;
return planetModel.equals(that.planetModel) && shape.equals(that.shape);
}
@Override
public final int hashCode() {
int result = super.hashCode();
result = 31 * result + planetModel.hashCode();
result = 31 * result + shape.hashCode();
return result;
}
@Override
public String toString(String field) {
final StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append(':');
if (this.field.equals(field) == false) {
sb.append(" field=");
sb.append(this.field);
sb.append(':');
}
sb.append("PlanetModel: ");
sb.append(planetModel);
sb.append(" Shape: ");
sb.append(shape);
return sb.toString();
}
}

View File

@ -1,17 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat

View File

@ -1,4 +1,4 @@
package org.apache.lucene.bkdtree3d;
package org.apache.lucene.geo3d;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -28,21 +28,15 @@ import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene60.Lucene60Codec;
import org.apache.lucene.codecs.DimensionalFormat;
import org.apache.lucene.codecs.DimensionalReader;
import org.apache.lucene.codecs.DimensionalWriter;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.lucene60.Lucene60DimensionalReader;
import org.apache.lucene.codecs.lucene60.Lucene60DimensionalWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.geo3d.GeoArea;
import org.apache.lucene.geo3d.GeoAreaFactory;
import org.apache.lucene.geo3d.GeoBBoxFactory;
import org.apache.lucene.geo3d.GeoCircleFactory;
import org.apache.lucene.geo3d.GeoPath;
import org.apache.lucene.geo3d.GeoPoint;
import org.apache.lucene.geo3d.GeoPolygonFactory;
import org.apache.lucene.geo3d.GeoShape;
import org.apache.lucene.geo3d.PlanetModel;
import org.apache.lucene.geo3d.XYZBounds;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@ -50,16 +44,13 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
@ -69,12 +60,6 @@ import org.junit.BeforeClass;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueCenter;
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueMax;
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueMin;
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.encodeValue;
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.encodeValueLenient;
public class TestGeo3DPointField extends LuceneTestCase {
private static boolean smallBBox;
@ -87,12 +72,39 @@ public class TestGeo3DPointField extends LuceneTestCase {
}
}
private static Codec getCodec() {
if (Codec.getDefault().getName().equals("Lucene60")) {
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
double maxMBSortInHeap = 0.1 + (3*random().nextDouble());
if (VERBOSE) {
System.out.println("TEST: using Lucene60DimensionalFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
}
return new FilterCodec("Lucene60", Codec.getDefault()) {
@Override
public DimensionalFormat dimensionalFormat() {
return new DimensionalFormat() {
@Override
public DimensionalWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60DimensionalWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
}
@Override
public DimensionalReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene60DimensionalReader(readState);
}
};
}
};
} else {
return Codec.getDefault();
}
}
public void testBasic() throws Exception {
Directory dir = getDirectory();
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Geo3DDocValuesFormat(PlanetModel.WGS84, maxPointsInLeaf, maxPointsSortInHeap)));
iwc.setCodec(getCodec());
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new Geo3DPointField("field", PlanetModel.WGS84, toRadians(50.7345267), toRadians(-97.5303555)));
@ -108,126 +120,10 @@ public class TestGeo3DPointField extends LuceneTestCase {
dir.close();
}
public void testPlanetModelChanged() throws Exception {
Directory dir = getDirectory();
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Geo3DDocValuesFormat(PlanetModel.WGS84, maxPointsInLeaf, maxPointsSortInHeap)));
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new Geo3DPointField("field", PlanetModel.WGS84, toRadians(50.7345267), toRadians(-97.5303555)));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w, true);
IndexSearcher s = new IndexSearcher(r);
try {
s.search(new PointInGeo3DShapeQuery(PlanetModel.SPHERE,
"field",
GeoCircleFactory.makeGeoCircle(PlanetModel.WGS84, toRadians(50), toRadians(-97), Math.PI/180.)), 1);
fail("did not hit exc");
} catch (IllegalStateException ise) {
// expected
}
w.close();
r.close();
dir.close();
}
private static double toRadians(double degrees) {
return Math.PI*(degrees/360.0);
}
public void testBKDBasic() throws Exception {
Directory dir = getDirectory();
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
BKD3DTreeWriter w = new BKD3DTreeWriter(dir, "bkd3d");
w.add(0, 0, 0, 0);
w.add(1, 1, 1, 1);
w.add(-1, -1, -1, 2);
long indexFP = w.finish(out);
out.close();
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(indexFP);
BKD3DTreeReader r = new BKD3DTreeReader(in, 3);
DocIdSet hits = r.intersect(Integer.MIN_VALUE, Integer.MAX_VALUE,
Integer.MIN_VALUE, Integer.MAX_VALUE,
Integer.MIN_VALUE, Integer.MAX_VALUE,
new BKD3DTreeReader.ValueFilter() {
@Override
public boolean accept(int docID) {
return true;
}
@Override
public BKD3DTreeReader.Relation compare(int xMin, int xMax,
int yMin, int yMax,
int zMin, int zMax) {
return BKD3DTreeReader.Relation.SHAPE_INSIDE_CELL;
}
});
DocIdSetIterator disi = hits.iterator();
assertEquals(0, disi.nextDoc());
assertEquals(1, disi.nextDoc());
assertEquals(2, disi.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
in.close();
dir.close();
}
static class Point {
final double x;
final double y;
final double z;
public Point(double x, double y, double z) {
this.x = x;
this.y = y;
this.z = z;
}
@Override
public String toString() {
return "x=" + x + " y=" + y + " z=" + z;
}
}
private static class Range {
final double min;
final double max;
public Range(double min, double max) {
this.min = min;
this.max = max;
}
@Override
public String toString() {
return min + " TO " + max;
}
}
private double randomCoord(PlanetModel planetModel) {
return planetModel.getMaximumMagnitude() * 2*(random().nextDouble()-0.5);
}
private Range randomRange(PlanetModel planetModel) {
double x = randomCoord(planetModel);
double y = randomCoord(planetModel);
if (x < y) {
return new Range(x, y);
} else {
return new Range(y, x);
}
}
private static PlanetModel getPlanetModel() {
if (random().nextBoolean()) {
// Use one of the earth models:
@ -243,161 +139,6 @@ public class TestGeo3DPointField extends LuceneTestCase {
}
}
public void testBKDRandom() throws Exception {
List<Point> points = new ArrayList<>();
int numPoints = atLeast(10000);
Directory dir = getDirectory();
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
PlanetModel planetModel = getPlanetModel();
final double planetMax = planetModel.getMaximumMagnitude();
BKD3DTreeWriter w = new BKD3DTreeWriter(dir, "bkd3d", maxPointsInLeaf, maxPointsSortInHeap);
for(int docID=0;docID<numPoints;docID++) {
Point point;
if (docID > 0 && random().nextInt(30) == 17) {
// Dup point
point = points.get(random().nextInt(points.size()));
} else {
point = new Point(randomCoord(planetModel),
randomCoord(planetModel),
randomCoord(planetModel));
}
if (VERBOSE) {
System.err.println(" docID=" + docID + " point=" + point);
System.err.println(" x=" + encodeValue(planetMax, point.x) +
" y=" + encodeValue(planetMax, point.y) +
" z=" + encodeValue(planetMax, point.z));
}
points.add(point);
w.add(encodeValue(planetMax, point.x),
encodeValue(planetMax, point.y),
encodeValue(planetMax, point.z),
docID);
}
long indexFP = w.finish(out);
out.close();
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(indexFP);
BKD3DTreeReader r = new BKD3DTreeReader(in, numPoints);
int numIters = atLeast(100);
for(int iter=0;iter<numIters;iter++) {
// bbox
Range x = randomRange(planetModel);
Range y = randomRange(planetModel);
Range z = randomRange(planetModel);
int xMinEnc = encodeValue(planetMax, x.min);
int xMaxEnc = encodeValue(planetMax, x.max);
int yMinEnc = encodeValue(planetMax, y.min);
int yMaxEnc = encodeValue(planetMax, y.max);
int zMinEnc = encodeValue(planetMax, z.min);
int zMaxEnc = encodeValue(planetMax, z.max);
if (VERBOSE) {
System.err.println("\nTEST: iter=" + iter + " bbox: x=" + x + " (" + xMinEnc + " TO " + xMaxEnc+ ")" + " y=" + y + " (" + yMinEnc + " TO " + yMaxEnc + ")" + " z=" + z + " (" + zMinEnc + " TO " + zMaxEnc + ")" );
}
DocIdSet hits = r.intersect(xMinEnc, xMaxEnc,
yMinEnc, yMaxEnc,
zMinEnc, zMaxEnc,
new BKD3DTreeReader.ValueFilter() {
@Override
public boolean accept(int docID) {
Point point = points.get(docID);
//System.out.println(" accept docID=" + docID + " point=" + point + " (x=" + encodeValue(point.x) + " y=" + encodeValue(point.y) + " z=" + encodeValue(point.z) + ")");
// System.out.println(" accept docID=" + docID + " point: x=" + point.x + " y=" + point.y + " z=" + point.z);
int xEnc = encodeValue(planetMax, point.x);
int yEnc = encodeValue(planetMax, point.y);
int zEnc = encodeValue(planetMax, point.z);
boolean accept = xEnc >= xMinEnc && xEnc <= xMaxEnc &&
yEnc >= yMinEnc && yEnc <= yMaxEnc &&
zEnc >= zMinEnc && zEnc <= zMaxEnc;
//System.out.println(" " + accept);
return accept;
}
@Override
public BKD3DTreeReader.Relation compare(int cellXMin, int cellXMax,
int cellYMin, int cellYMax,
int cellZMin, int cellZMax) {
if (cellXMin > xMaxEnc || cellXMax < xMinEnc) {
return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL;
}
if (cellYMin > yMaxEnc || cellYMax < yMinEnc) {
return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL;
}
if (cellZMin > zMaxEnc || cellZMax < zMinEnc) {
return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL;
}
if (cellXMin >= xMinEnc && cellXMax <= xMaxEnc &&
cellYMin >= yMinEnc && cellYMax <= yMaxEnc &&
cellZMin >= zMinEnc && cellZMax <= zMaxEnc) {
return BKD3DTreeReader.Relation.CELL_INSIDE_SHAPE;
}
if (xMinEnc >= cellXMin && xMaxEnc <= cellXMax &&
yMinEnc >= cellYMin && yMaxEnc <= cellYMax &&
zMinEnc >= cellZMin && zMaxEnc <= cellZMax) {
return BKD3DTreeReader.Relation.SHAPE_INSIDE_CELL;
}
return BKD3DTreeReader.Relation.SHAPE_CROSSES_CELL;
}
});
DocIdSetIterator disi = hits.iterator();
FixedBitSet matches = new FixedBitSet(numPoints);
while (true) {
int nextHit = disi.nextDoc();
if (nextHit == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
matches.set(nextHit);
}
if (VERBOSE) {
System.err.println(" total hits: " + matches.cardinality());
}
for(int docID=0;docID<numPoints;docID++) {
Point point = points.get(docID);
boolean actual = matches.get(docID);
// We must quantize exactly as BKD tree does else we'll get false failures
int xEnc = encodeValue(planetMax, point.x);
int yEnc = encodeValue(planetMax, point.y);
int zEnc = encodeValue(planetMax, point.z);
boolean expected = xEnc >= xMinEnc && xEnc <= xMaxEnc &&
yEnc >= yMinEnc && yEnc <= yMaxEnc &&
zEnc >= zMinEnc && zEnc <= zMaxEnc;
if (expected != actual) {
System.out.println("docID=" + docID + " is wrong: expected=" + expected + " actual=" + actual);
System.out.println(" x=" + point.x + " (" + xEnc + ")" + " y=" + point.y + " (" + yEnc + ")" + " z=" + point.z + " (" + zEnc + ")");
fail("wrong match");
}
}
}
in.close();
dir.close();
}
private static class Cell {
static int nextCellID;
@ -426,9 +167,9 @@ public class TestGeo3DPointField extends LuceneTestCase {
/** Returns true if the quantized point lies within this cell, inclusive on all bounds. */
public boolean contains(double planetMax, GeoPoint point) {
int docX = encodeValue(planetMax, point.x);
int docY = encodeValue(planetMax, point.y);
int docZ = encodeValue(planetMax, point.z);
int docX = Geo3DUtil.encodeValue(planetMax, point.x);
int docY = Geo3DUtil.encodeValue(planetMax, point.y);
int docZ = Geo3DUtil.encodeValue(planetMax, point.z);
return docX >= xMinEnc && docX <= xMaxEnc &&
docY >= yMinEnc && docY <= yMaxEnc &&
@ -442,9 +183,9 @@ public class TestGeo3DPointField extends LuceneTestCase {
}
private static GeoPoint quantize(double planetMax, GeoPoint point) {
return new GeoPoint(decodeValueCenter(planetMax, encodeValue(planetMax, point.x)),
decodeValueCenter(planetMax, encodeValue(planetMax, point.y)),
decodeValueCenter(planetMax, encodeValue(planetMax, point.z)));
return new GeoPoint(Geo3DUtil.decodeValueCenter(planetMax, Geo3DUtil.encodeValue(planetMax, point.x)),
Geo3DUtil.decodeValueCenter(planetMax, Geo3DUtil.encodeValue(planetMax, point.y)),
Geo3DUtil.decodeValueCenter(planetMax, Geo3DUtil.encodeValue(planetMax, point.z)));
}
/** Tests consistency of GeoArea.getRelationship vs GeoShape.isWithin */
@ -488,12 +229,12 @@ public class TestGeo3DPointField extends LuceneTestCase {
// Start with the root cell that fully contains the shape:
Cell root = new Cell(null,
encodeValueLenient(planetMax, bounds.getMinimumX()),
encodeValueLenient(planetMax, bounds.getMaximumX()),
encodeValueLenient(planetMax, bounds.getMinimumY()),
encodeValueLenient(planetMax, bounds.getMaximumY()),
encodeValueLenient(planetMax, bounds.getMinimumZ()),
encodeValueLenient(planetMax, bounds.getMaximumZ()),
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMinimumX()),
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMaximumX()),
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMinimumY()),
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMaximumY()),
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMinimumZ()),
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMaximumZ()),
0);
if (VERBOSE) {
@ -534,14 +275,14 @@ public class TestGeo3DPointField extends LuceneTestCase {
} else {
GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel,
decodeValueMin(planetMax, cell.xMinEnc), decodeValueMax(planetMax, cell.xMaxEnc),
decodeValueMin(planetMax, cell.yMinEnc), decodeValueMax(planetMax, cell.yMaxEnc),
decodeValueMin(planetMax, cell.zMinEnc), decodeValueMax(planetMax, cell.zMaxEnc));
Geo3DUtil.decodeValueMin(planetMax, cell.xMinEnc), Geo3DUtil.decodeValueMax(planetMax, cell.xMaxEnc),
Geo3DUtil.decodeValueMin(planetMax, cell.yMinEnc), Geo3DUtil.decodeValueMax(planetMax, cell.yMaxEnc),
Geo3DUtil.decodeValueMin(planetMax, cell.zMinEnc), Geo3DUtil.decodeValueMax(planetMax, cell.zMaxEnc));
if (VERBOSE) {
log.println(" minx="+decodeValueMin(planetMax, cell.xMinEnc)+" maxx="+decodeValueMax(planetMax, cell.xMaxEnc)+
" miny="+decodeValueMin(planetMax, cell.yMinEnc)+" maxy="+decodeValueMax(planetMax, cell.yMaxEnc)+
" minz="+decodeValueMin(planetMax, cell.zMinEnc)+" maxz="+decodeValueMax(planetMax, cell.zMaxEnc));
log.println(" minx="+Geo3DUtil.decodeValueMin(planetMax, cell.xMinEnc)+" maxx="+Geo3DUtil.decodeValueMax(planetMax, cell.xMaxEnc)+
" miny="+Geo3DUtil.decodeValueMin(planetMax, cell.yMinEnc)+" maxy="+Geo3DUtil.decodeValueMax(planetMax, cell.yMaxEnc)+
" minz="+Geo3DUtil.decodeValueMin(planetMax, cell.zMinEnc)+" maxz="+Geo3DUtil.decodeValueMax(planetMax, cell.zMaxEnc));
}
switch (xyzSolid.getRelationship(shape)) {
@ -898,8 +639,6 @@ public class TestGeo3DPointField extends LuceneTestCase {
}
private static void verify(double[] lats, double[] lons) throws Exception {
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
IndexWriterConfig iwc = newIndexWriterConfig();
PlanetModel planetModel = getPlanetModel();
@ -909,18 +648,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
if (mbd != -1 && mbd < lats.length/100) {
iwc.setMaxBufferedDocs(lats.length/100);
}
final DocValuesFormat dvFormat = new Geo3DDocValuesFormat(planetModel, maxPointsInLeaf, maxPointsSortInHeap);
Codec codec = new Lucene60Codec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
if (field.equals("point")) {
return dvFormat;
} else {
return super.getDocValuesFormatForField(field);
}
}
};
iwc.setCodec(codec);
iwc.setCodec(getCodec());
Directory dir;
if (lats.length > 100000) {
dir = noVirusChecker(newFSDirectory(createTempDir("TestBKDTree")));