mirror of https://github.com/apache/lucene.git
LUCENE-6881: cutover all BKD implementations to the codec's implementation
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1713278 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
17fbe3a6cd
commit
f12cb52a3b
|
@ -49,6 +49,9 @@ New Features
|
|||
* LUCENE-6879: Allow to define custom CharTokenizer instances without
|
||||
subclassing using Java 8 lambdas or method references. (Uwe Schindler)
|
||||
|
||||
* LUCENE-6881: Cutover all BKD implementations to dimensional values
|
||||
(Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3312: The API of oal.document was restructured to
|
||||
|
|
|
@ -148,7 +148,7 @@ class SimpleTextDimensionalWriter extends DimensionalWriter {
|
|||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
});
|
||||
indexFPs.put(fieldInfo.name, writer.finish(dataOut));
|
||||
|
|
|
@ -77,7 +77,7 @@ public abstract class DimensionalWriter implements Closeable {
|
|||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
// Forces this segment's DimensionalReader to always visit all docs + values:
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
package org.apache.lucene.codecs.lucene60;
|
||||
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -97,7 +96,7 @@ public class Lucene60DimensionalWriter extends DimensionalWriter implements Clos
|
|||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.document;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.bkd.BKDUtil;
|
||||
|
||||
/** A field that is indexed dimensionally such that finding
|
||||
* all documents within an N-dimensional at search time is
|
||||
|
@ -65,6 +67,9 @@ public final class DimensionalField extends Field {
|
|||
if (point.length == 0) {
|
||||
throw new IllegalArgumentException("point cannot be 0 dimensions");
|
||||
}
|
||||
if (point.length == 1) {
|
||||
return new BytesRef(point[0]);
|
||||
}
|
||||
int bytesPerDim = -1;
|
||||
for(byte[] dim : point) {
|
||||
if (dim == null) {
|
||||
|
@ -86,19 +91,20 @@ public final class DimensionalField extends Field {
|
|||
return new BytesRef(packed);
|
||||
}
|
||||
|
||||
/** Sugar API: indexes a one-dimensional point */
|
||||
public DimensionalField(String name, byte[] dim1) {
|
||||
super(name, dim1, getType(1, dim1.length));
|
||||
}
|
||||
private static BytesRef pack(long... point) {
|
||||
if (point == null) {
|
||||
throw new IllegalArgumentException("point cannot be null");
|
||||
}
|
||||
if (point.length == 0) {
|
||||
throw new IllegalArgumentException("point cannot be 0 dimensions");
|
||||
}
|
||||
byte[] packed = new byte[point.length * RamUsageEstimator.NUM_BYTES_LONG];
|
||||
|
||||
for(int dim=0;dim<point.length;dim++) {
|
||||
BKDUtil.longToBytes(point[dim], packed, dim);
|
||||
}
|
||||
|
||||
/** Sugar API: indexes a two-dimensional point */
|
||||
public DimensionalField(String name, byte[] dim1, byte[] dim2) {
|
||||
super(name, pack(dim1, dim2), getType(2, dim1.length));
|
||||
}
|
||||
|
||||
/** Sugar API: indexes a three-dimensional point */
|
||||
public DimensionalField(String name, byte[] dim1, byte[] dim2, byte[] dim3) {
|
||||
super(name, pack(dim1, dim2, dim3), getType(3, dim1.length));
|
||||
return new BytesRef(packed);
|
||||
}
|
||||
|
||||
/** General purpose API: creates a new DimensionalField, indexing the
|
||||
|
@ -108,10 +114,21 @@ public final class DimensionalField extends Field {
|
|||
* @param point byte[][] value
|
||||
* @throws IllegalArgumentException if the field name or value is null.
|
||||
*/
|
||||
public DimensionalField(String name, byte[][] point) {
|
||||
public DimensionalField(String name, byte[]... point) {
|
||||
super(name, pack(point), getType(point));
|
||||
}
|
||||
|
||||
/** General purpose API: creates a new DimensionalField, indexing the
|
||||
* provided N-dimensional long point.
|
||||
*
|
||||
* @param name field name
|
||||
* @param point long[] value
|
||||
* @throws IllegalArgumentException if the field name or value is null.
|
||||
*/
|
||||
public DimensionalField(String name, long... point) {
|
||||
super(name, pack(point), getType(point.length, RamUsageEstimator.NUM_BYTES_LONG));
|
||||
}
|
||||
|
||||
/** Expert API */
|
||||
public DimensionalField(String name, byte[] packedPoint, FieldType type) {
|
||||
super(name, packedPoint, type);
|
||||
|
|
|
@ -1750,7 +1750,7 @@ public class CheckIndex implements Closeable {
|
|||
|
||||
// We always pretend the query shape is so complex that it crosses every cell, so
|
||||
// that packedValue is passed for every document
|
||||
return DimensionalValues.Relation.QUERY_CROSSES_CELL;
|
||||
return DimensionalValues.Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
|
||||
private void checkPackedValue(String desc, byte[] packedValue, int docID) {
|
||||
|
|
|
@ -321,6 +321,11 @@ public abstract class CodecReader extends LeafReader implements Accountable {
|
|||
if (getTermVectorsReader() != null) {
|
||||
ramBytesUsed += getTermVectorsReader().ramBytesUsed();
|
||||
}
|
||||
|
||||
// dimensional values
|
||||
if (getDimensionalReader() != null) {
|
||||
ramBytesUsed += getDimensionalReader().ramBytesUsed();
|
||||
}
|
||||
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
@ -352,6 +357,11 @@ public abstract class CodecReader extends LeafReader implements Accountable {
|
|||
if (getTermVectorsReader() != null) {
|
||||
resources.add(Accountables.namedAccountable("term vectors", getTermVectorsReader()));
|
||||
}
|
||||
|
||||
// dimensional values
|
||||
if (getDimensionalReader() != null) {
|
||||
resources.add(Accountables.namedAccountable("dimensional values", getDimensionalReader()));
|
||||
}
|
||||
|
||||
return Collections.unmodifiableList(resources);
|
||||
}
|
||||
|
|
|
@ -137,6 +137,9 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
if (dimensionalWriter == null) {
|
||||
// lazy init
|
||||
DimensionalFormat fmt = state.segmentInfo.getCodec().dimensionalFormat();
|
||||
if (fmt == null) {
|
||||
throw new IllegalStateException("field=\"" + perField.fieldInfo.name + "\" was indexed dimensionally but codec does not support dimensional formats");
|
||||
}
|
||||
dimensionalWriter = fmt.fieldsWriter(state);
|
||||
}
|
||||
|
||||
|
|
|
@ -33,9 +33,9 @@ public abstract class DimensionalValues {
|
|||
/** Return this if the cell is fully contained by the query */
|
||||
CELL_INSIDE_QUERY,
|
||||
/** Return this if the cell and query do not overlap */
|
||||
QUERY_OUTSIDE_CELL,
|
||||
CELL_OUTSIDE_QUERY,
|
||||
/** Return this if the cell partially overlapps the query */
|
||||
QUERY_CROSSES_CELL
|
||||
CELL_CROSSES_QUERY
|
||||
};
|
||||
|
||||
/** We recurse the BKD tree, using a provided instance of this to guide the recursion.
|
||||
|
@ -53,6 +53,10 @@ public abstract class DimensionalValues {
|
|||
/** Called for non-leaf cells to test how the cell relates to the query, to
|
||||
* determine how to further recurse down the treer. */
|
||||
Relation compare(byte[] minPackedValue, byte[] maxPackedValue);
|
||||
|
||||
/** Notifies the caller that this many documents (from one block) are about
|
||||
* to be visited */
|
||||
default void grow(int count) {};
|
||||
}
|
||||
|
||||
/** Finds all documents and points matching the provided visitor.
|
||||
|
|
|
@ -321,11 +321,11 @@ public class ParallelLeafReader extends LeafReader {
|
|||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||
LeafReader reader = fieldToReader.get(fieldName);
|
||||
if (reader == null) {
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index dimensional values");
|
||||
return;
|
||||
}
|
||||
DimensionalValues dimValues = reader.getDimensionalValues();
|
||||
if (dimValues == null) {
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index dimensional values");
|
||||
return;
|
||||
}
|
||||
dimValues.intersect(fieldName, visitor);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,303 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.document.DimensionalField;
|
||||
import org.apache.lucene.index.DimensionalValues;
|
||||
import org.apache.lucene.index.DimensionalValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.DimensionalValues.Relation;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.bkd.BKDUtil;
|
||||
|
||||
/** Searches for ranges in fields previously indexed using {@link DimensionalField}. In
|
||||
* a 1D field this is a simple range query; in a multi-dimensional field it's a box shape. */
|
||||
|
||||
public class DimensionalRangeQuery extends Query {
|
||||
final String field;
|
||||
final int numDims;
|
||||
final byte[][] lowerPoint;
|
||||
final boolean[] lowerInclusive;
|
||||
final byte[][] upperPoint;
|
||||
final boolean[] upperInclusive;
|
||||
// This is null only in the "fully open range" case
|
||||
final Integer bytesPerDim;
|
||||
|
||||
public DimensionalRangeQuery(String field,
|
||||
byte[][] lowerPoint, boolean[] lowerInclusive,
|
||||
byte[][] upperPoint, boolean[] upperInclusive) {
|
||||
this.field = field;
|
||||
if (lowerPoint == null) {
|
||||
throw new IllegalArgumentException("lowerPoint must not be null");
|
||||
}
|
||||
if (upperPoint == null) {
|
||||
throw new IllegalArgumentException("upperPoint must not be null");
|
||||
}
|
||||
numDims = lowerPoint.length;
|
||||
if (upperPoint.length != numDims) {
|
||||
throw new IllegalArgumentException("lowerPoint has length=" + numDims + " but upperPoint has different length=" + upperPoint.length);
|
||||
}
|
||||
this.lowerPoint = lowerPoint;
|
||||
this.lowerInclusive = lowerInclusive;
|
||||
this.upperPoint = upperPoint;
|
||||
this.upperInclusive = upperInclusive;
|
||||
|
||||
int bytesPerDim = -1;
|
||||
for(byte[] value : lowerPoint) {
|
||||
if (value != null) {
|
||||
if (bytesPerDim == -1) {
|
||||
bytesPerDim = value.length;
|
||||
} else if (value.length != bytesPerDim) {
|
||||
throw new IllegalArgumentException("all dimensions must have same bytes length, but saw " + bytesPerDim + " and " + value.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(byte[] value : upperPoint) {
|
||||
if (value != null) {
|
||||
if (bytesPerDim == -1) {
|
||||
bytesPerDim = value.length;
|
||||
} else if (value.length != bytesPerDim) {
|
||||
throw new IllegalArgumentException("all dimensions must have same bytes length, but saw " + bytesPerDim + " and " + value.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bytesPerDim == -1) {
|
||||
this.bytesPerDim = null;
|
||||
} else {
|
||||
this.bytesPerDim = bytesPerDim;
|
||||
}
|
||||
}
|
||||
|
||||
/** Sugar constructor: use in the 1D case when you indexed 1D long values using {@link DimensionalField} */
|
||||
public DimensionalRangeQuery(String field, Long lowerValue, boolean lowerInclusive, Long upperValue, boolean upperInclusive) {
|
||||
this(field, pack(lowerValue), new boolean[] {lowerInclusive}, pack(upperValue), new boolean[] {upperInclusive});
|
||||
}
|
||||
|
||||
/** Sugar constructor: use in the 1D case when you indexed binary values using {@link DimensionalField} */
|
||||
public DimensionalRangeQuery(String field, byte[] lowerValue, boolean lowerInclusive, byte[] upperValue, boolean upperInclusive) {
|
||||
this(field, new byte[][] {lowerValue}, new boolean[] {lowerInclusive}, new byte[][] {upperValue}, new boolean[] {upperInclusive});
|
||||
}
|
||||
|
||||
private static byte[][] pack(Long value) {
|
||||
if (value == null) {
|
||||
// OK: open ended range
|
||||
return new byte[1][];
|
||||
}
|
||||
byte[][] result = new byte[][] {new byte[RamUsageEstimator.NUM_BYTES_LONG]};
|
||||
BKDUtil.longToBytes(value, result[0], 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
|
||||
// We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
|
||||
// This is an inverted structure and should be used in the first pass:
|
||||
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
LeafReader reader = context.reader();
|
||||
DimensionalValues values = reader.getDimensionalValues();
|
||||
if (values == null) {
|
||||
// No docs in this segment indexed any field dimensionally
|
||||
return null;
|
||||
}
|
||||
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||
if (fieldInfo == null) {
|
||||
// No docs in this segment indexed this field at all
|
||||
return null;
|
||||
}
|
||||
if (fieldInfo.getDimensionCount() != numDims) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + fieldInfo.getDimensionCount() + " but this query has numDims=" + numDims);
|
||||
}
|
||||
if (bytesPerDim != null && bytesPerDim.intValue() != fieldInfo.getDimensionNumBytes()) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getDimensionNumBytes() + " but this query has bytesPerDim=" + bytesPerDim);
|
||||
}
|
||||
int bytesPerDim = fieldInfo.getDimensionNumBytes();
|
||||
|
||||
byte[] packedLowerIncl = new byte[numDims * bytesPerDim];
|
||||
byte[] packedUpperIncl = new byte[numDims * bytesPerDim];
|
||||
|
||||
byte[] minValue = new byte[bytesPerDim];
|
||||
byte[] maxValue = new byte[bytesPerDim];
|
||||
Arrays.fill(maxValue, (byte) 0xff);
|
||||
|
||||
byte[] one = new byte[bytesPerDim];
|
||||
one[bytesPerDim-1] = 1;
|
||||
|
||||
// Carefully pack lower and upper bounds, taking care of per-dim inclusive:
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
if (lowerPoint[dim] != null) {
|
||||
if (lowerInclusive[dim] == false) {
|
||||
if (Arrays.equals(lowerPoint[dim], maxValue)) {
|
||||
return null;
|
||||
} else {
|
||||
byte[] value = new byte[bytesPerDim];
|
||||
BKDUtil.add(bytesPerDim, 0, lowerPoint[dim], one, value);
|
||||
System.arraycopy(value, 0, packedLowerIncl, dim*bytesPerDim, bytesPerDim);
|
||||
}
|
||||
} else {
|
||||
System.arraycopy(lowerPoint[dim], 0, packedLowerIncl, dim*bytesPerDim, bytesPerDim);
|
||||
}
|
||||
} else {
|
||||
// Open-ended range: we just leave 0s in this packed dim for the lower value
|
||||
}
|
||||
|
||||
if (upperPoint[dim] != null) {
|
||||
if (upperInclusive[dim] == false) {
|
||||
if (Arrays.equals(upperPoint[dim], minValue)) {
|
||||
return null;
|
||||
} else {
|
||||
byte[] value = new byte[bytesPerDim];
|
||||
BKDUtil.subtract(bytesPerDim, 0, upperPoint[dim], one, value);
|
||||
System.arraycopy(value, 0, packedUpperIncl, dim*bytesPerDim, bytesPerDim);
|
||||
}
|
||||
} else {
|
||||
System.arraycopy(upperPoint[dim], 0, packedUpperIncl, dim*bytesPerDim, bytesPerDim);
|
||||
}
|
||||
} else {
|
||||
// Open-ended range: fill with max point for this dim:
|
||||
System.arraycopy(maxValue, 0, packedUpperIncl, dim*bytesPerDim, bytesPerDim);
|
||||
}
|
||||
}
|
||||
|
||||
// Now packedLowerIncl and packedUpperIncl are inclusive, and non-empty space:
|
||||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
|
||||
int[] hitCount = new int[1];
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
result.grow(count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLowerIncl, offset) < 0) {
|
||||
// Doc's value is too low, in this dimension
|
||||
return;
|
||||
}
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpperIncl, offset) > 0) {
|
||||
// Doc's value is too high, in this dimension
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Doc is in-bounds
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
|
||||
boolean crosses = false;
|
||||
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpperIncl, offset) > 0 ||
|
||||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLowerIncl, offset) < 0) {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
|
||||
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLowerIncl, offset) < 0 ||
|
||||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpperIncl, offset) > 0;
|
||||
}
|
||||
|
||||
if (crosses) {
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// NOTE: hitCount[0] will be over-estimate in multi-valued case
|
||||
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hash = super.hashCode();
|
||||
hash += Arrays.hashCode(lowerPoint)^0x14fa55fb;
|
||||
hash += Arrays.hashCode(upperPoint)^0x733fa5fe;
|
||||
hash += Arrays.hashCode(lowerInclusive)^0x14fa55fb;
|
||||
hash += Arrays.hashCode(upperInclusive)^0x733fa5fe;
|
||||
hash += numDims^0x14fa55fb;
|
||||
hash += Objects.hashCode(bytesPerDim);
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (super.equals(other)) {
|
||||
final DimensionalRangeQuery q = (DimensionalRangeQuery) other;
|
||||
return q.numDims == numDims &&
|
||||
q.bytesPerDim == bytesPerDim &&
|
||||
Arrays.equals(lowerPoint, q.lowerPoint) &&
|
||||
Arrays.equals(lowerInclusive, q.lowerInclusive) &&
|
||||
Arrays.equals(upperPoint, q.upperPoint) &&
|
||||
Arrays.equals(upperInclusive, q.upperInclusive);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
sb.append(getClass().getSimpleName());
|
||||
sb.append(':');
|
||||
if (this.field.equals(field) == false) {
|
||||
sb.append("field=");
|
||||
sb.append(this.field);
|
||||
sb.append(':');
|
||||
}
|
||||
|
||||
return sb.append('[')
|
||||
.append(Arrays.toString(lowerPoint))
|
||||
.append(" TO ")
|
||||
.append(Arrays.toString(upperPoint))
|
||||
.append(']')
|
||||
.toString();
|
||||
}
|
||||
}
|
|
@ -380,4 +380,18 @@ public abstract class StringHelper {
|
|||
|
||||
return new BytesRef(bytes);
|
||||
}
|
||||
|
||||
/** Compares a fixed length slice of two byte arrays interpreted as
|
||||
* unsigned values. Returns positive int if a > b, negative
|
||||
* int if a < b and 0 if a == b */
|
||||
public static int compare(int count, byte[] a, int aOffset, byte[] b, int bOffset) {
|
||||
for(int i=0;i<count;i++) {
|
||||
int cmp = (a[aOffset+i]&0xff) - (b[bOffset+i]&0xff);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -113,6 +113,7 @@ public class BKDReader implements Accountable {
|
|||
//System.out.println("R: addAll nodeID=" + nodeID);
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
//System.out.println("ADDALL");
|
||||
visitDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.visitor);
|
||||
} else {
|
||||
addAll(state, 2*nodeID);
|
||||
|
@ -126,13 +127,12 @@ public class BKDReader implements Accountable {
|
|||
|
||||
// How many points are stored in this leaf cell:
|
||||
int count = in.readVInt();
|
||||
visitor.grow(count);
|
||||
|
||||
// TODO: especially for the 1D case, this was a decent speedup, because caller could know it should budget for around XXX docs:
|
||||
//state.docs.grow(count);
|
||||
int docID = 0;
|
||||
for(int i=0;i<count;i++) {
|
||||
docID += in.readVInt();
|
||||
visitor.visit(docID);
|
||||
visitor.visit(in.readInt());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -145,16 +145,15 @@ public class BKDReader implements Accountable {
|
|||
// TODO: we could maybe pollute the IntersectVisitor API with a "grow" method if this maybe helps perf
|
||||
// enough (it did before, esp. for the 1D case):
|
||||
//state.docs.grow(count);
|
||||
int docID = 0;
|
||||
for(int i=0;i<count;i++) {
|
||||
docID += in.readVInt();
|
||||
docIDs[i] = docID;
|
||||
docIDs[i] = in.readInt();
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
protected void visitDocValues(byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
|
||||
visitor.grow(count);
|
||||
for(int i=0;i<count;i++) {
|
||||
in.readBytes(scratchPackedValue, 0, scratchPackedValue.length);
|
||||
visitor.visit(docIDs[i], scratchPackedValue);
|
||||
|
@ -175,7 +174,7 @@ public class BKDReader implements Accountable {
|
|||
|
||||
Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
|
||||
|
||||
if (r == Relation.QUERY_OUTSIDE_CELL) {
|
||||
if (r == Relation.CELL_OUTSIDE_QUERY) {
|
||||
// This cell is fully outside of the query shape: stop recursing
|
||||
return;
|
||||
} else if (r == Relation.CELL_INSIDE_QUERY) {
|
||||
|
@ -187,6 +186,7 @@ public class BKDReader implements Accountable {
|
|||
}
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
//System.out.println("FILTER");
|
||||
// Leaf node; scan and filter all points in this block:
|
||||
int count = readDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.scratchDocIDs);
|
||||
|
||||
|
|
|
@ -20,14 +20,14 @@ package org.apache.lucene.util.bkd;
|
|||
import java.math.BigInteger;
|
||||
import java.util.Arrays;
|
||||
|
||||
/** Utility methods to convert to/from N-dimensional packed byte[] as numbers */
|
||||
/** Utility methods to convert to/from N-dimensional packed byte[] as unsigned numbers */
|
||||
public final class BKDUtil {
|
||||
|
||||
private BKDUtil() {
|
||||
// No instance
|
||||
}
|
||||
|
||||
/** result = a - b, where a >= b */
|
||||
/** Result = a - b, where a >= b, else {@code IllegalArgumentException} is thrown. */
|
||||
public static void subtract(int bytesPerDim, int dim, byte[] a, byte[] b, byte[] result) {
|
||||
int start = dim * bytesPerDim;
|
||||
int end = start + bytesPerDim;
|
||||
|
@ -43,10 +43,30 @@ public final class BKDUtil {
|
|||
result[i-start] = (byte) diff;
|
||||
}
|
||||
if (borrow != 0) {
|
||||
throw new IllegalArgumentException("a < b?");
|
||||
throw new IllegalArgumentException("a < b");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Result = a + b, where a and b are unsigned. If there is an overflow, {@code IllegalArgumentException} is thrown. */
|
||||
public static void add(int bytesPerDim, int dim, byte[] a, byte[] b, byte[] result) {
|
||||
int start = dim * bytesPerDim;
|
||||
int end = start + bytesPerDim;
|
||||
int carry = 0;
|
||||
for(int i=end-1;i>=start;i--) {
|
||||
int digitSum = (a[i]&0xff) + (b[i]&0xff) + carry;
|
||||
if (digitSum > 255) {
|
||||
digitSum -= 256;
|
||||
carry = 1;
|
||||
} else {
|
||||
carry = 0;
|
||||
}
|
||||
result[i-start] = (byte) digitSum;
|
||||
}
|
||||
if (carry != 0) {
|
||||
throw new IllegalArgumentException("a + b overflows bytesPerDim=" + bytesPerDim);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns positive int if a > b, negative int if a < b and 0 if a == b */
|
||||
public static int compare(int bytesPerDim, byte[] a, int aIndex, byte[] b, int bIndex) {
|
||||
for(int i=0;i<bytesPerDim;i++) {
|
||||
|
@ -93,6 +113,36 @@ public final class BKDUtil {
|
|||
return x ^ 0x80000000;
|
||||
}
|
||||
|
||||
public static void longToBytes(long v, byte[] bytes, int dim) {
|
||||
// Flip the sign bit so negative longs sort before positive longs:
|
||||
v ^= 0x8000000000000000L;
|
||||
int offset = 8 * dim;
|
||||
bytes[offset] = (byte) (v >> 56);
|
||||
bytes[offset+1] = (byte) (v >> 48);
|
||||
bytes[offset+2] = (byte) (v >> 40);
|
||||
bytes[offset+3] = (byte) (v >> 32);
|
||||
bytes[offset+4] = (byte) (v >> 24);
|
||||
bytes[offset+5] = (byte) (v >> 16);
|
||||
bytes[offset+6] = (byte) (v >> 8);
|
||||
bytes[offset+7] = (byte) v;
|
||||
}
|
||||
|
||||
public static long bytesToLong(byte[] bytes, int index) {
|
||||
int offset = 8 * index;
|
||||
long v = ((bytes[offset] & 0xffL) << 56) |
|
||||
((bytes[offset+1] & 0xffL) << 48) |
|
||||
((bytes[offset+2] & 0xffL) << 40) |
|
||||
((bytes[offset+3] & 0xffL) << 32) |
|
||||
((bytes[offset+4] & 0xffL) << 24) |
|
||||
((bytes[offset+5] & 0xffL) << 16) |
|
||||
((bytes[offset+6] & 0xffL) << 8) |
|
||||
(bytes[offset+7] & 0xffL);
|
||||
|
||||
// Flip the sign bit back
|
||||
v ^= 0x8000000000000000L;
|
||||
return v;
|
||||
}
|
||||
|
||||
public static void sortableBigIntBytes(byte[] bytes) {
|
||||
bytes[0] ^= 0x80;
|
||||
for(int i=1;i<bytes.length;i++) {
|
||||
|
|
|
@ -240,17 +240,15 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
if (dim != -1) {
|
||||
writer.readPackedValue(i, scratch1);
|
||||
writer.readPackedValue(j, scratch2);
|
||||
int cmp = BKDUtil.compare(bytesPerDim, scratch1, dim, scratch2, dim);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
writer.readPackedValue(i, scratch1);
|
||||
writer.readPackedValue(j, scratch2);
|
||||
int cmp = BKDUtil.compare(bytesPerDim, scratch1, dim, scratch2, dim);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
// Tie-break
|
||||
int cmp = Integer.compare(writer.docIDs[i], writer.docIDs[j]);
|
||||
cmp = Integer.compare(writer.docIDs[i], writer.docIDs[j]);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
@ -422,9 +420,12 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
boolean success = false;
|
||||
try {
|
||||
//long t0 = System.nanoTime();
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
sortedPointWriters[dim] = new PathSlice(sort(dim), 0, pointCount);
|
||||
}
|
||||
//long t1 = System.nanoTime();
|
||||
//System.out.println("sort time: " + ((t1-t0)/1000000.0) + " msec");
|
||||
|
||||
if (tempInput != null) {
|
||||
tempDir.deleteFile(tempInput.getName());
|
||||
|
@ -446,6 +447,8 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
// If no exception, we should have cleaned everything up:
|
||||
assert tempDir.getCreatedFiles().isEmpty();
|
||||
//long t2 = System.nanoTime();
|
||||
//System.out.println("write time: " + ((t2-t1)/1000000.0) + " msec");
|
||||
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -485,11 +488,8 @@ public class BKDWriter implements Closeable {
|
|||
protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
|
||||
out.writeVInt(count);
|
||||
|
||||
int lastDocID = 0;
|
||||
for (int i=0;i<count;i++) {
|
||||
int docID = docIDs[start + i];
|
||||
out.writeVInt(docID - lastDocID);
|
||||
lastDocID = docID;
|
||||
out.writeInt(docIDs[start + i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -582,6 +582,7 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
//System.out.println("SPLIT: " + splitDim);
|
||||
return splitDim;
|
||||
}
|
||||
|
||||
|
@ -633,9 +634,6 @@ public class BKDWriter implements Closeable {
|
|||
// We ensured that maxPointsSortInHeap was >= maxPointsInLeafNode, so we better be in heap at this point:
|
||||
HeapPointWriter heapSource = (HeapPointWriter) source.writer;
|
||||
|
||||
// Sort by docID in the leaf so we can delta-vInt encode:
|
||||
sortHeapPointWriter(heapSource, Math.toIntExact(source.start), Math.toIntExact(source.count), -1);
|
||||
|
||||
// Save the block file pointer:
|
||||
leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
|
||||
|
||||
|
|
|
@ -77,7 +77,7 @@ public class TestDimensionalValues extends LuceneTestCase {
|
|||
new IntersectVisitor() {
|
||||
@Override
|
||||
public Relation compare(byte[] minPacked, byte[] maxPacked) {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
public void visit(int docID) {
|
||||
throw new IllegalStateException();
|
||||
|
@ -119,7 +119,7 @@ public class TestDimensionalValues extends LuceneTestCase {
|
|||
new IntersectVisitor() {
|
||||
@Override
|
||||
public Relation compare(byte[] minPacked, byte[] maxPacked) {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
public void visit(int docID) {
|
||||
throw new IllegalStateException();
|
||||
|
@ -164,7 +164,7 @@ public class TestDimensionalValues extends LuceneTestCase {
|
|||
new IntersectVisitor() {
|
||||
@Override
|
||||
public Relation compare(byte[] minPacked, byte[] maxPacked) {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
public void visit(int docID) {
|
||||
throw new IllegalStateException();
|
||||
|
@ -411,14 +411,14 @@ public class TestDimensionalValues extends LuceneTestCase {
|
|||
assert max.compareTo(min) >= 0;
|
||||
|
||||
if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
|
||||
return Relation.QUERY_OUTSIDE_CELL;
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
|
||||
crosses = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (crosses) {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
|
@ -1079,7 +1079,7 @@ public class TestDimensionalValues extends LuceneTestCase {
|
|||
if (BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMin[dim], 0) < 0 ||
|
||||
BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMax[dim], 0) > 0) {
|
||||
//System.out.println(" query_outside_cell");
|
||||
return Relation.QUERY_OUTSIDE_CELL;
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if (BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMin[dim], 0) < 0 ||
|
||||
BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMax[dim], 0) > 0) {
|
||||
crosses = true;
|
||||
|
@ -1088,7 +1088,7 @@ public class TestDimensionalValues extends LuceneTestCase {
|
|||
|
||||
if (crosses) {
|
||||
//System.out.println(" query_crosses_cell");
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else {
|
||||
//System.out.println(" cell_inside_query");
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -91,11 +91,11 @@ public class TestBKD extends LuceneTestCase {
|
|||
}
|
||||
|
||||
if (max < queryMin || min > queryMax) {
|
||||
return Relation.QUERY_OUTSIDE_CELL;
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if (min >= queryMin && max <= queryMax) {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
} else {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -198,14 +198,14 @@ public class TestBKD extends LuceneTestCase {
|
|||
assert max >= min;
|
||||
|
||||
if (max < queryMin[dim] || min > queryMax[dim]) {
|
||||
return Relation.QUERY_OUTSIDE_CELL;
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if (min < queryMin[dim] || max > queryMax[dim]) {
|
||||
crosses = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (crosses) {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
|
@ -319,14 +319,14 @@ public class TestBKD extends LuceneTestCase {
|
|||
assert max.compareTo(min) >= 0;
|
||||
|
||||
if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
|
||||
return Relation.QUERY_OUTSIDE_CELL;
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
|
||||
crosses = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (crosses) {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
|
@ -517,6 +517,87 @@ public class TestBKD extends LuceneTestCase {
|
|||
verify(docValuesArray, docIDsArray, numDims, numBytesPerDim);
|
||||
}
|
||||
|
||||
public void testBKDUtilAdd() throws Exception {
|
||||
int iters = atLeast(10000);
|
||||
int numBytes = TestUtil.nextInt(random(), 1, 100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
BigInteger v1 = new BigInteger(8*numBytes-1, random());
|
||||
BigInteger v2 = new BigInteger(8*numBytes-1, random());
|
||||
|
||||
byte[] v1Bytes = new byte[numBytes];
|
||||
byte[] v1RawBytes = v1.toByteArray();
|
||||
assert v1RawBytes.length <= numBytes;
|
||||
System.arraycopy(v1RawBytes, 0, v1Bytes, v1Bytes.length-v1RawBytes.length, v1RawBytes.length);
|
||||
|
||||
byte[] v2Bytes = new byte[numBytes];
|
||||
byte[] v2RawBytes = v2.toByteArray();
|
||||
assert v1RawBytes.length <= numBytes;
|
||||
System.arraycopy(v2RawBytes, 0, v2Bytes, v2Bytes.length-v2RawBytes.length, v2RawBytes.length);
|
||||
|
||||
byte[] result = new byte[numBytes];
|
||||
BKDUtil.add(numBytes, 0, v1Bytes, v2Bytes, result);
|
||||
|
||||
BigInteger sum = v1.add(v2);
|
||||
assertTrue("sum=" + sum + " v1=" + v1 + " v2=" + v2 + " but result=" + new BigInteger(1, result), sum.equals(new BigInteger(1, result)));
|
||||
}
|
||||
}
|
||||
|
||||
public void testIllegalBKDUtilAdd() throws Exception {
|
||||
byte[] bytes = new byte[4];
|
||||
Arrays.fill(bytes, (byte) 0xff);
|
||||
byte[] one = new byte[4];
|
||||
one[3] = 1;
|
||||
try {
|
||||
BKDUtil.add(4, 0, bytes, one, new byte[4]);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
assertEquals("a + b overflows bytesPerDim=4", iae.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testBKDUtilSubtract() throws Exception {
|
||||
int iters = atLeast(10000);
|
||||
int numBytes = TestUtil.nextInt(random(), 1, 100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
BigInteger v1 = new BigInteger(8*numBytes-1, random());
|
||||
BigInteger v2 = new BigInteger(8*numBytes-1, random());
|
||||
if (v1.compareTo(v2) < 0) {
|
||||
BigInteger tmp = v1;
|
||||
v1 = v2;
|
||||
v2 = tmp;
|
||||
}
|
||||
|
||||
byte[] v1Bytes = new byte[numBytes];
|
||||
byte[] v1RawBytes = v1.toByteArray();
|
||||
assert v1RawBytes.length <= numBytes: "length=" + v1RawBytes.length + " vs numBytes=" + numBytes;
|
||||
System.arraycopy(v1RawBytes, 0, v1Bytes, v1Bytes.length-v1RawBytes.length, v1RawBytes.length);
|
||||
|
||||
byte[] v2Bytes = new byte[numBytes];
|
||||
byte[] v2RawBytes = v2.toByteArray();
|
||||
assert v2RawBytes.length <= numBytes;
|
||||
assert v2RawBytes.length <= numBytes: "length=" + v2RawBytes.length + " vs numBytes=" + numBytes;
|
||||
System.arraycopy(v2RawBytes, 0, v2Bytes, v2Bytes.length-v2RawBytes.length, v2RawBytes.length);
|
||||
|
||||
byte[] result = new byte[numBytes];
|
||||
BKDUtil.subtract(numBytes, 0, v1Bytes, v2Bytes, result);
|
||||
|
||||
BigInteger diff = v1.subtract(v2);
|
||||
|
||||
assertTrue("diff=" + diff + " vs result=" + new BigInteger(result) + " v1=" + v1 + " v2=" + v2, diff.equals(new BigInteger(result)));
|
||||
}
|
||||
}
|
||||
|
||||
public void testIllegalBKDUtilSubtract() throws Exception {
|
||||
byte[] v1 = new byte[4];
|
||||
v1[3] = (byte) 0xf0;
|
||||
byte[] v2 = new byte[4];
|
||||
v2[3] = (byte) 0xf1;
|
||||
try {
|
||||
BKDUtil.subtract(4, 0, v1, v2, new byte[4]);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
assertEquals("a < b", iae.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/** docIDs can be null, for the single valued case, else it maps value to docID */
|
||||
private void verify(byte[][][] docValues, int[] docIDs, int numDims, int numBytesPerDim) throws Exception {
|
||||
try (Directory dir = getDirectory(docValues.length)) {
|
||||
|
@ -627,7 +708,7 @@ public class TestBKD extends LuceneTestCase {
|
|||
for(int dim=0;dim<numDims;dim++) {
|
||||
if (BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMin[dim], 0) < 0 ||
|
||||
BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMax[dim], 0) > 0) {
|
||||
return Relation.QUERY_OUTSIDE_CELL;
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if (BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMin[dim], 0) < 0 ||
|
||||
BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMax[dim], 0) > 0) {
|
||||
crosses = true;
|
||||
|
@ -635,7 +716,7 @@ public class TestBKD extends LuceneTestCase {
|
|||
}
|
||||
|
||||
if (crosses) {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
|
|
|
@ -32,8 +32,8 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
|||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.DimensionalField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
|
@ -395,7 +395,7 @@ public abstract class SorterTestBase extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
return Relation.QUERY_CROSSES_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
|
||||
/** Add this to a document to index lat/lon point, but be sure to use {@link BKDTreeDocValuesFormat} for the field. */
|
||||
public final class BKDPointField extends Field {
|
||||
|
||||
public static final FieldType TYPE = new FieldType();
|
||||
static {
|
||||
TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC);
|
||||
TYPE.freeze();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new BKDPointField field with the specified lat and lon
|
||||
* @param name field name
|
||||
* @param lat double latitude
|
||||
* @param lon double longitude
|
||||
* @throws IllegalArgumentException if the field name is null or lat or lon are out of bounds
|
||||
*/
|
||||
public BKDPointField(String name, double lat, double lon) {
|
||||
super(name, TYPE);
|
||||
if (BKDTreeWriter.validLat(lat) == false) {
|
||||
throw new IllegalArgumentException("invalid lat (" + lat + "): must be -90 to 90");
|
||||
}
|
||||
if (BKDTreeWriter.validLon(lon) == false) {
|
||||
throw new IllegalArgumentException("invalid lon (" + lon + "): must be -180 to 180");
|
||||
}
|
||||
fieldsData = Long.valueOf(((long) BKDTreeWriter.encodeLat(lat) << 32) | (BKDTreeWriter.encodeLon(lon) & 0xffffffffL));
|
||||
}
|
||||
}
|
|
@ -1,138 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
class BKDTreeDocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||
final DocValuesConsumer delegate;
|
||||
final int maxPointsInLeafNode;
|
||||
final int maxPointsSortInHeap;
|
||||
final IndexOutput out;
|
||||
final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
|
||||
final SegmentWriteState state;
|
||||
final Directory tempDir;
|
||||
final String tempFileNamePrefix;
|
||||
|
||||
public BKDTreeDocValuesConsumer(Directory tempDir, String tempFileNamePrefix, DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
BKDTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.delegate = delegate;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
this.state = state;
|
||||
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.DATA_EXTENSION);
|
||||
out = state.directory.createOutput(datFileName, state.context);
|
||||
CodecUtil.writeIndexHeader(out, BKDTreeDocValuesFormat.DATA_CODEC_NAME, BKDTreeDocValuesFormat.DATA_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.writeFooter(out);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(delegate, out);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(delegate, out);
|
||||
}
|
||||
}
|
||||
|
||||
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.META_EXTENSION);
|
||||
IndexOutput metaOut = state.directory.createOutput(metaFileName, state.context);
|
||||
success = false;
|
||||
try {
|
||||
CodecUtil.writeIndexHeader(metaOut, BKDTreeDocValuesFormat.META_CODEC_NAME, BKDTreeDocValuesFormat.META_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
metaOut.writeVInt(fieldIndexFPs.size());
|
||||
for(Map.Entry<Integer,Long> ent : fieldIndexFPs.entrySet()) {
|
||||
metaOut.writeVInt(ent.getKey());
|
||||
metaOut.writeVLong(ent.getValue());
|
||||
}
|
||||
CodecUtil.writeFooter(metaOut);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(metaOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(metaOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
|
||||
delegate.addSortedNumericField(field, docToValueCount, values);
|
||||
BKDTreeWriter writer = new BKDTreeWriter(tempDir, tempFileNamePrefix, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
Iterator<Number> valueIt = values.iterator();
|
||||
Iterator<Number> valueCountIt = docToValueCount.iterator();
|
||||
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
|
||||
assert valueCountIt.hasNext();
|
||||
int count = valueCountIt.next().intValue();
|
||||
for(int i=0;i<count;i++) {
|
||||
assert valueIt.hasNext();
|
||||
long value = valueIt.next().longValue();
|
||||
int latEnc = (int) (value >> 32);
|
||||
int lonEnc = (int) (value & 0xffffffff);
|
||||
writer.add(latEnc, lonEnc, docID);
|
||||
}
|
||||
}
|
||||
|
||||
long indexStartFP = writer.finish(out);
|
||||
|
||||
fieldIndexFPs.put(field.number, indexStartFP);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
|
@ -1,109 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
/**
|
||||
* A {@link DocValuesFormat} to efficiently index geo-spatial lat/lon points
|
||||
* from {@link BKDPointField} for fast bounding-box ({@link BKDPointInBBoxQuery})
|
||||
* and polygon ({@link BKDPointInPolygonQuery}) queries.
|
||||
*
|
||||
* <p>This wraps {@link Lucene54DocValuesFormat}, but saves its own BKD tree
|
||||
* structures to disk for fast query-time intersection. See <a
|
||||
* href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a>
|
||||
* for details.
|
||||
*
|
||||
* <p>The BKD tree slices up 2D (lat/lon) space into smaller and
|
||||
* smaller rectangles, until the smallest rectangles have approximately
|
||||
* between X/2 and X (X default is 1024) points in them, at which point
|
||||
* such leaf cells are written as a block to disk, while the index tree
|
||||
* structure records how space was sub-divided is loaded into HEAP
|
||||
* at search time. At search time, the tree is recursed based on whether
|
||||
* each of left or right child overlap with the query shape, and once
|
||||
* a leaf block is reached, all documents in that leaf block are collected
|
||||
* if the cell is fully enclosed by the query shape, or filtered and then
|
||||
* collected, if not.
|
||||
*
|
||||
* <p>The index is also quite compact, because docs only appear once in
|
||||
* the tree (no "prefix terms").
|
||||
*
|
||||
* <p>In addition to the files written by {@link Lucene54DocValuesFormat}, this format writes:
|
||||
* <ol>
|
||||
* <li><tt>.kdd</tt>: BKD leaf data and index</li>
|
||||
* <li><tt>.kdm</tt>: BKD metadata</li>
|
||||
* </ol>
|
||||
*
|
||||
* <p>The disk format is experimental and free to change suddenly, and this code likely has new and exciting bugs!
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public class BKDTreeDocValuesFormat extends DocValuesFormat {
|
||||
|
||||
static final String DATA_CODEC_NAME = "BKDData";
|
||||
static final int DATA_VERSION_START = 0;
|
||||
static final int DATA_VERSION_CURRENT = DATA_VERSION_START;
|
||||
static final String DATA_EXTENSION = "kdd";
|
||||
|
||||
static final String META_CODEC_NAME = "BKDMeta";
|
||||
static final int META_VERSION_START = 0;
|
||||
static final int META_VERSION_CURRENT = META_VERSION_START;
|
||||
static final String META_EXTENSION = "kdm";
|
||||
|
||||
private final int maxPointsInLeafNode;
|
||||
private final int maxPointsSortInHeap;
|
||||
|
||||
private final DocValuesFormat delegate = new Lucene54DocValuesFormat();
|
||||
|
||||
/** Default constructor */
|
||||
public BKDTreeDocValuesFormat() {
|
||||
this(BKDTreeWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDTreeWriter.DEFAULT_MAX_POINTS_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
/** Creates this with custom configuration.
|
||||
*
|
||||
* @param maxPointsInLeafNode Maximum number of points in each leaf cell. Smaller values create a deeper tree with larger in-heap index and possibly
|
||||
* faster searching. The default is 1024.
|
||||
* @param maxPointsSortInHeap Maximum number of points where in-heap sort can be used. When the number of points exceeds this, a (slower)
|
||||
* offline sort is used. The default is 128 * 1024.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public BKDTreeDocValuesFormat(int maxPointsInLeafNode, int maxPointsSortInHeap) {
|
||||
super("BKDTree");
|
||||
BKDTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
|
||||
return new BKDTreeDocValuesConsumer(state.directory, state.segmentInfo.name, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
return new BKDTreeDocValuesProducer(delegate.fieldsProducer(state), state);
|
||||
}
|
||||
}
|
|
@ -1,175 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
class BKDTreeDocValuesProducer extends DocValuesProducer {
|
||||
|
||||
private final Map<String,BKDTreeReader> treeReaders = new HashMap<>();
|
||||
private final Map<Integer,Long> fieldToIndexFPs = new HashMap<>();
|
||||
|
||||
private final IndexInput datIn;
|
||||
private final AtomicLong ramBytesUsed;
|
||||
private final int maxDoc;
|
||||
private final DocValuesProducer delegate;
|
||||
private final boolean merging;
|
||||
|
||||
public BKDTreeDocValuesProducer(DocValuesProducer delegate, SegmentReadState state) throws IOException {
|
||||
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.META_EXTENSION);
|
||||
ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName, state.context);
|
||||
CodecUtil.checkIndexHeader(metaIn, BKDTreeDocValuesFormat.META_CODEC_NAME, BKDTreeDocValuesFormat.META_VERSION_START, BKDTreeDocValuesFormat.META_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
int fieldCount = metaIn.readVInt();
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
int fieldNumber = metaIn.readVInt();
|
||||
long indexFP = metaIn.readVLong();
|
||||
fieldToIndexFPs.put(fieldNumber, indexFP);
|
||||
}
|
||||
CodecUtil.checkFooter(metaIn);
|
||||
metaIn.close();
|
||||
|
||||
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BKDTreeDocValuesFormat.DATA_EXTENSION);
|
||||
datIn = state.directory.openInput(datFileName, state.context);
|
||||
CodecUtil.checkIndexHeader(datIn, BKDTreeDocValuesFormat.DATA_CODEC_NAME, BKDTreeDocValuesFormat.DATA_VERSION_START, BKDTreeDocValuesFormat.DATA_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
|
||||
maxDoc = state.segmentInfo.maxDoc();
|
||||
this.delegate = delegate;
|
||||
merging = false;
|
||||
}
|
||||
|
||||
// clone for merge: we don't hang onto the BKDTrees we load
|
||||
BKDTreeDocValuesProducer(BKDTreeDocValuesProducer orig) throws IOException {
|
||||
assert Thread.holdsLock(orig);
|
||||
datIn = orig.datIn.clone();
|
||||
ramBytesUsed = new AtomicLong(orig.ramBytesUsed.get());
|
||||
delegate = orig.delegate.getMergeInstance();
|
||||
fieldToIndexFPs.putAll(orig.fieldToIndexFPs);
|
||||
treeReaders.putAll(orig.treeReaders);
|
||||
merging = true;
|
||||
maxDoc = orig.maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
BKDTreeReader treeReader = treeReaders.get(field.name);
|
||||
if (treeReader == null) {
|
||||
// Lazy load
|
||||
Long fp = fieldToIndexFPs.get(field.number);
|
||||
if (fp == null) {
|
||||
throw new IllegalArgumentException("this field was not indexed as a BKDPointField");
|
||||
}
|
||||
|
||||
// LUCENE-6697: never do real IOPs with the original IndexInput because search
|
||||
// threads can be concurrently cloning it:
|
||||
IndexInput clone = datIn.clone();
|
||||
|
||||
clone.seek(fp);
|
||||
treeReader = new BKDTreeReader(clone, maxDoc);
|
||||
|
||||
// Only hang onto the reader when we are not merging:
|
||||
if (merging == false) {
|
||||
treeReaders.put(field.name, treeReader);
|
||||
ramBytesUsed.addAndGet(treeReader.ramBytesUsed());
|
||||
}
|
||||
}
|
||||
|
||||
return new BKDTreeSortedNumericDocValues(treeReader, delegate.getSortedNumeric(field));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(datIn, delegate);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
CodecUtil.checksumEntireFile(datIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinary(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSet(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDocsWithField(FieldInfo field) throws IOException {
|
||||
return delegate.getDocsWithField(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Collection<Accountable> getChildResources() {
|
||||
List<Accountable> resources = new ArrayList<>();
|
||||
for(Map.Entry<String,BKDTreeReader> ent : treeReaders.entrySet()) {
|
||||
resources.add(Accountables.namedAccountable("field " + ent.getKey(), ent.getValue()));
|
||||
}
|
||||
resources.add(Accountables.namedAccountable("delegate", delegate));
|
||||
|
||||
return resources;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized DocValuesProducer getMergeInstance() throws IOException {
|
||||
return new BKDTreeDocValuesProducer(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return ramBytesUsed.get() + delegate.ramBytesUsed();
|
||||
}
|
||||
}
|
|
@ -1,379 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/** Handles intersection of a shape with a BKD tree previously written with {@link BKDTreeWriter}.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
final class BKDTreeReader implements Accountable {
|
||||
final private int[] splitValues;
|
||||
final private int leafNodeOffset;
|
||||
final private long[] leafBlockFPs;
|
||||
final int maxDoc;
|
||||
final IndexInput in;
|
||||
|
||||
enum Relation {CELL_INSIDE_SHAPE, SHAPE_CROSSES_CELL, SHAPE_OUTSIDE_CELL};
|
||||
|
||||
interface LatLonFilter {
|
||||
// TODO: move DVs/encoding out on top: this method should just take a docID
|
||||
boolean accept(double lat, double lon);
|
||||
// TODO: move DVs/encoding out on top: this method should take ints and do its own decode
|
||||
Relation compare(double latMin, double latMax, double lonMin, double lonMax);
|
||||
}
|
||||
|
||||
public BKDTreeReader(IndexInput in, int maxDoc) throws IOException {
|
||||
|
||||
// Read index:
|
||||
int numLeaves = in.readVInt();
|
||||
leafNodeOffset = numLeaves;
|
||||
|
||||
// Tree is fully balanced binary tree, so number of nodes = numLeaves-1, except our nodeIDs are 1-based (splitValues[0] is unused):
|
||||
splitValues = new int[numLeaves];
|
||||
for(int i=0;i<numLeaves;i++) {
|
||||
splitValues[i] = in.readInt();
|
||||
}
|
||||
leafBlockFPs = new long[numLeaves];
|
||||
for(int i=0;i<numLeaves;i++) {
|
||||
leafBlockFPs[i] = in.readVLong();
|
||||
}
|
||||
|
||||
this.maxDoc = maxDoc;
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
private static final class QueryState {
|
||||
final IndexInput in;
|
||||
byte[] scratch = new byte[16];
|
||||
final ByteArrayDataInput scratchReader = new ByteArrayDataInput(scratch);
|
||||
final DocIdSetBuilder docs;
|
||||
final int latMinEnc;
|
||||
final int latMaxEnc;
|
||||
final int lonMinEnc;
|
||||
final int lonMaxEnc;
|
||||
final LatLonFilter latLonFilter;
|
||||
final SortedNumericDocValues sndv;
|
||||
|
||||
public QueryState(IndexInput in, int maxDoc,
|
||||
int latMinEnc, int latMaxEnc,
|
||||
int lonMinEnc, int lonMaxEnc,
|
||||
LatLonFilter latLonFilter,
|
||||
SortedNumericDocValues sndv) {
|
||||
this.in = in;
|
||||
this.docs = new DocIdSetBuilder(maxDoc);
|
||||
this.latMinEnc = latMinEnc;
|
||||
this.latMaxEnc = latMaxEnc;
|
||||
this.lonMinEnc = lonMinEnc;
|
||||
this.lonMaxEnc = lonMaxEnc;
|
||||
this.latLonFilter = latLonFilter;
|
||||
this.sndv = sndv;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: move DVs/encoding out on top: this method should take ints, and encode should be done up above
|
||||
public DocIdSet intersect(double latMin, double latMax, double lonMin, double lonMax, LatLonFilter filter, SortedNumericDocValues sndv) throws IOException {
|
||||
if (BKDTreeWriter.validLat(latMin) == false) {
|
||||
throw new IllegalArgumentException("invalid latMin: " + latMin);
|
||||
}
|
||||
if (BKDTreeWriter.validLat(latMax) == false) {
|
||||
throw new IllegalArgumentException("invalid latMax: " + latMax);
|
||||
}
|
||||
if (BKDTreeWriter.validLon(lonMin) == false) {
|
||||
throw new IllegalArgumentException("invalid lonMin: " + lonMin);
|
||||
}
|
||||
if (BKDTreeWriter.validLon(lonMax) == false) {
|
||||
throw new IllegalArgumentException("invalid lonMax: " + lonMax);
|
||||
}
|
||||
|
||||
int latMinEnc = BKDTreeWriter.encodeLat(latMin);
|
||||
int latMaxEnc = BKDTreeWriter.encodeLat(latMax);
|
||||
int lonMinEnc = BKDTreeWriter.encodeLon(lonMin);
|
||||
int lonMaxEnc = BKDTreeWriter.encodeLon(lonMax);
|
||||
|
||||
QueryState state = new QueryState(in.clone(), maxDoc,
|
||||
latMinEnc, latMaxEnc,
|
||||
lonMinEnc, lonMaxEnc,
|
||||
filter,
|
||||
sndv);
|
||||
|
||||
int hitCount = intersect(state, 1,
|
||||
BKDTreeWriter.encodeLat(-90.0),
|
||||
BKDTreeWriter.encodeLat(Math.nextAfter(90.0, Double.POSITIVE_INFINITY)),
|
||||
BKDTreeWriter.encodeLon(-180.0),
|
||||
BKDTreeWriter.encodeLon(Math.nextAfter(180.0, Double.POSITIVE_INFINITY)));
|
||||
|
||||
// NOTE: hitCount is an over-estimate in the multi-valued case:
|
||||
return state.docs.build(hitCount);
|
||||
}
|
||||
|
||||
private boolean accept(QueryState state, int docID) throws IOException {
|
||||
//System.out.println(" check accept docID=" + docID);
|
||||
state.sndv.setDocument(docID);
|
||||
// How many values this doc has:
|
||||
int count = state.sndv.count();
|
||||
for(int j=0;j<count;j++) {
|
||||
long enc = state.sndv.valueAt(j);
|
||||
|
||||
int latEnc = (int) ((enc>>32) & 0xffffffffL);
|
||||
int lonEnc = (int) (enc & 0xffffffffL);
|
||||
//System.out.println(" lat=" + BKDTreeWriter.decodeLat(latEnc) + " lon=" + BKDTreeWriter.decodeLon(lonEnc));
|
||||
|
||||
if (latEnc >= state.latMinEnc &&
|
||||
latEnc < state.latMaxEnc &&
|
||||
lonEnc >= state.lonMinEnc &&
|
||||
lonEnc < state.lonMaxEnc &&
|
||||
(state.latLonFilter == null ||
|
||||
state.latLonFilter.accept(BKDTreeWriter.decodeLat(latEnc), BKDTreeWriter.decodeLon(lonEnc)))) {
|
||||
//System.out.println(" yes");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Fast path: this is called when the query rect fully encompasses all cells under this node. */
|
||||
private int addAll(QueryState state, int nodeID) throws IOException {
|
||||
//System.out.println(" addAll nodeID=" + nodeID);
|
||||
//long latRange = (long) cellLatMaxEnc - (long) cellLatMinEnc;
|
||||
//long lonRange = (long) cellLonMaxEnc - (long) cellLonMinEnc;
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
//System.out.println(" leaf");
|
||||
|
||||
/*
|
||||
System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
|
||||
*/
|
||||
|
||||
// Leaf node
|
||||
long fp = leafBlockFPs[nodeID-leafNodeOffset];
|
||||
//System.out.println(" leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + fp);
|
||||
if (fp == 0) {
|
||||
// Dead end node (adversary case):
|
||||
return 0;
|
||||
}
|
||||
state.in.seek(fp);
|
||||
|
||||
//System.out.println(" seek to leafFP=" + fp);
|
||||
// How many points are stored in this leaf cell:
|
||||
int count = state.in.readVInt();
|
||||
state.docs.grow(count);
|
||||
for(int i=0;i<count;i++) {
|
||||
int docID = state.in.readInt();
|
||||
//System.out.println(" docID=" + docID);
|
||||
assert accept(state, docID);
|
||||
state.docs.add(docID);
|
||||
}
|
||||
|
||||
return count;
|
||||
} else {
|
||||
int splitValue = splitValues[nodeID];
|
||||
|
||||
if (splitValue == Integer.MAX_VALUE) {
|
||||
// Dead end node (adversary case):
|
||||
return 0;
|
||||
}
|
||||
|
||||
//System.out.println(" splitValue=" + splitValue);
|
||||
|
||||
//System.out.println(" addAll: inner");
|
||||
int count = addAll(state, 2*nodeID);
|
||||
count += addAll(state, 2*nodeID+1);
|
||||
//System.out.println(" addAll: return count=" + count);
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
private int intersect(QueryState state,
|
||||
int nodeID,
|
||||
int cellLatMinEnc, int cellLatMaxEnc, int cellLonMinEnc, int cellLonMaxEnc)
|
||||
throws IOException {
|
||||
|
||||
//System.out.println("\nBKD: intersect nodeID=" + nodeID + " lat=" + BKDTreeWriter.decodeLat(state.latMinEnc) + " TO " + BKDTreeWriter.decodeLat(state.latMaxEnc) +
|
||||
//" lon=" + BKDTreeWriter.decodeLon(state.lonMinEnc) + " TO " + BKDTreeWriter.decodeLon(state.lonMaxEnc));
|
||||
|
||||
// 2.06 sec -> 1.52 sec for 225 OSM London queries:
|
||||
if (state.latLonFilter != null) {
|
||||
|
||||
// Don't check the filter if the current cell fully contains the query bbox (just keep recursing in that case):
|
||||
if (cellLatMinEnc > state.latMinEnc || cellLatMaxEnc < state.latMaxEnc ||
|
||||
cellLonMinEnc > state.lonMinEnc || cellLonMaxEnc < state.lonMaxEnc) {
|
||||
|
||||
Relation r = state.latLonFilter.compare(BKDTreeWriter.decodeLat(cellLatMinEnc),
|
||||
BKDTreeWriter.decodeLat(cellLatMaxEnc),
|
||||
BKDTreeWriter.decodeLon(cellLonMinEnc),
|
||||
BKDTreeWriter.decodeLon(cellLonMaxEnc));
|
||||
// System.out.println("BKD.intersect cellLat=" + BKDTreeWriter.decodeLat(cellLatMinEnc) + " TO " + BKDTreeWriter.decodeLat(cellLatMaxEnc) + ", cellLon=" + BKDTreeWriter.decodeLon(cellLonMinEnc) + " TO " + BKDTreeWriter.decodeLon(cellLonMaxEnc) + " compare=" + r);
|
||||
if (r == Relation.SHAPE_OUTSIDE_CELL) {
|
||||
// This cell is fully outside of the query shape: stop recursing
|
||||
return 0;
|
||||
} else if (r == Relation.CELL_INSIDE_SHAPE) {
|
||||
// This cell is fully inside of the query shape: recursively add all points in this cell without filtering
|
||||
return addAll(state, nodeID);
|
||||
} else {
|
||||
// The cell crosses the shape boundary, so we fall through and do full filtering
|
||||
}
|
||||
} else {
|
||||
//System.out.println(" straight recurse");
|
||||
}
|
||||
// TODO: clean this up: the bbox case should also just be a filter, and we should assert filter != null at the start
|
||||
} else if (state.latMinEnc <= cellLatMinEnc && state.latMaxEnc >= cellLatMaxEnc && state.lonMinEnc <= cellLonMinEnc && state.lonMaxEnc >= cellLonMaxEnc) {
|
||||
// Bbox query: optimize the case when the query fully contains this cell: we can
|
||||
// recursively add all points without checking if they match the query:
|
||||
return addAll(state, nodeID);
|
||||
}
|
||||
|
||||
long latRange = (long) cellLatMaxEnc - (long) cellLatMinEnc;
|
||||
long lonRange = (long) cellLonMaxEnc - (long) cellLonMinEnc;
|
||||
|
||||
int dim;
|
||||
if (latRange >= lonRange) {
|
||||
dim = 0;
|
||||
} else {
|
||||
dim = 1;
|
||||
}
|
||||
|
||||
//System.out.println("\nintersect node=" + nodeID + " vs " + leafNodeOffset);
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
|
||||
// Leaf node; scan and filter all points in this block:
|
||||
//System.out.println(" intersect leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + leafBlockFPs[nodeID-leafNodeOffset]);
|
||||
int hitCount = 0;
|
||||
|
||||
long fp = leafBlockFPs[nodeID-leafNodeOffset];
|
||||
//System.out.println(" intersect leaf fp=" + fp);
|
||||
if (fp == 0) {
|
||||
// Dead end node (adversary case):
|
||||
//System.out.println(" dead-end leaf");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
System.out.println("I: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
|
||||
*/
|
||||
|
||||
state.in.seek(fp);
|
||||
|
||||
// How many points are stored in this leaf cell:
|
||||
int count = state.in.readVInt();
|
||||
|
||||
state.docs.grow(count);
|
||||
for(int i=0;i<count;i++) {
|
||||
int docID = state.in.readInt();
|
||||
if (accept(state, docID)) {
|
||||
state.docs.add(docID);
|
||||
hitCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return hitCount;
|
||||
|
||||
} else {
|
||||
|
||||
int splitValue = splitValues[nodeID];
|
||||
|
||||
if (splitValue == Integer.MAX_VALUE) {
|
||||
// Dead end node (adversary case):
|
||||
//System.out.println(" dead-end sub-tree");
|
||||
return 0;
|
||||
}
|
||||
|
||||
//System.out.println(" splitValue=" + splitValue);
|
||||
|
||||
int count = 0;
|
||||
|
||||
if (dim == 0) {
|
||||
|
||||
//System.out.println(" split on lat=" + BKDTreeWriter.decodeLat(splitValue));
|
||||
|
||||
// Inner node split on lat:
|
||||
|
||||
// Left node:
|
||||
if (state.latMinEnc < splitValue) {
|
||||
//System.out.println(" recurse left");
|
||||
count += intersect(state,
|
||||
2*nodeID,
|
||||
cellLatMinEnc, splitValue, cellLonMinEnc, cellLonMaxEnc);
|
||||
} else {
|
||||
//System.out.println(" no recurse left");
|
||||
}
|
||||
|
||||
// Right node:
|
||||
if (state.latMaxEnc >= splitValue) {
|
||||
//System.out.println(" recurse right");
|
||||
count += intersect(state,
|
||||
2*nodeID+1,
|
||||
splitValue, cellLatMaxEnc, cellLonMinEnc, cellLonMaxEnc);
|
||||
} else {
|
||||
//System.out.println(" no recurse right");
|
||||
}
|
||||
|
||||
} else {
|
||||
// Inner node split on lon:
|
||||
assert dim == 1;
|
||||
|
||||
//System.out.println(" split on lon=" + BKDTreeWriter.decodeLon(splitValue));
|
||||
|
||||
// Left node:
|
||||
if (state.lonMinEnc < splitValue) {
|
||||
//System.out.println(" recurse left");
|
||||
count += intersect(state,
|
||||
2*nodeID,
|
||||
cellLatMinEnc, cellLatMaxEnc, cellLonMinEnc, splitValue);
|
||||
} else {
|
||||
//System.out.println(" no recurse left");
|
||||
}
|
||||
|
||||
// Right node:
|
||||
if (state.lonMaxEnc >= splitValue) {
|
||||
//System.out.println(" recurse right");
|
||||
count += intersect(state,
|
||||
2*nodeID+1,
|
||||
cellLatMinEnc, cellLatMaxEnc, splitValue, cellLonMaxEnc);
|
||||
} else {
|
||||
//System.out.println(" no recurse right");
|
||||
}
|
||||
}
|
||||
//System.out.println(" return nodeID=" + nodeID);
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return splitValues.length * RamUsageEstimator.NUM_BYTES_INT +
|
||||
leafBlockFPs.length * RamUsageEstimator.NUM_BYTES_LONG;
|
||||
}
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
|
||||
class BKDTreeSortedNumericDocValues extends SortedNumericDocValues {
|
||||
final BKDTreeReader bkdTreeReader;
|
||||
final SortedNumericDocValues delegate;
|
||||
|
||||
public BKDTreeSortedNumericDocValues(BKDTreeReader bkdTreeReader, SortedNumericDocValues delegate) {
|
||||
this.bkdTreeReader = bkdTreeReader;
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
public BKDTreeReader getBKDTreeReader() {
|
||||
return bkdTreeReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDocument(int doc) {
|
||||
delegate.setDocument(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long valueAt(int index) {
|
||||
return delegate.valueAt(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int count() {
|
||||
return delegate.count();
|
||||
}
|
||||
}
|
|
@ -1,882 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
// TODO
|
||||
// - could we just "use postings" to map leaf -> docIDs?
|
||||
// - the polygon query really should be 2-phase
|
||||
// - if we could merge trees, we could drop delegating to wrapped DV?
|
||||
// - we could also index "auto-prefix terms" here, and use better compression, and maybe only use for the "fully contained" case so we'd
|
||||
// only index docIDs
|
||||
// - the index could be efficiently encoded as an FST, so we don't have wasteful
|
||||
// (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
|
||||
// the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
|
||||
// per leaf, and you can reduce that by putting more points per leaf
|
||||
// - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf
|
||||
// - we could use threads while building; the higher nodes are very parallelizable
|
||||
// - generalize to N dimenions? i think there are reasonable use cases here, e.g.
|
||||
// 2 dimensional points to store houses, plus e.g. 3rd dimension for "household income"
|
||||
// - geo3d integration should be straightforward? better accuracy, faster performance for small-poly-with-bbox cases? right now the poly
|
||||
// check is very costly...
|
||||
|
||||
/** Recursively builds a BKD tree to assign all incoming points to smaller
|
||||
* and smaller rectangles until the number of points in a given
|
||||
* rectangle is <= the <code>maxPointsInLeafNode</code>. The tree is
|
||||
* fully balanced, which means the leaf nodes will have between 50% and 100% of
|
||||
* the requested <code>maxPointsInLeafNode</code>, except for the adversarial case
|
||||
* of indexing exactly the same point many times.
|
||||
*
|
||||
* <p>
|
||||
* See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
|
||||
*
|
||||
* <p>This consumes heap during writing: it allocates a <code>LongBitSet(numPoints)</code>,
|
||||
* and for any nodes with fewer than <code>maxPointsSortInHeap</code>, it holds
|
||||
* the points in memory as simple java arrays.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE</b>: This can write at most Integer.MAX_VALUE * <code>maxPointsInLeafNode</code> total points.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
class BKDTreeWriter {
|
||||
|
||||
// latEnc (int) + lonEnc (int) + ord (long) + docID (int)
|
||||
static final int BYTES_PER_DOC = RamUsageEstimator.NUM_BYTES_LONG + 3 * RamUsageEstimator.NUM_BYTES_INT;
|
||||
|
||||
//static final boolean DEBUG = false;
|
||||
|
||||
public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024;
|
||||
|
||||
/** This works out to max of ~10 MB peak heap tied up during writing: */
|
||||
public static final int DEFAULT_MAX_POINTS_SORT_IN_HEAP = 128*1024;;
|
||||
|
||||
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
|
||||
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
private OfflineSorter.ByteSequencesWriter offlineWriter;
|
||||
private GrowingHeapLatLonWriter heapWriter;
|
||||
|
||||
private IndexOutput tempInput;
|
||||
private final int maxPointsInLeafNode;
|
||||
private final int maxPointsSortInHeap;
|
||||
|
||||
private long pointCount;
|
||||
|
||||
public BKDTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
|
||||
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
// TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
|
||||
public BKDTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
|
||||
// We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
|
||||
heapWriter = new GrowingHeapLatLonWriter(maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
public static void verifyParams(int maxPointsInLeafNode, int maxPointsSortInHeap) {
|
||||
if (maxPointsInLeafNode <= 0) {
|
||||
throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
|
||||
}
|
||||
if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
|
||||
throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
|
||||
}
|
||||
if (maxPointsSortInHeap < maxPointsInLeafNode) {
|
||||
throw new IllegalArgumentException("maxPointsSortInHeap must be >= maxPointsInLeafNode; got " + maxPointsSortInHeap + " vs maxPointsInLeafNode="+ maxPointsInLeafNode);
|
||||
}
|
||||
if (maxPointsSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) {
|
||||
throw new IllegalArgumentException("maxPointsSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsSortInHeap);
|
||||
}
|
||||
}
|
||||
|
||||
public void add(double lat, double lon, int docID) throws IOException {
|
||||
if (validLat(lat) == false) {
|
||||
throw new IllegalArgumentException("invalid lat: " + lat);
|
||||
}
|
||||
if (validLon(lon) == false) {
|
||||
throw new IllegalArgumentException("invalid lon: " + lon);
|
||||
}
|
||||
|
||||
// Quantize to 32 bit precision, which is plenty: ~.0093 meter precision (longitude) at the equator
|
||||
add(encodeLat(lat), encodeLon(lon), docID);
|
||||
}
|
||||
|
||||
/** If the current segment has too many points then we switchover to temp files / offline sort. */
|
||||
private void switchToOffline() throws IOException {
|
||||
|
||||
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
|
||||
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
|
||||
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
for(int i=0;i<pointCount;i++) {
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
scratchBytesOutput.writeInt(heapWriter.latEncs[i]);
|
||||
scratchBytesOutput.writeInt(heapWriter.lonEncs[i]);
|
||||
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
|
||||
scratchBytesOutput.writeVLong(i);
|
||||
// TODO: can/should OfflineSorter optimize the fixed-width case?
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
}
|
||||
|
||||
heapWriter = null;
|
||||
}
|
||||
|
||||
void add(int latEnc, int lonEnc, int docID) throws IOException {
|
||||
assert latEnc > Integer.MIN_VALUE;
|
||||
assert latEnc < Integer.MAX_VALUE;
|
||||
assert lonEnc > Integer.MIN_VALUE;
|
||||
assert lonEnc < Integer.MAX_VALUE;
|
||||
|
||||
if (pointCount >= maxPointsSortInHeap) {
|
||||
if (offlineWriter == null) {
|
||||
switchToOffline();
|
||||
}
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
scratchBytesOutput.writeInt(latEnc);
|
||||
scratchBytesOutput.writeInt(lonEnc);
|
||||
scratchBytesOutput.writeVInt(docID);
|
||||
scratchBytesOutput.writeVLong(pointCount);
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
} else {
|
||||
// Not too many points added yet, continue using heap:
|
||||
heapWriter.append(latEnc, lonEnc, pointCount, docID);
|
||||
}
|
||||
|
||||
pointCount++;
|
||||
}
|
||||
|
||||
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
|
||||
* as we recurse in {@link #build}. */
|
||||
private LatLonWriter convertToFixedWidth(String in) throws IOException {
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
scratch.grow(BYTES_PER_DOC);
|
||||
BytesRef bytes = scratch.get();
|
||||
ByteArrayDataInput dataReader = new ByteArrayDataInput();
|
||||
|
||||
OfflineSorter.ByteSequencesReader reader = null;
|
||||
LatLonWriter sortedWriter = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
|
||||
sortedWriter = getWriter(pointCount);
|
||||
for (long i=0;i<pointCount;i++) {
|
||||
boolean result = reader.read(scratch);
|
||||
assert result;
|
||||
dataReader.reset(bytes.bytes, bytes.offset, bytes.length);
|
||||
int latEnc = dataReader.readInt();
|
||||
int lonEnc = dataReader.readInt();
|
||||
int docID = dataReader.readVInt();
|
||||
long ord = dataReader.readVLong();
|
||||
assert docID >= 0: "docID=" + docID;
|
||||
assert latEnc > Integer.MIN_VALUE;
|
||||
assert latEnc < Integer.MAX_VALUE;
|
||||
assert lonEnc > Integer.MIN_VALUE;
|
||||
assert lonEnc < Integer.MAX_VALUE;
|
||||
sortedWriter.append(latEnc, lonEnc, ord, docID);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(sortedWriter, reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(sortedWriter, reader);
|
||||
try {
|
||||
sortedWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sortedWriter;
|
||||
}
|
||||
|
||||
private LatLonWriter sort(boolean lon) throws IOException {
|
||||
if (heapWriter != null) {
|
||||
// All buffered points are still in heap
|
||||
|
||||
assert pointCount < Integer.MAX_VALUE;
|
||||
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int docID = heapWriter.docIDs[i];
|
||||
heapWriter.docIDs[i] = heapWriter.docIDs[j];
|
||||
heapWriter.docIDs[j] = docID;
|
||||
|
||||
long ord = heapWriter.ords[i];
|
||||
heapWriter.ords[i] = heapWriter.ords[j];
|
||||
heapWriter.ords[j] = ord;
|
||||
|
||||
int latEnc = heapWriter.latEncs[i];
|
||||
heapWriter.latEncs[i] = heapWriter.latEncs[j];
|
||||
heapWriter.latEncs[j] = latEnc;
|
||||
|
||||
int lonEnc = heapWriter.lonEncs[i];
|
||||
heapWriter.lonEncs[i] = heapWriter.lonEncs[j];
|
||||
heapWriter.lonEncs[j] = lonEnc;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
int cmp;
|
||||
if (lon) {
|
||||
cmp = Integer.compare(heapWriter.lonEncs[i], heapWriter.lonEncs[j]);
|
||||
} else {
|
||||
cmp = Integer.compare(heapWriter.latEncs[i], heapWriter.latEncs[j]);
|
||||
}
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
// Tie-break
|
||||
cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
return Long.compare(heapWriter.ords[i], heapWriter.ords[j]);
|
||||
}
|
||||
}.sort(0, (int) pointCount);
|
||||
|
||||
HeapLatLonWriter sorted = new HeapLatLonWriter((int) pointCount);
|
||||
for(int i=0;i<pointCount;i++) {
|
||||
sorted.append(heapWriter.latEncs[i],
|
||||
heapWriter.lonEncs[i],
|
||||
heapWriter.ords[i],
|
||||
heapWriter.docIDs[i]);
|
||||
}
|
||||
sorted.close();
|
||||
|
||||
return sorted;
|
||||
} else {
|
||||
|
||||
// Offline sort:
|
||||
assert tempInput != null;
|
||||
|
||||
final ByteArrayDataInput reader = new ByteArrayDataInput();
|
||||
Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
|
||||
private final ByteArrayDataInput readerB = new ByteArrayDataInput();
|
||||
|
||||
@Override
|
||||
public int compare(BytesRef a, BytesRef b) {
|
||||
reader.reset(a.bytes, a.offset, a.length);
|
||||
final int latAEnc = reader.readInt();
|
||||
final int lonAEnc = reader.readInt();
|
||||
final int docIDA = reader.readVInt();
|
||||
final long ordA = reader.readVLong();
|
||||
|
||||
reader.reset(b.bytes, b.offset, b.length);
|
||||
final int latBEnc = reader.readInt();
|
||||
final int lonBEnc = reader.readInt();
|
||||
final int docIDB = reader.readVInt();
|
||||
final long ordB = reader.readVLong();
|
||||
|
||||
int cmp;
|
||||
if (lon) {
|
||||
cmp = Integer.compare(lonAEnc, lonBEnc);
|
||||
} else {
|
||||
cmp = Integer.compare(latAEnc, latBEnc);
|
||||
}
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
// Tie-break
|
||||
cmp = Integer.compare(docIDA, docIDB);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
return Long.compare(ordA, ordB);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
boolean success = false;
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
|
||||
String sortedFileName = sorter.sort(tempInput.getName());
|
||||
try {
|
||||
LatLonWriter writer = convertToFixedWidth(sortedFileName);
|
||||
success = true;
|
||||
return writer;
|
||||
} finally {
|
||||
if (success) {
|
||||
tempDir.deleteFile(sortedFileName);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
|
||||
public long finish(IndexOutput out) throws IOException {
|
||||
//System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter);
|
||||
|
||||
if (offlineWriter != null) {
|
||||
// This also closes the temp file output:
|
||||
offlineWriter.close();
|
||||
}
|
||||
|
||||
LongBitSet bitSet = new LongBitSet(pointCount);
|
||||
|
||||
long countPerLeaf = pointCount;
|
||||
long innerNodeCount = 1;
|
||||
|
||||
while (countPerLeaf > maxPointsInLeafNode) {
|
||||
countPerLeaf = (countPerLeaf+1)/2;
|
||||
innerNodeCount *= 2;
|
||||
}
|
||||
|
||||
//System.out.println("innerNodeCount=" + innerNodeCount);
|
||||
|
||||
if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
|
||||
throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
|
||||
}
|
||||
|
||||
innerNodeCount--;
|
||||
|
||||
int numLeaves = (int) (innerNodeCount+1);
|
||||
|
||||
// Indexed by nodeID, but first (root) nodeID is 1
|
||||
int[] splitValues = new int[numLeaves];
|
||||
|
||||
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
|
||||
long[] leafBlockFPs = new long[numLeaves];
|
||||
|
||||
// Make sure the math above "worked":
|
||||
assert pointCount / splitValues.length <= maxPointsInLeafNode: "pointCount=" + pointCount + " splitValues.length=" + splitValues.length + " maxPointsInLeafNode=" + maxPointsInLeafNode;
|
||||
//System.out.println(" avg pointsPerLeaf=" + (pointCount/splitValues.length));
|
||||
|
||||
// Sort all docs once by lat, once by lon:
|
||||
LatLonWriter latSortedWriter = null;
|
||||
LatLonWriter lonSortedWriter = null;
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
lonSortedWriter = sort(true);
|
||||
latSortedWriter = sort(false);
|
||||
heapWriter = null;
|
||||
|
||||
build(1, numLeaves, new PathSlice(latSortedWriter, 0, pointCount),
|
||||
new PathSlice(lonSortedWriter, 0, pointCount),
|
||||
bitSet, out,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
//encodeLat(-90.0), encodeLat(Math.nextAfter(90.0, Double.POSITIVE_INFINITY)),
|
||||
//encodeLon(-180.0), encodeLon(Math.nextAfter(180.0, Double.POSITIVE_INFINITY)),
|
||||
splitValues,
|
||||
leafBlockFPs);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
latSortedWriter.destroy();
|
||||
lonSortedWriter.destroy();
|
||||
if (tempInput != null) {
|
||||
tempDir.deleteFile(tempInput.getName());
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
latSortedWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
try {
|
||||
lonSortedWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
if (tempInput != null) {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println("Total nodes: " + innerNodeCount);
|
||||
|
||||
// Write index:
|
||||
long indexFP = out.getFilePointer();
|
||||
out.writeVInt(numLeaves);
|
||||
|
||||
// NOTE: splitValues[0] is unused, because nodeID is 1-based:
|
||||
for (int i=0;i<splitValues.length;i++) {
|
||||
out.writeInt(splitValues[i]);
|
||||
}
|
||||
for (int i=0;i<leafBlockFPs.length;i++) {
|
||||
out.writeVLong(leafBlockFPs[i]);
|
||||
}
|
||||
|
||||
return indexFP;
|
||||
}
|
||||
|
||||
/** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
|
||||
private static final class PathSlice {
|
||||
final LatLonWriter writer;
|
||||
final long start;
|
||||
final long count;
|
||||
|
||||
public PathSlice(LatLonWriter writer, long start, long count) {
|
||||
this.writer = writer;
|
||||
this.start = start;
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
|
||||
}
|
||||
}
|
||||
|
||||
/** Marks bits for the ords (points) that belong in the left sub tree. */
|
||||
private long markLeftTree(int splitDim, PathSlice source, LongBitSet bitSet, int[] splitValueRet,
|
||||
int minLatEnc, int maxLatEnc, int minLonEnc, int maxLonEnc) throws IOException {
|
||||
|
||||
// This is the initital size of our left tree, but we may lower it below for == case:
|
||||
long leftCount = source.count / 2;
|
||||
|
||||
// Read the split value:
|
||||
//if (DEBUG) System.out.println(" leftCount=" + leftCount + " vs " + source.count);
|
||||
LatLonReader reader = source.writer.getReader(source.start + leftCount);
|
||||
boolean success = false;
|
||||
int splitValue;
|
||||
try {
|
||||
boolean result = reader.next();
|
||||
assert result;
|
||||
|
||||
int latSplitEnc = reader.latEnc();
|
||||
assert latSplitEnc >= minLatEnc && latSplitEnc < maxLatEnc: "latSplitEnc=" + latSplitEnc + " minLatEnc=" + minLatEnc + " maxLatEnc=" + maxLatEnc;
|
||||
|
||||
int lonSplitEnc = reader.lonEnc();
|
||||
assert lonSplitEnc >= minLonEnc && lonSplitEnc < maxLonEnc: "lonSplitEnc=" + lonSplitEnc + " minLonEnc=" + minLonEnc + " maxLonEnc=" + maxLonEnc;
|
||||
|
||||
if (splitDim == 0) {
|
||||
splitValue = latSplitEnc;
|
||||
//if (DEBUG) System.out.println(" splitValue=" + decodeLat(splitValue));
|
||||
} else {
|
||||
splitValue = lonSplitEnc;
|
||||
//if (DEBUG) System.out.println(" splitValue=" + decodeLon(splitValue));
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader);
|
||||
}
|
||||
}
|
||||
|
||||
splitValueRet[0] = splitValue;
|
||||
|
||||
// Mark ords that fall into the left half, and also handle the == boundary case:
|
||||
assert bitSet.cardinality() == 0: "cardinality=" + bitSet.cardinality();
|
||||
|
||||
success = false;
|
||||
reader = source.writer.getReader(source.start);
|
||||
try {
|
||||
int lastValue = Integer.MIN_VALUE;
|
||||
for (int i=0;i<leftCount;i++) {
|
||||
boolean result = reader.next();
|
||||
assert result;
|
||||
int latEnc = reader.latEnc();
|
||||
int lonEnc = reader.lonEnc();
|
||||
|
||||
int value;
|
||||
if (splitDim == 0) {
|
||||
value = latEnc;
|
||||
} else {
|
||||
value = lonEnc;
|
||||
}
|
||||
|
||||
// Our input source is supposed to be sorted on the incoming dimension:
|
||||
assert value >= lastValue;
|
||||
lastValue = value;
|
||||
|
||||
if (value == splitValue) {
|
||||
// TODO: we could simplify this, by allowing splitValue to be on either side?
|
||||
// If we have identical points at the split, we move the count back to before the identical points:
|
||||
leftCount = i;
|
||||
break;
|
||||
}
|
||||
assert value < splitValue: "i=" + i + " value=" + value + " vs splitValue=" + splitValue;
|
||||
long ord = reader.ord();
|
||||
int docID = reader.docID();
|
||||
assert docID >= 0: "docID=" + docID + " reader=" + reader;
|
||||
|
||||
// We should never see dup ords:
|
||||
assert bitSet.get(ord) == false;
|
||||
bitSet.set(ord);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader);
|
||||
}
|
||||
}
|
||||
|
||||
assert leftCount == bitSet.cardinality(): "leftCount=" + leftCount + " cardinality=" + bitSet.cardinality();
|
||||
|
||||
return leftCount;
|
||||
}
|
||||
|
||||
/** The incoming PathSlice for the dim we will split is already partitioned/sorted. */
|
||||
private void build(int nodeID, int leafNodeOffset,
|
||||
PathSlice lastLatSorted,
|
||||
PathSlice lastLonSorted,
|
||||
LongBitSet bitSet,
|
||||
IndexOutput out,
|
||||
int minLatEnc, int maxLatEnc, int minLonEnc, int maxLonEnc,
|
||||
int[] splitValues,
|
||||
long[] leafBlockFPs) throws IOException {
|
||||
|
||||
PathSlice source;
|
||||
PathSlice nextSource;
|
||||
|
||||
long latRange = (long) maxLatEnc - (long) minLatEnc;
|
||||
long lonRange = (long) maxLonEnc - (long) minLonEnc;
|
||||
|
||||
assert lastLatSorted.count == lastLonSorted.count;
|
||||
|
||||
// Compute which dim we should split on at this level:
|
||||
int splitDim;
|
||||
if (latRange >= lonRange) {
|
||||
// Split by lat:
|
||||
splitDim = 0;
|
||||
source = lastLatSorted;
|
||||
nextSource = lastLonSorted;
|
||||
} else {
|
||||
// Split by lon:
|
||||
splitDim = 1;
|
||||
source = lastLonSorted;
|
||||
nextSource = lastLatSorted;
|
||||
}
|
||||
|
||||
long count = source.count;
|
||||
|
||||
//if (DEBUG) System.out.println("\nBUILD: nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset + " splitDim=" + splitDim + "\n lastLatSorted=" + lastLatSorted + "\n lastLonSorted=" + lastLonSorted + "\n count=" + count + " lat=" + decodeLat(minLatEnc) + " TO " + decodeLat(maxLatEnc) + " lon=" + decodeLon(minLonEnc) + " TO " + decodeLon(maxLonEnc));
|
||||
|
||||
if (count == 0) {
|
||||
// Dead end in the tree, due to adversary cases, e.g. many identical points:
|
||||
if (nodeID < splitValues.length) {
|
||||
// Sentinel used to mark that the tree is dead under here:
|
||||
splitValues[nodeID] = Integer.MAX_VALUE;
|
||||
}
|
||||
//if (DEBUG) System.out.println(" dead-end sub-tree");
|
||||
return;
|
||||
}
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
// Leaf node: write block
|
||||
//if (DEBUG) System.out.println(" leaf");
|
||||
assert maxLatEnc > minLatEnc;
|
||||
assert maxLonEnc > minLonEnc;
|
||||
|
||||
//System.out.println("\nleaf:\n lat range: " + ((long) maxLatEnc-minLatEnc));
|
||||
//System.out.println(" lon range: " + ((long) maxLonEnc-minLonEnc));
|
||||
|
||||
// Sort by docID in the leaf so we get sequentiality at search time (may not matter?):
|
||||
LatLonReader reader = source.writer.getReader(source.start);
|
||||
|
||||
// TODO: we can reuse this
|
||||
int[] docIDs = new int[(int) count];
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
for (int i=0;i<source.count;i++) {
|
||||
|
||||
// NOTE: we discard ord at this point; we only needed it temporarily
|
||||
// during building to uniquely identify each point to properly handle
|
||||
// the multi-valued case (one docID having multiple values):
|
||||
|
||||
// We also discard lat/lon, since at search time, we reside on the
|
||||
// wrapped doc values for this:
|
||||
|
||||
boolean result = reader.next();
|
||||
assert result;
|
||||
docIDs[i] = reader.docID();
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader);
|
||||
}
|
||||
}
|
||||
|
||||
Arrays.sort(docIDs);
|
||||
|
||||
// Dedup docIDs: for the multi-valued case where more than one value for the doc
|
||||
// wound up in this leaf cell, we only need to store the docID once:
|
||||
int lastDocID = -1;
|
||||
int uniqueCount = 0;
|
||||
for(int i=0;i<docIDs.length;i++) {
|
||||
int docID = docIDs[i];
|
||||
if (docID != lastDocID) {
|
||||
uniqueCount++;
|
||||
lastDocID = docID;
|
||||
}
|
||||
}
|
||||
assert uniqueCount <= count;
|
||||
|
||||
long startFP = out.getFilePointer();
|
||||
out.writeVInt(uniqueCount);
|
||||
|
||||
// Save the block file pointer:
|
||||
leafBlockFPs[nodeID - leafNodeOffset] = startFP;
|
||||
//System.out.println(" leafFP=" + startFP);
|
||||
|
||||
lastDocID = -1;
|
||||
for (int i=0;i<docIDs.length;i++) {
|
||||
// Absolute int encode; with "vInt of deltas" encoding, the .kdd size dropped from
|
||||
// 697 MB -> 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec.
|
||||
// I think if we also indexed prefix terms here we could do less costly compression
|
||||
// on those lists:
|
||||
int docID = docIDs[i];
|
||||
if (docID != lastDocID) {
|
||||
//System.out.println(" docID=" + docID);
|
||||
out.writeInt(docID);
|
||||
lastDocID = docID;
|
||||
}
|
||||
}
|
||||
//long endFP = out.getFilePointer();
|
||||
//System.out.println(" bytes/doc: " + ((endFP - startFP) / count));
|
||||
} else {
|
||||
// Inner node: partition/recurse
|
||||
|
||||
assert nodeID < splitValues.length: "nodeID=" + nodeID + " splitValues.length=" + splitValues.length;
|
||||
|
||||
int[] splitValueArray = new int[1];
|
||||
|
||||
long leftCount = markLeftTree(splitDim, source, bitSet, splitValueArray,
|
||||
minLatEnc, maxLatEnc, minLonEnc, maxLonEnc);
|
||||
int splitValue = splitValueArray[0];
|
||||
|
||||
// TODO: we could save split value in here so we don't have to re-open file later:
|
||||
|
||||
// Partition nextSource into sorted left and right sets, so we can recurse. This is somewhat hairy: we partition the next lon set
|
||||
// according to how we had just partitioned the lat set, and vice/versa:
|
||||
|
||||
LatLonWriter leftWriter = null;
|
||||
LatLonWriter rightWriter = null;
|
||||
LatLonReader reader = null;
|
||||
|
||||
boolean success = false;
|
||||
|
||||
int nextLeftCount = 0;
|
||||
|
||||
try {
|
||||
leftWriter = getWriter(leftCount);
|
||||
rightWriter = getWriter(count - leftCount);
|
||||
|
||||
//if (DEBUG) System.out.println(" partition:\n splitValueEnc=" + splitValue + "\n " + nextSource + "\n --> leftSorted=" + leftWriter + "\n --> rightSorted=" + rightWriter + ")");
|
||||
reader = nextSource.writer.getReader(nextSource.start);
|
||||
|
||||
// TODO: we could compute the split value here for each sub-tree and save an O(N) pass on recursion, but makes code hairier and only
|
||||
// changes the constant factor of building, not the big-oh:
|
||||
for (int i=0;i<count;i++) {
|
||||
boolean result = reader.next();
|
||||
assert result;
|
||||
int latEnc = reader.latEnc();
|
||||
int lonEnc = reader.lonEnc();
|
||||
long ord = reader.ord();
|
||||
int docID = reader.docID();
|
||||
assert docID >= 0: "docID=" + docID + " reader=" + reader;
|
||||
if (bitSet.get(ord)) {
|
||||
if (splitDim == 0) {
|
||||
assert latEnc < splitValue: "latEnc=" + latEnc + " splitValue=" + splitValue;
|
||||
} else {
|
||||
assert lonEnc < splitValue: "lonEnc=" + lonEnc + " splitValue=" + splitValue;
|
||||
}
|
||||
leftWriter.append(latEnc, lonEnc, ord, docID);
|
||||
nextLeftCount++;
|
||||
} else {
|
||||
if (splitDim == 0) {
|
||||
assert latEnc >= splitValue: "latEnc=" + latEnc + " splitValue=" + splitValue;
|
||||
} else {
|
||||
assert lonEnc >= splitValue: "lonEnc=" + lonEnc + " splitValue=" + splitValue;
|
||||
}
|
||||
rightWriter.append(latEnc, lonEnc, ord, docID);
|
||||
}
|
||||
}
|
||||
bitSet.clear(0, pointCount);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader, leftWriter, rightWriter);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader, leftWriter, rightWriter);
|
||||
}
|
||||
}
|
||||
|
||||
assert leftCount == nextLeftCount: "leftCount=" + leftCount + " nextLeftCount=" + nextLeftCount;
|
||||
|
||||
success = false;
|
||||
try {
|
||||
if (splitDim == 0) {
|
||||
//if (DEBUG) System.out.println(" recurse left");
|
||||
build(2*nodeID, leafNodeOffset,
|
||||
new PathSlice(source.writer, source.start, leftCount),
|
||||
new PathSlice(leftWriter, 0, leftCount),
|
||||
bitSet,
|
||||
out,
|
||||
minLatEnc, splitValue, minLonEnc, maxLonEnc,
|
||||
splitValues, leafBlockFPs);
|
||||
leftWriter.destroy();
|
||||
|
||||
//if (DEBUG) System.out.println(" recurse right");
|
||||
build(2*nodeID+1, leafNodeOffset,
|
||||
new PathSlice(source.writer, source.start+leftCount, count-leftCount),
|
||||
new PathSlice(rightWriter, 0, count - leftCount),
|
||||
bitSet,
|
||||
out,
|
||||
splitValue, maxLatEnc, minLonEnc, maxLonEnc,
|
||||
splitValues, leafBlockFPs);
|
||||
rightWriter.destroy();
|
||||
} else {
|
||||
//if (DEBUG) System.out.println(" recurse left");
|
||||
build(2*nodeID, leafNodeOffset,
|
||||
new PathSlice(leftWriter, 0, leftCount),
|
||||
new PathSlice(source.writer, source.start, leftCount),
|
||||
bitSet,
|
||||
out,
|
||||
minLatEnc, maxLatEnc, minLonEnc, splitValue,
|
||||
splitValues, leafBlockFPs);
|
||||
|
||||
leftWriter.destroy();
|
||||
|
||||
//if (DEBUG) System.out.println(" recurse right");
|
||||
build(2*nodeID+1, leafNodeOffset,
|
||||
new PathSlice(rightWriter, 0, count-leftCount),
|
||||
new PathSlice(source.writer, source.start+leftCount, count-leftCount),
|
||||
bitSet,
|
||||
out,
|
||||
minLatEnc, maxLatEnc, splitValue, maxLonEnc,
|
||||
splitValues, leafBlockFPs);
|
||||
rightWriter.destroy();
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success == false) {
|
||||
try {
|
||||
leftWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
try {
|
||||
rightWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
splitValues[nodeID] = splitValue;
|
||||
}
|
||||
}
|
||||
|
||||
LatLonWriter getWriter(long count) throws IOException {
|
||||
if (count < maxPointsSortInHeap) {
|
||||
return new HeapLatLonWriter((int) count);
|
||||
} else {
|
||||
return new OfflineLatLonWriter(tempDir, tempFileNamePrefix, count);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: move/share all this into GeoUtils
|
||||
|
||||
// We allow one iota over the true max:
|
||||
static final double MAX_LAT_INCL = Math.nextAfter(90.0D, Double.POSITIVE_INFINITY);
|
||||
static final double MAX_LON_INCL = Math.nextAfter(180.0D, Double.POSITIVE_INFINITY);
|
||||
static final double MIN_LAT_INCL = -90.0D;
|
||||
static final double MIN_LON_INCL = -180.0D;
|
||||
|
||||
static boolean validLat(double lat) {
|
||||
return Double.isNaN(lat) == false && lat >= MIN_LAT_INCL && lat <= MAX_LAT_INCL;
|
||||
}
|
||||
|
||||
static boolean validLon(double lon) {
|
||||
return Double.isNaN(lon) == false && lon >= MIN_LON_INCL && lon <= MAX_LON_INCL;
|
||||
}
|
||||
|
||||
private static final int BITS = 32;
|
||||
|
||||
// -3 so valid lat/lon never hit the Integer.MIN_VALUE nor Integer.MAX_VALUE:
|
||||
private static final double LON_SCALE = ((0x1L<<BITS)-3)/360.0D;
|
||||
private static final double LAT_SCALE = ((0x1L<<BITS)-3)/180.0D;
|
||||
|
||||
/** Max quantization error for both lat and lon when encoding/decoding into 32 bits */
|
||||
public static final double TOLERANCE = 1E-7;
|
||||
|
||||
/** Quantizes double (64 bit) latitude into 32 bits */
|
||||
static int encodeLat(double lat) {
|
||||
assert validLat(lat): "lat=" + lat;
|
||||
long x = (long) (lat * LAT_SCALE);
|
||||
// We use Integer.MAX_VALUE as a sentinel:
|
||||
assert x < Integer.MAX_VALUE: "lat=" + lat + " mapped to Integer.MAX_VALUE + " + (x - Integer.MAX_VALUE);
|
||||
assert x > Integer.MIN_VALUE: "lat=" + lat + " mapped to Integer.MIN_VALUE";
|
||||
return (int) x;
|
||||
}
|
||||
|
||||
/** Quantizes double (64 bit) longitude into 32 bits */
|
||||
static int encodeLon(double lon) {
|
||||
assert validLon(lon): "lon=" + lon;
|
||||
long x = (long) (lon * LON_SCALE);
|
||||
// We use Integer.MAX_VALUE as a sentinel:
|
||||
assert x < Integer.MAX_VALUE;
|
||||
assert x > Integer.MIN_VALUE;
|
||||
return (int) x;
|
||||
}
|
||||
|
||||
/** Turns quantized value from {@link #encodeLat} back into a double. */
|
||||
static double decodeLat(int x) {
|
||||
return x / LAT_SCALE;
|
||||
}
|
||||
|
||||
/** Turns quantized value from {@link #encodeLon} back into a double. */
|
||||
static double decodeLon(int x) {
|
||||
return x / LON_SCALE;
|
||||
}
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
final class GrowingHeapLatLonWriter implements LatLonWriter {
|
||||
int[] latEncs;
|
||||
int[] lonEncs;
|
||||
int[] docIDs;
|
||||
long[] ords;
|
||||
private int nextWrite;
|
||||
final int maxSize;
|
||||
|
||||
public GrowingHeapLatLonWriter(int maxSize) {
|
||||
latEncs = new int[16];
|
||||
lonEncs = new int[16];
|
||||
docIDs = new int[16];
|
||||
ords = new long[16];
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
||||
private int[] growExact(int[] arr, int size) {
|
||||
assert size > arr.length;
|
||||
int[] newArr = new int[size];
|
||||
System.arraycopy(arr, 0, newArr, 0, arr.length);
|
||||
return newArr;
|
||||
}
|
||||
|
||||
private long[] growExact(long[] arr, int size) {
|
||||
assert size > arr.length;
|
||||
long[] newArr = new long[size];
|
||||
System.arraycopy(arr, 0, newArr, 0, arr.length);
|
||||
return newArr;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(int latEnc, int lonEnc, long ord, int docID) {
|
||||
assert ord == nextWrite;
|
||||
if (latEncs.length == nextWrite) {
|
||||
int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT));
|
||||
assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
|
||||
latEncs = growExact(latEncs, nextSize);
|
||||
lonEncs = growExact(lonEncs, nextSize);
|
||||
ords = growExact(ords, nextSize);
|
||||
docIDs = growExact(docIDs, nextSize);
|
||||
}
|
||||
latEncs[nextWrite] = latEnc;
|
||||
lonEncs[nextWrite] = lonEnc;
|
||||
ords[nextWrite] = ord;
|
||||
docIDs[nextWrite] = docID;
|
||||
nextWrite++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LatLonReader getReader(long start) {
|
||||
return new HeapLatLonReader(latEncs, lonEncs, ords, docIDs, (int) start, nextWrite);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "GrowingHeapLatLonWriter(count=" + nextWrite + " alloc=" + latEncs.length + ")";
|
||||
}
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
final class HeapLatLonReader implements LatLonReader {
|
||||
private int curRead;
|
||||
final int[] latEncs;
|
||||
final int[] lonEncs;
|
||||
final long[] ords;
|
||||
final int[] docIDs;
|
||||
final int end;
|
||||
|
||||
HeapLatLonReader(int[] latEncs, int[] lonEncs, long[] ords, int[] docIDs, int start, int end) {
|
||||
this.latEncs = latEncs;
|
||||
this.lonEncs = lonEncs;
|
||||
this.ords = ords;
|
||||
this.docIDs = docIDs;
|
||||
curRead = start-1;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
curRead++;
|
||||
return curRead < end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int latEnc() {
|
||||
return latEncs[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int lonEnc() {
|
||||
return lonEncs[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docIDs[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
return ords[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
}
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
final class HeapLatLonWriter implements LatLonWriter {
|
||||
final int[] latEncs;
|
||||
final int[] lonEncs;
|
||||
final int[] docIDs;
|
||||
final long[] ords;
|
||||
private int nextWrite;
|
||||
private boolean closed;
|
||||
|
||||
public HeapLatLonWriter(int count) {
|
||||
latEncs = new int[count];
|
||||
lonEncs = new int[count];
|
||||
docIDs = new int[count];
|
||||
ords = new long[count];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(int latEnc, int lonEnc, long ord, int docID) {
|
||||
latEncs[nextWrite] = latEnc;
|
||||
lonEncs[nextWrite] = lonEnc;
|
||||
ords[nextWrite] = ord;
|
||||
docIDs[nextWrite] = docID;
|
||||
nextWrite++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LatLonReader getReader(long start) {
|
||||
assert closed;
|
||||
return new HeapLatLonReader(latEncs, lonEncs, ords, docIDs, (int) start, latEncs.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
closed = true;
|
||||
if (nextWrite != latEncs.length) {
|
||||
throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + latEncs.length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HeapLatLonWriter(count=" + latEncs.length + ")";
|
||||
}
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
|
||||
interface LatLonReader extends Closeable {
|
||||
boolean next() throws IOException;
|
||||
int latEnc();
|
||||
int lonEnc();
|
||||
long ord();
|
||||
int docID();
|
||||
}
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
|
||||
interface LatLonWriter extends Closeable {
|
||||
void append(int latEnc, int lonEnc, long ord, int docID) throws IOException;
|
||||
LatLonReader getReader(long start) throws IOException;
|
||||
void destroy() throws IOException;
|
||||
}
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
final class OfflineLatLonReader implements LatLonReader {
|
||||
final IndexInput in;
|
||||
long countLeft;
|
||||
private int latEnc;
|
||||
private int lonEnc;
|
||||
private long ord;
|
||||
private int docID;
|
||||
|
||||
OfflineLatLonReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
|
||||
in = tempDir.openInput(tempFileName, IOContext.READONCE);
|
||||
in.seek(start * BKDTreeWriter.BYTES_PER_DOC);
|
||||
this.countLeft = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (countLeft == 0) {
|
||||
return false;
|
||||
}
|
||||
countLeft--;
|
||||
latEnc = in.readInt();
|
||||
lonEnc = in.readInt();
|
||||
ord = in.readLong();
|
||||
docID = in.readInt();
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int latEnc() {
|
||||
return latEnc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int lonEnc() {
|
||||
return lonEnc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
return ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,77 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
final class OfflineLatLonWriter implements LatLonWriter {
|
||||
|
||||
final Directory tempDir;
|
||||
final byte[] scratchBytes = new byte[BKDTreeWriter.BYTES_PER_DOC];
|
||||
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
final IndexOutput out;
|
||||
final long count;
|
||||
private long countWritten;
|
||||
private boolean closed;
|
||||
|
||||
public OfflineLatLonWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
|
||||
this.tempDir = tempDir;
|
||||
out = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(int latEnc, int lonEnc, long ord, int docID) throws IOException {
|
||||
out.writeInt(latEnc);
|
||||
out.writeInt(lonEnc);
|
||||
out.writeLong(ord);
|
||||
out.writeInt(docID);
|
||||
countWritten++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LatLonReader getReader(long start) throws IOException {
|
||||
assert closed;
|
||||
return new OfflineLatLonReader(tempDir, out.getName(), start, count-start);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
closed = true;
|
||||
out.close();
|
||||
if (count != countWritten) {
|
||||
throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() throws IOException {
|
||||
tempDir.deleteFile(out.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "OfflineLatLonWriter(count=" + count + " tempFileName=" + out.getName() + ")";
|
||||
}
|
||||
}
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- not a package-info.java, because we already defined this package in core/ -->
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
This package contains a BKD spatial tree implementation for indexing lat/lon points and fast shape searching.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,88 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.GeoUtils;
|
||||
import org.apache.lucene.util.bkd.BKDUtil;
|
||||
|
||||
/** Add this to a document to index lat/lon point dimensionally */
|
||||
public final class DimensionalLatLonField extends Field {
|
||||
|
||||
public static final FieldType TYPE = new FieldType();
|
||||
static {
|
||||
TYPE.setDimensions(2, 4);
|
||||
TYPE.freeze();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new DimensionalLatLonField with the specified lat and lon
|
||||
* @param name field name
|
||||
* @param lat double latitude
|
||||
* @param lon double longitude
|
||||
* @throws IllegalArgumentException if the field name is null or lat or lon are out of bounds
|
||||
*/
|
||||
public DimensionalLatLonField(String name, double lat, double lon) {
|
||||
super(name, TYPE);
|
||||
if (GeoUtils.isValidLat(lat) == false) {
|
||||
throw new IllegalArgumentException("invalid lat (" + lat + "): must be -90 to 90");
|
||||
}
|
||||
if (GeoUtils.isValidLon(lon) == false) {
|
||||
throw new IllegalArgumentException("invalid lon (" + lon + "): must be -180 to 180");
|
||||
}
|
||||
byte[] bytes = new byte[8];
|
||||
BKDUtil.intToBytes(encodeLat(lat), bytes, 0);
|
||||
BKDUtil.intToBytes(encodeLon(lon), bytes, 1);
|
||||
fieldsData = new BytesRef(bytes);
|
||||
}
|
||||
|
||||
public static final double TOLERANCE = 1E-7;
|
||||
|
||||
private static final int BITS = 32;
|
||||
|
||||
private static final double LON_SCALE = (0x1L<<BITS)/360.0D;
|
||||
private static final double LAT_SCALE = (0x1L<<BITS)/180.0D;
|
||||
|
||||
/** Quantizes double (64 bit) latitude into 32 bits */
|
||||
public static int encodeLat(double lat) {
|
||||
assert GeoUtils.isValidLat(lat): "lat=" + lat;
|
||||
long x = (long) (lat * LAT_SCALE);
|
||||
assert x < Integer.MAX_VALUE: "lat=" + lat + " mapped to Integer.MAX_VALUE + " + (x - Integer.MAX_VALUE);
|
||||
assert x > Integer.MIN_VALUE: "lat=" + lat + " mapped to Integer.MIN_VALUE";
|
||||
return (int) x;
|
||||
}
|
||||
|
||||
/** Quantizes double (64 bit) longitude into 32 bits */
|
||||
public static int encodeLon(double lon) {
|
||||
assert GeoUtils.isValidLon(lon): "lon=" + lon;
|
||||
long x = (long) (lon * LON_SCALE);
|
||||
assert x < Integer.MAX_VALUE;
|
||||
assert x > Integer.MIN_VALUE;
|
||||
return (int) x;
|
||||
}
|
||||
|
||||
/** Turns quantized value from {@link #encodeLat} back into a double. */
|
||||
public static double decodeLat(int x) {
|
||||
return x / LAT_SCALE;
|
||||
}
|
||||
|
||||
/** Turns quantized value from {@link #encodeLon} back into a double. */
|
||||
public static double decodeLon(int x) {
|
||||
return x / LON_SCALE;
|
||||
}
|
||||
}
|
|
@ -1,84 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
final class GrowingHeapSliceWriter implements SliceWriter {
|
||||
long[] values;
|
||||
int[] docIDs;
|
||||
long[] ords;
|
||||
private int nextWrite;
|
||||
final int maxSize;
|
||||
|
||||
public GrowingHeapSliceWriter(int maxSize) {
|
||||
values = new long[16];
|
||||
docIDs = new int[16];
|
||||
ords = new long[16];
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
||||
private int[] growExact(int[] arr, int size) {
|
||||
assert size > arr.length;
|
||||
int[] newArr = new int[size];
|
||||
System.arraycopy(arr, 0, newArr, 0, arr.length);
|
||||
return newArr;
|
||||
}
|
||||
|
||||
private long[] growExact(long[] arr, int size) {
|
||||
assert size > arr.length;
|
||||
long[] newArr = new long[size];
|
||||
System.arraycopy(arr, 0, newArr, 0, arr.length);
|
||||
return newArr;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(long value, long ord, int docID) {
|
||||
assert ord == nextWrite;
|
||||
if (values.length == nextWrite) {
|
||||
int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT));
|
||||
assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
|
||||
values = growExact(values, nextSize);
|
||||
ords = growExact(ords, nextSize);
|
||||
docIDs = growExact(docIDs, nextSize);
|
||||
}
|
||||
values[nextWrite] = value;
|
||||
ords[nextWrite] = ord;
|
||||
docIDs[nextWrite] = docID;
|
||||
nextWrite++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SliceReader getReader(long start) {
|
||||
return new HeapSliceReader(values, ords, docIDs, (int) start, nextWrite);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "GrowingHeapSliceWriter(count=" + nextWrite + " alloc=" + values.length + ")";
|
||||
}
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
final class HeapSliceReader implements SliceReader {
|
||||
private int curRead;
|
||||
final long[] values;
|
||||
final long[] ords;
|
||||
final int[] docIDs;
|
||||
final int end;
|
||||
|
||||
HeapSliceReader(long[] values, long[] ords, int[] docIDs, int start, int end) {
|
||||
this.values = values;
|
||||
this.ords = ords;
|
||||
this.docIDs = docIDs;
|
||||
curRead = start-1;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
curRead++;
|
||||
return curRead < end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long value() {
|
||||
return values[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docIDs[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
return ords[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
}
|
||||
|
|
@ -1,63 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
final class HeapSliceWriter implements SliceWriter {
|
||||
final long[] values;
|
||||
final int[] docIDs;
|
||||
final long[] ords;
|
||||
private int nextWrite;
|
||||
private boolean closed;
|
||||
|
||||
public HeapSliceWriter(int count) {
|
||||
values = new long[count];
|
||||
docIDs = new int[count];
|
||||
ords = new long[count];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(long value, long ord, int docID) {
|
||||
values[nextWrite] = value;
|
||||
ords[nextWrite] = ord;
|
||||
docIDs[nextWrite] = docID;
|
||||
nextWrite++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SliceReader getReader(long start) {
|
||||
assert closed;
|
||||
return new HeapSliceReader(values, ords, docIDs, (int) start, values.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
closed = true;
|
||||
if (nextWrite != values.length) {
|
||||
throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + values.length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HeapSliceWriter(count=" + values.length + ")";
|
||||
}
|
||||
}
|
|
@ -1,157 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Finds all previously indexed long values that fall within the specified range.
|
||||
*
|
||||
* <p>The field must be indexed with {@link RangeTreeDocValuesFormat}, and {@link SortedNumericDocValuesField} added per document.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public class NumericRangeTreeQuery extends Query {
|
||||
final String field;
|
||||
final Long minValue;
|
||||
final Long maxValue;
|
||||
final boolean minInclusive;
|
||||
final boolean maxInclusive;
|
||||
|
||||
// TODO: sugar for all numeric conversions?
|
||||
|
||||
/** Matches all values in the specified long range. */
|
||||
public NumericRangeTreeQuery(String field, Long minValue, boolean minInclusive, Long maxValue, boolean maxInclusive) {
|
||||
this.field = field;
|
||||
this.minInclusive = minInclusive;
|
||||
this.minValue = minValue;
|
||||
this.maxInclusive = maxInclusive;
|
||||
this.maxValue = maxValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
|
||||
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
|
||||
// used in the first pass:
|
||||
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
LeafReader reader = context.reader();
|
||||
SortedNumericDocValues sdv = reader.getSortedNumericDocValues(field);
|
||||
if (sdv == null) {
|
||||
// No docs in this segment had this field
|
||||
return null;
|
||||
}
|
||||
|
||||
if (sdv instanceof RangeTreeSortedNumericDocValues == false) {
|
||||
throw new IllegalStateException("field \"" + field + "\" was not indexed with RangeTreeDocValuesFormat: got: " + sdv);
|
||||
}
|
||||
RangeTreeSortedNumericDocValues treeDV = (RangeTreeSortedNumericDocValues) sdv;
|
||||
RangeTreeReader tree = treeDV.getRangeTreeReader();
|
||||
|
||||
// lower
|
||||
long minBoundIncl = (minValue == null) ? Long.MIN_VALUE : minValue.longValue();
|
||||
|
||||
if (minInclusive == false && minValue != null) {
|
||||
if (minBoundIncl == Long.MAX_VALUE) {
|
||||
return null;
|
||||
}
|
||||
minBoundIncl++;
|
||||
}
|
||||
|
||||
// upper
|
||||
long maxBoundIncl = (maxValue == null) ? Long.MAX_VALUE : maxValue.longValue();
|
||||
if (maxInclusive == false && maxValue != null) {
|
||||
if (maxBoundIncl == Long.MIN_VALUE) {
|
||||
return null;
|
||||
}
|
||||
maxBoundIncl--;
|
||||
}
|
||||
|
||||
if (maxBoundIncl < minBoundIncl) {
|
||||
return null;
|
||||
}
|
||||
|
||||
DocIdSet result = tree.intersect(minBoundIncl, maxBoundIncl, treeDV.delegate, context.reader().maxDoc());
|
||||
|
||||
final DocIdSetIterator disi = result.iterator();
|
||||
|
||||
return new ConstantScoreScorer(this, score(), disi);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hash = super.hashCode();
|
||||
if (minValue != null) hash += minValue.hashCode()^0x14fa55fb;
|
||||
if (maxValue != null) hash += maxValue.hashCode()^0x733fa5fe;
|
||||
return hash +
|
||||
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
|
||||
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (super.equals(other)) {
|
||||
final NumericRangeTreeQuery q = (NumericRangeTreeQuery) other;
|
||||
return (
|
||||
(q.minValue == null ? minValue == null : q.minValue.equals(minValue)) &&
|
||||
(q.maxValue == null ? maxValue == null : q.maxValue.equals(maxValue)) &&
|
||||
minInclusive == q.minInclusive &&
|
||||
maxInclusive == q.maxInclusive
|
||||
);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
sb.append(getClass().getSimpleName());
|
||||
sb.append(':');
|
||||
if (this.field.equals(field) == false) {
|
||||
sb.append("field=");
|
||||
sb.append(this.field);
|
||||
sb.append(':');
|
||||
}
|
||||
|
||||
return sb.append(minInclusive ? '[' : '{')
|
||||
.append((minValue == null) ? "*" : minValue.toString())
|
||||
.append(" TO ")
|
||||
.append((maxValue == null) ? "*" : maxValue.toString())
|
||||
.append(maxInclusive ? ']' : '}')
|
||||
.toString();
|
||||
}
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
final class OfflineSliceReader implements SliceReader {
|
||||
final IndexInput in;
|
||||
private long countLeft;
|
||||
private long value;
|
||||
private long ord;
|
||||
private int docID;
|
||||
|
||||
OfflineSliceReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
|
||||
in = tempDir.openInput(tempFileName, IOContext.READONCE);
|
||||
in.seek(start * RangeTreeWriter.BYTES_PER_DOC);
|
||||
this.countLeft = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (countLeft == 0) {
|
||||
return false;
|
||||
}
|
||||
countLeft--;
|
||||
value = in.readLong();
|
||||
ord = in.readLong();
|
||||
docID = in.readInt();
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long value() {
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
return ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
final class OfflineSliceWriter implements SliceWriter {
|
||||
|
||||
final Directory tempDir;
|
||||
final byte[] scratchBytes = new byte[RangeTreeWriter.BYTES_PER_DOC];
|
||||
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
final IndexOutput tempFile;
|
||||
final long count;
|
||||
private boolean closed;
|
||||
private long countWritten;
|
||||
|
||||
public OfflineSliceWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
|
||||
this.tempDir = tempDir;
|
||||
tempFile = tempDir.createTempOutput(tempFileNamePrefix, "rangetree", IOContext.DEFAULT);
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(long value, long ord, int docID) throws IOException {
|
||||
tempFile.writeLong(value);
|
||||
tempFile.writeLong(ord);
|
||||
tempFile.writeInt(docID);
|
||||
countWritten++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SliceReader getReader(long start) throws IOException {
|
||||
assert closed;
|
||||
return new OfflineSliceReader(tempDir, tempFile.getName(), start, count-start);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
closed = true;
|
||||
tempFile.close();
|
||||
if (count != countWritten) {
|
||||
throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() throws IOException {
|
||||
tempDir.deleteFile(tempFile.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "OfflineSliceWriter(count=" + count + " tempFileName=" + tempFile.getName() + ")";
|
||||
}
|
||||
}
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
class RangeTreeDocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||
final DocValuesConsumer delegate;
|
||||
final int maxPointsInLeafNode;
|
||||
final int maxPointsSortInHeap;
|
||||
final IndexOutput out;
|
||||
final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
|
||||
final SegmentWriteState state;
|
||||
|
||||
public RangeTreeDocValuesConsumer(DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
RangeTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.delegate = delegate;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
this.state = state;
|
||||
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.DATA_EXTENSION);
|
||||
out = state.directory.createOutput(datFileName, state.context);
|
||||
CodecUtil.writeIndexHeader(out, RangeTreeDocValuesFormat.DATA_CODEC_NAME, RangeTreeDocValuesFormat.DATA_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.writeFooter(out);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(delegate, out);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(delegate, out);
|
||||
}
|
||||
}
|
||||
|
||||
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.META_EXTENSION);
|
||||
IndexOutput metaOut = state.directory.createOutput(metaFileName, state.context);
|
||||
success = false;
|
||||
try {
|
||||
CodecUtil.writeIndexHeader(metaOut, RangeTreeDocValuesFormat.META_CODEC_NAME, RangeTreeDocValuesFormat.META_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
metaOut.writeVInt(fieldIndexFPs.size());
|
||||
for(Map.Entry<Integer,Long> ent : fieldIndexFPs.entrySet()) {
|
||||
metaOut.writeVInt(ent.getKey());
|
||||
metaOut.writeVLong(ent.getValue());
|
||||
}
|
||||
CodecUtil.writeFooter(metaOut);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(metaOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(metaOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
|
||||
delegate.addSortedNumericField(field, docToValueCount, values);
|
||||
RangeTreeWriter writer = new RangeTreeWriter(state.directory, state.segmentInfo.name, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
Iterator<Number> valueIt = values.iterator();
|
||||
Iterator<Number> valueCountIt = docToValueCount.iterator();
|
||||
//System.out.println("\nSNF: field=" + field.name);
|
||||
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
|
||||
assert valueCountIt.hasNext();
|
||||
int count = valueCountIt.next().intValue();
|
||||
for(int i=0;i<count;i++) {
|
||||
assert valueIt.hasNext();
|
||||
writer.add(valueIt.next().longValue(), docID);
|
||||
}
|
||||
}
|
||||
|
||||
long indexStartFP = writer.finish(out);
|
||||
|
||||
fieldIndexFPs.put(field.number, indexStartFP);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||
throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) {
|
||||
throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) {
|
||||
throw new UnsupportedOperationException("use either SortedNumericDocValuesField or SortedSetDocValuesField");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
|
||||
delegate.addSortedSetField(field, values, docToOrdCount, ords);
|
||||
RangeTreeWriter writer = new RangeTreeWriter(state.directory, state.segmentInfo.name, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
Iterator<Number> docToOrdCountIt = docToOrdCount.iterator();
|
||||
Iterator<Number> ordsIt = ords.iterator();
|
||||
//System.out.println("\nSSF: field=" + field.name);
|
||||
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
|
||||
assert docToOrdCountIt.hasNext();
|
||||
int count = docToOrdCountIt.next().intValue();
|
||||
for(int i=0;i<count;i++) {
|
||||
assert ordsIt.hasNext();
|
||||
long ord = ordsIt.next().longValue();
|
||||
writer.add(ord, docID);
|
||||
}
|
||||
}
|
||||
|
||||
long indexStartFP = writer.finish(out);
|
||||
|
||||
fieldIndexFPs.put(field.number, indexStartFP);
|
||||
}
|
||||
}
|
|
@ -1,112 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A {@link DocValuesFormat} to efficiently index numeric values from
|
||||
* from {@link SortedNumericDocValuesField} or BytesRef values from {@link SortedSetDocValuesField}
|
||||
* for numeric range queries using ({@link NumericRangeTreeQuery}) and arbitrary binary
|
||||
* range queries using {@link SortedSetRangeTreeQuery}.
|
||||
*
|
||||
* <p>This wraps {@link Lucene54DocValuesFormat}, but saves its own numeric tree
|
||||
* structures to disk for fast query-time intersection. See <a
|
||||
* href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a>
|
||||
* for details.
|
||||
*
|
||||
* <p>The numeric tree slices up 1D space into smaller and
|
||||
* smaller ranges, until the smallest ranges have approximately
|
||||
* between X/2 and X (X default is 1024) values in them, at which point
|
||||
* such leaf cells are written as a block to disk, while the index tree
|
||||
* structure records how space was sub-divided is loaded into HEAP
|
||||
* at search time. At search time, the tree is recursed based on whether
|
||||
* each of left or right child overlap with the query range, and once
|
||||
* a leaf block is reached, all documents in that leaf block are collected
|
||||
* if the cell is fully enclosed by the query shape, or filtered and then
|
||||
* collected, if not.
|
||||
*
|
||||
* <p>The index is also quite compact, because docs only appear once in
|
||||
* the tree (no "prefix terms").
|
||||
*
|
||||
* <p>In addition to the files written by {@link Lucene54DocValuesFormat}, this format writes:
|
||||
* <ol>
|
||||
* <li><tt>.ndd</tt>: numeric tree leaf data and index</li>
|
||||
* <li><tt>.ndm</tt>: numeric tree metadata</li>
|
||||
* </ol>
|
||||
*
|
||||
* <p>The disk format is experimental and free to change suddenly, and this code likely has new and exciting bugs!
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public class RangeTreeDocValuesFormat extends DocValuesFormat {
|
||||
|
||||
static final String DATA_CODEC_NAME = "RangeTreeData";
|
||||
static final int DATA_VERSION_START = 0;
|
||||
static final int DATA_VERSION_CURRENT = DATA_VERSION_START;
|
||||
static final String DATA_EXTENSION = "ndd";
|
||||
|
||||
static final String META_CODEC_NAME = "RangeTreeMeta";
|
||||
static final int META_VERSION_START = 0;
|
||||
static final int META_VERSION_CURRENT = META_VERSION_START;
|
||||
static final String META_EXTENSION = "ndm";
|
||||
|
||||
private final int maxPointsInLeafNode;
|
||||
private final int maxPointsSortInHeap;
|
||||
|
||||
private final DocValuesFormat delegate = new Lucene54DocValuesFormat();
|
||||
|
||||
/** Default constructor */
|
||||
public RangeTreeDocValuesFormat() {
|
||||
this(RangeTreeWriter.DEFAULT_MAX_VALUES_IN_LEAF_NODE, RangeTreeWriter.DEFAULT_MAX_VALUES_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
/** Creates this with custom configuration.
|
||||
*
|
||||
* @param maxPointsInLeafNode Maximum number of points in each leaf cell. Smaller values create a deeper tree with larger in-heap index and possibly
|
||||
* faster searching. The default is 1024.
|
||||
* @param maxPointsSortInHeap Maximum number of points where in-heap sort can be used. When the number of points exceeds this, a (slower)
|
||||
* offline sort is used. The default is 128 * 1024.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public RangeTreeDocValuesFormat(int maxPointsInLeafNode, int maxPointsSortInHeap) {
|
||||
super("RangeTree");
|
||||
RangeTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
|
||||
return new RangeTreeDocValuesConsumer(delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
return new RangeTreeDocValuesProducer(delegate.fieldsProducer(state), state);
|
||||
}
|
||||
}
|
|
@ -1,196 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
class RangeTreeDocValuesProducer extends DocValuesProducer {
|
||||
|
||||
private final Map<String,RangeTreeReader> treeReaders = new HashMap<>();
|
||||
private final Map<Integer,Long> fieldToIndexFPs = new HashMap<>();
|
||||
|
||||
private final IndexInput datIn;
|
||||
private final AtomicLong ramBytesUsed;
|
||||
private final int maxDoc;
|
||||
private final DocValuesProducer delegate;
|
||||
private final boolean merging;
|
||||
|
||||
public RangeTreeDocValuesProducer(DocValuesProducer delegate, SegmentReadState state) throws IOException {
|
||||
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.META_EXTENSION);
|
||||
ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName, state.context);
|
||||
CodecUtil.checkIndexHeader(metaIn, RangeTreeDocValuesFormat.META_CODEC_NAME, RangeTreeDocValuesFormat.META_VERSION_START, RangeTreeDocValuesFormat.META_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
int fieldCount = metaIn.readVInt();
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
int fieldNumber = metaIn.readVInt();
|
||||
long indexFP = metaIn.readVLong();
|
||||
fieldToIndexFPs.put(fieldNumber, indexFP);
|
||||
}
|
||||
CodecUtil.checkFooter(metaIn);
|
||||
metaIn.close();
|
||||
|
||||
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, RangeTreeDocValuesFormat.DATA_EXTENSION);
|
||||
datIn = state.directory.openInput(datFileName, state.context);
|
||||
CodecUtil.checkIndexHeader(datIn, RangeTreeDocValuesFormat.DATA_CODEC_NAME, RangeTreeDocValuesFormat.DATA_VERSION_START, RangeTreeDocValuesFormat.DATA_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
|
||||
maxDoc = state.segmentInfo.maxDoc();
|
||||
this.delegate = delegate;
|
||||
merging = false;
|
||||
}
|
||||
|
||||
// clone for merge: we don't hang onto the RangeTrees we load
|
||||
RangeTreeDocValuesProducer(RangeTreeDocValuesProducer orig) throws IOException {
|
||||
assert Thread.holdsLock(orig);
|
||||
datIn = orig.datIn.clone();
|
||||
ramBytesUsed = new AtomicLong(orig.ramBytesUsed.get());
|
||||
delegate = orig.delegate.getMergeInstance();
|
||||
fieldToIndexFPs.putAll(orig.fieldToIndexFPs);
|
||||
treeReaders.putAll(orig.treeReaders);
|
||||
merging = true;
|
||||
maxDoc = orig.maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
RangeTreeReader treeReader = treeReaders.get(field.name);
|
||||
if (treeReader == null) {
|
||||
// Lazy load
|
||||
Long fp = fieldToIndexFPs.get(field.number);
|
||||
// FieldInfos checks has already ensured we are a DV field of this type, and Codec ensures
|
||||
// this DVFormat was used at write time:
|
||||
assert fp != null;
|
||||
|
||||
// LUCENE-6697: never do real IOPs with the original IndexInput because search
|
||||
// threads can be concurrently cloning it:
|
||||
IndexInput clone = datIn.clone();
|
||||
clone.seek(fp);
|
||||
treeReader = new RangeTreeReader(clone);
|
||||
|
||||
// Only hang onto the reader when we are not merging:
|
||||
if (merging == false) {
|
||||
treeReaders.put(field.name, treeReader);
|
||||
ramBytesUsed.addAndGet(treeReader.ramBytesUsed());
|
||||
}
|
||||
}
|
||||
|
||||
return new RangeTreeSortedNumericDocValues(treeReader, delegate.getSortedNumeric(field));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(datIn, delegate);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
CodecUtil.checksumEntireFile(datIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinary(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
||||
RangeTreeReader treeReader = treeReaders.get(field.name);
|
||||
if (treeReader == null) {
|
||||
// Lazy load
|
||||
Long fp = fieldToIndexFPs.get(field.number);
|
||||
|
||||
// FieldInfos checks has already ensured we are a DV field of this type, and Codec ensures
|
||||
// this DVFormat was used at write time:
|
||||
assert fp != null;
|
||||
|
||||
// LUCENE-6697: never do real IOPs with the original IndexInput because search
|
||||
// threads can be concurrently cloning it:
|
||||
IndexInput clone = datIn.clone();
|
||||
clone.seek(fp);
|
||||
treeReader = new RangeTreeReader(clone);
|
||||
|
||||
// Only hang onto the reader when we are not merging:
|
||||
if (merging == false) {
|
||||
treeReaders.put(field.name, treeReader);
|
||||
ramBytesUsed.addAndGet(treeReader.ramBytesUsed());
|
||||
}
|
||||
}
|
||||
|
||||
return new RangeTreeSortedSetDocValues(treeReader, delegate.getSortedSet(field));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDocsWithField(FieldInfo field) throws IOException {
|
||||
return delegate.getDocsWithField(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Collection<Accountable> getChildResources() {
|
||||
List<Accountable> resources = new ArrayList<>();
|
||||
for(Map.Entry<String,RangeTreeReader> ent : treeReaders.entrySet()) {
|
||||
resources.add(Accountables.namedAccountable("field " + ent.getKey(), ent.getValue()));
|
||||
}
|
||||
resources.add(Accountables.namedAccountable("delegate", delegate));
|
||||
|
||||
return resources;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized DocValuesProducer getMergeInstance() throws IOException {
|
||||
return new RangeTreeDocValuesProducer(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return ramBytesUsed.get() + delegate.ramBytesUsed();
|
||||
}
|
||||
}
|
|
@ -1,202 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/** Handles intersection of a range with a numeric tree previously written with {@link RangeTreeWriter}.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
final class RangeTreeReader implements Accountable {
|
||||
final private long[] blockFPs;
|
||||
final private long[] blockMinValues;
|
||||
final IndexInput in;
|
||||
final long globalMaxValue;
|
||||
final int approxDocsPerBlock;
|
||||
|
||||
public RangeTreeReader(IndexInput in) throws IOException {
|
||||
|
||||
// Read index:
|
||||
int numLeaves = in.readVInt();
|
||||
approxDocsPerBlock = in.readVInt();
|
||||
|
||||
blockMinValues = new long[numLeaves];
|
||||
for(int i=0;i<numLeaves;i++) {
|
||||
blockMinValues[i] = in.readLong();
|
||||
}
|
||||
blockFPs = new long[numLeaves];
|
||||
for(int i=0;i<numLeaves;i++) {
|
||||
blockFPs[i] = in.readVLong();
|
||||
}
|
||||
globalMaxValue = in.readLong();
|
||||
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
public long getMinValue() {
|
||||
return blockMinValues[0];
|
||||
}
|
||||
|
||||
public long getMaxValue() {
|
||||
return globalMaxValue;
|
||||
}
|
||||
|
||||
private static final class QueryState {
|
||||
final IndexInput in;
|
||||
final DocIdSetBuilder docs;
|
||||
final long minValueIncl;
|
||||
final long maxValueIncl;
|
||||
final SortedNumericDocValues sndv;
|
||||
|
||||
public QueryState(IndexInput in, int maxDoc,
|
||||
long minValueIncl, long maxValueIncl,
|
||||
SortedNumericDocValues sndv) {
|
||||
this.in = in;
|
||||
this.docs = new DocIdSetBuilder(maxDoc);
|
||||
this.minValueIncl = minValueIncl;
|
||||
this.maxValueIncl = maxValueIncl;
|
||||
this.sndv = sndv;
|
||||
}
|
||||
}
|
||||
|
||||
public DocIdSet intersect(long minIncl, long maxIncl, SortedNumericDocValues sndv, int maxDoc) throws IOException {
|
||||
|
||||
if (minIncl > maxIncl) {
|
||||
return DocIdSet.EMPTY;
|
||||
}
|
||||
|
||||
if (minIncl > globalMaxValue || maxIncl < blockMinValues[0]) {
|
||||
return DocIdSet.EMPTY;
|
||||
}
|
||||
|
||||
QueryState state = new QueryState(in.clone(), maxDoc,
|
||||
minIncl, maxIncl,
|
||||
sndv);
|
||||
|
||||
int startBlockIncl = Arrays.binarySearch(blockMinValues, minIncl);
|
||||
if (startBlockIncl >= 0) {
|
||||
// There can be dups here, when the same value is added many
|
||||
// times. Also, we need the first block whose min is < minIncl:
|
||||
while (startBlockIncl > 0 && blockMinValues[startBlockIncl] == minIncl) {
|
||||
startBlockIncl--;
|
||||
}
|
||||
} else {
|
||||
startBlockIncl = Math.max(-startBlockIncl-2, 0);
|
||||
}
|
||||
|
||||
int endBlockIncl = Arrays.binarySearch(blockMinValues, maxIncl);
|
||||
if (endBlockIncl >= 0) {
|
||||
// There can be dups here, when the same value is added many
|
||||
// times. Also, we need the first block whose max is > minIncl:
|
||||
while (endBlockIncl < blockMinValues.length-1 && blockMinValues[endBlockIncl] == maxIncl) {
|
||||
endBlockIncl++;
|
||||
}
|
||||
} else {
|
||||
endBlockIncl = Math.max(-endBlockIncl-2, 0);
|
||||
}
|
||||
|
||||
assert startBlockIncl <= endBlockIncl;
|
||||
|
||||
state.in.seek(blockFPs[startBlockIncl]);
|
||||
|
||||
//System.out.println("startBlockIncl=" + startBlockIncl + " endBlockIncl=" + endBlockIncl);
|
||||
|
||||
// Rough estimate of how many hits we'll see. Note that in the degenerate case
|
||||
// (index same value many times) this could be a big over-estimate, but in the typical
|
||||
// case it's good:
|
||||
state.docs.grow(approxDocsPerBlock * (endBlockIncl - startBlockIncl + 1));
|
||||
|
||||
int hitCount = 0;
|
||||
for (int block=startBlockIncl;block<=endBlockIncl;block++) {
|
||||
boolean doFilter = blockMinValues[block] <= minIncl || block == blockMinValues.length-1 || blockMinValues[block+1] >= maxIncl;
|
||||
//System.out.println(" block=" + block + " min=" + blockMinValues[block] + " doFilter=" + doFilter);
|
||||
|
||||
int newCount;
|
||||
if (doFilter) {
|
||||
// We must filter each hit:
|
||||
newCount = addSome(state);
|
||||
} else {
|
||||
newCount = addAll(state);
|
||||
}
|
||||
|
||||
hitCount += newCount;
|
||||
}
|
||||
|
||||
// NOTE: hitCount is an over-estimate in the multi-valued case:
|
||||
return state.docs.build(hitCount);
|
||||
}
|
||||
|
||||
/** Adds all docs from the current block. */
|
||||
private int addAll(QueryState state) throws IOException {
|
||||
// How many values are stored in this leaf cell:
|
||||
int count = state.in.readVInt();
|
||||
state.docs.grow(count);
|
||||
for(int i=0;i<count;i++) {
|
||||
int docID = state.in.readInt();
|
||||
state.docs.add(docID);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/** Adds docs from the current block, filtering each hit against the query min/max. This
|
||||
* is only needed on the boundary blocks. */
|
||||
private int addSome(QueryState state) throws IOException {
|
||||
int hitCount = 0;
|
||||
|
||||
// How many points are stored in this leaf cell:
|
||||
int count = state.in.readVInt();
|
||||
state.docs.grow(count);
|
||||
for(int i=0;i<count;i++) {
|
||||
int docID = state.in.readInt();
|
||||
state.sndv.setDocument(docID);
|
||||
|
||||
// How many values this doc has:
|
||||
int docValueCount = state.sndv.count();
|
||||
|
||||
for(int j=0;j<docValueCount;j++) {
|
||||
long value = state.sndv.valueAt(j);
|
||||
|
||||
if (value >= state.minValueIncl && value <= state.maxValueIncl) {
|
||||
state.docs.add(docID);
|
||||
hitCount++;
|
||||
|
||||
// Stop processing values for this doc:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hitCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return blockMinValues.length * RamUsageEstimator.NUM_BYTES_LONG +
|
||||
blockFPs.length * RamUsageEstimator.NUM_BYTES_LONG;
|
||||
}
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
|
||||
class RangeTreeSortedNumericDocValues extends SortedNumericDocValues {
|
||||
final RangeTreeReader rangeTreeReader;
|
||||
final SortedNumericDocValues delegate;
|
||||
|
||||
public RangeTreeSortedNumericDocValues(RangeTreeReader rangeTreeReader, SortedNumericDocValues delegate) {
|
||||
this.rangeTreeReader = rangeTreeReader;
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
public RangeTreeReader getRangeTreeReader() {
|
||||
return rangeTreeReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDocument(int doc) {
|
||||
delegate.setDocument(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long valueAt(int index) {
|
||||
return delegate.valueAt(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int count() {
|
||||
return delegate.count();
|
||||
}
|
||||
}
|
|
@ -1,66 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
class RangeTreeSortedSetDocValues extends SortedSetDocValues {
|
||||
final RangeTreeReader rangeTreeReader;
|
||||
final SortedSetDocValues delegate;
|
||||
|
||||
public RangeTreeSortedSetDocValues(RangeTreeReader rangeTreeReader, SortedSetDocValues delegate) {
|
||||
this.rangeTreeReader = rangeTreeReader;
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
public RangeTreeReader getRangeTreeReader() {
|
||||
return rangeTreeReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextOrd() {
|
||||
return delegate.nextOrd();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDocument(int doc) {
|
||||
delegate.setDocument(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef lookupOrd(long ord) {
|
||||
return delegate.lookupOrd(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueCount() {
|
||||
return delegate.getValueCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long lookupTerm(BytesRef key) {
|
||||
return delegate.lookupTerm(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum termsEnum() {
|
||||
return delegate.termsEnum();
|
||||
}
|
||||
}
|
|
@ -1,580 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
// TODO
|
||||
// - could we just "use postings" to map leaf -> docIDs?
|
||||
// - we could also index "auto-prefix terms" here, and use better compression
|
||||
// - the index could be efficiently encoded as an FST, so we don't have wasteful
|
||||
// (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
|
||||
// the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
|
||||
// per leaf, and you can reduce that by putting more points per leaf
|
||||
// - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf
|
||||
|
||||
/** Recursively builds a 1d BKD tree to assign all incoming {@code long} values to smaller
|
||||
* and smaller ranges until the number of points in a given
|
||||
* range is <= the <code>maxPointsInLeafNode</code>. The tree is
|
||||
* fully balanced, which means the leaf nodes will have between 50% and 100% of
|
||||
* the requested <code>maxPointsInLeafNode</code>, except for the adversarial case
|
||||
* of indexing exactly the same value many times.
|
||||
*
|
||||
* <p>
|
||||
* See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
|
||||
*
|
||||
* <p>This consumes heap during writing: for any nodes with fewer than <code>maxPointsSortInHeap</code>, it holds
|
||||
* the points in memory as simple java arrays.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE</b>: This can write at most Integer.MAX_VALUE * <code>maxPointsInLeafNode</code> total values,
|
||||
* which should be plenty since a Lucene index can have at most Integer.MAX_VALUE-1 documents.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
class RangeTreeWriter {
|
||||
|
||||
// value (long) + ord (long) + docID (int)
|
||||
static final int BYTES_PER_DOC = 2 * RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT;
|
||||
|
||||
public static final int DEFAULT_MAX_VALUES_IN_LEAF_NODE = 1024;
|
||||
|
||||
/** This works out to max of ~10 MB peak heap tied up during writing: */
|
||||
public static final int DEFAULT_MAX_VALUES_SORT_IN_HEAP = 128*1024;;
|
||||
|
||||
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
|
||||
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
private OfflineSorter.ByteSequencesWriter offlineWriter;
|
||||
private GrowingHeapSliceWriter heapWriter;
|
||||
|
||||
private IndexOutput tempInput;
|
||||
private final int maxValuesInLeafNode;
|
||||
private final int maxValuesSortInHeap;
|
||||
|
||||
private long valueCount;
|
||||
private long globalMinValue = Long.MAX_VALUE;
|
||||
private long globalMaxValue = Long.MIN_VALUE;
|
||||
|
||||
public RangeTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
|
||||
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_VALUES_IN_LEAF_NODE, DEFAULT_MAX_VALUES_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
// TODO: instead of maxValuesSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
|
||||
public RangeTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxValuesInLeafNode, int maxValuesSortInHeap) throws IOException {
|
||||
verifyParams(maxValuesInLeafNode, maxValuesSortInHeap);
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.maxValuesInLeafNode = maxValuesInLeafNode;
|
||||
this.maxValuesSortInHeap = maxValuesSortInHeap;
|
||||
|
||||
// We write first maxValuesSortInHeap in heap, then cutover to offline for additional points:
|
||||
heapWriter = new GrowingHeapSliceWriter(maxValuesSortInHeap);
|
||||
}
|
||||
|
||||
public static void verifyParams(int maxValuesInLeafNode, int maxValuesSortInHeap) {
|
||||
if (maxValuesInLeafNode <= 0) {
|
||||
throw new IllegalArgumentException("maxValuesInLeafNode must be > 0; got " + maxValuesInLeafNode);
|
||||
}
|
||||
if (maxValuesInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
|
||||
throw new IllegalArgumentException("maxValuesInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxValuesInLeafNode);
|
||||
}
|
||||
if (maxValuesSortInHeap < maxValuesInLeafNode) {
|
||||
throw new IllegalArgumentException("maxValuesSortInHeap must be >= maxValuesInLeafNode; got " + maxValuesSortInHeap + " vs maxValuesInLeafNode="+ maxValuesInLeafNode);
|
||||
}
|
||||
if (maxValuesSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) {
|
||||
throw new IllegalArgumentException("maxValuesSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxValuesSortInHeap);
|
||||
}
|
||||
}
|
||||
|
||||
/** If the current segment has too many points then we switchover to temp files / offline sort. */
|
||||
private void switchToOffline() throws IOException {
|
||||
|
||||
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
|
||||
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "rangetree", IOContext.DEFAULT);
|
||||
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
for(int i=0;i<valueCount;i++) {
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
scratchBytesOutput.writeLong(heapWriter.values[i]);
|
||||
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
|
||||
scratchBytesOutput.writeVLong(i);
|
||||
// TODO: can/should OfflineSorter optimize the fixed-width case?
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
}
|
||||
|
||||
heapWriter = null;
|
||||
}
|
||||
|
||||
void add(long value, int docID) throws IOException {
|
||||
if (valueCount >= maxValuesSortInHeap) {
|
||||
if (offlineWriter == null) {
|
||||
switchToOffline();
|
||||
}
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
scratchBytesOutput.writeLong(value);
|
||||
scratchBytesOutput.writeVInt(docID);
|
||||
scratchBytesOutput.writeVLong(valueCount);
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
} else {
|
||||
// Not too many points added yet, continue using heap:
|
||||
heapWriter.append(value, valueCount, docID);
|
||||
}
|
||||
|
||||
valueCount++;
|
||||
globalMaxValue = Math.max(value, globalMaxValue);
|
||||
globalMinValue = Math.min(value, globalMinValue);
|
||||
}
|
||||
|
||||
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
|
||||
* as we recurse in {@link #build}. */
|
||||
private SliceWriter convertToFixedWidth(String in) throws IOException {
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
scratch.grow(BYTES_PER_DOC);
|
||||
BytesRef bytes = scratch.get();
|
||||
ByteArrayDataInput dataReader = new ByteArrayDataInput();
|
||||
|
||||
OfflineSorter.ByteSequencesReader reader = null;
|
||||
SliceWriter sortedWriter = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
|
||||
sortedWriter = getWriter(valueCount);
|
||||
for (long i=0;i<valueCount;i++) {
|
||||
boolean result = reader.read(scratch);
|
||||
assert result;
|
||||
dataReader.reset(bytes.bytes, bytes.offset, bytes.length);
|
||||
long value = dataReader.readLong();
|
||||
int docID = dataReader.readVInt();
|
||||
assert docID >= 0: "docID=" + docID;
|
||||
long ord = dataReader.readVLong();
|
||||
sortedWriter.append(value, ord, docID);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(sortedWriter, reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(sortedWriter, reader);
|
||||
try {
|
||||
sortedWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sortedWriter;
|
||||
}
|
||||
|
||||
private SliceWriter sort() throws IOException {
|
||||
if (heapWriter != null) {
|
||||
|
||||
assert valueCount < Integer.MAX_VALUE;
|
||||
|
||||
// All buffered points are still in heap
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int docID = heapWriter.docIDs[i];
|
||||
heapWriter.docIDs[i] = heapWriter.docIDs[j];
|
||||
heapWriter.docIDs[j] = docID;
|
||||
|
||||
long ord = heapWriter.ords[i];
|
||||
heapWriter.ords[i] = heapWriter.ords[j];
|
||||
heapWriter.ords[j] = ord;
|
||||
|
||||
long value = heapWriter.values[i];
|
||||
heapWriter.values[i] = heapWriter.values[j];
|
||||
heapWriter.values[j] = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
int cmp = Long.compare(heapWriter.values[i], heapWriter.values[j]);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
// Tie-break
|
||||
cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
return Long.compare(heapWriter.ords[i], heapWriter.ords[j]);
|
||||
}
|
||||
}.sort(0, (int) valueCount);
|
||||
|
||||
HeapSliceWriter sorted = new HeapSliceWriter((int) valueCount);
|
||||
for(int i=0;i<valueCount;i++) {
|
||||
sorted.append(heapWriter.values[i],
|
||||
heapWriter.ords[i],
|
||||
heapWriter.docIDs[i]);
|
||||
}
|
||||
sorted.close();
|
||||
|
||||
return sorted;
|
||||
} else {
|
||||
|
||||
// Offline sort:
|
||||
assert tempInput != null;
|
||||
|
||||
final ByteArrayDataInput reader = new ByteArrayDataInput();
|
||||
Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
|
||||
private final ByteArrayDataInput readerB = new ByteArrayDataInput();
|
||||
|
||||
@Override
|
||||
public int compare(BytesRef a, BytesRef b) {
|
||||
reader.reset(a.bytes, a.offset, a.length);
|
||||
final long valueA = reader.readLong();
|
||||
final int docIDA = reader.readVInt();
|
||||
final long ordA = reader.readVLong();
|
||||
|
||||
reader.reset(b.bytes, b.offset, b.length);
|
||||
final long valueB = reader.readLong();
|
||||
final int docIDB = reader.readVInt();
|
||||
final long ordB = reader.readVLong();
|
||||
|
||||
int cmp = Long.compare(valueA, valueB);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
// Tie-break
|
||||
cmp = Integer.compare(docIDA, docIDB);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
return Long.compare(ordA, ordB);
|
||||
}
|
||||
};
|
||||
|
||||
boolean success = false;
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
|
||||
String sortedFileName = sorter.sort(tempInput.getName());
|
||||
try {
|
||||
SliceWriter writer = convertToFixedWidth(sortedFileName);
|
||||
success = true;
|
||||
return writer;
|
||||
} finally {
|
||||
if (success) {
|
||||
tempDir.deleteFile(sortedFileName);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Writes the 1d BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
|
||||
public long finish(IndexOutput out) throws IOException {
|
||||
|
||||
if (offlineWriter != null) {
|
||||
offlineWriter.close();
|
||||
}
|
||||
|
||||
if (valueCount == 0) {
|
||||
throw new IllegalStateException("at least one value must be indexed");
|
||||
}
|
||||
|
||||
// TODO: we should use in-memory sort here, if number of points is small enough:
|
||||
|
||||
long countPerLeaf = valueCount;
|
||||
long innerNodeCount = 1;
|
||||
|
||||
while (countPerLeaf > maxValuesInLeafNode) {
|
||||
countPerLeaf = (countPerLeaf+1)/2;
|
||||
innerNodeCount *= 2;
|
||||
}
|
||||
|
||||
//System.out.println("innerNodeCount=" + innerNodeCount);
|
||||
|
||||
if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
|
||||
throw new IllegalStateException("too many nodes; increase maxValuesInLeafNode (currently " + maxValuesInLeafNode + ") and reindex");
|
||||
}
|
||||
|
||||
innerNodeCount--;
|
||||
|
||||
int numLeaves = (int) (innerNodeCount+1);
|
||||
|
||||
// Indexed by nodeID, but first (root) nodeID is 1
|
||||
long[] blockMinValues = new long[numLeaves];
|
||||
|
||||
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
|
||||
long[] leafBlockFPs = new long[numLeaves];
|
||||
|
||||
// Make sure the math above "worked":
|
||||
assert valueCount / blockMinValues.length <= maxValuesInLeafNode: "valueCount=" + valueCount + " blockMinValues.length=" + blockMinValues.length + " maxValuesInLeafNode=" + maxValuesInLeafNode;
|
||||
//System.out.println(" avg pointsPerLeaf=" + (valueCount/blockMinValues.length));
|
||||
|
||||
// Sort all docs by value:
|
||||
SliceWriter sortedWriter = null;
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
sortedWriter = sort();
|
||||
heapWriter = null;
|
||||
|
||||
build(1, numLeaves,
|
||||
new PathSlice(sortedWriter, 0, valueCount),
|
||||
out,
|
||||
globalMinValue, globalMaxValue,
|
||||
blockMinValues,
|
||||
leafBlockFPs);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
sortedWriter.destroy();
|
||||
if (tempInput != null) {
|
||||
tempDir.deleteFile(tempInput.getName());
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
sortedWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
if (tempInput != null) {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println("Total nodes: " + innerNodeCount);
|
||||
|
||||
// Write index:
|
||||
long indexFP = out.getFilePointer();
|
||||
out.writeVInt(numLeaves);
|
||||
out.writeVInt((int) (valueCount / numLeaves));
|
||||
|
||||
for (int i=0;i<blockMinValues.length;i++) {
|
||||
out.writeLong(blockMinValues[i]);
|
||||
}
|
||||
for (int i=0;i<leafBlockFPs.length;i++) {
|
||||
out.writeVLong(leafBlockFPs[i]);
|
||||
}
|
||||
out.writeLong(globalMaxValue);
|
||||
|
||||
return indexFP;
|
||||
}
|
||||
|
||||
/** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
|
||||
private static final class PathSlice {
|
||||
final SliceWriter writer;
|
||||
final long start;
|
||||
final long count;
|
||||
|
||||
public PathSlice(SliceWriter writer, long start, long count) {
|
||||
this.writer = writer;
|
||||
this.start = start;
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
|
||||
}
|
||||
}
|
||||
|
||||
private long getSplitValue(PathSlice source, long leftCount, long minValue, long maxValue) throws IOException {
|
||||
|
||||
// Read the split value:
|
||||
SliceReader reader = source.writer.getReader(source.start + leftCount);
|
||||
boolean success = false;
|
||||
long splitValue;
|
||||
try {
|
||||
boolean result = reader.next();
|
||||
assert result;
|
||||
splitValue = reader.value();
|
||||
assert splitValue >= minValue && splitValue <= maxValue: "splitValue=" + splitValue + " minValue=" + minValue + " maxValue=" + maxValue + " reader=" + reader;
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader);
|
||||
}
|
||||
}
|
||||
|
||||
return splitValue;
|
||||
}
|
||||
|
||||
/** The incoming PathSlice for the dim we will split is already partitioned/sorted. */
|
||||
private void build(int nodeID, int leafNodeOffset,
|
||||
PathSlice source,
|
||||
IndexOutput out,
|
||||
long minValue, long maxValue,
|
||||
long[] blockMinValues,
|
||||
long[] leafBlockFPs) throws IOException {
|
||||
|
||||
long count = source.count;
|
||||
|
||||
if (source.writer instanceof OfflineSliceWriter && count <= maxValuesSortInHeap) {
|
||||
// Cutover to heap:
|
||||
SliceWriter writer = new HeapSliceWriter((int) count);
|
||||
SliceReader reader = source.writer.getReader(source.start);
|
||||
try {
|
||||
for(int i=0;i<count;i++) {
|
||||
boolean hasNext = reader.next();
|
||||
assert hasNext;
|
||||
writer.append(reader.value(), reader.ord(), reader.docID());
|
||||
}
|
||||
} finally {
|
||||
IOUtils.close(reader, writer);
|
||||
}
|
||||
source = new PathSlice(writer, 0, count);
|
||||
}
|
||||
|
||||
// We should never hit dead-end nodes on recursion even in the adversarial cases:
|
||||
assert count > 0;
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
// Leaf node: write block
|
||||
assert maxValue >= minValue;
|
||||
|
||||
//System.out.println("\nleaf:\n lat range: " + ((long) maxLatEnc-minLatEnc));
|
||||
//System.out.println(" lon range: " + ((long) maxLonEnc-minLonEnc));
|
||||
|
||||
// Sort by docID in the leaf so we can .or(DISI) at search time:
|
||||
SliceReader reader = source.writer.getReader(source.start);
|
||||
|
||||
int[] docIDs = new int[(int) count];
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
for (int i=0;i<source.count;i++) {
|
||||
|
||||
// NOTE: we discard ord at this point; we only needed it temporarily
|
||||
// during building to uniquely identify each point to properly handle
|
||||
// the multi-valued case (one docID having multiple values):
|
||||
|
||||
// We also discard lat/lon, since at search time, we reside on the
|
||||
// wrapped doc values for this:
|
||||
|
||||
boolean result = reader.next();
|
||||
assert result;
|
||||
docIDs[i] = reader.docID();
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: not clear we need to do this anymore (we used to make a DISI over
|
||||
// the block at search time), but maybe it buys some memory
|
||||
// locality/sequentiality at search time?
|
||||
Arrays.sort(docIDs);
|
||||
|
||||
// Dedup docIDs: for the multi-valued case where more than one value for the doc
|
||||
// wound up in this leaf cell, we only need to store the docID once:
|
||||
int lastDocID = -1;
|
||||
int uniqueCount = 0;
|
||||
for(int i=0;i<docIDs.length;i++) {
|
||||
int docID = docIDs[i];
|
||||
if (docID != lastDocID) {
|
||||
uniqueCount++;
|
||||
lastDocID = docID;
|
||||
}
|
||||
}
|
||||
assert uniqueCount <= count;
|
||||
|
||||
// TODO: in theory we could compute exactly what this fp will be, since we fixed-width (writeInt) encode docID, and up-front we know
|
||||
// how many docIDs are in every leaf since we don't do anything special about multiple splitValue boundary case?
|
||||
long startFP = out.getFilePointer();
|
||||
out.writeVInt(uniqueCount);
|
||||
|
||||
// Save the block file pointer:
|
||||
int blockID = nodeID - leafNodeOffset;
|
||||
leafBlockFPs[blockID] = startFP;
|
||||
//System.out.println(" leafFP=" + startFP);
|
||||
|
||||
blockMinValues[blockID] = minValue;
|
||||
|
||||
lastDocID = -1;
|
||||
for (int i=0;i<docIDs.length;i++) {
|
||||
// Absolute int encode; with "vInt of deltas" encoding, the .kdd size dropped from
|
||||
// 697 MB -> 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec.
|
||||
// I think if we also indexed prefix terms here we could do less costly compression
|
||||
// on those lists:
|
||||
int docID = docIDs[i];
|
||||
if (docID != lastDocID) {
|
||||
out.writeInt(docID);
|
||||
lastDocID = docID;
|
||||
}
|
||||
}
|
||||
//long endFP = out.getFilePointer();
|
||||
//System.out.println(" bytes/doc: " + ((endFP - startFP) / count));
|
||||
} else {
|
||||
// Inner node: sort, partition/recurse
|
||||
|
||||
assert nodeID < blockMinValues.length: "nodeID=" + nodeID + " blockMinValues.length=" + blockMinValues.length;
|
||||
|
||||
assert source.count == count;
|
||||
|
||||
long leftCount = source.count / 2;
|
||||
|
||||
// NOTE: we don't tweak leftCount for the boundary cases, which means at search time if we are looking for exactly splitValue then we
|
||||
// must search both left and right trees:
|
||||
long splitValue = getSplitValue(source, leftCount, minValue, maxValue);
|
||||
|
||||
build(2*nodeID, leafNodeOffset,
|
||||
new PathSlice(source.writer, source.start, leftCount),
|
||||
out,
|
||||
minValue, splitValue,
|
||||
blockMinValues, leafBlockFPs);
|
||||
|
||||
build(2*nodeID+1, leafNodeOffset,
|
||||
new PathSlice(source.writer, source.start+leftCount, count-leftCount),
|
||||
out,
|
||||
splitValue, maxValue,
|
||||
blockMinValues, leafBlockFPs);
|
||||
}
|
||||
}
|
||||
|
||||
SliceWriter getWriter(long count) throws IOException {
|
||||
if (count < maxValuesSortInHeap) {
|
||||
return new HeapSliceWriter((int) count);
|
||||
} else {
|
||||
return new OfflineSliceWriter(tempDir, tempFileNamePrefix, count);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/** Iterates over one slice of the sorted values. This abstracts away whether
|
||||
* OfflineSorter or simple arrays in heap are used. */
|
||||
interface SliceReader extends Closeable {
|
||||
boolean next() throws IOException;
|
||||
long value();
|
||||
long ord();
|
||||
int docID();
|
||||
}
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
|
||||
interface SliceWriter extends Closeable {
|
||||
void append(long value, long ord, int docID) throws IOException;
|
||||
SliceReader getReader(long start) throws IOException;
|
||||
void destroy() throws IOException;
|
||||
}
|
||||
|
|
@ -1,217 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Finds all previously indexed values that fall within the specified {@link BytesRef} range.
|
||||
*
|
||||
* <p>The field must be indexed with {@link RangeTreeDocValuesFormat}, and {@link SortedSetDocValuesField} added per document.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public class SortedSetRangeTreeQuery extends Query {
|
||||
final String field;
|
||||
final BytesRef minValue;
|
||||
final BytesRef maxValue;
|
||||
final boolean minInclusive;
|
||||
final boolean maxInclusive;
|
||||
|
||||
/** Matches all values in the specified {@link BytesRef} range. */
|
||||
public SortedSetRangeTreeQuery(String field, BytesRef minValue, boolean minInclusive, BytesRef maxValue, boolean maxInclusive) {
|
||||
this.field = field;
|
||||
this.minInclusive = minInclusive;
|
||||
this.minValue = minValue;
|
||||
this.maxInclusive = maxInclusive;
|
||||
this.maxValue = maxValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
|
||||
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
|
||||
// used in the first pass:
|
||||
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
LeafReader reader = context.reader();
|
||||
final SortedSetDocValues ssdv = reader.getSortedSetDocValues(field);
|
||||
if (ssdv == null) {
|
||||
// No docs in this segment had this field
|
||||
return null;
|
||||
}
|
||||
|
||||
if (ssdv instanceof RangeTreeSortedSetDocValues == false) {
|
||||
throw new IllegalStateException("field \"" + field + "\" was not indexed with RangeTreeDocValuesFormat: got: " + ssdv);
|
||||
}
|
||||
RangeTreeSortedSetDocValues treeDV = (RangeTreeSortedSetDocValues) ssdv;
|
||||
RangeTreeReader tree = treeDV.getRangeTreeReader();
|
||||
|
||||
/*
|
||||
for(int i=0;i<treeDV.getValueCount();i++) {
|
||||
System.out.println(" ord " + i + " -> " + treeDV.lookupOrd(i));
|
||||
}
|
||||
*/
|
||||
|
||||
// lower
|
||||
final long minOrdIncl;
|
||||
if (minValue == null) {
|
||||
minOrdIncl = 0;
|
||||
} else {
|
||||
long ord = ssdv.lookupTerm(minValue);
|
||||
if (ord >= 0) {
|
||||
// Exact match
|
||||
if (minInclusive) {
|
||||
minOrdIncl = ord;
|
||||
} else {
|
||||
minOrdIncl = ord+1;
|
||||
}
|
||||
} else {
|
||||
minOrdIncl = -ord-1;
|
||||
}
|
||||
}
|
||||
|
||||
// upper
|
||||
final long maxOrdIncl;
|
||||
if (maxValue == null) {
|
||||
maxOrdIncl = Long.MAX_VALUE;
|
||||
} else {
|
||||
long ord = ssdv.lookupTerm(maxValue);
|
||||
if (ord >= 0) {
|
||||
// Exact match
|
||||
if (maxInclusive) {
|
||||
maxOrdIncl = ord;
|
||||
} else {
|
||||
maxOrdIncl = ord-1;
|
||||
}
|
||||
} else {
|
||||
maxOrdIncl = -ord-2;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxOrdIncl < minOrdIncl) {
|
||||
// This can happen when the requested range lies entirely between 2 adjacent ords:
|
||||
return null;
|
||||
}
|
||||
|
||||
//System.out.println(reader + ": ORD: " + minOrdIncl + "-" + maxOrdIncl + "; " + minValue + " - " + maxValue);
|
||||
|
||||
// Just a "view" of only the ords from the SSDV, as an SNDV. Maybe we
|
||||
// have this view implemented somewhere else already? It's not so bad that
|
||||
// we are inefficient here (making 2 passes over the ords): this is only
|
||||
// used in at most 2 leaf cells (the boundary cells).
|
||||
SortedNumericDocValues ords = new SortedNumericDocValues() {
|
||||
|
||||
private long[] ords = new long[2];
|
||||
private int count;
|
||||
|
||||
@Override
|
||||
public void setDocument(int doc) {
|
||||
ssdv.setDocument(doc);
|
||||
long ord;
|
||||
count = 0;
|
||||
while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||
if (count == ords.length) {
|
||||
ords = ArrayUtil.grow(ords, count+1);
|
||||
}
|
||||
ords[count++] = ord;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int count() {
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long valueAt(int index) {
|
||||
return ords[index];
|
||||
}
|
||||
};
|
||||
|
||||
DocIdSet result = tree.intersect(minOrdIncl, maxOrdIncl, ords, context.reader().maxDoc());
|
||||
|
||||
final DocIdSetIterator disi = result.iterator();
|
||||
|
||||
return new ConstantScoreScorer(this, score(), disi);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hash = super.hashCode();
|
||||
if (minValue != null) hash += minValue.hashCode()^0x14fa55fb;
|
||||
if (maxValue != null) hash += maxValue.hashCode()^0x733fa5fe;
|
||||
return hash +
|
||||
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
|
||||
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (super.equals(other)) {
|
||||
final SortedSetRangeTreeQuery q = (SortedSetRangeTreeQuery) other;
|
||||
return (
|
||||
(q.minValue == null ? minValue == null : q.minValue.equals(minValue)) &&
|
||||
(q.maxValue == null ? maxValue == null : q.maxValue.equals(maxValue)) &&
|
||||
minInclusive == q.minInclusive &&
|
||||
maxInclusive == q.maxInclusive
|
||||
);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
sb.append(getClass().getSimpleName());
|
||||
sb.append(':');
|
||||
if (this.field.equals(field) == false) {
|
||||
sb.append("field=");
|
||||
sb.append(this.field);
|
||||
sb.append(':');
|
||||
}
|
||||
|
||||
return sb.append(minInclusive ? '[' : '{')
|
||||
.append((minValue == null) ? "*" : minValue.toString())
|
||||
.append(" TO ")
|
||||
.append((maxValue == null) ? "*" : maxValue.toString())
|
||||
.append(maxInclusive ? ']' : '}')
|
||||
.toString();
|
||||
}
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- not a package-info.java, because we already defined this package in core/ -->
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
This package contains a numeric tree implementation for indexing long values enabling fast range searching.
|
||||
</body>
|
||||
</html>
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,35 +19,24 @@ package org.apache.lucene.bkdtree;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.DimensionalLatLonField;
|
||||
import org.apache.lucene.index.DimensionalValues;
|
||||
import org.apache.lucene.index.DimensionalValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.DimensionalValues.Relation;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.GeoUtils;
|
||||
import org.apache.lucene.util.bkd.BKDUtil;
|
||||
|
||||
/** Finds all previously indexed points that fall within the specified polygon.
|
||||
*
|
||||
* <p>The field must be indexed with {@link BKDTreeDocValuesFormat}, and {@link BKDPointField} added per document.
|
||||
* <p>The field must be indexed with using {@link DimensionalLatLonField} added per document.
|
||||
*
|
||||
* <p>Because this implementation cannot intersect each cell with the polygon, it will be costly especially for large polygons, as every
|
||||
* possible point must be checked.
|
||||
*
|
||||
* <p><b>NOTE</b>: for fastest performance, this allocates FixedBitSet(maxDoc) for each segment. The score of each hit is the query boost.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
* @lucene.experimental */
|
||||
|
||||
public class BKDPointInPolygonQuery extends Query {
|
||||
public class DimensionalPointInPolygonQuery extends Query {
|
||||
final String field;
|
||||
final double minLat;
|
||||
final double maxLat;
|
||||
|
@ -57,7 +46,7 @@ public class BKDPointInPolygonQuery extends Query {
|
|||
final double[] polyLons;
|
||||
|
||||
/** The lats/lons must be clockwise or counter-clockwise. */
|
||||
public BKDPointInPolygonQuery(String field, double[] polyLats, double[] polyLons) {
|
||||
public DimensionalPointInPolygonQuery(String field, double[] polyLats, double[] polyLons) {
|
||||
this.field = field;
|
||||
if (polyLats.length != polyLons.length) {
|
||||
throw new IllegalArgumentException("polyLats and polyLons must be equal length");
|
||||
|
@ -83,13 +72,13 @@ public class BKDPointInPolygonQuery extends Query {
|
|||
double maxLat = Double.NEGATIVE_INFINITY;
|
||||
for(int i=0;i<polyLats.length;i++) {
|
||||
double lat = polyLats[i];
|
||||
if (BKDTreeWriter.validLat(lat) == false) {
|
||||
if (GeoUtils.isValidLat(lat) == false) {
|
||||
throw new IllegalArgumentException("polyLats[" + i + "]=" + lat + " is not a valid latitude");
|
||||
}
|
||||
minLat = Math.min(minLat, lat);
|
||||
maxLat = Math.max(maxLat, lat);
|
||||
double lon = polyLons[i];
|
||||
if (BKDTreeWriter.validLon(lon) == false) {
|
||||
if (GeoUtils.isValidLon(lon) == false) {
|
||||
throw new IllegalArgumentException("polyLons[" + i + "]=" + lat + " is not a valid longitude");
|
||||
}
|
||||
minLon = Math.min(minLon, lon);
|
||||
|
@ -115,42 +104,59 @@ public class BKDPointInPolygonQuery extends Query {
|
|||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
LeafReader reader = context.reader();
|
||||
SortedNumericDocValues sdv = reader.getSortedNumericDocValues(field);
|
||||
if (sdv == null) {
|
||||
// No docs in this segment had this field
|
||||
DimensionalValues values = reader.getDimensionalValues();
|
||||
if (values == null) {
|
||||
// No docs in this segment had any dimensional fields
|
||||
return null;
|
||||
}
|
||||
|
||||
if (sdv instanceof BKDTreeSortedNumericDocValues == false) {
|
||||
throw new IllegalStateException("field \"" + field + "\" was not indexed with BKDTreeDocValuesFormat: got: " + sdv);
|
||||
}
|
||||
BKDTreeSortedNumericDocValues treeDV = (BKDTreeSortedNumericDocValues) sdv;
|
||||
BKDTreeReader tree = treeDV.getBKDTreeReader();
|
||||
|
||||
DocIdSet result = tree.intersect(minLat, maxLat, minLon, maxLon,
|
||||
new BKDTreeReader.LatLonFilter() {
|
||||
@Override
|
||||
public boolean accept(double lat, double lon) {
|
||||
return GeoUtils.pointInPolygon(polyLons, polyLats, lat, lon);
|
||||
}
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
int[] hitCount = new int[1];
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BKDTreeReader.Relation compare(double cellLatMin, double cellLatMax, double cellLonMin, double cellLonMax) {
|
||||
if (GeoUtils.rectWithinPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax,
|
||||
polyLons, polyLats,
|
||||
minLon, minLat, maxLon, maxLat)) {
|
||||
return BKDTreeReader.Relation.CELL_INSIDE_SHAPE;
|
||||
} else if (GeoUtils.rectCrossesPoly(cellLonMin, cellLatMin, cellLonMax, cellLatMax,
|
||||
polyLons, polyLats,
|
||||
minLon, minLat, maxLon, maxLat)) {
|
||||
return BKDTreeReader.Relation.SHAPE_CROSSES_CELL;
|
||||
} else {
|
||||
return BKDTreeReader.Relation.SHAPE_OUTSIDE_CELL;
|
||||
}
|
||||
}
|
||||
}, treeDV.delegate);
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
assert packedValue.length == 8;
|
||||
double lat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(packedValue, 0));
|
||||
double lon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(packedValue, 1));
|
||||
if (GeoUtils.pointInPolygon(polyLons, polyLats, lat, lon)) {
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
}
|
||||
|
||||
return new ConstantScoreScorer(this, score(), result.iterator());
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
double cellMinLat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(minPackedValue, 0));
|
||||
double cellMinLon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(minPackedValue, 1));
|
||||
double cellMaxLat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(maxPackedValue, 0));
|
||||
double cellMaxLon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(maxPackedValue, 1));
|
||||
|
||||
if (cellMinLat <= minLat && cellMaxLat >= maxLat && cellMinLon <= minLon && cellMaxLon >= maxLon) {
|
||||
// Cell fully encloses the query
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else if (GeoUtils.rectWithinPoly(cellMinLon, cellMinLat, cellMaxLon, cellMaxLat,
|
||||
polyLons, polyLats,
|
||||
minLon, minLat, maxLon, maxLat)) {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
} else if (GeoUtils.rectCrossesPoly(cellMinLon, cellMinLat, cellMaxLon, cellMaxLat,
|
||||
polyLons, polyLats,
|
||||
minLon, minLat, maxLon, maxLat)) {
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// NOTE: hitCount[0] will be over-estimate in multi-valued case
|
||||
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -162,7 +168,7 @@ public class BKDPointInPolygonQuery extends Query {
|
|||
if (o == null || getClass() != o.getClass()) return false;
|
||||
if (!super.equals(o)) return false;
|
||||
|
||||
BKDPointInPolygonQuery that = (BKDPointInPolygonQuery) o;
|
||||
DimensionalPointInPolygonQuery that = (DimensionalPointInPolygonQuery) o;
|
||||
|
||||
if (Arrays.equals(polyLons, that.polyLons) == false) {
|
||||
return false;
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -18,33 +18,25 @@ package org.apache.lucene.bkdtree;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.DimensionalLatLonField;
|
||||
import org.apache.lucene.index.DimensionalValues;
|
||||
import org.apache.lucene.index.DimensionalValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.DimensionalValues.Relation;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.GeoUtils;
|
||||
import org.apache.lucene.util.bkd.BKDUtil;
|
||||
|
||||
/** Finds all previously indexed points that fall within the specified boundings box.
|
||||
*
|
||||
* <p>The field must be indexed with {@link BKDTreeDocValuesFormat}, and {@link BKDPointField} added per document.
|
||||
* <p>The field must be indexed with using {@link DimensionalLatLonField} added per document.
|
||||
*
|
||||
* <p><b>NOTE</b>: for fastest performance, this allocates FixedBitSet(maxDoc) for each segment. The score of each hit is the query boost.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
* @lucene.experimental */
|
||||
|
||||
public class BKDPointInBBoxQuery extends Query {
|
||||
public class DimensionalPointInRectQuery extends Query {
|
||||
final String field;
|
||||
final double minLat;
|
||||
final double maxLat;
|
||||
|
@ -52,18 +44,18 @@ public class BKDPointInBBoxQuery extends Query {
|
|||
final double maxLon;
|
||||
|
||||
/** Matches all points >= minLon, minLat (inclusive) and < maxLon, maxLat (exclusive). */
|
||||
public BKDPointInBBoxQuery(String field, double minLat, double maxLat, double minLon, double maxLon) {
|
||||
public DimensionalPointInRectQuery(String field, double minLat, double maxLat, double minLon, double maxLon) {
|
||||
this.field = field;
|
||||
if (BKDTreeWriter.validLat(minLat) == false) {
|
||||
if (GeoUtils.isValidLat(minLat) == false) {
|
||||
throw new IllegalArgumentException("minLat=" + minLat + " is not a valid latitude");
|
||||
}
|
||||
if (BKDTreeWriter.validLat(maxLat) == false) {
|
||||
if (GeoUtils.isValidLat(maxLat) == false) {
|
||||
throw new IllegalArgumentException("maxLat=" + maxLat + " is not a valid latitude");
|
||||
}
|
||||
if (BKDTreeWriter.validLon(minLon) == false) {
|
||||
if (GeoUtils.isValidLon(minLon) == false) {
|
||||
throw new IllegalArgumentException("minLon=" + minLon + " is not a valid longitude");
|
||||
}
|
||||
if (BKDTreeWriter.validLon(maxLon) == false) {
|
||||
if (GeoUtils.isValidLon(maxLon) == false) {
|
||||
throw new IllegalArgumentException("maxLon=" + maxLon + " is not a valid longitude");
|
||||
}
|
||||
this.minLon = minLon;
|
||||
|
@ -82,21 +74,59 @@ public class BKDPointInBBoxQuery extends Query {
|
|||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
LeafReader reader = context.reader();
|
||||
SortedNumericDocValues sdv = reader.getSortedNumericDocValues(field);
|
||||
if (sdv == null) {
|
||||
// No docs in this segment had this field
|
||||
DimensionalValues values = reader.getDimensionalValues();
|
||||
if (values == null) {
|
||||
// No docs in this segment had any dimensional fields
|
||||
return null;
|
||||
}
|
||||
|
||||
if (sdv instanceof BKDTreeSortedNumericDocValues == false) {
|
||||
throw new IllegalStateException("field \"" + field + "\" was not indexed with BKDTreeDocValuesFormat: got: " + sdv);
|
||||
}
|
||||
BKDTreeSortedNumericDocValues treeDV = (BKDTreeSortedNumericDocValues) sdv;
|
||||
BKDTreeReader tree = treeDV.getBKDTreeReader();
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
int[] hitCount = new int[1];
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
result.grow(count);
|
||||
}
|
||||
|
||||
DocIdSet result = tree.intersect(minLat, maxLat, minLon, maxLon, null, treeDV.delegate);
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
return new ConstantScoreScorer(this, score(), result.iterator());
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
assert packedValue.length == 8;
|
||||
double lat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(packedValue, 0));
|
||||
double lon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(packedValue, 1));
|
||||
if (lat >= minLat && lat <= maxLat && lon >= minLon && lon <= maxLon) {
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
double cellMinLat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(minPackedValue, 0));
|
||||
double cellMinLon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(minPackedValue, 1));
|
||||
double cellMaxLat = DimensionalLatLonField.decodeLat(BKDUtil.bytesToInt(maxPackedValue, 0));
|
||||
double cellMaxLon = DimensionalLatLonField.decodeLon(BKDUtil.bytesToInt(maxPackedValue, 1));
|
||||
|
||||
if (minLat <= cellMinLat && maxLat >= cellMaxLat && minLon <= cellMinLon && maxLon >= cellMaxLon) {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
|
||||
if (cellMaxLat < minLat || cellMinLat > maxLat || cellMaxLon < minLon || cellMinLon > maxLon) {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
});
|
||||
|
||||
// NOTE: hitCount[0] will be over-estimate in multi-valued case
|
||||
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -111,9 +141,9 @@ public class BKDPointInBBoxQuery extends Query {
|
|||
q.setDisableCoord(true);
|
||||
|
||||
// E.g.: maxLon = -179, minLon = 179
|
||||
BKDPointInBBoxQuery left = new BKDPointInBBoxQuery(field, minLat, maxLat, BKDTreeWriter.MIN_LON_INCL, maxLon);
|
||||
DimensionalPointInRectQuery left = new DimensionalPointInRectQuery(field, minLat, maxLat, GeoUtils.MIN_LON_INCL, maxLon);
|
||||
q.add(new BooleanClause(left, BooleanClause.Occur.SHOULD));
|
||||
BKDPointInBBoxQuery right = new BKDPointInBBoxQuery(field, minLat, maxLat, minLon, BKDTreeWriter.MAX_LON_INCL);
|
||||
DimensionalPointInRectQuery right = new DimensionalPointInRectQuery(field, minLat, maxLat, minLon, GeoUtils.MAX_LON_INCL);
|
||||
q.add(new BooleanClause(right, BooleanClause.Occur.SHOULD));
|
||||
return new ConstantScoreQuery(q.build());
|
||||
} else {
|
||||
|
@ -133,8 +163,8 @@ public class BKDPointInBBoxQuery extends Query {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (super.equals(other) && other instanceof BKDPointInBBoxQuery) {
|
||||
final BKDPointInBBoxQuery q = (BKDPointInBBoxQuery) other;
|
||||
if (super.equals(other) && other instanceof DimensionalPointInRectQuery) {
|
||||
final DimensionalPointInRectQuery q = (DimensionalPointInRectQuery) other;
|
||||
return field.equals(q.field) &&
|
||||
minLat == q.minLat &&
|
||||
maxLat == q.maxLat &&
|
|
@ -1,18 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
org.apache.lucene.bkdtree.BKDTreeDocValuesFormat
|
||||
org.apache.lucene.rangetree.RangeTreeDocValuesFormat
|
||||
|
|
@ -1,194 +0,0 @@
|
|||
package org.apache.lucene.bkdtree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.BaseGeoPointTestCase;
|
||||
import org.apache.lucene.util.GeoRect;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.SloppyMath;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
// TODO: can test framework assert we don't leak temp files?
|
||||
|
||||
public class TestBKDTree extends BaseGeoPointTestCase {
|
||||
|
||||
@Override
|
||||
protected void addPointToDoc(String field, Document doc, double lat, double lon) {
|
||||
doc.add(new BKDPointField(field, lat, lon));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newBBoxQuery(String field, GeoRect rect) {
|
||||
return new BKDPointInBBoxQuery(field, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters) {
|
||||
// return new BKDDistanceQuery(field, centerLat, centerLon, radiusMeters);
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newDistanceRangeQuery(String field, double centerLat, double centerLon, double minRadiusMeters, double radiusMeters) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newPolygonQuery(String field, double[] lats, double[] lons) {
|
||||
return new BKDPointInPolygonQuery(FIELD_NAME, lats, lons);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void initIndexWriterConfig(final String fieldName, IndexWriterConfig iwc) {
|
||||
final DocValuesFormat dvFormat = getDocValuesFormat();
|
||||
Codec codec = new Lucene60Codec() {
|
||||
@Override
|
||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||
if (field.equals(fieldName)) {
|
||||
return dvFormat;
|
||||
} else {
|
||||
return super.getDocValuesFormatForField(field);
|
||||
}
|
||||
}
|
||||
};
|
||||
iwc.setCodec(codec);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Boolean rectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
|
||||
|
||||
assert Double.isNaN(pointLat) == false;
|
||||
|
||||
int rectLatMinEnc = BKDTreeWriter.encodeLat(rect.minLat);
|
||||
int rectLatMaxEnc = BKDTreeWriter.encodeLat(rect.maxLat);
|
||||
int rectLonMinEnc = BKDTreeWriter.encodeLon(rect.minLon);
|
||||
int rectLonMaxEnc = BKDTreeWriter.encodeLon(rect.maxLon);
|
||||
|
||||
int pointLatEnc = BKDTreeWriter.encodeLat(pointLat);
|
||||
int pointLonEnc = BKDTreeWriter.encodeLon(pointLon);
|
||||
|
||||
if (rect.minLon < rect.maxLon) {
|
||||
return pointLatEnc >= rectLatMinEnc &&
|
||||
pointLatEnc < rectLatMaxEnc &&
|
||||
pointLonEnc >= rectLonMinEnc &&
|
||||
pointLonEnc < rectLonMaxEnc;
|
||||
} else {
|
||||
// Rect crosses dateline:
|
||||
return pointLatEnc >= rectLatMinEnc &&
|
||||
pointLatEnc < rectLatMaxEnc &&
|
||||
(pointLonEnc >= rectLonMinEnc ||
|
||||
pointLonEnc < rectLonMaxEnc);
|
||||
}
|
||||
}
|
||||
|
||||
private static final double POLY_TOLERANCE = 1e-7;
|
||||
|
||||
@Override
|
||||
protected Boolean polyRectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
|
||||
if (Math.abs(rect.minLat-pointLat) < POLY_TOLERANCE ||
|
||||
Math.abs(rect.maxLat-pointLat) < POLY_TOLERANCE ||
|
||||
Math.abs(rect.minLon-pointLon) < POLY_TOLERANCE ||
|
||||
Math.abs(rect.maxLon-pointLon) < POLY_TOLERANCE) {
|
||||
// The poly check quantizes slightly differently, so we allow for boundary cases to disagree
|
||||
return null;
|
||||
} else {
|
||||
return rectContainsPoint(rect, pointLat, pointLon);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Boolean circleContainsPoint(double centerLat, double centerLon, double radiusMeters, double pointLat, double pointLon) {
|
||||
double distanceKM = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon);
|
||||
boolean result = distanceKM*1000.0 <= radiusMeters;
|
||||
//System.out.println(" shouldMatch? centerLon=" + centerLon + " centerLat=" + centerLat + " pointLon=" + pointLon + " pointLat=" + pointLat + " result=" + result + " distanceMeters=" + (distanceKM * 1000));
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Boolean distanceRangeContainsPoint(double centerLat, double centerLon, double minRadiusMeters, double radiusMeters, double pointLat, double pointLon) {
|
||||
final double d = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon)*1000.0;
|
||||
return d >= minRadiusMeters && d <= radiusMeters;
|
||||
}
|
||||
|
||||
public void testEncodeDecode() throws Exception {
|
||||
int iters = atLeast(10000);
|
||||
boolean small = random().nextBoolean();
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
double lat = randomLat(small);
|
||||
double latQuantized = BKDTreeWriter.decodeLat(BKDTreeWriter.encodeLat(lat));
|
||||
assertEquals(lat, latQuantized, BKDTreeWriter.TOLERANCE);
|
||||
|
||||
double lon = randomLon(small);
|
||||
double lonQuantized = BKDTreeWriter.decodeLon(BKDTreeWriter.encodeLon(lon));
|
||||
assertEquals(lon, lonQuantized, BKDTreeWriter.TOLERANCE);
|
||||
}
|
||||
}
|
||||
|
||||
public void testEncodeDecodeMax() throws Exception {
|
||||
int x = BKDTreeWriter.encodeLat(Math.nextAfter(90.0, Double.POSITIVE_INFINITY));
|
||||
assertTrue(x < Integer.MAX_VALUE);
|
||||
|
||||
int y = BKDTreeWriter.encodeLon(Math.nextAfter(180.0, Double.POSITIVE_INFINITY));
|
||||
assertTrue(y < Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
public void testAccountableHasDelegate() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
iwc.setCodec(TestUtil.alwaysDocValuesFormat(getDocValuesFormat()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new BKDPointField(FIELD_NAME, -18.2861, 147.7));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
// We can't wrap with "exotic" readers because the BKD query must see the BKDDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
// Need to run a query so the DV field is really loaded:
|
||||
TopDocs hits = s.search(new BKDPointInBBoxQuery(FIELD_NAME, -30, 0, 140, 150), 1);
|
||||
assertEquals(1, hits.totalHits);
|
||||
assertTrue(Accountables.toString((Accountable) r.leaves().get(0).reader()).contains("delegate"));
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
private static DocValuesFormat getDocValuesFormat() {
|
||||
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
|
||||
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" BKD params: maxPointsInLeaf=" + maxPointsInLeaf + " maxPointsSortInHeap=" + maxPointsSortInHeap);
|
||||
}
|
||||
return new BKDTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
private Directory getDirectory() {
|
||||
return noVirusChecker(newDirectory());
|
||||
}
|
||||
}
|
|
@ -1,781 +0,0 @@
|
|||
package org.apache.lucene.rangetree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SimpleCollector;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestRangeTree extends LuceneTestCase {
|
||||
|
||||
// Controls what range of values we randomly generate, so we sometimes test narrow ranges:
|
||||
static long valueMid;
|
||||
static int valueRange;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() {
|
||||
if (random().nextBoolean()) {
|
||||
valueMid = random().nextLong();
|
||||
if (random().nextBoolean()) {
|
||||
// Wide range
|
||||
valueRange = TestUtil.nextInt(random(), 1, Integer.MAX_VALUE);
|
||||
} else {
|
||||
// Narrow range
|
||||
valueRange = TestUtil.nextInt(random(), 1, 100000);
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: will generate long values " + valueMid + " +/- " + valueRange);
|
||||
}
|
||||
} else {
|
||||
// All longs
|
||||
valueRange = 0;
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: will generate all long values");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testAllEqual() throws Exception {
|
||||
int numValues = atLeast(10000);
|
||||
long value = randomValue();
|
||||
long[] values = new long[numValues];
|
||||
FixedBitSet missing = new FixedBitSet(numValues);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: use same value=" + value);
|
||||
}
|
||||
|
||||
for(int docID=0;docID<numValues;docID++) {
|
||||
int x = random().nextInt(20);
|
||||
if (x == 17) {
|
||||
// Some docs don't have a point:
|
||||
missing.set(docID);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" doc=" + docID + " is missing");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
values[docID] = value;
|
||||
}
|
||||
|
||||
verify(missing, values);
|
||||
}
|
||||
|
||||
public void testMultiValued() throws Exception {
|
||||
int numValues = atLeast(10000);
|
||||
// Every doc has 2 values:
|
||||
long[] values = new long[2*numValues];
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
|
||||
// We rely on docID order:
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(getDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
for (int docID=0;docID<numValues;docID++) {
|
||||
Document doc = new Document();
|
||||
values[2*docID] = randomValue();
|
||||
doc.add(new SortedNumericDocValuesField("value", values[2*docID]));
|
||||
values[2*docID+1] = randomValue();
|
||||
doc.add(new SortedNumericDocValuesField("value", values[2*docID+1]));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
w.forceMerge(1);
|
||||
}
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
// We can't wrap with "exotic" readers because the NumericRangeTreeQuery must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
|
||||
int iters = atLeast(100);
|
||||
for (int iter=0;iter<iters;iter++) {
|
||||
long lower = randomValue();
|
||||
long upper = randomValue();
|
||||
|
||||
if (upper < lower) {
|
||||
long x = lower;
|
||||
lower = upper;
|
||||
upper = x;
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter + " value=" + lower + " TO " + upper);
|
||||
}
|
||||
|
||||
boolean includeLower = random().nextBoolean();
|
||||
boolean includeUpper = random().nextBoolean();
|
||||
Query query = new NumericRangeTreeQuery("value", lower, includeLower, upper, includeUpper);
|
||||
|
||||
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
|
||||
s.search(query, new SimpleCollector() {
|
||||
|
||||
private int docBase;
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) {
|
||||
hits.set(docBase+doc);
|
||||
}
|
||||
});
|
||||
|
||||
for(int docID=0;docID<values.length/2;docID++) {
|
||||
long docValue1 = values[2*docID];
|
||||
long docValue2 = values[2*docID+1];
|
||||
boolean expected = matches(lower, includeLower, upper, includeUpper, docValue1) ||
|
||||
matches(lower, includeLower, upper, includeUpper, docValue2);
|
||||
|
||||
if (hits.get(docID) != expected) {
|
||||
fail("docID=" + docID + " docValue1=" + docValue1 + " docValue2=" + docValue2 + " expected " + expected + " but got: " + hits.get(docID));
|
||||
}
|
||||
}
|
||||
}
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMultiValuedSortedSet() throws Exception {
|
||||
int numValues = atLeast(10000);
|
||||
// Every doc has 2 values:
|
||||
long[] values = new long[2*numValues];
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
|
||||
// We rely on docID order:
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(getDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
for (int docID=0;docID<numValues;docID++) {
|
||||
Document doc = new Document();
|
||||
values[2*docID] = randomValue();
|
||||
doc.add(new SortedSetDocValuesField("value", longToBytes(values[2*docID])));
|
||||
values[2*docID+1] = randomValue();
|
||||
doc.add(new SortedSetDocValuesField("value", longToBytes(values[2*docID+1])));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
w.forceMerge(1);
|
||||
}
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
// We can't wrap with "exotic" readers because the NumericRangeTreeQuery must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
|
||||
int iters = atLeast(100);
|
||||
for (int iter=0;iter<iters;iter++) {
|
||||
long lower = randomValue();
|
||||
long upper = randomValue();
|
||||
|
||||
if (upper < lower) {
|
||||
long x = lower;
|
||||
lower = upper;
|
||||
upper = x;
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter + " value=" + lower + " TO " + upper);
|
||||
}
|
||||
|
||||
boolean includeLower = random().nextBoolean();
|
||||
boolean includeUpper = random().nextBoolean();
|
||||
Query query = new SortedSetRangeTreeQuery("value", longToBytes(lower), includeLower, longToBytes(upper), includeUpper);
|
||||
|
||||
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
|
||||
s.search(query, new SimpleCollector() {
|
||||
|
||||
private int docBase;
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) {
|
||||
hits.set(docBase+doc);
|
||||
}
|
||||
});
|
||||
|
||||
for(int docID=0;docID<values.length/2;docID++) {
|
||||
long docValue1 = values[2*docID];
|
||||
long docValue2 = values[2*docID+1];
|
||||
boolean expected = matches(lower, includeLower, upper, includeUpper, docValue1) ||
|
||||
matches(lower, includeLower, upper, includeUpper, docValue2);
|
||||
|
||||
if (hits.get(docID) != expected) {
|
||||
fail("docID=" + docID + " docValue1=" + docValue1 + " docValue2=" + docValue2 + " expected " + expected + " but got: " + hits.get(docID));
|
||||
}
|
||||
}
|
||||
}
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandomTiny() throws Exception {
|
||||
// Make sure single-leaf-node case is OK:
|
||||
doTestRandom(10);
|
||||
}
|
||||
|
||||
public void testRandomMedium() throws Exception {
|
||||
doTestRandom(10000);
|
||||
}
|
||||
|
||||
@Nightly
|
||||
public void testRandomBig() throws Exception {
|
||||
doTestRandom(200000);
|
||||
}
|
||||
|
||||
private void doTestRandom(int count) throws Exception {
|
||||
|
||||
int numValues = atLeast(count);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: numValues=" + numValues);
|
||||
}
|
||||
|
||||
long[] values = new long[numValues];
|
||||
FixedBitSet missing = new FixedBitSet(numValues);
|
||||
|
||||
boolean haveRealDoc = false;
|
||||
|
||||
for (int docID=0;docID<numValues;docID++) {
|
||||
int x = random().nextInt(20);
|
||||
if (x == 17) {
|
||||
// Some docs don't have a point:
|
||||
missing.set(docID);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" doc=" + docID + " is missing");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (docID > 0 && x == 0 && haveRealDoc) {
|
||||
int oldDocID;
|
||||
while (true) {
|
||||
oldDocID = random().nextInt(docID);
|
||||
if (missing.get(oldDocID) == false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Identical to old value
|
||||
values[docID] = values[oldDocID];
|
||||
if (VERBOSE) {
|
||||
System.out.println(" doc=" + docID + " value=" + values[docID] + " bytes=" + longToBytes(values[docID]) + " (same as doc=" + oldDocID + ")");
|
||||
}
|
||||
} else {
|
||||
values[docID] = randomValue();
|
||||
haveRealDoc = true;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" doc=" + docID + " value=" + values[docID] + " bytes=" + longToBytes(values[docID]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
verify(missing, values);
|
||||
}
|
||||
|
||||
private static void verify(Bits missing, long[] values) throws Exception {
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
|
||||
// Else we can get O(N^2) merging:
|
||||
int mbd = iwc.getMaxBufferedDocs();
|
||||
if (mbd != -1 && mbd < values.length/100) {
|
||||
iwc.setMaxBufferedDocs(values.length/100);
|
||||
}
|
||||
final DocValuesFormat dvFormat = getDocValuesFormat();
|
||||
Codec codec = new Lucene60Codec() {
|
||||
@Override
|
||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||
if (field.equals("sn_value") || field.equals("ss_value")) {
|
||||
return dvFormat;
|
||||
} else {
|
||||
return super.getDocValuesFormatForField(field);
|
||||
}
|
||||
}
|
||||
};
|
||||
iwc.setCodec(codec);
|
||||
Directory dir;
|
||||
if (values.length > 100000) {
|
||||
dir = noVirusChecker(newFSDirectory(createTempDir("TestRangeTree")));
|
||||
} else {
|
||||
dir = getDirectory();
|
||||
}
|
||||
Set<Integer> deleted = new HashSet<>();
|
||||
// RandomIndexWriter is too slow here:
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
for(int id=0;id<values.length;id++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("id", ""+id, Field.Store.NO));
|
||||
doc.add(new NumericDocValuesField("id", id));
|
||||
if (missing.get(id) == false) {
|
||||
doc.add(new SortedNumericDocValuesField("sn_value", values[id]));
|
||||
doc.add(new SortedSetDocValuesField("ss_value", longToBytes(values[id])));
|
||||
}
|
||||
w.addDocument(doc);
|
||||
if (id > 0 && random().nextInt(100) == 42) {
|
||||
int idToDelete = random().nextInt(id);
|
||||
w.deleteDocuments(new Term("id", ""+idToDelete));
|
||||
deleted.add(idToDelete);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" delete id=" + idToDelete);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" forceMerge(1)");
|
||||
}
|
||||
w.forceMerge(1);
|
||||
}
|
||||
final IndexReader r = DirectoryReader.open(w, true);
|
||||
w.close();
|
||||
|
||||
// We can't wrap with "exotic" readers because the NumericRangeTreeQuery must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
|
||||
int numThreads = TestUtil.nextInt(random(), 2, 5);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: use " + numThreads + " query threads");
|
||||
}
|
||||
|
||||
List<Thread> threads = new ArrayList<>();
|
||||
final int iters = atLeast(100);
|
||||
|
||||
final CountDownLatch startingGun = new CountDownLatch(1);
|
||||
final AtomicBoolean failed = new AtomicBoolean();
|
||||
|
||||
for(int i=0;i<numThreads;i++) {
|
||||
Thread thread = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
_run();
|
||||
} catch (Exception e) {
|
||||
failed.set(true);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void _run() throws Exception {
|
||||
startingGun.await();
|
||||
|
||||
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
|
||||
|
||||
for (int iter=0;iter<iters && failed.get() == false;iter++) {
|
||||
long lower = randomValue();
|
||||
long upper = randomValue();
|
||||
|
||||
if (upper < lower) {
|
||||
long x = lower;
|
||||
lower = upper;
|
||||
upper = x;
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\n" + Thread.currentThread().getName() + ": TEST: iter=" + iter + " value=" + lower + " TO " + upper);
|
||||
}
|
||||
|
||||
boolean includeLower = random().nextBoolean();
|
||||
boolean includeUpper = random().nextBoolean();
|
||||
Query query;
|
||||
if (random().nextBoolean()) {
|
||||
query = new NumericRangeTreeQuery("sn_value", lower, includeLower, upper, includeUpper);
|
||||
} else {
|
||||
query = new SortedSetRangeTreeQuery("ss_value", longToBytes(lower), includeLower, longToBytes(upper), includeUpper);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(Thread.currentThread().getName() + ": using query: " + query);
|
||||
}
|
||||
|
||||
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
|
||||
s.search(query, new SimpleCollector() {
|
||||
|
||||
private int docBase;
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) {
|
||||
hits.set(docBase+doc);
|
||||
}
|
||||
});
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(Thread.currentThread().getName() + ": hitCount: " + hits.cardinality());
|
||||
}
|
||||
|
||||
for(int docID=0;docID<r.maxDoc();docID++) {
|
||||
int id = (int) docIDToID.get(docID);
|
||||
boolean expected = missing.get(id) == false && deleted.contains(id) == false && matches(lower, includeLower, upper, includeUpper, values[id]);
|
||||
if (hits.get(docID) != expected) {
|
||||
// We do exact quantized comparison so the bbox query should never disagree:
|
||||
fail(Thread.currentThread().getName() + ": iter=" + iter + " id=" + id + " docID=" + docID + " value=" + values[id] + " (range: " + lower + " TO " + upper + ") expected " + expected + " but got: " + hits.get(docID) + " deleted?=" + deleted.contains(id) + " query=" + query);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
thread.setName("T" + i);
|
||||
thread.start();
|
||||
threads.add(thread);
|
||||
}
|
||||
startingGun.countDown();
|
||||
for(Thread thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
IOUtils.close(r, dir);
|
||||
}
|
||||
|
||||
private static boolean matches(long lower, boolean includeLower, long upper, boolean includeUpper, long value) {
|
||||
if (includeLower == false) {
|
||||
if (lower == Long.MAX_VALUE) {
|
||||
return false;
|
||||
}
|
||||
lower++;
|
||||
}
|
||||
if (includeUpper == false) {
|
||||
if (upper == Long.MIN_VALUE) {
|
||||
return false;
|
||||
}
|
||||
upper--;
|
||||
}
|
||||
|
||||
return value >= lower && value <= upper;
|
||||
}
|
||||
|
||||
private static long randomValue() {
|
||||
if (valueRange == 0) {
|
||||
return random().nextLong();
|
||||
} else {
|
||||
return valueMid + TestUtil.nextInt(random(), -valueRange, valueRange);
|
||||
}
|
||||
}
|
||||
|
||||
public void testAccountableHasDelegate() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField("value", 187));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
// Need to run a query so the DV field is really loaded:
|
||||
TopDocs hits = s.search(new NumericRangeTreeQuery("value", -30L, true, 187L, true), 1);
|
||||
assertEquals(1, hits.totalHits);
|
||||
assertTrue(Accountables.toString((Accountable) r.leaves().get(0).reader()).contains("delegate"));
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testMinMaxLong() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField("value", Long.MIN_VALUE));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField("value", Long.MAX_VALUE));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
|
||||
assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, 0L, true)));
|
||||
assertEquals(1, s.count(new NumericRangeTreeQuery("value", 0L, true, Long.MAX_VALUE, true)));
|
||||
assertEquals(2, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true)));
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testBasicSortedSet() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("value", new BytesRef("abc")));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("value", new BytesRef("def")));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
|
||||
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("aaa"), true, new BytesRef("bbb"), true)));
|
||||
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("c"), true, new BytesRef("e"), true)));
|
||||
assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("z"), true)));
|
||||
|
||||
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", null, true, new BytesRef("abc"), true)));
|
||||
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("abc"), true)));
|
||||
assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("abc"), false)));
|
||||
|
||||
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), true, null, false)));
|
||||
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), true, new BytesRef("z"), true)));
|
||||
assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), false, new BytesRef("z"), true)));
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testLongMinMaxNumeric() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField("value", Long.MIN_VALUE));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField("value", Long.MAX_VALUE));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
|
||||
assertEquals(2, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true)));
|
||||
assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, false)));
|
||||
assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, true)));
|
||||
assertEquals(0, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, false)));
|
||||
|
||||
assertEquals(2, s.count(new NumericRangeTreeQuery("value", null, true, null, true)));
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testLongMinMaxSortedSet() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("value", longToBytes(Long.MIN_VALUE)));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("value", longToBytes(Long.MAX_VALUE)));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
|
||||
assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), true, longToBytes(Long.MAX_VALUE), true)));
|
||||
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), true, longToBytes(Long.MAX_VALUE), false)));
|
||||
assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), false, longToBytes(Long.MAX_VALUE), true)));
|
||||
assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), false, longToBytes(Long.MAX_VALUE), false)));
|
||||
|
||||
assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", null, true, null, true)));
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testSortedSetNoOrdsMatch() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("value", new BytesRef("a")));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("value", new BytesRef("z")));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("m"), true, new BytesRef("n"), false)));
|
||||
|
||||
assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", null, true, null, true)));
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testNumericNoValuesMatch() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField("value", 17));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField("value", 22));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
assertEquals(0, s.count(new NumericRangeTreeQuery("value", 17L, true, 13L, false)));
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testNoDocs() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
w.addDocument(new Document());
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
// We can't wrap with "exotic" readers because the query must see the RangeTreeDVFormat:
|
||||
IndexSearcher s = newSearcher(r, false);
|
||||
assertEquals(0, s.count(new NumericRangeTreeQuery("value", 17L, true, 13L, false)));
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
private static BytesRef longToBytes(long v) {
|
||||
// Flip the sign bit so negative longs sort before positive longs:
|
||||
v ^= 0x8000000000000000L;
|
||||
byte[] bytes = new byte[8];
|
||||
bytes[0] = (byte) (v >> 56);
|
||||
bytes[1] = (byte) (v >> 48);
|
||||
bytes[2] = (byte) (v >> 40);
|
||||
bytes[3] = (byte) (v >> 32);
|
||||
bytes[4] = (byte) (v >> 24);
|
||||
bytes[5] = (byte) (v >> 16);
|
||||
bytes[6] = (byte) (v >> 8);
|
||||
bytes[7] = (byte) v;
|
||||
return new BytesRef(bytes);
|
||||
}
|
||||
|
||||
/*
|
||||
private static long bytesToLong(BytesRef bytes) {
|
||||
long v = ((bytes.bytes[bytes.offset]&0xFFL) << 56) |
|
||||
((bytes.bytes[bytes.offset+1]&0xFFL) << 48) |
|
||||
((bytes.bytes[bytes.offset+2]&0xFFL) << 40) |
|
||||
((bytes.bytes[bytes.offset+3]&0xFFL) << 32) |
|
||||
((bytes.bytes[bytes.offset+4]&0xFFL) << 24) |
|
||||
((bytes.bytes[bytes.offset+5]&0xFFL) << 16) |
|
||||
((bytes.bytes[bytes.offset+6]&0xFFL) << 8) |
|
||||
(bytes.bytes[bytes.offset+7]&0xFFL);
|
||||
// Flip the sign bit back:
|
||||
return v ^ 0x8000000000000000L;
|
||||
}
|
||||
*/
|
||||
|
||||
private static DocValuesFormat getDocValuesFormat() {
|
||||
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
|
||||
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
|
||||
return new RangeTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
private static Directory noVirusChecker(Directory dir) {
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
|
||||
private static Directory getDirectory() {
|
||||
return noVirusChecker(newDirectory());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,124 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.DimensionalLatLonField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BaseGeoPointTestCase;
|
||||
import org.apache.lucene.util.GeoRect;
|
||||
import org.apache.lucene.util.SloppyMath;
|
||||
|
||||
public class TestDimensionalQueries extends BaseGeoPointTestCase {
|
||||
|
||||
@Override
|
||||
protected void addPointToDoc(String field, Document doc, double lat, double lon) {
|
||||
doc.add(new DimensionalLatLonField(field, lat, lon));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newRectQuery(String field, GeoRect rect) {
|
||||
return new DimensionalPointInRectQuery(field, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters) {
|
||||
// return new BKDDistanceQuery(field, centerLat, centerLon, radiusMeters);
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newDistanceRangeQuery(String field, double centerLat, double centerLon, double minRadiusMeters, double radiusMeters) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newPolygonQuery(String field, double[] lats, double[] lons) {
|
||||
return new DimensionalPointInPolygonQuery(FIELD_NAME, lats, lons);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Boolean rectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
|
||||
|
||||
assert Double.isNaN(pointLat) == false;
|
||||
|
||||
int rectLatMinEnc = DimensionalLatLonField.encodeLat(rect.minLat);
|
||||
int rectLatMaxEnc = DimensionalLatLonField.encodeLat(rect.maxLat);
|
||||
int rectLonMinEnc = DimensionalLatLonField.encodeLon(rect.minLon);
|
||||
int rectLonMaxEnc = DimensionalLatLonField.encodeLon(rect.maxLon);
|
||||
|
||||
int pointLatEnc = DimensionalLatLonField.encodeLat(pointLat);
|
||||
int pointLonEnc = DimensionalLatLonField.encodeLon(pointLon);
|
||||
|
||||
if (rect.minLon < rect.maxLon) {
|
||||
return pointLatEnc >= rectLatMinEnc &&
|
||||
pointLatEnc <= rectLatMaxEnc &&
|
||||
pointLonEnc >= rectLonMinEnc &&
|
||||
pointLonEnc <= rectLonMaxEnc;
|
||||
} else {
|
||||
// Rect crosses dateline:
|
||||
return pointLatEnc >= rectLatMinEnc &&
|
||||
pointLatEnc <= rectLatMaxEnc &&
|
||||
(pointLonEnc >= rectLonMinEnc ||
|
||||
pointLonEnc <= rectLonMaxEnc);
|
||||
}
|
||||
}
|
||||
|
||||
private static final double POLY_TOLERANCE = 1e-7;
|
||||
|
||||
@Override
|
||||
protected Boolean polyRectContainsPoint(GeoRect rect, double pointLat, double pointLon) {
|
||||
if (Math.abs(rect.minLat-pointLat) < POLY_TOLERANCE ||
|
||||
Math.abs(rect.maxLat-pointLat) < POLY_TOLERANCE ||
|
||||
Math.abs(rect.minLon-pointLon) < POLY_TOLERANCE ||
|
||||
Math.abs(rect.maxLon-pointLon) < POLY_TOLERANCE) {
|
||||
// The poly check quantizes slightly differently, so we allow for boundary cases to disagree
|
||||
return null;
|
||||
} else {
|
||||
return rectContainsPoint(rect, pointLat, pointLon);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Boolean circleContainsPoint(double centerLat, double centerLon, double radiusMeters, double pointLat, double pointLon) {
|
||||
double distanceKM = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon);
|
||||
boolean result = distanceKM*1000.0 <= radiusMeters;
|
||||
//System.out.println(" shouldMatch? centerLon=" + centerLon + " centerLat=" + centerLat + " pointLon=" + pointLon + " pointLat=" + pointLat + " result=" + result + " distanceMeters=" + (distanceKM * 1000));
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Boolean distanceRangeContainsPoint(double centerLat, double centerLon, double minRadiusMeters, double radiusMeters, double pointLat, double pointLon) {
|
||||
final double d = SloppyMath.haversin(centerLat, centerLon, pointLat, pointLon)*1000.0;
|
||||
return d >= minRadiusMeters && d <= radiusMeters;
|
||||
}
|
||||
|
||||
public void testEncodeDecode() throws Exception {
|
||||
int iters = atLeast(10000);
|
||||
boolean small = random().nextBoolean();
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
double lat = randomLat(small);
|
||||
double latQuantized = DimensionalLatLonField.decodeLat(DimensionalLatLonField.encodeLat(lat));
|
||||
assertEquals(lat, latQuantized, DimensionalLatLonField.TOLERANCE);
|
||||
|
||||
double lon = randomLon(small);
|
||||
double lonQuantized = DimensionalLatLonField.decodeLon(DimensionalLatLonField.encodeLon(lon));
|
||||
assertEquals(lon, lonQuantized, DimensionalLatLonField.TOLERANCE);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -48,13 +48,19 @@ public class TestGeoPointQuery extends BaseGeoPointTestCase {
|
|||
// error threshold for point-distance queries (in percent) NOTE: Guideline from USGS
|
||||
private static final double DISTANCE_PCT_ERR = 0.005;
|
||||
|
||||
@Override
|
||||
protected boolean forceSmall() {
|
||||
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void addPointToDoc(String field, Document doc, double lat, double lon) {
|
||||
doc.add(new GeoPointField(field, lon, lat, Field.Store.NO));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newBBoxQuery(String field, GeoRect rect) {
|
||||
protected Query newRectQuery(String field, GeoRect rect) {
|
||||
return new GeoPointInBBoxQuery(field, rect.minLon, rect.minLat, rect.maxLon, rect.maxLat);
|
||||
}
|
||||
|
||||
|
|
|
@ -72,11 +72,18 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
originLat = GeoUtils.normalizeLat(GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2 * latRange) * random().nextDouble());
|
||||
}
|
||||
|
||||
/** Return true when testing on a non-small region may be too slow (GeoPoint*Query) */
|
||||
protected boolean forceSmall() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// A particularly tricky adversary for BKD tree:
|
||||
@Nightly
|
||||
public void testSamePointManyTimes() throws Exception {
|
||||
|
||||
// For GeoPointQuery, only run this test nightly:
|
||||
assumeTrue("GeoPoint*Query is too slow otherwise", TEST_NIGHTLY || forceSmall() == false);
|
||||
|
||||
int numPoints = atLeast(1000);
|
||||
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
|
||||
boolean small = random().nextBoolean();
|
||||
|
||||
// Every doc has 2 points:
|
||||
|
@ -92,12 +99,13 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
verify(small, lats, lons);
|
||||
}
|
||||
|
||||
@Nightly
|
||||
public void testAllLatEqual() throws Exception {
|
||||
|
||||
// For GeoPointQuery, only run this test nightly:
|
||||
assumeTrue("GeoPoint*Query is too slow otherwise", TEST_NIGHTLY || forceSmall() == false);
|
||||
|
||||
int numPoints = atLeast(10000);
|
||||
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
|
||||
// boolean small = random().nextBoolean();
|
||||
boolean small = true;
|
||||
boolean small = forceSmall() || random().nextBoolean();
|
||||
double lat = randomLat(small);
|
||||
double[] lats = new double[numPoints];
|
||||
double[] lons = new double[numPoints];
|
||||
|
@ -142,12 +150,13 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
verify(small, lats, lons);
|
||||
}
|
||||
|
||||
@Nightly
|
||||
public void testAllLonEqual() throws Exception {
|
||||
|
||||
// For GeoPointQuery, only run this test nightly:
|
||||
assumeTrue("GeoPoint*Query is too slow otherwise", TEST_NIGHTLY || forceSmall() == false);
|
||||
|
||||
int numPoints = atLeast(10000);
|
||||
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
|
||||
// boolean small = random().nextBoolean();
|
||||
boolean small = true;
|
||||
boolean small = forceSmall() || random().nextBoolean();
|
||||
double theLon = randomLon(small);
|
||||
double[] lats = new double[numPoints];
|
||||
double[] lons = new double[numPoints];
|
||||
|
@ -194,8 +203,11 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
verify(small, lats, lons);
|
||||
}
|
||||
|
||||
@Nightly
|
||||
public void testMultiValued() throws Exception {
|
||||
|
||||
// For GeoPointQuery, only run this test nightly:
|
||||
assumeTrue("GeoPoint*Query is too slow otherwise", TEST_NIGHTLY || forceSmall() == false);
|
||||
|
||||
int numPoints = atLeast(10000);
|
||||
// Every doc has 2 points:
|
||||
double[] lats = new double[2*numPoints];
|
||||
|
@ -209,9 +221,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
|
||||
boolean small = random().nextBoolean();
|
||||
//boolean small = true;
|
||||
|
||||
for (int id=0;id<numPoints;id++) {
|
||||
Document doc = new Document();
|
||||
|
@ -231,6 +241,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
// TODO: share w/ verify; just need parallel array of the expected ids
|
||||
if (random().nextBoolean()) {
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
@ -245,10 +256,10 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
GeoRect rect = randomRect(small, small == false);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter + " bbox=" + rect);
|
||||
System.out.println("\nTEST: iter=" + iter + " rect=" + rect);
|
||||
}
|
||||
|
||||
Query query = newBBoxQuery(FIELD_NAME, rect);
|
||||
Query query = newRectQuery(FIELD_NAME, rect);
|
||||
|
||||
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
|
||||
s.search(query, new SimpleCollector() {
|
||||
|
@ -340,7 +351,6 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
double[] lats = new double[numPoints];
|
||||
double[] lons = new double[numPoints];
|
||||
|
||||
// TODO: GeoUtils are potentially slow if we use small=false with heavy testing
|
||||
boolean small = random().nextBoolean();
|
||||
|
||||
boolean haveRealDoc = false;
|
||||
|
@ -424,6 +434,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
} else {
|
||||
result = -90 + 180.0 * random().nextDouble();
|
||||
}
|
||||
// TODO: we should not do this here! it weakens the test, and users don't pre-quantize the lat/lons they send us:
|
||||
return unscaleLat(scaleLat(result));
|
||||
}
|
||||
|
||||
|
@ -434,6 +445,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
} else {
|
||||
result = -180 + 360.0 * random().nextDouble();
|
||||
}
|
||||
// TODO: we should not do this here! it weakens the test, and users don't pre-quantize the lat/lons they send us:
|
||||
return unscaleLon(scaleLon(result));
|
||||
}
|
||||
|
||||
|
@ -463,7 +475,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
|
||||
protected abstract void addPointToDoc(String field, Document doc, double lat, double lon);
|
||||
|
||||
protected abstract Query newBBoxQuery(String field, GeoRect bbox);
|
||||
protected abstract Query newRectQuery(String field, GeoRect bbox);
|
||||
|
||||
protected abstract Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters);
|
||||
|
||||
|
@ -557,7 +569,6 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
if (mbd != -1 && mbd < lats.length/100) {
|
||||
iwc.setMaxBufferedDocs(lats.length/100);
|
||||
}
|
||||
initIndexWriterConfig(FIELD_NAME, iwc);
|
||||
Directory dir;
|
||||
if (lats.length > 100000) {
|
||||
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
|
||||
|
@ -631,15 +642,15 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
|
|||
VerifyHits verifyHits;
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
// BBox: don't allow dateline crossing when testing small:
|
||||
final GeoRect bbox = randomRect(small, small == false);
|
||||
// Rect: don't allow dateline crossing when testing small:
|
||||
final GeoRect rect = randomRect(small, small == false);
|
||||
|
||||
query = newBBoxQuery(FIELD_NAME, bbox);
|
||||
query = newRectQuery(FIELD_NAME, rect);
|
||||
|
||||
verifyHits = new VerifyHits() {
|
||||
@Override
|
||||
protected Boolean shouldMatch(double pointLat, double pointLon) {
|
||||
return rectContainsPoint(bbox, pointLat, pointLon);
|
||||
return rectContainsPoint(rect, pointLat, pointLon);
|
||||
}
|
||||
@Override
|
||||
protected void describe(int docID, double lat, double lon) {
|
||||
|
|
|
@ -1,343 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/** Handles intersection of a shape with a BKD tree previously written with {@link BKD3DTreeWriter}.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
final class BKD3DTreeReader implements Accountable {
|
||||
final private int[] splitValues;
|
||||
final private int leafNodeOffset;
|
||||
final private long[] leafBlockFPs;
|
||||
final int maxDoc;
|
||||
final IndexInput in;
|
||||
|
||||
enum Relation {CELL_INSIDE_SHAPE, SHAPE_CROSSES_CELL, SHAPE_OUTSIDE_CELL, SHAPE_INSIDE_CELL};
|
||||
|
||||
interface ValueFilter {
|
||||
boolean accept(int docID);
|
||||
Relation compare(int cellXMin, int cellXMax, int cellYMin, int cellYMax, int cellZMin, int cellZMax);
|
||||
}
|
||||
|
||||
public BKD3DTreeReader(IndexInput in, int maxDoc) throws IOException {
|
||||
|
||||
// Read index:
|
||||
int numLeaves = in.readVInt();
|
||||
leafNodeOffset = numLeaves;
|
||||
|
||||
// Tree is fully balanced binary tree, so number of nodes = numLeaves-1, except our nodeIDs are 1-based (splitValues[0] is unused):
|
||||
splitValues = new int[numLeaves];
|
||||
for(int i=0;i<numLeaves;i++) {
|
||||
splitValues[i] = in.readInt();
|
||||
}
|
||||
leafBlockFPs = new long[numLeaves];
|
||||
for(int i=0;i<numLeaves;i++) {
|
||||
leafBlockFPs[i] = in.readVLong();
|
||||
}
|
||||
|
||||
this.maxDoc = maxDoc;
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
private static final class QueryState {
|
||||
final IndexInput in;
|
||||
byte[] scratch = new byte[16];
|
||||
final ByteArrayDataInput scratchReader = new ByteArrayDataInput(scratch);
|
||||
final DocIdSetBuilder docs;
|
||||
final int xMin;
|
||||
final int xMax;
|
||||
final int yMin;
|
||||
final int yMax;
|
||||
final int zMin;
|
||||
final int zMax;
|
||||
final ValueFilter valueFilter;
|
||||
|
||||
public QueryState(IndexInput in, int maxDoc,
|
||||
int xMin, int xMax,
|
||||
int yMin, int yMax,
|
||||
int zMin, int zMax,
|
||||
ValueFilter valueFilter) {
|
||||
this.in = in;
|
||||
this.docs = new DocIdSetBuilder(maxDoc);
|
||||
this.xMin = xMin;
|
||||
this.xMax = xMax;
|
||||
this.yMin = yMin;
|
||||
this.yMax = yMax;
|
||||
this.zMin = zMin;
|
||||
this.zMax = zMax;
|
||||
this.valueFilter = valueFilter;
|
||||
}
|
||||
}
|
||||
|
||||
public DocIdSet intersect(ValueFilter filter) throws IOException {
|
||||
return intersect(Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
filter);
|
||||
}
|
||||
|
||||
/** Optimized intersect which takes the 3D bbox for the query and uses that to avoid filter.compare calls
|
||||
* when cells are clearly outside the bbox. */
|
||||
public DocIdSet intersect(int xMin, int xMax, int yMin, int yMax, int zMin, int zMax, ValueFilter filter) throws IOException {
|
||||
|
||||
QueryState state = new QueryState(in.clone(), maxDoc,
|
||||
xMin, xMax,
|
||||
yMin, yMax,
|
||||
zMin, zMax,
|
||||
filter);
|
||||
|
||||
int hitCount = intersect(state, 1,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE);
|
||||
|
||||
// NOTE: hitCount is an over-estimate in the multi-valued case:
|
||||
return state.docs.build(hitCount);
|
||||
}
|
||||
|
||||
/** Fast path: this is called when the query rect fully encompasses all cells under this node. */
|
||||
private int addAll(QueryState state, int nodeID) throws IOException {
|
||||
//System.out.println(" addAll nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset);
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
|
||||
/*
|
||||
System.out.println("A: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
|
||||
*/
|
||||
|
||||
// Leaf node
|
||||
long fp = leafBlockFPs[nodeID-leafNodeOffset];
|
||||
//System.out.println(" leaf fp=" + fp);
|
||||
state.in.seek(fp);
|
||||
|
||||
//System.out.println(" seek to leafFP=" + fp);
|
||||
// How many points are stored in this leaf cell:
|
||||
int count = state.in.readVInt();
|
||||
//System.out.println(" count=" + count);
|
||||
state.docs.grow(count);
|
||||
for(int i=0;i<count;i++) {
|
||||
int docID = state.in.readInt();
|
||||
state.docs.add(docID);
|
||||
|
||||
// Up above in the recursion we asked valueFilter to relate our cell, and it returned Relation.CELL_INSIDE_SHAPE
|
||||
// so all docs inside this cell better be accepted by the filter:
|
||||
|
||||
// NOTE: this is too anal, because we lost precision in the pack/unpack (8 bytes to 4 bytes), a point that's a bit above/below the
|
||||
// earth's surface due to that quantization may incorrectly evaluate as not inside the shape:
|
||||
// assert state.valueFilter.accept(docID);
|
||||
}
|
||||
|
||||
return count;
|
||||
} else {
|
||||
int count = addAll(state, 2*nodeID);
|
||||
count += addAll(state, 2*nodeID+1);
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
private int intersect(QueryState state,
|
||||
int nodeID,
|
||||
int cellXMin, int cellXMax,
|
||||
int cellYMin, int cellYMax,
|
||||
int cellZMin, int cellZMax)
|
||||
throws IOException {
|
||||
|
||||
//System.out.println("BKD3D.intersect nodeID=" + nodeID + " cellX=" + cellXMin + " TO " + cellXMax + ", cellY=" + cellYMin + " TO " + cellYMax + ", cellZ=" + cellZMin + " TO " + cellZMax);
|
||||
|
||||
if (cellXMin >= state.xMin ||
|
||||
cellXMax <= state.xMax ||
|
||||
cellYMin >= state.yMin ||
|
||||
cellYMax <= state.yMax ||
|
||||
cellZMin >= state.zMin ||
|
||||
cellZMax <= state.zMax) {
|
||||
|
||||
// Only call the filter when the current cell does not fully contain the bbox:
|
||||
Relation r = state.valueFilter.compare(cellXMin, cellXMax,
|
||||
cellYMin, cellYMax,
|
||||
cellZMin, cellZMax);
|
||||
//System.out.println(" relation: " + r);
|
||||
|
||||
if (r == Relation.SHAPE_OUTSIDE_CELL) {
|
||||
// This cell is fully outside of the query shape: stop recursing
|
||||
return 0;
|
||||
} else if (r == Relation.CELL_INSIDE_SHAPE) {
|
||||
// This cell is fully inside of the query shape: recursively add all points in this cell without filtering
|
||||
|
||||
/*
|
||||
System.out.println(Thread.currentThread() + ": switch to addAll at cell" +
|
||||
" x=" + Geo3DDocValuesFormat.decodeValue(cellXMin) + " to " + Geo3DDocValuesFormat.decodeValue(cellXMax) +
|
||||
" y=" + Geo3DDocValuesFormat.decodeValue(cellYMin) + " to " + Geo3DDocValuesFormat.decodeValue(cellYMax) +
|
||||
" z=" + Geo3DDocValuesFormat.decodeValue(cellZMin) + " to " + Geo3DDocValuesFormat.decodeValue(cellZMax));
|
||||
*/
|
||||
return addAll(state, nodeID);
|
||||
} else {
|
||||
// The cell crosses the shape boundary, so we fall through and do full filtering
|
||||
}
|
||||
} else {
|
||||
// The whole point of the incoming bbox (state.xMin/xMax/etc.) is that it is
|
||||
// supposed to fully enclose the shape, so this cell we are visiting, which
|
||||
// fully contains the query's bbox, better in turn fully contain the shape!
|
||||
assert state.valueFilter.compare(cellXMin, cellXMax, cellYMin, cellYMax, cellZMin, cellZMax) == Relation.SHAPE_INSIDE_CELL: "got " + state.valueFilter.compare(cellXMin, cellXMax, cellYMin, cellYMax, cellZMin, cellZMax);
|
||||
}
|
||||
|
||||
//System.out.println("\nintersect node=" + nodeID + " vs " + leafNodeOffset);
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
//System.out.println(" leaf");
|
||||
// Leaf node; scan and filter all points in this block:
|
||||
//System.out.println(" intersect leaf nodeID=" + nodeID + " vs leafNodeOffset=" + leafNodeOffset + " fp=" + leafBlockFPs[nodeID-leafNodeOffset]);
|
||||
int hitCount = 0;
|
||||
|
||||
long fp = leafBlockFPs[nodeID-leafNodeOffset];
|
||||
|
||||
/*
|
||||
System.out.println("I: " + BKDTreeWriter.decodeLat(cellLatMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLat(cellLatMaxEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMinEnc)
|
||||
+ " " + BKDTreeWriter.decodeLon(cellLonMaxEnc));
|
||||
*/
|
||||
|
||||
state.in.seek(fp);
|
||||
|
||||
// How many points are stored in this leaf cell:
|
||||
int count = state.in.readVInt();
|
||||
|
||||
state.docs.grow(count);
|
||||
//System.out.println(" count=" + count);
|
||||
for(int i=0;i<count;i++) {
|
||||
int docID = state.in.readInt();
|
||||
//System.out.println(" check docID=" + docID);
|
||||
if (state.valueFilter.accept(docID)) {
|
||||
state.docs.add(docID);
|
||||
hitCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return hitCount;
|
||||
|
||||
} else {
|
||||
|
||||
//System.out.println(" non-leaf");
|
||||
|
||||
int splitDim = BKD3DTreeWriter.getSplitDim(cellXMin, cellXMax,
|
||||
cellYMin, cellYMax,
|
||||
cellZMin, cellZMax);
|
||||
|
||||
int splitValue = splitValues[nodeID];
|
||||
|
||||
int count = 0;
|
||||
|
||||
if (splitDim == 0) {
|
||||
|
||||
//System.out.println(" split on lat=" + splitValue);
|
||||
|
||||
// Inner node split on x:
|
||||
|
||||
// Left node:
|
||||
if (state.xMin <= splitValue) {
|
||||
//System.out.println(" recurse left");
|
||||
count += intersect(state,
|
||||
2*nodeID,
|
||||
cellXMin, splitValue,
|
||||
cellYMin, cellYMax,
|
||||
cellZMin, cellZMax);
|
||||
}
|
||||
|
||||
// Right node:
|
||||
if (state.xMax >= splitValue) {
|
||||
//System.out.println(" recurse right");
|
||||
count += intersect(state,
|
||||
2*nodeID+1,
|
||||
splitValue, cellXMax,
|
||||
cellYMin, cellYMax,
|
||||
cellZMin, cellZMax);
|
||||
}
|
||||
|
||||
} else if (splitDim == 1) {
|
||||
// Inner node split on y:
|
||||
|
||||
// System.out.println(" split on lon=" + splitValue);
|
||||
|
||||
// Left node:
|
||||
if (state.yMin <= splitValue) {
|
||||
// System.out.println(" recurse left");
|
||||
count += intersect(state,
|
||||
2*nodeID,
|
||||
cellXMin, cellXMax,
|
||||
cellYMin, splitValue,
|
||||
cellZMin, cellZMax);
|
||||
}
|
||||
|
||||
// Right node:
|
||||
if (state.yMax >= splitValue) {
|
||||
// System.out.println(" recurse right");
|
||||
count += intersect(state,
|
||||
2*nodeID+1,
|
||||
cellXMin, cellXMax,
|
||||
splitValue, cellYMax,
|
||||
cellZMin, cellZMax);
|
||||
}
|
||||
} else {
|
||||
// Inner node split on z:
|
||||
|
||||
// System.out.println(" split on lon=" + splitValue);
|
||||
|
||||
// Left node:
|
||||
if (state.zMin <= splitValue) {
|
||||
// System.out.println(" recurse left");
|
||||
count += intersect(state,
|
||||
2*nodeID,
|
||||
cellXMin, cellXMax,
|
||||
cellYMin, cellYMax,
|
||||
cellZMin, splitValue);
|
||||
}
|
||||
|
||||
// Right node:
|
||||
if (state.zMax >= splitValue) {
|
||||
// System.out.println(" recurse right");
|
||||
count += intersect(state,
|
||||
2*nodeID+1,
|
||||
cellXMin, cellXMax,
|
||||
cellYMin, cellYMax,
|
||||
splitValue, cellZMax);
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return splitValues.length * RamUsageEstimator.NUM_BYTES_INT +
|
||||
leafBlockFPs.length * RamUsageEstimator.NUM_BYTES_LONG;
|
||||
}
|
||||
}
|
|
@ -1,924 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
// TODO
|
||||
// - we could also index "auto-prefix terms" here, and use better compression, and maybe only use for the "fully contained" case so we'd
|
||||
// only index docIDs
|
||||
// - the index could be efficiently encoded as an FST, so we don't have wasteful
|
||||
// (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
|
||||
// the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
|
||||
// per leaf, and you can reduce that by putting more points per leaf
|
||||
// - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf
|
||||
// - we could use threads while building; the higher nodes are very parallelizable
|
||||
// - generalize to N dimenions? i think there are reasonable use cases here, e.g.
|
||||
// 2 dimensional points to store houses, plus e.g. 3rd dimension for "household income"
|
||||
|
||||
/** Recursively builds a BKD tree to assign all incoming points to smaller
|
||||
* and smaller rectangles until the number of points in a given
|
||||
* rectangle is <= the <code>maxPointsInLeafNode</code>. The tree is
|
||||
* fully balanced, which means the leaf nodes will have between 50% and 100% of
|
||||
* the requested <code>maxPointsInLeafNode</code>, except for the adversarial case
|
||||
* of indexing exactly the same point many times.
|
||||
*
|
||||
* <p>
|
||||
* See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
|
||||
*
|
||||
* <p>This consumes heap during writing: it allocates a <code>LongBitSet(numPoints)</code>,
|
||||
* and for any nodes with fewer than <code>maxPointsSortInHeap</code>, it holds
|
||||
* the points in memory as simple java arrays.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE</b>: This can write at most Integer.MAX_VALUE * <code>maxPointsInLeafNode</code> total points.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
class BKD3DTreeWriter {
|
||||
|
||||
// x (int), y (int), z (int) + ord (long) + docID (int)
|
||||
static final int BYTES_PER_DOC = RamUsageEstimator.NUM_BYTES_LONG + 4 * RamUsageEstimator.NUM_BYTES_INT;
|
||||
|
||||
//static final boolean DEBUG = false;
|
||||
|
||||
public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024;
|
||||
|
||||
/** This works out to max of ~10 MB peak heap tied up during writing: */
|
||||
public static final int DEFAULT_MAX_POINTS_SORT_IN_HEAP = 128*1024;;
|
||||
|
||||
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
|
||||
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
private OfflineSorter.ByteSequencesWriter offlineWriter;
|
||||
private GrowingHeapWriter heapWriter;
|
||||
|
||||
private IndexOutput tempInput;
|
||||
private final int maxPointsInLeafNode;
|
||||
private final int maxPointsSortInHeap;
|
||||
|
||||
private long pointCount;
|
||||
|
||||
private final int[] scratchDocIDs;
|
||||
|
||||
public BKD3DTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
|
||||
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
// TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
|
||||
public BKD3DTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
scratchDocIDs = new int[maxPointsInLeafNode];
|
||||
|
||||
// We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
|
||||
heapWriter = new GrowingHeapWriter(maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
public static void verifyParams(int maxPointsInLeafNode, int maxPointsSortInHeap) {
|
||||
if (maxPointsInLeafNode <= 0) {
|
||||
throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
|
||||
}
|
||||
if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
|
||||
throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
|
||||
}
|
||||
if (maxPointsSortInHeap < maxPointsInLeafNode) {
|
||||
throw new IllegalArgumentException("maxPointsSortInHeap must be >= maxPointsInLeafNode; got " + maxPointsSortInHeap + " vs maxPointsInLeafNode="+ maxPointsInLeafNode);
|
||||
}
|
||||
if (maxPointsSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) {
|
||||
throw new IllegalArgumentException("maxPointsSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsSortInHeap);
|
||||
}
|
||||
}
|
||||
|
||||
/** If the current segment has too many points then we switchover to temp files / offline sort. */
|
||||
private void switchToOffline() throws IOException {
|
||||
|
||||
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
|
||||
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "bkd3d", IOContext.DEFAULT);
|
||||
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
for(int i=0;i<pointCount;i++) {
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
scratchBytesOutput.writeInt(heapWriter.xs[i]);
|
||||
scratchBytesOutput.writeInt(heapWriter.ys[i]);
|
||||
scratchBytesOutput.writeInt(heapWriter.zs[i]);
|
||||
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
|
||||
scratchBytesOutput.writeVLong(i);
|
||||
// TODO: can/should OfflineSorter optimize the fixed-width case?
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
}
|
||||
|
||||
heapWriter = null;
|
||||
}
|
||||
|
||||
public void add(int x, int y, int z, int docID) throws IOException {
|
||||
|
||||
if (pointCount >= maxPointsSortInHeap) {
|
||||
if (offlineWriter == null) {
|
||||
switchToOffline();
|
||||
}
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
scratchBytesOutput.writeInt(x);
|
||||
scratchBytesOutput.writeInt(y);
|
||||
scratchBytesOutput.writeInt(z);
|
||||
scratchBytesOutput.writeVInt(docID);
|
||||
scratchBytesOutput.writeVLong(pointCount);
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
} else {
|
||||
// Not too many points added yet, continue using heap:
|
||||
heapWriter.append(x, y, z, pointCount, docID);
|
||||
}
|
||||
|
||||
pointCount++;
|
||||
}
|
||||
|
||||
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
|
||||
* as we recurse in {@link #build}. */
|
||||
private Writer convertToFixedWidth(String in) throws IOException {
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
scratch.grow(BYTES_PER_DOC);
|
||||
BytesRef bytes = scratch.get();
|
||||
ByteArrayDataInput dataReader = new ByteArrayDataInput();
|
||||
|
||||
OfflineSorter.ByteSequencesReader reader = null;
|
||||
Writer sortedWriter = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
|
||||
sortedWriter = getWriter(pointCount);
|
||||
for (long i=0;i<pointCount;i++) {
|
||||
boolean result = reader.read(scratch);
|
||||
assert result;
|
||||
dataReader.reset(bytes.bytes, bytes.offset, bytes.length);
|
||||
int x = dataReader.readInt();
|
||||
int y = dataReader.readInt();
|
||||
int z = dataReader.readInt();
|
||||
int docID = dataReader.readVInt();
|
||||
long ord = dataReader.readVLong();
|
||||
assert docID >= 0: "docID=" + docID;
|
||||
sortedWriter.append(x, y, z, ord, docID);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(sortedWriter, reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(sortedWriter, reader);
|
||||
try {
|
||||
sortedWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sortedWriter;
|
||||
}
|
||||
|
||||
/** dim: 0=x, 1=y, 2=z */
|
||||
private Writer sort(int dim) throws IOException {
|
||||
if (heapWriter != null) {
|
||||
|
||||
assert pointCount < Integer.MAX_VALUE;
|
||||
|
||||
// All buffered points are still in heap
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int docID = heapWriter.docIDs[i];
|
||||
heapWriter.docIDs[i] = heapWriter.docIDs[j];
|
||||
heapWriter.docIDs[j] = docID;
|
||||
|
||||
long ord = heapWriter.ords[i];
|
||||
heapWriter.ords[i] = heapWriter.ords[j];
|
||||
heapWriter.ords[j] = ord;
|
||||
|
||||
int x = heapWriter.xs[i];
|
||||
heapWriter.xs[i] = heapWriter.xs[j];
|
||||
heapWriter.xs[j] = x;
|
||||
|
||||
int y = heapWriter.ys[i];
|
||||
heapWriter.ys[i] = heapWriter.ys[j];
|
||||
heapWriter.ys[j] = y;
|
||||
|
||||
int z = heapWriter.zs[i];
|
||||
heapWriter.zs[i] = heapWriter.zs[j];
|
||||
heapWriter.zs[j] = z;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
int cmp;
|
||||
if (dim == 0) {
|
||||
cmp = Integer.compare(heapWriter.xs[i], heapWriter.xs[j]);
|
||||
} else if (dim == 1) {
|
||||
cmp = Integer.compare(heapWriter.ys[i], heapWriter.ys[j]);
|
||||
} else {
|
||||
cmp = Integer.compare(heapWriter.zs[i], heapWriter.zs[j]);
|
||||
}
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
// Tie-break
|
||||
cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
return Long.compare(heapWriter.ords[i], heapWriter.ords[j]);
|
||||
}
|
||||
}.sort(0, (int) pointCount);
|
||||
|
||||
HeapWriter sorted = new HeapWriter((int) pointCount);
|
||||
//System.out.println("sorted dim=" + dim);
|
||||
for(int i=0;i<pointCount;i++) {
|
||||
/*
|
||||
System.out.println(" docID=" + heapWriter.docIDs[i] +
|
||||
" x=" + heapWriter.xs[i] +
|
||||
" y=" + heapWriter.ys[i] +
|
||||
" z=" + heapWriter.zs[i]);
|
||||
*/
|
||||
sorted.append(heapWriter.xs[i],
|
||||
heapWriter.ys[i],
|
||||
heapWriter.zs[i],
|
||||
heapWriter.ords[i],
|
||||
heapWriter.docIDs[i]);
|
||||
}
|
||||
sorted.close();
|
||||
|
||||
return sorted;
|
||||
} else {
|
||||
|
||||
// Offline sort:
|
||||
assert tempInput != null;
|
||||
|
||||
final ByteArrayDataInput reader = new ByteArrayDataInput();
|
||||
Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
|
||||
private final ByteArrayDataInput readerB = new ByteArrayDataInput();
|
||||
|
||||
@Override
|
||||
public int compare(BytesRef a, BytesRef b) {
|
||||
reader.reset(a.bytes, a.offset, a.length);
|
||||
final int xa = reader.readInt();
|
||||
final int ya = reader.readInt();
|
||||
final int za = reader.readInt();
|
||||
final int docIDA = reader.readVInt();
|
||||
final long ordA = reader.readVLong();
|
||||
|
||||
reader.reset(b.bytes, b.offset, b.length);
|
||||
final int xb = reader.readInt();
|
||||
final int yb = reader.readInt();
|
||||
final int zb = reader.readInt();
|
||||
final int docIDB = reader.readVInt();
|
||||
final long ordB = reader.readVLong();
|
||||
|
||||
int cmp;
|
||||
if (dim == 0) {
|
||||
cmp = Integer.compare(xa, xb);
|
||||
} else if (dim == 1) {
|
||||
cmp = Integer.compare(ya, yb);
|
||||
} else {
|
||||
cmp = Integer.compare(za, zb);
|
||||
}
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
// Tie-break
|
||||
cmp = Integer.compare(docIDA, docIDB);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
return Long.compare(ordA, ordB);
|
||||
}
|
||||
};
|
||||
|
||||
boolean success = false;
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
|
||||
String sortedFileName = sorter.sort(tempInput.getName());
|
||||
try {
|
||||
Writer writer = convertToFixedWidth(sortedFileName);
|
||||
success = true;
|
||||
return writer;
|
||||
} finally {
|
||||
if (success) {
|
||||
tempDir.deleteFile(sortedFileName);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
|
||||
public long finish(IndexOutput out) throws IOException {
|
||||
//System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter + " maxPointsInLeafNode=" + maxPointsInLeafNode);
|
||||
|
||||
if (offlineWriter != null) {
|
||||
offlineWriter.close();
|
||||
}
|
||||
|
||||
LongBitSet bitSet = new LongBitSet(pointCount);
|
||||
|
||||
long countPerLeaf = pointCount;
|
||||
long innerNodeCount = 1;
|
||||
|
||||
while (countPerLeaf > maxPointsInLeafNode) {
|
||||
countPerLeaf = (countPerLeaf+1)/2;
|
||||
innerNodeCount *= 2;
|
||||
}
|
||||
|
||||
//System.out.println("innerNodeCount=" + innerNodeCount + " countPerLeaf=" + countPerLeaf);
|
||||
|
||||
if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
|
||||
throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
|
||||
}
|
||||
|
||||
innerNodeCount--;
|
||||
|
||||
int numLeaves = (int) (innerNodeCount+1);
|
||||
//System.out.println(" numLeaves=" + numLeaves);
|
||||
|
||||
// Indexed by nodeID, but first (root) nodeID is 1
|
||||
int[] splitValues = new int[numLeaves];
|
||||
|
||||
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
|
||||
long[] leafBlockFPs = new long[numLeaves];
|
||||
|
||||
// Make sure the math above "worked":
|
||||
assert pointCount / splitValues.length <= maxPointsInLeafNode: "pointCount=" + pointCount + " splitValues.length=" + splitValues.length + " maxPointsInLeafNode=" + maxPointsInLeafNode;
|
||||
//System.out.println(" avg pointsPerLeaf=" + (pointCount/splitValues.length));
|
||||
|
||||
// Sort all docs once by x, once by y, once by z:
|
||||
Writer xSortedWriter = null;
|
||||
Writer ySortedWriter = null;
|
||||
Writer zSortedWriter = null;
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
xSortedWriter = sort(0);
|
||||
ySortedWriter = sort(1);
|
||||
zSortedWriter = sort(2);
|
||||
heapWriter = null;
|
||||
|
||||
build(1, numLeaves,
|
||||
new PathSlice(xSortedWriter, 0, pointCount),
|
||||
new PathSlice(ySortedWriter, 0, pointCount),
|
||||
new PathSlice(zSortedWriter, 0, pointCount),
|
||||
bitSet, out,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
splitValues,
|
||||
leafBlockFPs);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
xSortedWriter.destroy();
|
||||
ySortedWriter.destroy();
|
||||
zSortedWriter.destroy();
|
||||
if (tempInput != null) {
|
||||
tempDir.deleteFile(tempInput.getName());
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
xSortedWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
try {
|
||||
ySortedWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
try {
|
||||
zSortedWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
if (tempInput != null) {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println("Total nodes: " + innerNodeCount);
|
||||
|
||||
// Write index:
|
||||
long indexFP = out.getFilePointer();
|
||||
//System.out.println("indexFP=" + indexFP);
|
||||
out.writeVInt(numLeaves);
|
||||
|
||||
// NOTE: splitValues[0] is unused, because nodeID is 1-based:
|
||||
for (int i=0;i<splitValues.length;i++) {
|
||||
out.writeInt(splitValues[i]);
|
||||
}
|
||||
for (int i=0;i<leafBlockFPs.length;i++) {
|
||||
out.writeVLong(leafBlockFPs[i]);
|
||||
}
|
||||
|
||||
return indexFP;
|
||||
}
|
||||
|
||||
/** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
|
||||
private static final class PathSlice {
|
||||
final Writer writer;
|
||||
final long start;
|
||||
final long count;
|
||||
|
||||
public PathSlice(Writer writer, long start, long count) {
|
||||
this.writer = writer;
|
||||
this.start = start;
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
|
||||
}
|
||||
}
|
||||
|
||||
/** Marks bits for the ords (points) that belong in the left sub tree. */
|
||||
private int markLeftTree(int splitDim, PathSlice source, LongBitSet bitSet,
|
||||
int minX, int maxX,
|
||||
int minY, int maxY,
|
||||
int minZ, int maxZ) throws IOException {
|
||||
|
||||
// This is the size of our left tree
|
||||
long leftCount = source.count / 2;
|
||||
|
||||
// Read the split value:
|
||||
//if (DEBUG) System.out.println(" leftCount=" + leftCount + " vs " + source.count);
|
||||
Reader reader = source.writer.getReader(source.start + leftCount);
|
||||
boolean success = false;
|
||||
int splitValue;
|
||||
try {
|
||||
boolean result = reader.next();
|
||||
assert result;
|
||||
|
||||
int x = reader.x();
|
||||
assert x >= minX && x <= maxX: "x=" + x + " minX=" + minX + " maxX=" + maxX;
|
||||
|
||||
int y = reader.y();
|
||||
assert y >= minY && y <= maxY: "y=" + y + " minY=" + minY + " maxY=" + maxY;
|
||||
|
||||
int z = reader.z();
|
||||
assert z >= minZ && z <= maxZ: "z=" + z + " minZ=" + minZ + " maxZ=" + maxZ;
|
||||
|
||||
if (splitDim == 0) {
|
||||
splitValue = x;
|
||||
} else if (splitDim == 1) {
|
||||
splitValue = y;
|
||||
} else {
|
||||
splitValue = z;
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader);
|
||||
}
|
||||
}
|
||||
|
||||
// Mark ords that fall into the left half, and also handle the == boundary case:
|
||||
assert bitSet.cardinality() == 0: "cardinality=" + bitSet.cardinality();
|
||||
|
||||
success = false;
|
||||
reader = source.writer.getReader(source.start);
|
||||
try {
|
||||
int lastValue = Integer.MIN_VALUE;
|
||||
for (int i=0;i<leftCount;i++) {
|
||||
boolean result = reader.next();
|
||||
assert result;
|
||||
int x = reader.x();
|
||||
int y = reader.y();
|
||||
int z = reader.z();
|
||||
|
||||
int value;
|
||||
if (splitDim == 0) {
|
||||
value = x;
|
||||
} else if (splitDim == 1) {
|
||||
value = y;
|
||||
} else {
|
||||
value = z;
|
||||
}
|
||||
|
||||
// Our input source is supposed to be sorted on the incoming dimension:
|
||||
assert value >= lastValue;
|
||||
lastValue = value;
|
||||
|
||||
assert value <= splitValue: "i=" + i + " value=" + value + " vs splitValue=" + splitValue;
|
||||
long ord = reader.ord();
|
||||
int docID = reader.docID();
|
||||
assert docID >= 0: "docID=" + docID + " reader=" + reader;
|
||||
|
||||
// We should never see dup ords:
|
||||
assert bitSet.get(ord) == false;
|
||||
bitSet.set(ord);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader);
|
||||
}
|
||||
}
|
||||
|
||||
assert leftCount == bitSet.cardinality(): "leftCount=" + leftCount + " cardinality=" + bitSet.cardinality();
|
||||
|
||||
return splitValue;
|
||||
}
|
||||
|
||||
// Split on the dim with the largest range:
|
||||
static int getSplitDim(int minX, int maxX, int minY, int maxY, int minZ, int maxZ) {
|
||||
long xRange = (long) maxX - (long) minX;
|
||||
long yRange = (long) maxY - (long) minY;
|
||||
long zRange = (long) maxZ - (long) minZ;
|
||||
|
||||
if (xRange > yRange) {
|
||||
if (xRange > zRange) {
|
||||
return 0;
|
||||
} else {
|
||||
return 2;
|
||||
}
|
||||
} else if (yRange > zRange) {
|
||||
return 1;
|
||||
} else {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
/** The incoming PathSlice for the dim we will split is already partitioned/sorted. */
|
||||
private void build(int nodeID, int leafNodeOffset,
|
||||
PathSlice lastXSorted,
|
||||
PathSlice lastYSorted,
|
||||
PathSlice lastZSorted,
|
||||
LongBitSet bitSet,
|
||||
IndexOutput out,
|
||||
int minX, int maxX,
|
||||
int minY, int maxY,
|
||||
int minZ, int maxZ,
|
||||
int[] splitValues,
|
||||
long[] leafBlockFPs) throws IOException {
|
||||
|
||||
long count = lastXSorted.count;
|
||||
assert count > 0;
|
||||
assert count <= ArrayUtil.MAX_ARRAY_LENGTH;
|
||||
|
||||
assert count == lastYSorted.count;
|
||||
assert count == lastZSorted.count;
|
||||
|
||||
//if (DEBUG) System.out.println("\nBUILD: nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset + "\n lastXSorted=" + lastXSorted + "\n lastYSorted=" + lastYSorted + "\n lastZSorted=" + lastZSorted + "\n count=" + lastXSorted.count + " x=" + minX + " TO " + maxX + " y=" + minY + " TO " + maxY + " z=" + minZ + " TO " + maxZ);
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
// Leaf node: write block
|
||||
//if (DEBUG) System.out.println(" leaf");
|
||||
assert maxX >= minX;
|
||||
assert maxY >= minY;
|
||||
assert maxZ >= minZ;
|
||||
|
||||
//System.out.println("\nleaf:\n lat range: " + ((long) maxLatEnc-minLatEnc));
|
||||
//System.out.println(" lon range: " + ((long) maxLonEnc-minLonEnc));
|
||||
|
||||
// Sort by docID in the leaf so we get sequentiality at search time (may not matter?):
|
||||
Reader reader = lastXSorted.writer.getReader(lastXSorted.start);
|
||||
|
||||
assert count <= scratchDocIDs.length: "count=" + count + " scratchDocIDs.length=" + scratchDocIDs.length;
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
for (int i=0;i<count;i++) {
|
||||
|
||||
// NOTE: we discard ord at this point; we only needed it temporarily
|
||||
// during building to uniquely identify each point to properly handle
|
||||
// the multi-valued case (one docID having multiple values):
|
||||
|
||||
// We also discard lat/lon, since at search time, we reside on the
|
||||
// wrapped doc values for this:
|
||||
|
||||
boolean result = reader.next();
|
||||
assert result;
|
||||
scratchDocIDs[i] = reader.docID();
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader);
|
||||
}
|
||||
}
|
||||
|
||||
Arrays.sort(scratchDocIDs, 0, (int) count);
|
||||
|
||||
// Dedup docIDs: for the multi-valued case where more than one value for the doc
|
||||
// wound up in this leaf cell, we only need to store the docID once:
|
||||
int lastDocID = -1;
|
||||
int uniqueCount = 0;
|
||||
for(int i=0;i<count;i++) {
|
||||
int docID = scratchDocIDs[i];
|
||||
if (docID != lastDocID) {
|
||||
uniqueCount++;
|
||||
lastDocID = docID;
|
||||
}
|
||||
}
|
||||
assert uniqueCount <= count;
|
||||
|
||||
long startFP = out.getFilePointer();
|
||||
out.writeVInt(uniqueCount);
|
||||
|
||||
// Save the block file pointer:
|
||||
leafBlockFPs[nodeID - leafNodeOffset] = startFP;
|
||||
//System.out.println(" leafFP=" + startFP);
|
||||
|
||||
lastDocID = -1;
|
||||
for (int i=0;i<count;i++) {
|
||||
// Absolute int encode; with "vInt of deltas" encoding, the .kdd size dropped from
|
||||
// 697 MB -> 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec.
|
||||
// I think if we also indexed prefix terms here we could do less costly compression
|
||||
// on those lists:
|
||||
int docID = scratchDocIDs[i];
|
||||
if (docID != lastDocID) {
|
||||
out.writeInt(docID);
|
||||
//System.out.println(" write docID=" + docID);
|
||||
lastDocID = docID;
|
||||
}
|
||||
}
|
||||
//long endFP = out.getFilePointer();
|
||||
//System.out.println(" bytes/doc: " + ((endFP - startFP) / count));
|
||||
} else {
|
||||
|
||||
int splitDim = getSplitDim(minX, maxX, minY, maxY, minZ, maxZ);
|
||||
//System.out.println(" splitDim=" + splitDim);
|
||||
|
||||
PathSlice source;
|
||||
|
||||
if (splitDim == 0) {
|
||||
source = lastXSorted;
|
||||
} else if (splitDim == 1) {
|
||||
source = lastYSorted;
|
||||
} else {
|
||||
source = lastZSorted;
|
||||
}
|
||||
|
||||
// We let ties go to either side, so we should never get down to count == 0, even
|
||||
// in adversarial case (all values are the same):
|
||||
assert count > 0;
|
||||
|
||||
// Inner node: partition/recurse
|
||||
//if (DEBUG) System.out.println(" non-leaf");
|
||||
|
||||
assert nodeID < splitValues.length: "nodeID=" + nodeID + " splitValues.length=" + splitValues.length;
|
||||
|
||||
int splitValue = markLeftTree(splitDim, source, bitSet,
|
||||
minX, maxX,
|
||||
minY, maxY,
|
||||
minZ, maxZ);
|
||||
long leftCount = count/2;
|
||||
|
||||
// TODO: we could save split value in here so we don't have to re-open file later:
|
||||
|
||||
// Partition the other (not split) dims into sorted left and right sets, so we can recurse.
|
||||
// This is somewhat hairy: we partition the next X, Y set according to how we had just
|
||||
// partitioned the Z set, etc.
|
||||
|
||||
Writer[] leftWriters = new Writer[3];
|
||||
Writer[] rightWriters = new Writer[3];
|
||||
|
||||
for(int dim=0;dim<3;dim++) {
|
||||
if (dim == splitDim) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Writer leftWriter = null;
|
||||
Writer rightWriter = null;
|
||||
Reader reader = null;
|
||||
|
||||
boolean success = false;
|
||||
|
||||
int nextLeftCount = 0;
|
||||
|
||||
PathSlice nextSource;
|
||||
if (dim == 0) {
|
||||
nextSource = lastXSorted;
|
||||
} else if (dim == 1) {
|
||||
nextSource = lastYSorted;
|
||||
} else {
|
||||
nextSource = lastZSorted;
|
||||
}
|
||||
|
||||
try {
|
||||
leftWriter = getWriter(leftCount);
|
||||
rightWriter = getWriter(nextSource.count - leftCount);
|
||||
|
||||
assert nextSource.count == count;
|
||||
reader = nextSource.writer.getReader(nextSource.start);
|
||||
|
||||
// TODO: we could compute the split value here for each sub-tree and save an O(N) pass on recursion, but makes code hairier and only
|
||||
// changes the constant factor of building, not the big-oh:
|
||||
for (int i=0;i<count;i++) {
|
||||
boolean result = reader.next();
|
||||
assert result;
|
||||
int x = reader.x();
|
||||
int y = reader.y();
|
||||
int z = reader.z();
|
||||
long ord = reader.ord();
|
||||
int docID = reader.docID();
|
||||
assert docID >= 0: "docID=" + docID + " reader=" + reader;
|
||||
//System.out.println(" i=" + i + " x=" + x + " ord=" + ord + " docID=" + docID);
|
||||
if (bitSet.get(ord)) {
|
||||
if (splitDim == 0) {
|
||||
assert x <= splitValue: "x=" + x + " splitValue=" + splitValue;
|
||||
} else if (splitDim == 1) {
|
||||
assert y <= splitValue: "y=" + y + " splitValue=" + splitValue;
|
||||
} else {
|
||||
assert z <= splitValue: "z=" + z + " splitValue=" + splitValue;
|
||||
}
|
||||
leftWriter.append(x, y, z, ord, docID);
|
||||
nextLeftCount++;
|
||||
} else {
|
||||
if (splitDim == 0) {
|
||||
assert x >= splitValue: "x=" + x + " splitValue=" + splitValue;
|
||||
} else if (splitDim == 1) {
|
||||
assert y >= splitValue: "y=" + y + " splitValue=" + splitValue;
|
||||
} else {
|
||||
assert z >= splitValue: "z=" + z + " splitValue=" + splitValue;
|
||||
}
|
||||
rightWriter.append(x, y, z, ord, docID);
|
||||
}
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader, leftWriter, rightWriter);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader, leftWriter, rightWriter);
|
||||
}
|
||||
}
|
||||
|
||||
assert leftCount == nextLeftCount: "leftCount=" + leftCount + " nextLeftCount=" + nextLeftCount;
|
||||
leftWriters[dim] = leftWriter;
|
||||
rightWriters[dim] = rightWriter;
|
||||
}
|
||||
bitSet.clear(0, pointCount);
|
||||
|
||||
long rightCount = count - leftCount;
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
if (splitDim == 0) {
|
||||
build(2*nodeID, leafNodeOffset,
|
||||
new PathSlice(source.writer, source.start, leftCount),
|
||||
new PathSlice(leftWriters[1], 0, leftCount),
|
||||
new PathSlice(leftWriters[2], 0, leftCount),
|
||||
bitSet,
|
||||
out,
|
||||
minX, splitValue,
|
||||
minY, maxY,
|
||||
minZ, maxZ,
|
||||
splitValues, leafBlockFPs);
|
||||
leftWriters[1].destroy();
|
||||
leftWriters[2].destroy();
|
||||
|
||||
build(2*nodeID+1, leafNodeOffset,
|
||||
new PathSlice(source.writer, source.start+leftCount, rightCount),
|
||||
new PathSlice(rightWriters[1], 0, rightCount),
|
||||
new PathSlice(rightWriters[2], 0, rightCount),
|
||||
bitSet,
|
||||
out,
|
||||
splitValue, maxX,
|
||||
minY, maxY,
|
||||
minZ, maxZ,
|
||||
splitValues, leafBlockFPs);
|
||||
rightWriters[1].destroy();
|
||||
rightWriters[2].destroy();
|
||||
} else if (splitDim == 1) {
|
||||
build(2*nodeID, leafNodeOffset,
|
||||
new PathSlice(leftWriters[0], 0, leftCount),
|
||||
new PathSlice(source.writer, source.start, leftCount),
|
||||
new PathSlice(leftWriters[2], 0, leftCount),
|
||||
bitSet,
|
||||
out,
|
||||
minX, maxX,
|
||||
minY, splitValue,
|
||||
minZ, maxZ,
|
||||
splitValues, leafBlockFPs);
|
||||
leftWriters[0].destroy();
|
||||
leftWriters[2].destroy();
|
||||
|
||||
build(2*nodeID+1, leafNodeOffset,
|
||||
new PathSlice(rightWriters[0], 0, rightCount),
|
||||
new PathSlice(source.writer, source.start+leftCount, rightCount),
|
||||
new PathSlice(rightWriters[2], 0, rightCount),
|
||||
bitSet,
|
||||
out,
|
||||
minX, maxX,
|
||||
splitValue, maxY,
|
||||
minZ, maxZ,
|
||||
splitValues, leafBlockFPs);
|
||||
rightWriters[0].destroy();
|
||||
rightWriters[2].destroy();
|
||||
} else {
|
||||
build(2*nodeID, leafNodeOffset,
|
||||
new PathSlice(leftWriters[0], 0, leftCount),
|
||||
new PathSlice(leftWriters[1], 0, leftCount),
|
||||
new PathSlice(source.writer, source.start, leftCount),
|
||||
bitSet,
|
||||
out,
|
||||
minX, maxX,
|
||||
minY, maxY,
|
||||
minZ, splitValue,
|
||||
splitValues, leafBlockFPs);
|
||||
leftWriters[0].destroy();
|
||||
leftWriters[1].destroy();
|
||||
|
||||
build(2*nodeID+1, leafNodeOffset,
|
||||
new PathSlice(rightWriters[0], 0, rightCount),
|
||||
new PathSlice(rightWriters[1], 0, rightCount),
|
||||
new PathSlice(source.writer, source.start+leftCount, rightCount),
|
||||
bitSet,
|
||||
out,
|
||||
minX, maxX,
|
||||
minY, maxY,
|
||||
splitValue, maxZ,
|
||||
splitValues, leafBlockFPs);
|
||||
rightWriters[0].destroy();
|
||||
rightWriters[1].destroy();
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success == false) {
|
||||
for(Writer writer : leftWriters) {
|
||||
if (writer != null) {
|
||||
try {
|
||||
writer.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
}
|
||||
}
|
||||
for(Writer writer : rightWriters) {
|
||||
if (writer != null) {
|
||||
try {
|
||||
writer.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
splitValues[nodeID] = splitValue;
|
||||
}
|
||||
}
|
||||
|
||||
Writer getWriter(long count) throws IOException {
|
||||
if (count < maxPointsSortInHeap) {
|
||||
return new HeapWriter((int) count);
|
||||
} else {
|
||||
return new OfflineWriter(tempDir, tempFileNamePrefix, count);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
class Geo3DBinaryDocValues extends BinaryDocValues {
|
||||
final BKD3DTreeReader bkdTreeReader;
|
||||
final BinaryDocValues delegate;
|
||||
final double planetMax;
|
||||
|
||||
public Geo3DBinaryDocValues(BKD3DTreeReader bkdTreeReader, BinaryDocValues delegate, double planetMax) {
|
||||
this.bkdTreeReader = bkdTreeReader;
|
||||
this.delegate = delegate;
|
||||
this.planetMax = planetMax;
|
||||
}
|
||||
|
||||
public BKD3DTreeReader getBKD3DTreeReader() {
|
||||
return bkdTreeReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef get(int docID) {
|
||||
return delegate.get(docID);
|
||||
}
|
||||
}
|
|
@ -1,145 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.geo3d.PlanetModel;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
class Geo3DDocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||
final DocValuesConsumer delegate;
|
||||
final int maxPointsInLeafNode;
|
||||
final int maxPointsSortInHeap;
|
||||
final IndexOutput out;
|
||||
final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
|
||||
final SegmentWriteState state;
|
||||
final Directory tempDir;
|
||||
final String tempFileNamePrefix;
|
||||
|
||||
public Geo3DDocValuesConsumer(Directory tempDir, String tempFileNamePrefix, PlanetModel planetModel, DocValuesConsumer delegate,
|
||||
SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
BKD3DTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.delegate = delegate;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
this.state = state;
|
||||
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.DATA_EXTENSION);
|
||||
out = state.directory.createOutput(datFileName, state.context);
|
||||
CodecUtil.writeIndexHeader(out, Geo3DDocValuesFormat.DATA_CODEC_NAME, Geo3DDocValuesFormat.DATA_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
|
||||
// We write the max for this PlanetModel into the index so we know we are decoding correctly at search time, and so we can also do
|
||||
// best-effort check that the search time PlanetModel "matches":
|
||||
out.writeLong(Double.doubleToLongBits(planetModel.getMaximumMagnitude()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.writeFooter(out);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(delegate, out);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(delegate, out);
|
||||
}
|
||||
}
|
||||
|
||||
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.META_EXTENSION);
|
||||
IndexOutput metaOut = state.directory.createOutput(metaFileName, state.context);
|
||||
success = false;
|
||||
try {
|
||||
CodecUtil.writeIndexHeader(metaOut, Geo3DDocValuesFormat.META_CODEC_NAME, Geo3DDocValuesFormat.META_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
metaOut.writeVInt(fieldIndexFPs.size());
|
||||
for(Map.Entry<Integer,Long> ent : fieldIndexFPs.entrySet()) {
|
||||
metaOut.writeVInt(ent.getKey());
|
||||
metaOut.writeVLong(ent.getValue());
|
||||
}
|
||||
CodecUtil.writeFooter(metaOut);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(metaOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(metaOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
|
||||
delegate.addBinaryField(field, values);
|
||||
BKD3DTreeWriter writer = new BKD3DTreeWriter(tempDir, tempFileNamePrefix, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
Iterator<BytesRef> valuesIt = values.iterator();
|
||||
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
|
||||
assert valuesIt.hasNext();
|
||||
BytesRef value = valuesIt.next();
|
||||
// TODO: we should allow multi-valued here, just appended into the BDV
|
||||
// 3 ints packed into byte[]
|
||||
if (value != null) {
|
||||
assert value.length == 12;
|
||||
int x = Geo3DDocValuesFormat.readInt(value.bytes, value.offset);
|
||||
int y = Geo3DDocValuesFormat.readInt(value.bytes, value.offset+4);
|
||||
int z = Geo3DDocValuesFormat.readInt(value.bytes, value.offset+8);
|
||||
writer.add(x, y, z, docID);
|
||||
}
|
||||
}
|
||||
|
||||
long indexStartFP = writer.finish(out);
|
||||
|
||||
fieldIndexFPs.put(field.number, indexStartFP);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
|
@ -1,167 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesFormat;
|
||||
import org.apache.lucene.geo3d.PlanetModel;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A {@link DocValuesFormat} to efficiently index geo-spatial 3D x,y,z points
|
||||
* from {@link Geo3DPointField} for fast shape intersection queries using
|
||||
* ({@link PointInGeo3DShapeQuery})
|
||||
*
|
||||
* <p>This wraps {@link Lucene54DocValuesFormat}, but saves its own BKD tree
|
||||
* structures to disk for fast query-time intersection. See <a
|
||||
* href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a>
|
||||
* for details.
|
||||
*
|
||||
* <p>The BKD tree slices up 3D x,y,z space into smaller and
|
||||
* smaller 3D rectangles, until the smallest rectangles have approximately
|
||||
* between X/2 and X (X default is 1024) points in them, at which point
|
||||
* such leaf cells are written as a block to disk, while the index tree
|
||||
* structure records how space was sub-divided is loaded into HEAP
|
||||
* at search time. At search time, the tree is recursed based on whether
|
||||
* each of left or right child overlap with the query shape, and once
|
||||
* a leaf block is reached, all documents in that leaf block are collected
|
||||
* if the cell is fully enclosed by the query shape, or filtered and then
|
||||
* collected, if not.
|
||||
*
|
||||
* <p>The index is also quite compact, because docs only appear once in
|
||||
* the tree (no "prefix terms").
|
||||
*
|
||||
* <p>In addition to the files written by {@link Lucene54DocValuesFormat}, this format writes:
|
||||
* <ol>
|
||||
* <li><tt>.kd3d</tt>: BKD leaf data and index</li>
|
||||
* <li><tt>.kd3m</tt>: BKD metadata</li>
|
||||
* </ol>
|
||||
*
|
||||
* <p>The disk format is experimental and free to change suddenly, and this code
|
||||
* likely has new and exciting bugs!
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public class Geo3DDocValuesFormat extends DocValuesFormat {
|
||||
|
||||
static final String DATA_CODEC_NAME = "Geo3DData";
|
||||
static final int DATA_VERSION_START = 0;
|
||||
static final int DATA_VERSION_CURRENT = DATA_VERSION_START;
|
||||
static final String DATA_EXTENSION = "g3dd";
|
||||
|
||||
static final String META_CODEC_NAME = "Geo3DMeta";
|
||||
static final int META_VERSION_START = 0;
|
||||
static final int META_VERSION_CURRENT = META_VERSION_START;
|
||||
static final String META_EXTENSION = "g3dm";
|
||||
|
||||
private final int maxPointsInLeafNode;
|
||||
private final int maxPointsSortInHeap;
|
||||
|
||||
private final DocValuesFormat delegate = new Lucene54DocValuesFormat();
|
||||
|
||||
private final PlanetModel planetModel;
|
||||
|
||||
/** Default constructor */
|
||||
public Geo3DDocValuesFormat() {
|
||||
this(PlanetModel.WGS84, BKD3DTreeWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKD3DTreeWriter.DEFAULT_MAX_POINTS_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
/** Creates this with custom configuration.
|
||||
*
|
||||
* @param planetModel the {@link PlanetModel} to use; this is only used when writing
|
||||
* @param maxPointsInLeafNode Maximum number of points in each leaf cell. Smaller values create a deeper tree with larger in-heap index and possibly
|
||||
* faster searching. The default is 1024.
|
||||
* @param maxPointsSortInHeap Maximum number of points where in-heap sort can be used. When the number of points exceeds this, a (slower)
|
||||
* offline sort is used. The default is 128 * 1024.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public Geo3DDocValuesFormat(PlanetModel planetModel, int maxPointsInLeafNode, int maxPointsSortInHeap) {
|
||||
super("BKD3DTree");
|
||||
BKD3DTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
this.planetModel = planetModel;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
|
||||
return new Geo3DDocValuesConsumer(state.directory, state.segmentInfo.name, planetModel, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
return new Geo3DDocValuesProducer(delegate.fieldsProducer(state), state);
|
||||
}
|
||||
|
||||
/** Clips the incoming value to the allowed min/max range before encoding, instead of throwing an exception. */
|
||||
static int encodeValueLenient(double planetMax, double x) {
|
||||
if (x > planetMax) {
|
||||
x = planetMax;
|
||||
} else if (x < -planetMax) {
|
||||
x = -planetMax;
|
||||
}
|
||||
return encodeValue(planetMax, x);
|
||||
}
|
||||
|
||||
static int encodeValue(double planetMax, double x) {
|
||||
if (x > planetMax) {
|
||||
throw new IllegalArgumentException("value=" + x + " is out-of-bounds (greater than planetMax=" + planetMax + ")");
|
||||
}
|
||||
if (x < -planetMax) {
|
||||
throw new IllegalArgumentException("value=" + x + " is out-of-bounds (less than than -planetMax=" + -planetMax + ")");
|
||||
}
|
||||
long y = Math.round (x * (Integer.MAX_VALUE / planetMax));
|
||||
assert y >= Integer.MIN_VALUE;
|
||||
assert y <= Integer.MAX_VALUE;
|
||||
|
||||
return (int) y;
|
||||
}
|
||||
|
||||
/** Center decode */
|
||||
static double decodeValueCenter(double planetMax, int x) {
|
||||
return x * (planetMax / Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/** More negative decode, at bottom of cell */
|
||||
static double decodeValueMin(double planetMax, int x) {
|
||||
return (((double)x) - 0.5) * (planetMax / Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/** More positive decode, at top of cell */
|
||||
static double decodeValueMax(double planetMax, int x) {
|
||||
return (((double)x) + 0.5) * (planetMax / Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
|
||||
static int readInt(byte[] bytes, int offset) {
|
||||
return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset+1] & 0xFF) << 16)
|
||||
| ((bytes[offset+2] & 0xFF) << 8) | (bytes[offset+3] & 0xFF);
|
||||
}
|
||||
|
||||
static void writeInt(int value, byte[] bytes, int offset) {
|
||||
bytes[offset] = (byte) ((value >> 24) & 0xff);
|
||||
bytes[offset+1] = (byte) ((value >> 16) & 0xff);
|
||||
bytes[offset+2] = (byte) ((value >> 8) & 0xff);
|
||||
bytes[offset+3] = (byte) (value & 0xff);
|
||||
}
|
||||
}
|
|
@ -1,177 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
class Geo3DDocValuesProducer extends DocValuesProducer {
|
||||
|
||||
private final Map<String,BKD3DTreeReader> treeReaders = new HashMap<>();
|
||||
private final Map<Integer,Long> fieldToIndexFPs = new HashMap<>();
|
||||
|
||||
private final IndexInput datIn;
|
||||
private final AtomicLong ramBytesUsed;
|
||||
private final int maxDoc;
|
||||
private final DocValuesProducer delegate;
|
||||
private final boolean merging;
|
||||
private final double planetMax;
|
||||
|
||||
public Geo3DDocValuesProducer(DocValuesProducer delegate, SegmentReadState state) throws IOException {
|
||||
String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.META_EXTENSION);
|
||||
ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaFileName, state.context);
|
||||
CodecUtil.checkIndexHeader(metaIn, Geo3DDocValuesFormat.META_CODEC_NAME, Geo3DDocValuesFormat.META_VERSION_START, Geo3DDocValuesFormat.META_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
int fieldCount = metaIn.readVInt();
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
int fieldNumber = metaIn.readVInt();
|
||||
long indexFP = metaIn.readVLong();
|
||||
fieldToIndexFPs.put(fieldNumber, indexFP);
|
||||
}
|
||||
CodecUtil.checkFooter(metaIn);
|
||||
metaIn.close();
|
||||
|
||||
String datFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Geo3DDocValuesFormat.DATA_EXTENSION);
|
||||
datIn = state.directory.openInput(datFileName, state.context);
|
||||
CodecUtil.checkIndexHeader(datIn, Geo3DDocValuesFormat.DATA_CODEC_NAME, Geo3DDocValuesFormat.DATA_VERSION_START, Geo3DDocValuesFormat.DATA_VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
planetMax = Double.longBitsToDouble(datIn.readLong());
|
||||
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
|
||||
maxDoc = state.segmentInfo.maxDoc();
|
||||
this.delegate = delegate;
|
||||
merging = false;
|
||||
}
|
||||
|
||||
// clone for merge: we don't hang onto the Geo3Ds we load
|
||||
Geo3DDocValuesProducer(Geo3DDocValuesProducer orig) throws IOException {
|
||||
assert Thread.holdsLock(orig);
|
||||
datIn = orig.datIn.clone();
|
||||
ramBytesUsed = new AtomicLong(orig.ramBytesUsed.get());
|
||||
delegate = orig.delegate.getMergeInstance();
|
||||
fieldToIndexFPs.putAll(orig.fieldToIndexFPs);
|
||||
treeReaders.putAll(orig.treeReaders);
|
||||
merging = true;
|
||||
maxDoc = orig.maxDoc;
|
||||
planetMax = orig.planetMax;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(datIn, delegate);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
CodecUtil.checksumEntireFile(datIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized BinaryDocValues getBinary(FieldInfo field) throws IOException {
|
||||
BKD3DTreeReader treeReader = treeReaders.get(field.name);
|
||||
if (treeReader == null) {
|
||||
// Lazy load
|
||||
Long fp = fieldToIndexFPs.get(field.number);
|
||||
if (fp == null) {
|
||||
throw new IllegalArgumentException("this field was not indexed as a BKDPointField");
|
||||
}
|
||||
|
||||
// LUCENE-6697: never do real IOPs with the original IndexInput because search
|
||||
// threads can be concurrently cloning it:
|
||||
IndexInput clone = datIn.clone();
|
||||
clone.seek(fp);
|
||||
treeReader = new BKD3DTreeReader(clone, maxDoc);
|
||||
|
||||
// Only hang onto the reader when we are not merging:
|
||||
if (merging == false) {
|
||||
treeReaders.put(field.name, treeReader);
|
||||
ramBytesUsed.addAndGet(treeReader.ramBytesUsed());
|
||||
}
|
||||
}
|
||||
|
||||
return new Geo3DBinaryDocValues(treeReader, delegate.getBinary(field), planetMax);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSet(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getDocsWithField(FieldInfo field) throws IOException {
|
||||
return delegate.getDocsWithField(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Collection<Accountable> getChildResources() {
|
||||
List<Accountable> resources = new ArrayList<>();
|
||||
for(Map.Entry<String,BKD3DTreeReader> ent : treeReaders.entrySet()) {
|
||||
resources.add(Accountables.namedAccountable("field " + ent.getKey(), ent.getValue()));
|
||||
}
|
||||
resources.add(Accountables.namedAccountable("delegate", delegate));
|
||||
|
||||
return resources;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized DocValuesProducer getMergeInstance() throws IOException {
|
||||
return new Geo3DDocValuesProducer(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return ramBytesUsed.get() + delegate.ramBytesUsed();
|
||||
}
|
||||
}
|
|
@ -1,92 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
final class GrowingHeapWriter implements Writer {
|
||||
int[] xs;
|
||||
int[] ys;
|
||||
int[] zs;
|
||||
int[] docIDs;
|
||||
long[] ords;
|
||||
private int nextWrite;
|
||||
final int maxSize;
|
||||
|
||||
public GrowingHeapWriter(int maxSize) {
|
||||
xs = new int[16];
|
||||
ys = new int[16];
|
||||
zs = new int[16];
|
||||
docIDs = new int[16];
|
||||
ords = new long[16];
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
||||
private int[] growExact(int[] arr, int size) {
|
||||
assert size > arr.length;
|
||||
int[] newArr = new int[size];
|
||||
System.arraycopy(arr, 0, newArr, 0, arr.length);
|
||||
return newArr;
|
||||
}
|
||||
|
||||
private long[] growExact(long[] arr, int size) {
|
||||
assert size > arr.length;
|
||||
long[] newArr = new long[size];
|
||||
System.arraycopy(arr, 0, newArr, 0, arr.length);
|
||||
return newArr;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(int x, int y, int z, long ord, int docID) {
|
||||
assert ord == nextWrite;
|
||||
if (xs.length == nextWrite) {
|
||||
int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT));
|
||||
assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
|
||||
xs = growExact(xs, nextSize);
|
||||
ys = growExact(ys, nextSize);
|
||||
zs = growExact(zs, nextSize);
|
||||
ords = growExact(ords, nextSize);
|
||||
docIDs = growExact(docIDs, nextSize);
|
||||
}
|
||||
xs[nextWrite] = x;
|
||||
ys[nextWrite] = y;
|
||||
zs[nextWrite] = z;
|
||||
ords[nextWrite] = ord;
|
||||
docIDs[nextWrite] = docID;
|
||||
nextWrite++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader getReader(long start) {
|
||||
return new HeapReader(xs, ys, zs, ords, docIDs, (int) start, nextWrite);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "GrowingHeapWriter(count=" + nextWrite + " alloc=" + xs.length + ")";
|
||||
}
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
final class HeapReader implements Reader {
|
||||
private int curRead;
|
||||
final int[] xs;
|
||||
final int[] ys;
|
||||
final int[] zs;
|
||||
final long[] ords;
|
||||
final int[] docIDs;
|
||||
final int end;
|
||||
|
||||
HeapReader(int[] xs, int[] ys, int[] zs, long[] ords, int[] docIDs, int start, int end) {
|
||||
this.xs = xs;
|
||||
this.ys = ys;
|
||||
this.zs = zs;
|
||||
this.ords = ords;
|
||||
this.docIDs = docIDs;
|
||||
curRead = start-1;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
curRead++;
|
||||
return curRead < end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int x() {
|
||||
return xs[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int y() {
|
||||
return ys[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int z() {
|
||||
return zs[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docIDs[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
return ords[curRead];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
final class HeapWriter implements Writer {
|
||||
final int[] xs;
|
||||
final int[] ys;
|
||||
final int[] zs;
|
||||
final int[] docIDs;
|
||||
final long[] ords;
|
||||
private int nextWrite;
|
||||
private boolean closed;
|
||||
|
||||
public HeapWriter(int count) {
|
||||
xs = new int[count];
|
||||
ys = new int[count];
|
||||
zs = new int[count];
|
||||
docIDs = new int[count];
|
||||
ords = new long[count];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(int x, int y, int z, long ord, int docID) {
|
||||
xs[nextWrite] = x;
|
||||
ys[nextWrite] = y;
|
||||
zs[nextWrite] = z;
|
||||
ords[nextWrite] = ord;
|
||||
docIDs[nextWrite] = docID;
|
||||
nextWrite++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader getReader(long start) {
|
||||
assert closed;
|
||||
return new HeapReader(xs, ys, zs, ords, docIDs, (int) start, xs.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
closed = true;
|
||||
if (nextWrite != xs.length) {
|
||||
throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + xs.length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HeapWriter(count=" + xs.length + ")";
|
||||
}
|
||||
}
|
|
@ -1,84 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
final class OfflineReader implements Reader {
|
||||
final IndexInput in;
|
||||
long countLeft;
|
||||
private int x;
|
||||
private int y;
|
||||
private int z;
|
||||
private long ord;
|
||||
private int docID;
|
||||
|
||||
OfflineReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
|
||||
in = tempDir.openInput(tempFileName, IOContext.READONCE);
|
||||
in.seek(start * BKD3DTreeWriter.BYTES_PER_DOC);
|
||||
this.countLeft = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (countLeft == 0) {
|
||||
return false;
|
||||
}
|
||||
countLeft--;
|
||||
x = in.readInt();
|
||||
y = in.readInt();
|
||||
z = in.readInt();
|
||||
ord = in.readLong();
|
||||
docID = in.readInt();
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int x() {
|
||||
return x;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int y() {
|
||||
return y;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int z() {
|
||||
return z;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
return ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
final class OfflineWriter implements Writer {
|
||||
|
||||
final Directory tempDir;
|
||||
final IndexOutput out;
|
||||
final byte[] scratchBytes = new byte[BKD3DTreeWriter.BYTES_PER_DOC];
|
||||
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
final long count;
|
||||
private long countWritten;
|
||||
private boolean closed;
|
||||
|
||||
public OfflineWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
|
||||
this.tempDir = tempDir;
|
||||
out = tempDir.createTempOutput(tempFileNamePrefix, "bkd3d", IOContext.DEFAULT);
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(int x, int y, int z, long ord, int docID) throws IOException {
|
||||
out.writeInt(x);
|
||||
out.writeInt(y);
|
||||
out.writeInt(z);
|
||||
out.writeLong(ord);
|
||||
out.writeInt(docID);
|
||||
countWritten++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader getReader(long start) throws IOException {
|
||||
assert closed;
|
||||
return new OfflineReader(tempDir, out.getName(), start, count-start);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
closed = true;
|
||||
out.close();
|
||||
if (count != countWritten) {
|
||||
throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() throws IOException {
|
||||
tempDir.deleteFile(out.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "OfflineWriter(count=" + count + " tempFileName=" + out.getName() + ")";
|
||||
}
|
||||
}
|
|
@ -1,222 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.geo3d.GeoArea;
|
||||
import org.apache.lucene.geo3d.GeoAreaFactory;
|
||||
import org.apache.lucene.geo3d.GeoShape;
|
||||
import org.apache.lucene.geo3d.PlanetModel;
|
||||
import org.apache.lucene.geo3d.XYZBounds;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Finds all previously indexed points that fall within the specified polygon.
|
||||
*
|
||||
* <p>The field must be indexed with {@link Geo3DDocValuesFormat}, and {@link Geo3DPointField} added per document.
|
||||
*
|
||||
* <p>Because this implementation cannot intersect each cell with the polygon, it will be costly especially for large polygons, as every
|
||||
* possible point must be checked.
|
||||
*
|
||||
* <p><b>NOTE</b>: for fastest performance, this allocates FixedBitSet(maxDoc) for each segment. The score of each hit is the query boost.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public class PointInGeo3DShapeQuery extends Query {
|
||||
final String field;
|
||||
final PlanetModel planetModel;
|
||||
final GeoShape shape;
|
||||
|
||||
/** The lats/lons must be clockwise or counter-clockwise. */
|
||||
public PointInGeo3DShapeQuery(PlanetModel planetModel, String field, GeoShape shape) {
|
||||
this.field = field;
|
||||
this.planetModel = planetModel;
|
||||
this.shape = shape;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
|
||||
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
|
||||
// used in the first pass:
|
||||
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
LeafReader reader = context.reader();
|
||||
BinaryDocValues bdv = reader.getBinaryDocValues(field);
|
||||
if (bdv == null) {
|
||||
// No docs in this segment had this field
|
||||
return null;
|
||||
}
|
||||
|
||||
if (bdv instanceof Geo3DBinaryDocValues == false) {
|
||||
throw new IllegalStateException("field \"" + field + "\" was not indexed with Geo3DBinaryDocValuesFormat: got: " + bdv);
|
||||
}
|
||||
final Geo3DBinaryDocValues treeDV = (Geo3DBinaryDocValues) bdv;
|
||||
BKD3DTreeReader tree = treeDV.getBKD3DTreeReader();
|
||||
|
||||
XYZBounds bounds = new XYZBounds();
|
||||
shape.getBounds(bounds);
|
||||
|
||||
final double planetMax = planetModel.getMaximumMagnitude();
|
||||
if (planetMax != treeDV.planetMax) {
|
||||
throw new IllegalStateException(planetModel + " is not the same one used during indexing: planetMax=" + planetMax + " vs indexing planetMax=" + treeDV.planetMax);
|
||||
}
|
||||
|
||||
/*
|
||||
GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel,
|
||||
bounds.getMinimumX(),
|
||||
bounds.getMaximumX(),
|
||||
bounds.getMinimumY(),
|
||||
bounds.getMaximumY(),
|
||||
bounds.getMinimumZ(),
|
||||
bounds.getMaximumZ());
|
||||
|
||||
assert xyzSolid.getRelationship(shape) == GeoArea.WITHIN || xyzSolid.getRelationship(shape) == GeoArea.OVERLAPS: "expected WITHIN (1) or OVERLAPS (2) but got " + xyzSolid.getRelationship(shape) + "; shape="+shape+"; XYZSolid="+xyzSolid;
|
||||
*/
|
||||
|
||||
DocIdSet result = tree.intersect(Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMinimumX()),
|
||||
Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMaximumX()),
|
||||
Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMinimumY()),
|
||||
Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMaximumY()),
|
||||
Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMinimumZ()),
|
||||
Geo3DDocValuesFormat.encodeValueLenient(planetMax, bounds.getMaximumZ()),
|
||||
new BKD3DTreeReader.ValueFilter() {
|
||||
@Override
|
||||
public boolean accept(int docID) {
|
||||
//System.out.println(" accept? docID=" + docID);
|
||||
BytesRef bytes = treeDV.get(docID);
|
||||
if (bytes == null) {
|
||||
//System.out.println(" false (null)");
|
||||
return false;
|
||||
}
|
||||
|
||||
assert bytes.length == 12;
|
||||
double x = Geo3DDocValuesFormat.decodeValueCenter(treeDV.planetMax, Geo3DDocValuesFormat.readInt(bytes.bytes, bytes.offset));
|
||||
double y = Geo3DDocValuesFormat.decodeValueCenter(treeDV.planetMax, Geo3DDocValuesFormat.readInt(bytes.bytes, bytes.offset+4));
|
||||
double z = Geo3DDocValuesFormat.decodeValueCenter(treeDV.planetMax, Geo3DDocValuesFormat.readInt(bytes.bytes, bytes.offset+8));
|
||||
// System.out.println(" accept docID=" + docID + " point: x=" + x + " y=" + y + " z=" + z);
|
||||
|
||||
// True if x,y,z is within shape
|
||||
//System.out.println(" x=" + x + " y=" + y + " z=" + z);
|
||||
//System.out.println(" ret: " + shape.isWithin(x, y, z));
|
||||
|
||||
return shape.isWithin(x, y, z);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BKD3DTreeReader.Relation compare(int cellXMinEnc, int cellXMaxEnc, int cellYMinEnc, int cellYMaxEnc, int cellZMinEnc, int cellZMaxEnc) {
|
||||
assert cellXMinEnc <= cellXMaxEnc;
|
||||
assert cellYMinEnc <= cellYMaxEnc;
|
||||
assert cellZMinEnc <= cellZMaxEnc;
|
||||
|
||||
// Because the BKD tree operates in quantized (64 bit -> 32 bit) space, and the cell bounds
|
||||
// here are inclusive, we need to extend the bounds to the largest un-quantized values that
|
||||
// could quantize into these bounds. The encoding (Geo3DDocValuesFormat.encodeValue) does
|
||||
// a Math.round from double to long, so e.g. 1.4 -> 1, and -1.4 -> -1:
|
||||
double cellXMin = Geo3DDocValuesFormat.decodeValueMin(treeDV.planetMax, cellXMinEnc);
|
||||
double cellXMax = Geo3DDocValuesFormat.decodeValueMax(treeDV.planetMax, cellXMaxEnc);
|
||||
double cellYMin = Geo3DDocValuesFormat.decodeValueMin(treeDV.planetMax, cellYMinEnc);
|
||||
double cellYMax = Geo3DDocValuesFormat.decodeValueMax(treeDV.planetMax, cellYMaxEnc);
|
||||
double cellZMin = Geo3DDocValuesFormat.decodeValueMin(treeDV.planetMax, cellZMinEnc);
|
||||
double cellZMax = Geo3DDocValuesFormat.decodeValueMax(treeDV.planetMax, cellZMaxEnc);
|
||||
//System.out.println(" compare: x=" + cellXMin + "-" + cellXMax + " y=" + cellYMin + "-" + cellYMax + " z=" + cellZMin + "-" + cellZMax);
|
||||
|
||||
GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel, cellXMin, cellXMax, cellYMin, cellYMax, cellZMin, cellZMax);
|
||||
|
||||
switch(xyzSolid.getRelationship(shape)) {
|
||||
case GeoArea.CONTAINS:
|
||||
// Shape fully contains the cell
|
||||
//System.out.println(" inside");
|
||||
return BKD3DTreeReader.Relation.CELL_INSIDE_SHAPE;
|
||||
case GeoArea.OVERLAPS:
|
||||
// They do overlap but neither contains the other:
|
||||
//System.out.println(" crosses1");
|
||||
return BKD3DTreeReader.Relation.SHAPE_CROSSES_CELL;
|
||||
case GeoArea.WITHIN:
|
||||
// Cell fully contains the shape:
|
||||
//System.out.println(" crosses2");
|
||||
return BKD3DTreeReader.Relation.SHAPE_INSIDE_CELL;
|
||||
case GeoArea.DISJOINT:
|
||||
// They do not overlap at all
|
||||
//System.out.println(" outside");
|
||||
return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL;
|
||||
default:
|
||||
assert false;
|
||||
return BKD3DTreeReader.Relation.SHAPE_CROSSES_CELL;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
final DocIdSetIterator disi = result.iterator();
|
||||
|
||||
return new ConstantScoreScorer(this, score(), disi);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings({"unchecked","rawtypes"})
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
if (!super.equals(o)) return false;
|
||||
|
||||
PointInGeo3DShapeQuery that = (PointInGeo3DShapeQuery) o;
|
||||
|
||||
return planetModel.equals(that.planetModel) && shape.equals(that.shape);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
int result = super.hashCode();
|
||||
result = 31 * result + planetModel.hashCode();
|
||||
result = 31 * result + shape.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
sb.append(getClass().getSimpleName());
|
||||
sb.append(':');
|
||||
if (this.field.equals(field) == false) {
|
||||
sb.append(" field=");
|
||||
sb.append(this.field);
|
||||
sb.append(':');
|
||||
}
|
||||
sb.append("PlanetModel: ");
|
||||
sb.append(planetModel);
|
||||
sb.append(" Shape: ");
|
||||
sb.append(shape);
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
|
||||
interface Reader extends Closeable {
|
||||
boolean next() throws IOException;
|
||||
int x();
|
||||
int y();
|
||||
int z();
|
||||
long ord();
|
||||
int docID();
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
|
||||
interface Writer extends Closeable {
|
||||
void append(int x, int y, int z, long ord, int docID) throws IOException;
|
||||
Reader getReader(long start) throws IOException;
|
||||
void destroy() throws IOException;
|
||||
}
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Fast "indexed point inside geo3d shape" query implementation.
|
||||
*/
|
||||
package org.apache.lucene.bkdtree3d;
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
package org.apache.lucene.geo3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -17,24 +17,23 @@ package org.apache.lucene.bkdtree3d;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.geo3d.PlanetModel;
|
||||
import org.apache.lucene.geo3d.GeoPoint;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.bkd.BKDUtil;
|
||||
|
||||
// TODO: allow multi-valued, packing all points into a single BytesRef
|
||||
|
||||
/** Add this to a document to index lat/lon point, but be sure to use {@link Geo3DDocValuesFormat} for the field.
|
||||
|
||||
/** Add this to a document to index lat/lon or x/y/z point, indexed as a dimensional value.
|
||||
* Multiple values are allowed: just add multiple Geo3DPointField to the document with the
|
||||
* same field name.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public final class Geo3DPointField extends Field {
|
||||
|
||||
/** Indexing {@link FieldType}. */
|
||||
public static final FieldType TYPE = new FieldType();
|
||||
static {
|
||||
TYPE.setDocValuesType(DocValuesType.BINARY);
|
||||
TYPE.setDimensions(3, RamUsageEstimator.NUM_BYTES_INT);
|
||||
TYPE.freeze();
|
||||
}
|
||||
|
||||
|
@ -62,9 +61,9 @@ public final class Geo3DPointField extends Field {
|
|||
|
||||
private void fillFieldsData(double planetMax, double x, double y, double z) {
|
||||
byte[] bytes = new byte[12];
|
||||
Geo3DDocValuesFormat.writeInt(Geo3DDocValuesFormat.encodeValue(planetMax, x), bytes, 0);
|
||||
Geo3DDocValuesFormat.writeInt(Geo3DDocValuesFormat.encodeValue(planetMax, y), bytes, 4);
|
||||
Geo3DDocValuesFormat.writeInt(Geo3DDocValuesFormat.encodeValue(planetMax, z), bytes, 8);
|
||||
BKDUtil.intToBytes(Geo3DUtil.encodeValue(planetMax, x), bytes, 0);
|
||||
BKDUtil.intToBytes(Geo3DUtil.encodeValue(planetMax, y), bytes, 1);
|
||||
BKDUtil.intToBytes(Geo3DUtil.encodeValue(planetMax, z), bytes, 2);
|
||||
fieldsData = new BytesRef(bytes);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
package org.apache.lucene.geo3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
class Geo3DUtil {
|
||||
|
||||
/** Clips the incoming value to the allowed min/max range before encoding, instead of throwing an exception. */
|
||||
public static int encodeValueLenient(double planetMax, double x) {
|
||||
if (x > planetMax) {
|
||||
x = planetMax;
|
||||
} else if (x < -planetMax) {
|
||||
x = -planetMax;
|
||||
}
|
||||
return encodeValue(planetMax, x);
|
||||
}
|
||||
|
||||
public static int encodeValue(double planetMax, double x) {
|
||||
if (x > planetMax) {
|
||||
throw new IllegalArgumentException("value=" + x + " is out-of-bounds (greater than planetMax=" + planetMax + ")");
|
||||
}
|
||||
if (x < -planetMax) {
|
||||
throw new IllegalArgumentException("value=" + x + " is out-of-bounds (less than than -planetMax=" + -planetMax + ")");
|
||||
}
|
||||
long y = Math.round (x * (Integer.MAX_VALUE / planetMax));
|
||||
assert y >= Integer.MIN_VALUE;
|
||||
assert y <= Integer.MAX_VALUE;
|
||||
|
||||
return (int) y;
|
||||
}
|
||||
|
||||
/** Center decode */
|
||||
public static double decodeValueCenter(double planetMax, int x) {
|
||||
return x * (planetMax / Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/** More negative decode, at bottom of cell */
|
||||
public static double decodeValueMin(double planetMax, int x) {
|
||||
return (((double)x) - 0.5) * (planetMax / Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/** More positive decode, at top of cell */
|
||||
public static double decodeValueMax(double planetMax, int x) {
|
||||
return (((double)x) + 0.5) * (planetMax / Integer.MAX_VALUE);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,205 @@
|
|||
package org.apache.lucene.geo3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DimensionalValues;
|
||||
import org.apache.lucene.index.DimensionalValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.DimensionalValues.Relation;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.bkd.BKDUtil;
|
||||
|
||||
/** Finds all previously indexed points that fall within the specified polygon.
|
||||
*
|
||||
* <p>The field must be indexed using {@link Geo3DPointField}.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public class PointInGeo3DShapeQuery extends Query {
|
||||
final String field;
|
||||
final PlanetModel planetModel;
|
||||
final GeoShape shape;
|
||||
|
||||
/** The lats/lons must be clockwise or counter-clockwise. */
|
||||
public PointInGeo3DShapeQuery(PlanetModel planetModel, String field, GeoShape shape) {
|
||||
this.field = field;
|
||||
this.planetModel = planetModel;
|
||||
this.shape = shape;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
|
||||
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
|
||||
// used in the first pass:
|
||||
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
LeafReader reader = context.reader();
|
||||
DimensionalValues values = reader.getDimensionalValues();
|
||||
if (values == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/*
|
||||
XYZBounds bounds = new XYZBounds();
|
||||
shape.getBounds(bounds);
|
||||
|
||||
final double planetMax = planetModel.getMaximumMagnitude();
|
||||
if (planetMax != treeDV.planetMax) {
|
||||
throw new IllegalStateException(planetModel + " is not the same one used during indexing: planetMax=" + planetMax + " vs indexing planetMax=" + treeDV.planetMax);
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel,
|
||||
bounds.getMinimumX(),
|
||||
bounds.getMaximumX(),
|
||||
bounds.getMinimumY(),
|
||||
bounds.getMaximumY(),
|
||||
bounds.getMinimumZ(),
|
||||
bounds.getMaximumZ());
|
||||
|
||||
assert xyzSolid.getRelationship(shape) == GeoArea.WITHIN || xyzSolid.getRelationship(shape) == GeoArea.OVERLAPS: "expected WITHIN (1) or OVERLAPS (2) but got " + xyzSolid.getRelationship(shape) + "; shape="+shape+"; XYZSolid="+xyzSolid;
|
||||
*/
|
||||
|
||||
double planetMax = planetModel.getMaximumMagnitude();
|
||||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
|
||||
int[] hitCount = new int[1];
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
result.add(docID);
|
||||
hitCount[0]++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
assert packedValue.length == 12;
|
||||
double x = Geo3DUtil.decodeValueCenter(planetMax, BKDUtil.bytesToInt(packedValue, 0));
|
||||
double y = Geo3DUtil.decodeValueCenter(planetMax, BKDUtil.bytesToInt(packedValue, 1));
|
||||
double z = Geo3DUtil.decodeValueCenter(planetMax, BKDUtil.bytesToInt(packedValue, 2));
|
||||
if (shape.isWithin(x, y, z)) {
|
||||
result.add(docID);
|
||||
hitCount[0]++;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
// Because the dimensional format operates in quantized (64 bit -> 32 bit) space, and the cell bounds
|
||||
// here are inclusive, we need to extend the bounds to the largest un-quantized values that
|
||||
// could quantize into these bounds. The encoding (Geo3DUtil.encodeValue) does
|
||||
// a Math.round from double to long, so e.g. 1.4 -> 1, and -1.4 -> -1:
|
||||
double xMin = Geo3DUtil.decodeValueMin(planetMax, BKDUtil.bytesToInt(minPackedValue, 0));
|
||||
double xMax = Geo3DUtil.decodeValueMax(planetMax, BKDUtil.bytesToInt(maxPackedValue, 0));
|
||||
double yMin = Geo3DUtil.decodeValueMin(planetMax, BKDUtil.bytesToInt(minPackedValue, 1));
|
||||
double yMax = Geo3DUtil.decodeValueMax(planetMax, BKDUtil.bytesToInt(maxPackedValue, 1));
|
||||
double zMin = Geo3DUtil.decodeValueMin(planetMax, BKDUtil.bytesToInt(minPackedValue, 2));
|
||||
double zMax = Geo3DUtil.decodeValueMax(planetMax, BKDUtil.bytesToInt(maxPackedValue, 2));
|
||||
|
||||
//System.out.println(" compare: x=" + cellXMin + "-" + cellXMax + " y=" + cellYMin + "-" + cellYMax + " z=" + cellZMin + "-" + cellZMax);
|
||||
assert xMin <= xMax;
|
||||
assert yMin <= yMax;
|
||||
assert zMin <= zMax;
|
||||
|
||||
GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel, xMin, xMax, yMin, yMax, zMin, zMax);
|
||||
|
||||
switch(xyzSolid.getRelationship(shape)) {
|
||||
case GeoArea.CONTAINS:
|
||||
// Shape fully contains the cell
|
||||
//System.out.println(" inside");
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
case GeoArea.OVERLAPS:
|
||||
// They do overlap but neither contains the other:
|
||||
//System.out.println(" crosses1");
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
case GeoArea.WITHIN:
|
||||
// Cell fully contains the shape:
|
||||
//System.out.println(" crosses2");
|
||||
// return Relation.SHAPE_INSIDE_CELL;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
case GeoArea.DISJOINT:
|
||||
// They do not overlap at all
|
||||
//System.out.println(" outside");
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
default:
|
||||
assert false;
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// NOTE: hitCount[0] will be over-estimate in multi-valued case
|
||||
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings({"unchecked","rawtypes"})
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
if (!super.equals(o)) return false;
|
||||
|
||||
PointInGeo3DShapeQuery that = (PointInGeo3DShapeQuery) o;
|
||||
|
||||
return planetModel.equals(that.planetModel) && shape.equals(that.shape);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
int result = super.hashCode();
|
||||
result = 31 * result + planetModel.hashCode();
|
||||
result = 31 * result + shape.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
sb.append(getClass().getSimpleName());
|
||||
sb.append(':');
|
||||
if (this.field.equals(field) == false) {
|
||||
sb.append(" field=");
|
||||
sb.append(this.field);
|
||||
sb.append(':');
|
||||
}
|
||||
sb.append("PlanetModel: ");
|
||||
sb.append(planetModel);
|
||||
sb.append(" Shape: ");
|
||||
sb.append(shape);
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.bkdtree3d;
|
||||
package org.apache.lucene.geo3d;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -28,21 +28,15 @@ import java.util.concurrent.CountDownLatch;
|
|||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60Codec;
|
||||
import org.apache.lucene.codecs.DimensionalFormat;
|
||||
import org.apache.lucene.codecs.DimensionalReader;
|
||||
import org.apache.lucene.codecs.DimensionalWriter;
|
||||
import org.apache.lucene.codecs.FilterCodec;
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60DimensionalReader;
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60DimensionalWriter;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.geo3d.GeoArea;
|
||||
import org.apache.lucene.geo3d.GeoAreaFactory;
|
||||
import org.apache.lucene.geo3d.GeoBBoxFactory;
|
||||
import org.apache.lucene.geo3d.GeoCircleFactory;
|
||||
import org.apache.lucene.geo3d.GeoPath;
|
||||
import org.apache.lucene.geo3d.GeoPoint;
|
||||
import org.apache.lucene.geo3d.GeoPolygonFactory;
|
||||
import org.apache.lucene.geo3d.GeoShape;
|
||||
import org.apache.lucene.geo3d.PlanetModel;
|
||||
import org.apache.lucene.geo3d.XYZBounds;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -50,16 +44,13 @@ import org.apache.lucene.index.IndexWriterConfig;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SimpleCollector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -69,12 +60,6 @@ import org.junit.BeforeClass;
|
|||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||
|
||||
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueCenter;
|
||||
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueMax;
|
||||
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueMin;
|
||||
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.encodeValue;
|
||||
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.encodeValueLenient;
|
||||
|
||||
public class TestGeo3DPointField extends LuceneTestCase {
|
||||
|
||||
private static boolean smallBBox;
|
||||
|
@ -87,12 +72,39 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static Codec getCodec() {
|
||||
if (Codec.getDefault().getName().equals("Lucene60")) {
|
||||
int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048);
|
||||
double maxMBSortInHeap = 0.1 + (3*random().nextDouble());
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: using Lucene60DimensionalFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
|
||||
}
|
||||
|
||||
return new FilterCodec("Lucene60", Codec.getDefault()) {
|
||||
@Override
|
||||
public DimensionalFormat dimensionalFormat() {
|
||||
return new DimensionalFormat() {
|
||||
@Override
|
||||
public DimensionalWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||
return new Lucene60DimensionalWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DimensionalReader fieldsReader(SegmentReadState readState) throws IOException {
|
||||
return new Lucene60DimensionalReader(readState);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return Codec.getDefault();
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
|
||||
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Geo3DDocValuesFormat(PlanetModel.WGS84, maxPointsInLeaf, maxPointsSortInHeap)));
|
||||
iwc.setCodec(getCodec());
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new Geo3DPointField("field", PlanetModel.WGS84, toRadians(50.7345267), toRadians(-97.5303555)));
|
||||
|
@ -108,126 +120,10 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testPlanetModelChanged() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
|
||||
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Geo3DDocValuesFormat(PlanetModel.WGS84, maxPointsInLeaf, maxPointsSortInHeap)));
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new Geo3DPointField("field", PlanetModel.WGS84, toRadians(50.7345267), toRadians(-97.5303555)));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = DirectoryReader.open(w, true);
|
||||
IndexSearcher s = new IndexSearcher(r);
|
||||
try {
|
||||
s.search(new PointInGeo3DShapeQuery(PlanetModel.SPHERE,
|
||||
"field",
|
||||
GeoCircleFactory.makeGeoCircle(PlanetModel.WGS84, toRadians(50), toRadians(-97), Math.PI/180.)), 1);
|
||||
fail("did not hit exc");
|
||||
} catch (IllegalStateException ise) {
|
||||
// expected
|
||||
}
|
||||
w.close();
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static double toRadians(double degrees) {
|
||||
return Math.PI*(degrees/360.0);
|
||||
}
|
||||
|
||||
public void testBKDBasic() throws Exception {
|
||||
Directory dir = getDirectory();
|
||||
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
|
||||
|
||||
BKD3DTreeWriter w = new BKD3DTreeWriter(dir, "bkd3d");
|
||||
|
||||
w.add(0, 0, 0, 0);
|
||||
w.add(1, 1, 1, 1);
|
||||
w.add(-1, -1, -1, 2);
|
||||
|
||||
long indexFP = w.finish(out);
|
||||
out.close();
|
||||
|
||||
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
|
||||
in.seek(indexFP);
|
||||
BKD3DTreeReader r = new BKD3DTreeReader(in, 3);
|
||||
|
||||
DocIdSet hits = r.intersect(Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
Integer.MIN_VALUE, Integer.MAX_VALUE,
|
||||
|
||||
new BKD3DTreeReader.ValueFilter() {
|
||||
|
||||
@Override
|
||||
public boolean accept(int docID) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BKD3DTreeReader.Relation compare(int xMin, int xMax,
|
||||
int yMin, int yMax,
|
||||
int zMin, int zMax) {
|
||||
return BKD3DTreeReader.Relation.SHAPE_INSIDE_CELL;
|
||||
}
|
||||
|
||||
});
|
||||
DocIdSetIterator disi = hits.iterator();
|
||||
assertEquals(0, disi.nextDoc());
|
||||
assertEquals(1, disi.nextDoc());
|
||||
assertEquals(2, disi.nextDoc());
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
|
||||
in.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
static class Point {
|
||||
final double x;
|
||||
final double y;
|
||||
final double z;
|
||||
|
||||
public Point(double x, double y, double z) {
|
||||
this.x = x;
|
||||
this.y = y;
|
||||
this.z = z;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "x=" + x + " y=" + y + " z=" + z;
|
||||
}
|
||||
}
|
||||
|
||||
private static class Range {
|
||||
final double min;
|
||||
final double max;
|
||||
|
||||
public Range(double min, double max) {
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return min + " TO " + max;
|
||||
}
|
||||
}
|
||||
|
||||
private double randomCoord(PlanetModel planetModel) {
|
||||
return planetModel.getMaximumMagnitude() * 2*(random().nextDouble()-0.5);
|
||||
}
|
||||
|
||||
private Range randomRange(PlanetModel planetModel) {
|
||||
double x = randomCoord(planetModel);
|
||||
double y = randomCoord(planetModel);
|
||||
if (x < y) {
|
||||
return new Range(x, y);
|
||||
} else {
|
||||
return new Range(y, x);
|
||||
}
|
||||
}
|
||||
|
||||
private static PlanetModel getPlanetModel() {
|
||||
if (random().nextBoolean()) {
|
||||
// Use one of the earth models:
|
||||
|
@ -243,161 +139,6 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testBKDRandom() throws Exception {
|
||||
List<Point> points = new ArrayList<>();
|
||||
int numPoints = atLeast(10000);
|
||||
Directory dir = getDirectory();
|
||||
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
|
||||
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
|
||||
|
||||
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
|
||||
|
||||
PlanetModel planetModel = getPlanetModel();
|
||||
final double planetMax = planetModel.getMaximumMagnitude();
|
||||
|
||||
BKD3DTreeWriter w = new BKD3DTreeWriter(dir, "bkd3d", maxPointsInLeaf, maxPointsSortInHeap);
|
||||
for(int docID=0;docID<numPoints;docID++) {
|
||||
Point point;
|
||||
if (docID > 0 && random().nextInt(30) == 17) {
|
||||
// Dup point
|
||||
point = points.get(random().nextInt(points.size()));
|
||||
} else {
|
||||
point = new Point(randomCoord(planetModel),
|
||||
randomCoord(planetModel),
|
||||
randomCoord(planetModel));
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.err.println(" docID=" + docID + " point=" + point);
|
||||
System.err.println(" x=" + encodeValue(planetMax, point.x) +
|
||||
" y=" + encodeValue(planetMax, point.y) +
|
||||
" z=" + encodeValue(planetMax, point.z));
|
||||
}
|
||||
|
||||
points.add(point);
|
||||
w.add(encodeValue(planetMax, point.x),
|
||||
encodeValue(planetMax, point.y),
|
||||
encodeValue(planetMax, point.z),
|
||||
docID);
|
||||
}
|
||||
|
||||
long indexFP = w.finish(out);
|
||||
out.close();
|
||||
|
||||
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
|
||||
in.seek(indexFP);
|
||||
BKD3DTreeReader r = new BKD3DTreeReader(in, numPoints);
|
||||
|
||||
int numIters = atLeast(100);
|
||||
for(int iter=0;iter<numIters;iter++) {
|
||||
// bbox
|
||||
Range x = randomRange(planetModel);
|
||||
Range y = randomRange(planetModel);
|
||||
Range z = randomRange(planetModel);
|
||||
|
||||
int xMinEnc = encodeValue(planetMax, x.min);
|
||||
int xMaxEnc = encodeValue(planetMax, x.max);
|
||||
int yMinEnc = encodeValue(planetMax, y.min);
|
||||
int yMaxEnc = encodeValue(planetMax, y.max);
|
||||
int zMinEnc = encodeValue(planetMax, z.min);
|
||||
int zMaxEnc = encodeValue(planetMax, z.max);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.err.println("\nTEST: iter=" + iter + " bbox: x=" + x + " (" + xMinEnc + " TO " + xMaxEnc+ ")" + " y=" + y + " (" + yMinEnc + " TO " + yMaxEnc + ")" + " z=" + z + " (" + zMinEnc + " TO " + zMaxEnc + ")" );
|
||||
}
|
||||
|
||||
DocIdSet hits = r.intersect(xMinEnc, xMaxEnc,
|
||||
yMinEnc, yMaxEnc,
|
||||
zMinEnc, zMaxEnc,
|
||||
|
||||
new BKD3DTreeReader.ValueFilter() {
|
||||
|
||||
@Override
|
||||
public boolean accept(int docID) {
|
||||
Point point = points.get(docID);
|
||||
//System.out.println(" accept docID=" + docID + " point=" + point + " (x=" + encodeValue(point.x) + " y=" + encodeValue(point.y) + " z=" + encodeValue(point.z) + ")");
|
||||
|
||||
// System.out.println(" accept docID=" + docID + " point: x=" + point.x + " y=" + point.y + " z=" + point.z);
|
||||
int xEnc = encodeValue(planetMax, point.x);
|
||||
int yEnc = encodeValue(planetMax, point.y);
|
||||
int zEnc = encodeValue(planetMax, point.z);
|
||||
|
||||
boolean accept = xEnc >= xMinEnc && xEnc <= xMaxEnc &&
|
||||
yEnc >= yMinEnc && yEnc <= yMaxEnc &&
|
||||
zEnc >= zMinEnc && zEnc <= zMaxEnc;
|
||||
//System.out.println(" " + accept);
|
||||
|
||||
return accept;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BKD3DTreeReader.Relation compare(int cellXMin, int cellXMax,
|
||||
int cellYMin, int cellYMax,
|
||||
int cellZMin, int cellZMax) {
|
||||
if (cellXMin > xMaxEnc || cellXMax < xMinEnc) {
|
||||
return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL;
|
||||
}
|
||||
if (cellYMin > yMaxEnc || cellYMax < yMinEnc) {
|
||||
return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL;
|
||||
}
|
||||
if (cellZMin > zMaxEnc || cellZMax < zMinEnc) {
|
||||
return BKD3DTreeReader.Relation.SHAPE_OUTSIDE_CELL;
|
||||
}
|
||||
|
||||
if (cellXMin >= xMinEnc && cellXMax <= xMaxEnc &&
|
||||
cellYMin >= yMinEnc && cellYMax <= yMaxEnc &&
|
||||
cellZMin >= zMinEnc && cellZMax <= zMaxEnc) {
|
||||
return BKD3DTreeReader.Relation.CELL_INSIDE_SHAPE;
|
||||
}
|
||||
|
||||
if (xMinEnc >= cellXMin && xMaxEnc <= cellXMax &&
|
||||
yMinEnc >= cellYMin && yMaxEnc <= cellYMax &&
|
||||
zMinEnc >= cellZMin && zMaxEnc <= cellZMax) {
|
||||
return BKD3DTreeReader.Relation.SHAPE_INSIDE_CELL;
|
||||
}
|
||||
|
||||
return BKD3DTreeReader.Relation.SHAPE_CROSSES_CELL;
|
||||
}
|
||||
});
|
||||
|
||||
DocIdSetIterator disi = hits.iterator();
|
||||
FixedBitSet matches = new FixedBitSet(numPoints);
|
||||
while (true) {
|
||||
int nextHit = disi.nextDoc();
|
||||
if (nextHit == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
matches.set(nextHit);
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.err.println(" total hits: " + matches.cardinality());
|
||||
}
|
||||
|
||||
for(int docID=0;docID<numPoints;docID++) {
|
||||
Point point = points.get(docID);
|
||||
boolean actual = matches.get(docID);
|
||||
|
||||
// We must quantize exactly as BKD tree does else we'll get false failures
|
||||
int xEnc = encodeValue(planetMax, point.x);
|
||||
int yEnc = encodeValue(planetMax, point.y);
|
||||
int zEnc = encodeValue(planetMax, point.z);
|
||||
|
||||
boolean expected = xEnc >= xMinEnc && xEnc <= xMaxEnc &&
|
||||
yEnc >= yMinEnc && yEnc <= yMaxEnc &&
|
||||
zEnc >= zMinEnc && zEnc <= zMaxEnc;
|
||||
|
||||
if (expected != actual) {
|
||||
System.out.println("docID=" + docID + " is wrong: expected=" + expected + " actual=" + actual);
|
||||
System.out.println(" x=" + point.x + " (" + xEnc + ")" + " y=" + point.y + " (" + yEnc + ")" + " z=" + point.z + " (" + zEnc + ")");
|
||||
fail("wrong match");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
in.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static class Cell {
|
||||
static int nextCellID;
|
||||
|
||||
|
@ -426,9 +167,9 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
|
||||
/** Returns true if the quantized point lies within this cell, inclusive on all bounds. */
|
||||
public boolean contains(double planetMax, GeoPoint point) {
|
||||
int docX = encodeValue(planetMax, point.x);
|
||||
int docY = encodeValue(planetMax, point.y);
|
||||
int docZ = encodeValue(planetMax, point.z);
|
||||
int docX = Geo3DUtil.encodeValue(planetMax, point.x);
|
||||
int docY = Geo3DUtil.encodeValue(planetMax, point.y);
|
||||
int docZ = Geo3DUtil.encodeValue(planetMax, point.z);
|
||||
|
||||
return docX >= xMinEnc && docX <= xMaxEnc &&
|
||||
docY >= yMinEnc && docY <= yMaxEnc &&
|
||||
|
@ -442,9 +183,9 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private static GeoPoint quantize(double planetMax, GeoPoint point) {
|
||||
return new GeoPoint(decodeValueCenter(planetMax, encodeValue(planetMax, point.x)),
|
||||
decodeValueCenter(planetMax, encodeValue(planetMax, point.y)),
|
||||
decodeValueCenter(planetMax, encodeValue(planetMax, point.z)));
|
||||
return new GeoPoint(Geo3DUtil.decodeValueCenter(planetMax, Geo3DUtil.encodeValue(planetMax, point.x)),
|
||||
Geo3DUtil.decodeValueCenter(planetMax, Geo3DUtil.encodeValue(planetMax, point.y)),
|
||||
Geo3DUtil.decodeValueCenter(planetMax, Geo3DUtil.encodeValue(planetMax, point.z)));
|
||||
}
|
||||
|
||||
/** Tests consistency of GeoArea.getRelationship vs GeoShape.isWithin */
|
||||
|
@ -488,12 +229,12 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
|
||||
// Start with the root cell that fully contains the shape:
|
||||
Cell root = new Cell(null,
|
||||
encodeValueLenient(planetMax, bounds.getMinimumX()),
|
||||
encodeValueLenient(planetMax, bounds.getMaximumX()),
|
||||
encodeValueLenient(planetMax, bounds.getMinimumY()),
|
||||
encodeValueLenient(planetMax, bounds.getMaximumY()),
|
||||
encodeValueLenient(planetMax, bounds.getMinimumZ()),
|
||||
encodeValueLenient(planetMax, bounds.getMaximumZ()),
|
||||
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMinimumX()),
|
||||
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMaximumX()),
|
||||
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMinimumY()),
|
||||
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMaximumY()),
|
||||
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMinimumZ()),
|
||||
Geo3DUtil.encodeValueLenient(planetMax, bounds.getMaximumZ()),
|
||||
0);
|
||||
|
||||
if (VERBOSE) {
|
||||
|
@ -534,14 +275,14 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
} else {
|
||||
|
||||
GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel,
|
||||
decodeValueMin(planetMax, cell.xMinEnc), decodeValueMax(planetMax, cell.xMaxEnc),
|
||||
decodeValueMin(planetMax, cell.yMinEnc), decodeValueMax(planetMax, cell.yMaxEnc),
|
||||
decodeValueMin(planetMax, cell.zMinEnc), decodeValueMax(planetMax, cell.zMaxEnc));
|
||||
Geo3DUtil.decodeValueMin(planetMax, cell.xMinEnc), Geo3DUtil.decodeValueMax(planetMax, cell.xMaxEnc),
|
||||
Geo3DUtil.decodeValueMin(planetMax, cell.yMinEnc), Geo3DUtil.decodeValueMax(planetMax, cell.yMaxEnc),
|
||||
Geo3DUtil.decodeValueMin(planetMax, cell.zMinEnc), Geo3DUtil.decodeValueMax(planetMax, cell.zMaxEnc));
|
||||
|
||||
if (VERBOSE) {
|
||||
log.println(" minx="+decodeValueMin(planetMax, cell.xMinEnc)+" maxx="+decodeValueMax(planetMax, cell.xMaxEnc)+
|
||||
" miny="+decodeValueMin(planetMax, cell.yMinEnc)+" maxy="+decodeValueMax(planetMax, cell.yMaxEnc)+
|
||||
" minz="+decodeValueMin(planetMax, cell.zMinEnc)+" maxz="+decodeValueMax(planetMax, cell.zMaxEnc));
|
||||
log.println(" minx="+Geo3DUtil.decodeValueMin(planetMax, cell.xMinEnc)+" maxx="+Geo3DUtil.decodeValueMax(planetMax, cell.xMaxEnc)+
|
||||
" miny="+Geo3DUtil.decodeValueMin(planetMax, cell.yMinEnc)+" maxy="+Geo3DUtil.decodeValueMax(planetMax, cell.yMaxEnc)+
|
||||
" minz="+Geo3DUtil.decodeValueMin(planetMax, cell.zMinEnc)+" maxz="+Geo3DUtil.decodeValueMax(planetMax, cell.zMaxEnc));
|
||||
}
|
||||
|
||||
switch (xyzSolid.getRelationship(shape)) {
|
||||
|
@ -898,8 +639,6 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private static void verify(double[] lats, double[] lons) throws Exception {
|
||||
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
|
||||
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
|
||||
PlanetModel planetModel = getPlanetModel();
|
||||
|
@ -909,18 +648,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
if (mbd != -1 && mbd < lats.length/100) {
|
||||
iwc.setMaxBufferedDocs(lats.length/100);
|
||||
}
|
||||
final DocValuesFormat dvFormat = new Geo3DDocValuesFormat(planetModel, maxPointsInLeaf, maxPointsSortInHeap);
|
||||
Codec codec = new Lucene60Codec() {
|
||||
@Override
|
||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||
if (field.equals("point")) {
|
||||
return dvFormat;
|
||||
} else {
|
||||
return super.getDocValuesFormatForField(field);
|
||||
}
|
||||
}
|
||||
};
|
||||
iwc.setCodec(codec);
|
||||
iwc.setCodec(getCodec());
|
||||
Directory dir;
|
||||
if (lats.length > 100000) {
|
||||
dir = noVirusChecker(newFSDirectory(createTempDir("TestBKDTree")));
|
Loading…
Reference in New Issue