mirror of https://github.com/apache/lucene.git
add PointValues.size() stat, to return total number of points
This commit is contained in:
parent
b17c57f072
commit
3c02ab2187
|
@ -34,8 +34,8 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.BLOCK_VA
|
|||
class SimpleTextBKDReader extends BKDReader {
|
||||
|
||||
public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
|
||||
byte[] minPackedValue, byte[] maxPackedValue) throws IOException {
|
||||
super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue);
|
||||
byte[] minPackedValue, byte[] maxPackedValue, long pointCount) throws IOException {
|
||||
super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue, pointCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -47,6 +47,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MAX_LEAF
|
|||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MAX_VALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MIN_VALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.NUM_DIMS;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.POINT_COUNT;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_COUNT;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_DIM;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_VALUE;
|
||||
|
@ -119,6 +120,10 @@ class SimpleTextPointReader extends PointReader {
|
|||
assert startsWith(MAX_VALUE);
|
||||
BytesRef maxValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE));
|
||||
assert maxValue.length == numDims*bytesPerDim;
|
||||
|
||||
readLine(dataIn);
|
||||
assert startsWith(POINT_COUNT);
|
||||
long pointCount = parseLong(POINT_COUNT);
|
||||
|
||||
long[] leafBlockFPs = new long[count];
|
||||
for(int i=0;i<count;i++) {
|
||||
|
@ -139,7 +144,7 @@ class SimpleTextPointReader extends PointReader {
|
|||
System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim);
|
||||
}
|
||||
|
||||
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes);
|
||||
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes, pointCount);
|
||||
}
|
||||
|
||||
private void readLine(IndexInput in) throws IOException {
|
||||
|
@ -267,4 +272,15 @@ class SimpleTextPointReader extends PointReader {
|
|||
}
|
||||
return bkdReader.getBytesPerDimension();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index points in the past, but
|
||||
// now all docs having this field were deleted in this segment:
|
||||
return 0;
|
||||
}
|
||||
return bkdReader.getPointCount();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ class SimpleTextPointWriter extends PointWriter {
|
|||
final static BytesRef FIELD_FP = new BytesRef(" field fp ");
|
||||
final static BytesRef MIN_VALUE = new BytesRef("min value ");
|
||||
final static BytesRef MAX_VALUE = new BytesRef("max value ");
|
||||
final static BytesRef POINT_COUNT = new BytesRef("point count ");
|
||||
|
||||
private IndexOutput dataOut;
|
||||
final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
|
@ -102,6 +103,10 @@ class SimpleTextPointWriter extends PointWriter {
|
|||
write(out, br.toString());
|
||||
newline(out);
|
||||
|
||||
write(out, POINT_COUNT);
|
||||
writeLong(out, pointCount);
|
||||
newline(out);
|
||||
|
||||
for(int i=0;i<leafBlockFPs.length;i++) {
|
||||
write(out, BLOCK_FP);
|
||||
writeLong(out, leafBlockFPs[i]);
|
||||
|
|
|
@ -95,6 +95,11 @@ public abstract class PointFormat {
|
|||
public int getBytesPerDimension(String fieldName) {
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with points");
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with points");
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
|
@ -115,6 +115,12 @@ public abstract class PointWriter implements Closeable {
|
|||
public int getBytesPerDimension(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -215,5 +215,16 @@ public class Lucene60PointReader extends PointReader implements Closeable {
|
|||
}
|
||||
return bkdReader.getBytesPerDimension();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
BKDReader bkdReader = getBKDReader(fieldName);
|
||||
if (bkdReader == null) {
|
||||
// Schema ghost corner case! This field did index points in the past, but
|
||||
// now all docs having this point field were deleted in this segment:
|
||||
return 0;
|
||||
}
|
||||
return bkdReader.getPointCount();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1698,15 +1698,31 @@ public final class CheckIndex implements Closeable {
|
|||
status.totalValueFields++;
|
||||
int dimCount = fieldInfo.getPointDimensionCount();
|
||||
int bytesPerDim = fieldInfo.getPointNumBytes();
|
||||
byte[] lastMinPackedValue = new byte[dimCount*bytesPerDim];
|
||||
BytesRef lastMinPacked = new BytesRef(lastMinPackedValue);
|
||||
byte[] lastMaxPackedValue = new byte[dimCount*bytesPerDim];
|
||||
BytesRef lastMaxPacked = new BytesRef(lastMaxPackedValue);
|
||||
int packedBytesCount = dimCount * bytesPerDim;
|
||||
byte[] lastMinPackedValue = new byte[packedBytesCount];
|
||||
byte[] lastMaxPackedValue = new byte[packedBytesCount];
|
||||
BytesRef scratch = new BytesRef();
|
||||
lastMaxPacked.length = bytesPerDim;
|
||||
lastMinPacked.length = bytesPerDim;
|
||||
scratch.length = bytesPerDim;
|
||||
byte[] lastPackedValue = new byte[dimCount*bytesPerDim];
|
||||
byte[] lastPackedValue = new byte[packedBytesCount];
|
||||
|
||||
long[] pointCountSeen = new long[1];
|
||||
|
||||
byte[] globalMinPackedValue = values.getMinPackedValue(fieldInfo.name);
|
||||
long size = values.size(fieldInfo.name);
|
||||
if (globalMinPackedValue == null && size != 0) {
|
||||
throw new RuntimeException("getMinPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size);
|
||||
}
|
||||
if (globalMinPackedValue.length != packedBytesCount) {
|
||||
throw new RuntimeException("getMinPackedValue for field \"" + fieldInfo.name + "\" return length=" + globalMinPackedValue.length + " array, but should be " + packedBytesCount);
|
||||
}
|
||||
byte[] globalMaxPackedValue = values.getMaxPackedValue(fieldInfo.name);
|
||||
if (globalMaxPackedValue == null && size != 0) {
|
||||
throw new RuntimeException("getMaxPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size);
|
||||
}
|
||||
if (globalMaxPackedValue.length != packedBytesCount) {
|
||||
throw new RuntimeException("getMaxPackedValue for field \"" + fieldInfo.name + "\" return length=" + globalMaxPackedValue.length + " array, but should be " + packedBytesCount);
|
||||
}
|
||||
|
||||
values.intersect(fieldInfo.name,
|
||||
new PointValues.IntersectVisitor() {
|
||||
|
||||
|
@ -1720,18 +1736,18 @@ public final class CheckIndex implements Closeable {
|
|||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
checkPackedValue("packed value", packedValue, docID);
|
||||
scratch.bytes = packedValue;
|
||||
pointCountSeen[0]++;
|
||||
|
||||
for(int dim=0;dim<dimCount;dim++) {
|
||||
lastMaxPacked.offset = bytesPerDim * dim;
|
||||
lastMinPacked.offset = bytesPerDim * dim;
|
||||
scratch.offset = bytesPerDim * dim;
|
||||
int offset = bytesPerDim * dim;
|
||||
|
||||
if (scratch.compareTo(lastMinPacked) < 0) {
|
||||
// Compare to last cell:
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMinPackedValue, offset) < 0) {
|
||||
// This doc's point, in this dimension, is lower than the minimum value of the last cell checked:
|
||||
throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
|
||||
}
|
||||
if (scratch.compareTo(lastMaxPacked) > 0) {
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMaxPackedValue, offset) > 0) {
|
||||
// This doc's point, in this dimension, is greater than the maximum value of the last cell checked:
|
||||
throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
|
||||
}
|
||||
|
@ -1758,9 +1774,33 @@ public final class CheckIndex implements Closeable {
|
|||
@Override
|
||||
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
checkPackedValue("min packed value", minPackedValue, -1);
|
||||
System.arraycopy(minPackedValue, 0, lastMinPackedValue, 0, minPackedValue.length);
|
||||
System.arraycopy(minPackedValue, 0, lastMinPackedValue, 0, packedBytesCount);
|
||||
checkPackedValue("max packed value", maxPackedValue, -1);
|
||||
System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, maxPackedValue.length);
|
||||
System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, packedBytesCount);
|
||||
|
||||
for(int dim=0;dim<dimCount;dim++) {
|
||||
int offset = bytesPerDim * dim;
|
||||
|
||||
// Make sure this cell is not outside of the global min/max:
|
||||
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMinPackedValue, offset) < 0) {
|
||||
throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
|
||||
" is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim);
|
||||
}
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMinPackedValue, offset) < 0) {
|
||||
throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(minPackedValue) +
|
||||
" is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim);
|
||||
}
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMaxPackedValue, offset) > 0) {
|
||||
throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
|
||||
" is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim);
|
||||
}
|
||||
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMaxPackedValue, offset) > 0) {
|
||||
throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
|
||||
" is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim);
|
||||
}
|
||||
}
|
||||
|
||||
// We always pretend the query shape is so complex that it crosses every cell, so
|
||||
// that packedValue is passed for every document
|
||||
|
@ -1772,11 +1812,16 @@ public final class CheckIndex implements Closeable {
|
|||
throw new RuntimeException(desc + " is null for docID=" + docID);
|
||||
}
|
||||
|
||||
if (packedValue.length != dimCount * bytesPerDim) {
|
||||
throw new RuntimeException(desc + " has incorrect length=" + packedValue.length + " vs expected=" + (dimCount * bytesPerDim) + " for docID=" + docID);
|
||||
if (packedValue.length != packedBytesCount) {
|
||||
throw new RuntimeException(desc + " has incorrect length=" + packedValue.length + " vs expected=" + packedBytesCount + " for docID=" + docID);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (pointCountSeen[0] != size) {
|
||||
System.out.println("HERE: " + values);
|
||||
throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points, but in fact has " + pointCountSeen[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,172 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/** Merges multiple {@link PointValues} into a single one. */
|
||||
public class MultiPointValues extends PointValues {
|
||||
|
||||
private final List<PointValues> subs;
|
||||
private final List<Integer> docBases;
|
||||
|
||||
private MultiPointValues(List<PointValues> subs, List<Integer> docBases) {
|
||||
this.subs = subs;
|
||||
this.docBases = docBases;
|
||||
}
|
||||
|
||||
/** Returns a {@link PointValues} merging all point values from the provided reader. */
|
||||
public static PointValues get(IndexReader r) {
|
||||
final List<LeafReaderContext> leaves = r.leaves();
|
||||
final int size = leaves.size();
|
||||
if (size == 0) {
|
||||
return null;
|
||||
} else if (size == 1) {
|
||||
return leaves.get(0).reader().getPointValues();
|
||||
}
|
||||
|
||||
List<PointValues> values = new ArrayList<>();
|
||||
List<Integer> docBases = new ArrayList<>();
|
||||
for (int i = 0; i < size; i++) {
|
||||
LeafReaderContext context = leaves.get(i);
|
||||
PointValues v = context.reader().getPointValues();
|
||||
if (v != null) {
|
||||
values.add(v);
|
||||
docBases.add(context.docBase);
|
||||
}
|
||||
}
|
||||
|
||||
if (values.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new MultiPointValues(values, docBases);
|
||||
}
|
||||
|
||||
/** Finds all documents and points matching the provided visitor */
|
||||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||
for(int i=0;i<subs.size();i++) {
|
||||
int docBase = docBases.get(i);
|
||||
subs.get(i).intersect(fieldName,
|
||||
new IntersectVisitor() {
|
||||
@Override
|
||||
public void visit(int docID) throws IOException {
|
||||
visitor.visit(docBase+docID);
|
||||
}
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) throws IOException {
|
||||
visitor.visit(docBase+docID, packedValue);
|
||||
}
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
return visitor.compare(minPackedValue, maxPackedValue);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder b = new StringBuilder();
|
||||
b.append("MultiPointValues(");
|
||||
for(int i=0;i<subs.size();i++) {
|
||||
if (i > 0) {
|
||||
b.append(", ");
|
||||
}
|
||||
b.append("docBase=");
|
||||
b.append(docBases.get(i));
|
||||
b.append(" sub=" + subs.get(i));
|
||||
}
|
||||
b.append(')');
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMinPackedValue(String fieldName) throws IOException {
|
||||
byte[] result = null;
|
||||
for(int i=0;i<subs.size();i++) {
|
||||
byte[] minPackedValue = subs.get(i).getMinPackedValue(fieldName);
|
||||
if (result == null) {
|
||||
if (minPackedValue != null) {
|
||||
result = minPackedValue.clone();
|
||||
}
|
||||
} else {
|
||||
int numDims = subs.get(0).getNumDimensions(fieldName);
|
||||
int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName);
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, result, offset) < 0) {
|
||||
System.arraycopy(minPackedValue, offset, result, offset, bytesPerDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getMaxPackedValue(String fieldName) throws IOException {
|
||||
byte[] result = null;
|
||||
for(int i=0;i<subs.size();i++) {
|
||||
byte[] maxPackedValue = subs.get(i).getMaxPackedValue(fieldName);
|
||||
if (result == null) {
|
||||
if (maxPackedValue != null) {
|
||||
result = maxPackedValue.clone();
|
||||
}
|
||||
} else {
|
||||
int numDims = subs.get(0).getNumDimensions(fieldName);
|
||||
int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName);
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, result, offset) > 0) {
|
||||
System.arraycopy(maxPackedValue, offset, result, offset, bytesPerDim);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDimensions(String fieldName) throws IOException {
|
||||
for(int i=0;i<subs.size();i++) {
|
||||
int result = subs.get(i).getNumDimensions(fieldName);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||
for(int i=0;i<subs.size();i++) {
|
||||
int result = subs.get(i).getBytesPerDimension(fieldName);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -381,6 +381,19 @@ public class ParallelLeafReader extends LeafReader {
|
|||
}
|
||||
return dimValues.getBytesPerDimension(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
LeafReader reader = fieldToReader.get(fieldName);
|
||||
if (reader == null) {
|
||||
return 0;
|
||||
}
|
||||
PointValues dimValues = reader.getPointValues();
|
||||
if (dimValues == null) {
|
||||
return 0;
|
||||
}
|
||||
return dimValues.size(fieldName);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -80,10 +80,10 @@ public abstract class PointValues {
|
|||
* to test whether each document is deleted, if necessary. */
|
||||
public abstract void intersect(String fieldName, IntersectVisitor visitor) throws IOException;
|
||||
|
||||
/** Returns minimum value for each dimension, packed, or null if no points were indexed */
|
||||
/** Returns minimum value for each dimension, packed, or null if {@link #size} is <code>0</code> */
|
||||
public abstract byte[] getMinPackedValue(String fieldName) throws IOException;
|
||||
|
||||
/** Returns maximum value for each dimension, packed, or null if no points were indexed */
|
||||
/** Returns maximum value for each dimension, packed, or null if {@link #size} is <code>0</code> */
|
||||
public abstract byte[] getMaxPackedValue(String fieldName) throws IOException;
|
||||
|
||||
/** Returns how many dimensions were indexed */
|
||||
|
@ -91,4 +91,9 @@ public abstract class PointValues {
|
|||
|
||||
/** Returns the number of bytes per dimension */
|
||||
public abstract int getBytesPerDimension(String fieldName) throws IOException;
|
||||
|
||||
/** Returns the total number of indexed points across all documents in this field. */
|
||||
public abstract long size(String fieldName);
|
||||
|
||||
// nocommit make "delete all point docs then force merge" and then check stats test
|
||||
}
|
||||
|
|
|
@ -108,6 +108,11 @@ class PointValuesWriter {
|
|||
public int getBytesPerDimension(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -172,6 +172,11 @@ public final class SlowCodecReaderWrapper {
|
|||
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||
return values.getBytesPerDimension(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
return values.size(fieldName);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ public class BKDReader implements Accountable {
|
|||
final int maxPointsInLeafNode;
|
||||
final byte[] minPackedValue;
|
||||
final byte[] maxPackedValue;
|
||||
final long pointCount;
|
||||
protected final int packedBytesLength;
|
||||
|
||||
/** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
|
||||
|
@ -59,9 +60,12 @@ public class BKDReader implements Accountable {
|
|||
|
||||
minPackedValue = new byte[packedBytesLength];
|
||||
maxPackedValue = new byte[packedBytesLength];
|
||||
|
||||
in.readBytes(minPackedValue, 0, packedBytesLength);
|
||||
in.readBytes(maxPackedValue, 0, packedBytesLength);
|
||||
|
||||
pointCount = in.readVLong();
|
||||
|
||||
splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
|
||||
|
||||
// TODO: don't write split packed values[0]!
|
||||
|
@ -122,7 +126,7 @@ public class BKDReader implements Accountable {
|
|||
|
||||
/** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */
|
||||
protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
|
||||
byte[] minPackedValue, byte[] maxPackedValue) throws IOException {
|
||||
byte[] minPackedValue, byte[] maxPackedValue, long pointCount) throws IOException {
|
||||
this.in = in;
|
||||
this.numDims = numDims;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
|
@ -133,6 +137,7 @@ public class BKDReader implements Accountable {
|
|||
this.splitPackedValues = splitPackedValues;
|
||||
this.minPackedValue = minPackedValue;
|
||||
this.maxPackedValue = maxPackedValue;
|
||||
this.pointCount = pointCount;
|
||||
assert minPackedValue.length == packedBytesLength;
|
||||
assert maxPackedValue.length == packedBytesLength;
|
||||
}
|
||||
|
@ -275,10 +280,7 @@ public class BKDReader implements Accountable {
|
|||
packedBytesLength,
|
||||
maxPointsInLeafNode,
|
||||
visitor);
|
||||
byte[] rootMinPacked = new byte[packedBytesLength];
|
||||
byte[] rootMaxPacked = new byte[packedBytesLength];
|
||||
Arrays.fill(rootMaxPacked, (byte) 0xff);
|
||||
intersect(state, 1, rootMinPacked, rootMaxPacked);
|
||||
intersect(state, 1, minPackedValue, maxPackedValue);
|
||||
}
|
||||
|
||||
/** Fast path: this is called when the query box fully encompasses all cells under this node. */
|
||||
|
@ -430,4 +432,8 @@ public class BKDReader implements Accountable {
|
|||
public int getBytesPerDimension() {
|
||||
return bytesPerDim;
|
||||
}
|
||||
|
||||
public long getPointCount() {
|
||||
return pointCount;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -123,7 +123,7 @@ public class BKDWriter implements Closeable {
|
|||
/** Maximum per-dim values, packed */
|
||||
protected final byte[] maxPackedValue;
|
||||
|
||||
private long pointCount;
|
||||
protected long pointCount;
|
||||
|
||||
public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim) throws IOException {
|
||||
this(tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP);
|
||||
|
@ -428,7 +428,8 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
|
||||
|
||||
assert numDims > 1 || valueInOrder(valueCount++, lastPackedValue, reader.state.scratchPackedValue);
|
||||
assert numDims > 1 || valueInOrder(valueCount, lastPackedValue, reader.state.scratchPackedValue);
|
||||
valueCount++;
|
||||
|
||||
if (leafCount == 0) {
|
||||
if (leafBlockFPs.size() > 0) {
|
||||
|
@ -478,6 +479,8 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
pointCount = valueCount;
|
||||
|
||||
long indexFP = out.getFilePointer();
|
||||
|
||||
int numInnerNodes = leafBlockStartValues.size();
|
||||
|
@ -799,10 +802,6 @@ public class BKDWriter implements Closeable {
|
|||
// Sort all docs once by each dimension:
|
||||
PathSlice[] sortedPointWriters = new PathSlice[numDims];
|
||||
|
||||
byte[] minPacked = new byte[packedBytesLength];
|
||||
byte[] maxPacked = new byte[packedBytesLength];
|
||||
Arrays.fill(maxPacked, (byte) 0xff);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
//long t0 = System.nanoTime();
|
||||
|
@ -822,7 +821,7 @@ public class BKDWriter implements Closeable {
|
|||
|
||||
build(1, numLeaves, sortedPointWriters,
|
||||
ordBitSet, out,
|
||||
minPacked, maxPacked,
|
||||
minPackedValue, maxPackedValue,
|
||||
splitPackedValues,
|
||||
leafBlockFPs);
|
||||
|
||||
|
@ -862,6 +861,8 @@ public class BKDWriter implements Closeable {
|
|||
out.writeBytes(minPackedValue, 0, packedBytesLength);
|
||||
out.writeBytes(maxPackedValue, 0, packedBytesLength);
|
||||
|
||||
out.writeVLong(pointCount);
|
||||
|
||||
// TODO: for 1D case, don't waste the first byte of each split value (it's always 0)
|
||||
|
||||
// NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.index.PointValues;
|
|||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
|
@ -105,8 +106,8 @@ public final class AssertingPointFormat extends PointFormat {
|
|||
|
||||
// This doc's packed value should be contained in the last cell passed to compare:
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
assert StringHelper.compare(bytesPerDim, lastMinPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) <= 0: "dim=" + dim + " of " + numDims;
|
||||
assert StringHelper.compare(bytesPerDim, lastMaxPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) >= 0: "dim=" + dim + " of " + numDims;
|
||||
assert StringHelper.compare(bytesPerDim, lastMinPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) <= 0: "dim=" + dim + " of " + numDims + " value=" + new BytesRef(packedValue);
|
||||
assert StringHelper.compare(bytesPerDim, lastMaxPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) >= 0: "dim=" + dim + " of " + numDims + " value=" + new BytesRef(packedValue);
|
||||
}
|
||||
|
||||
// TODO: we should assert that this "matches" whatever relation the last call to compare had returned
|
||||
|
@ -214,6 +215,12 @@ public final class AssertingPointFormat extends PointFormat {
|
|||
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||
return in.getBytesPerDimension(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
// TODO: what to assert?
|
||||
return in.size(fieldName);
|
||||
}
|
||||
}
|
||||
|
||||
static class AssertingPointWriter extends PointWriter {
|
||||
|
|
|
@ -171,5 +171,10 @@ class CrankyPointFormat extends PointFormat {
|
|||
public long ramBytesUsed() {
|
||||
return delegate.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size(String fieldName) {
|
||||
return delegate.size(fieldName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue