add PointValues.size() stat, to return total number of points

This commit is contained in:
Mike McCandless 2016-03-02 17:53:28 -05:00
parent b17c57f072
commit 3c02ab2187
16 changed files with 171 additions and 208 deletions

View File

@ -34,8 +34,8 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.BLOCK_VA
class SimpleTextBKDReader extends BKDReader { class SimpleTextBKDReader extends BKDReader {
public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues, public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
byte[] minPackedValue, byte[] maxPackedValue) throws IOException { byte[] minPackedValue, byte[] maxPackedValue, long pointCount) throws IOException {
super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue); super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue, pointCount);
} }
@Override @Override

View File

@ -47,6 +47,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MAX_LEAF
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MAX_VALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MAX_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MIN_VALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MIN_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.NUM_DIMS; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.NUM_DIMS;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.POINT_COUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_COUNT; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_COUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_DIM; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_DIM;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_VALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_VALUE;
@ -119,6 +120,10 @@ class SimpleTextPointReader extends PointReader {
assert startsWith(MAX_VALUE); assert startsWith(MAX_VALUE);
BytesRef maxValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE)); BytesRef maxValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE));
assert maxValue.length == numDims*bytesPerDim; assert maxValue.length == numDims*bytesPerDim;
readLine(dataIn);
assert startsWith(POINT_COUNT);
long pointCount = parseLong(POINT_COUNT);
long[] leafBlockFPs = new long[count]; long[] leafBlockFPs = new long[count];
for(int i=0;i<count;i++) { for(int i=0;i<count;i++) {
@ -139,7 +144,7 @@ class SimpleTextPointReader extends PointReader {
System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim); System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim);
} }
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes); return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes, pointCount);
} }
private void readLine(IndexInput in) throws IOException { private void readLine(IndexInput in) throws IOException {
@ -267,4 +272,15 @@ class SimpleTextPointReader extends PointReader {
} }
return bkdReader.getBytesPerDimension(); return bkdReader.getBytesPerDimension();
} }
@Override
public long size(String fieldName) {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index points in the past, but
// now all docs having this field were deleted in this segment:
return 0;
}
return bkdReader.getPointCount();
}
} }

View File

@ -51,6 +51,7 @@ class SimpleTextPointWriter extends PointWriter {
final static BytesRef FIELD_FP = new BytesRef(" field fp "); final static BytesRef FIELD_FP = new BytesRef(" field fp ");
final static BytesRef MIN_VALUE = new BytesRef("min value "); final static BytesRef MIN_VALUE = new BytesRef("min value ");
final static BytesRef MAX_VALUE = new BytesRef("max value "); final static BytesRef MAX_VALUE = new BytesRef("max value ");
final static BytesRef POINT_COUNT = new BytesRef("point count ");
private IndexOutput dataOut; private IndexOutput dataOut;
final BytesRefBuilder scratch = new BytesRefBuilder(); final BytesRefBuilder scratch = new BytesRefBuilder();
@ -102,6 +103,10 @@ class SimpleTextPointWriter extends PointWriter {
write(out, br.toString()); write(out, br.toString());
newline(out); newline(out);
write(out, POINT_COUNT);
writeLong(out, pointCount);
newline(out);
for(int i=0;i<leafBlockFPs.length;i++) { for(int i=0;i<leafBlockFPs.length;i++) {
write(out, BLOCK_FP); write(out, BLOCK_FP);
writeLong(out, leafBlockFPs[i]); writeLong(out, leafBlockFPs[i]);

View File

@ -95,6 +95,11 @@ public abstract class PointFormat {
public int getBytesPerDimension(String fieldName) { public int getBytesPerDimension(String fieldName) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with points"); throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with points");
} }
@Override
public long size(String fieldName) {
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with points");
}
}; };
} }
}; };

View File

@ -115,6 +115,12 @@ public abstract class PointWriter implements Closeable {
public int getBytesPerDimension(String fieldName) { public int getBytesPerDimension(String fieldName) {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
@Override
public long size(String fieldName) {
throw new UnsupportedOperationException();
}
}); });
} }

View File

@ -215,5 +215,16 @@ public class Lucene60PointReader extends PointReader implements Closeable {
} }
return bkdReader.getBytesPerDimension(); return bkdReader.getBytesPerDimension();
} }
@Override
public long size(String fieldName) {
BKDReader bkdReader = getBKDReader(fieldName);
if (bkdReader == null) {
// Schema ghost corner case! This field did index points in the past, but
// now all docs having this point field were deleted in this segment:
return 0;
}
return bkdReader.getPointCount();
}
} }

View File

@ -1698,15 +1698,31 @@ public final class CheckIndex implements Closeable {
status.totalValueFields++; status.totalValueFields++;
int dimCount = fieldInfo.getPointDimensionCount(); int dimCount = fieldInfo.getPointDimensionCount();
int bytesPerDim = fieldInfo.getPointNumBytes(); int bytesPerDim = fieldInfo.getPointNumBytes();
byte[] lastMinPackedValue = new byte[dimCount*bytesPerDim]; int packedBytesCount = dimCount * bytesPerDim;
BytesRef lastMinPacked = new BytesRef(lastMinPackedValue); byte[] lastMinPackedValue = new byte[packedBytesCount];
byte[] lastMaxPackedValue = new byte[dimCount*bytesPerDim]; byte[] lastMaxPackedValue = new byte[packedBytesCount];
BytesRef lastMaxPacked = new BytesRef(lastMaxPackedValue);
BytesRef scratch = new BytesRef(); BytesRef scratch = new BytesRef();
lastMaxPacked.length = bytesPerDim;
lastMinPacked.length = bytesPerDim;
scratch.length = bytesPerDim; scratch.length = bytesPerDim;
byte[] lastPackedValue = new byte[dimCount*bytesPerDim]; byte[] lastPackedValue = new byte[packedBytesCount];
long[] pointCountSeen = new long[1];
byte[] globalMinPackedValue = values.getMinPackedValue(fieldInfo.name);
long size = values.size(fieldInfo.name);
if (globalMinPackedValue == null && size != 0) {
throw new RuntimeException("getMinPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size);
}
if (globalMinPackedValue.length != packedBytesCount) {
throw new RuntimeException("getMinPackedValue for field \"" + fieldInfo.name + "\" return length=" + globalMinPackedValue.length + " array, but should be " + packedBytesCount);
}
byte[] globalMaxPackedValue = values.getMaxPackedValue(fieldInfo.name);
if (globalMaxPackedValue == null && size != 0) {
throw new RuntimeException("getMaxPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size);
}
if (globalMaxPackedValue.length != packedBytesCount) {
throw new RuntimeException("getMaxPackedValue for field \"" + fieldInfo.name + "\" return length=" + globalMaxPackedValue.length + " array, but should be " + packedBytesCount);
}
values.intersect(fieldInfo.name, values.intersect(fieldInfo.name,
new PointValues.IntersectVisitor() { new PointValues.IntersectVisitor() {
@ -1720,18 +1736,18 @@ public final class CheckIndex implements Closeable {
@Override @Override
public void visit(int docID, byte[] packedValue) { public void visit(int docID, byte[] packedValue) {
checkPackedValue("packed value", packedValue, docID); checkPackedValue("packed value", packedValue, docID);
scratch.bytes = packedValue; pointCountSeen[0]++;
for(int dim=0;dim<dimCount;dim++) { for(int dim=0;dim<dimCount;dim++) {
lastMaxPacked.offset = bytesPerDim * dim; int offset = bytesPerDim * dim;
lastMinPacked.offset = bytesPerDim * dim;
scratch.offset = bytesPerDim * dim;
if (scratch.compareTo(lastMinPacked) < 0) { // Compare to last cell:
if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMinPackedValue, offset) < 0) {
// This doc's point, in this dimension, is lower than the minimum value of the last cell checked: // This doc's point, in this dimension, is lower than the minimum value of the last cell checked:
throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim); throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
} }
if (scratch.compareTo(lastMaxPacked) > 0) {
if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMaxPackedValue, offset) > 0) {
// This doc's point, in this dimension, is greater than the maximum value of the last cell checked: // This doc's point, in this dimension, is greater than the maximum value of the last cell checked:
throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim); throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
} }
@ -1758,9 +1774,33 @@ public final class CheckIndex implements Closeable {
@Override @Override
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
checkPackedValue("min packed value", minPackedValue, -1); checkPackedValue("min packed value", minPackedValue, -1);
System.arraycopy(minPackedValue, 0, lastMinPackedValue, 0, minPackedValue.length); System.arraycopy(minPackedValue, 0, lastMinPackedValue, 0, packedBytesCount);
checkPackedValue("max packed value", maxPackedValue, -1); checkPackedValue("max packed value", maxPackedValue, -1);
System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, maxPackedValue.length); System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, packedBytesCount);
for(int dim=0;dim<dimCount;dim++) {
int offset = bytesPerDim * dim;
// Make sure this cell is not outside of the global min/max:
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMinPackedValue, offset) < 0) {
throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
" is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim);
}
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMinPackedValue, offset) < 0) {
throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(minPackedValue) +
" is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim);
}
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMaxPackedValue, offset) > 0) {
throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
" is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim);
}
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMaxPackedValue, offset) > 0) {
throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
" is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim);
}
}
// We always pretend the query shape is so complex that it crosses every cell, so // We always pretend the query shape is so complex that it crosses every cell, so
// that packedValue is passed for every document // that packedValue is passed for every document
@ -1772,11 +1812,16 @@ public final class CheckIndex implements Closeable {
throw new RuntimeException(desc + " is null for docID=" + docID); throw new RuntimeException(desc + " is null for docID=" + docID);
} }
if (packedValue.length != dimCount * bytesPerDim) { if (packedValue.length != packedBytesCount) {
throw new RuntimeException(desc + " has incorrect length=" + packedValue.length + " vs expected=" + (dimCount * bytesPerDim) + " for docID=" + docID); throw new RuntimeException(desc + " has incorrect length=" + packedValue.length + " vs expected=" + packedBytesCount + " for docID=" + docID);
} }
} }
}); });
if (pointCountSeen[0] != size) {
System.out.println("HERE: " + values);
throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points, but in fact has " + pointCountSeen[0]);
}
} }
} }
} }

View File

@ -1,172 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.StringHelper;
/** Merges multiple {@link PointValues} into a single one. */
public class MultiPointValues extends PointValues {
private final List<PointValues> subs;
private final List<Integer> docBases;
private MultiPointValues(List<PointValues> subs, List<Integer> docBases) {
this.subs = subs;
this.docBases = docBases;
}
/** Returns a {@link PointValues} merging all point values from the provided reader. */
public static PointValues get(IndexReader r) {
final List<LeafReaderContext> leaves = r.leaves();
final int size = leaves.size();
if (size == 0) {
return null;
} else if (size == 1) {
return leaves.get(0).reader().getPointValues();
}
List<PointValues> values = new ArrayList<>();
List<Integer> docBases = new ArrayList<>();
for (int i = 0; i < size; i++) {
LeafReaderContext context = leaves.get(i);
PointValues v = context.reader().getPointValues();
if (v != null) {
values.add(v);
docBases.add(context.docBase);
}
}
if (values.isEmpty()) {
return null;
}
return new MultiPointValues(values, docBases);
}
/** Finds all documents and points matching the provided visitor */
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
for(int i=0;i<subs.size();i++) {
int docBase = docBases.get(i);
subs.get(i).intersect(fieldName,
new IntersectVisitor() {
@Override
public void visit(int docID) throws IOException {
visitor.visit(docBase+docID);
}
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
visitor.visit(docBase+docID, packedValue);
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return visitor.compare(minPackedValue, maxPackedValue);
}
});
}
}
@Override
public String toString() {
StringBuilder b = new StringBuilder();
b.append("MultiPointValues(");
for(int i=0;i<subs.size();i++) {
if (i > 0) {
b.append(", ");
}
b.append("docBase=");
b.append(docBases.get(i));
b.append(" sub=" + subs.get(i));
}
b.append(')');
return b.toString();
}
@Override
public byte[] getMinPackedValue(String fieldName) throws IOException {
byte[] result = null;
for(int i=0;i<subs.size();i++) {
byte[] minPackedValue = subs.get(i).getMinPackedValue(fieldName);
if (result == null) {
if (minPackedValue != null) {
result = minPackedValue.clone();
}
} else {
int numDims = subs.get(0).getNumDimensions(fieldName);
int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName);
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, result, offset) < 0) {
System.arraycopy(minPackedValue, offset, result, offset, bytesPerDim);
}
}
}
}
return result;
}
@Override
public byte[] getMaxPackedValue(String fieldName) throws IOException {
byte[] result = null;
for(int i=0;i<subs.size();i++) {
byte[] maxPackedValue = subs.get(i).getMaxPackedValue(fieldName);
if (result == null) {
if (maxPackedValue != null) {
result = maxPackedValue.clone();
}
} else {
int numDims = subs.get(0).getNumDimensions(fieldName);
int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName);
for(int dim=0;dim<numDims;dim++) {
int offset = dim*bytesPerDim;
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, result, offset) > 0) {
System.arraycopy(maxPackedValue, offset, result, offset, bytesPerDim);
}
}
}
}
return result;
}
@Override
public int getNumDimensions(String fieldName) throws IOException {
for(int i=0;i<subs.size();i++) {
int result = subs.get(i).getNumDimensions(fieldName);
if (result != 0) {
return result;
}
}
return 0;
}
@Override
public int getBytesPerDimension(String fieldName) throws IOException {
for(int i=0;i<subs.size();i++) {
int result = subs.get(i).getBytesPerDimension(fieldName);
if (result != 0) {
return result;
}
}
return 0;
}
}

View File

@ -381,6 +381,19 @@ public class ParallelLeafReader extends LeafReader {
} }
return dimValues.getBytesPerDimension(fieldName); return dimValues.getBytesPerDimension(fieldName);
} }
@Override
public long size(String fieldName) {
LeafReader reader = fieldToReader.get(fieldName);
if (reader == null) {
return 0;
}
PointValues dimValues = reader.getPointValues();
if (dimValues == null) {
return 0;
}
return dimValues.size(fieldName);
}
}; };
} }

View File

@ -80,10 +80,10 @@ public abstract class PointValues {
* to test whether each document is deleted, if necessary. */ * to test whether each document is deleted, if necessary. */
public abstract void intersect(String fieldName, IntersectVisitor visitor) throws IOException; public abstract void intersect(String fieldName, IntersectVisitor visitor) throws IOException;
/** Returns minimum value for each dimension, packed, or null if no points were indexed */ /** Returns minimum value for each dimension, packed, or null if {@link #size} is <code>0</code> */
public abstract byte[] getMinPackedValue(String fieldName) throws IOException; public abstract byte[] getMinPackedValue(String fieldName) throws IOException;
/** Returns maximum value for each dimension, packed, or null if no points were indexed */ /** Returns maximum value for each dimension, packed, or null if {@link #size} is <code>0</code> */
public abstract byte[] getMaxPackedValue(String fieldName) throws IOException; public abstract byte[] getMaxPackedValue(String fieldName) throws IOException;
/** Returns how many dimensions were indexed */ /** Returns how many dimensions were indexed */
@ -91,4 +91,9 @@ public abstract class PointValues {
/** Returns the number of bytes per dimension */ /** Returns the number of bytes per dimension */
public abstract int getBytesPerDimension(String fieldName) throws IOException; public abstract int getBytesPerDimension(String fieldName) throws IOException;
/** Returns the total number of indexed points across all documents in this field. */
public abstract long size(String fieldName);
// nocommit make "delete all point docs then force merge" and then check stats test
} }

View File

@ -108,6 +108,11 @@ class PointValuesWriter {
public int getBytesPerDimension(String fieldName) { public int getBytesPerDimension(String fieldName) {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
@Override
public long size(String fieldName) {
throw new UnsupportedOperationException();
}
}); });
} }
} }

View File

@ -172,6 +172,11 @@ public final class SlowCodecReaderWrapper {
public int getBytesPerDimension(String fieldName) throws IOException { public int getBytesPerDimension(String fieldName) throws IOException {
return values.getBytesPerDimension(fieldName); return values.getBytesPerDimension(fieldName);
} }
@Override
public long size(String fieldName) {
return values.size(fieldName);
}
}; };
} }

View File

@ -42,6 +42,7 @@ public class BKDReader implements Accountable {
final int maxPointsInLeafNode; final int maxPointsInLeafNode;
final byte[] minPackedValue; final byte[] minPackedValue;
final byte[] maxPackedValue; final byte[] maxPackedValue;
final long pointCount;
protected final int packedBytesLength; protected final int packedBytesLength;
/** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */ /** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
@ -59,9 +60,12 @@ public class BKDReader implements Accountable {
minPackedValue = new byte[packedBytesLength]; minPackedValue = new byte[packedBytesLength];
maxPackedValue = new byte[packedBytesLength]; maxPackedValue = new byte[packedBytesLength];
in.readBytes(minPackedValue, 0, packedBytesLength); in.readBytes(minPackedValue, 0, packedBytesLength);
in.readBytes(maxPackedValue, 0, packedBytesLength); in.readBytes(maxPackedValue, 0, packedBytesLength);
pointCount = in.readVLong();
splitPackedValues = new byte[(1+bytesPerDim)*numLeaves]; splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
// TODO: don't write split packed values[0]! // TODO: don't write split packed values[0]!
@ -122,7 +126,7 @@ public class BKDReader implements Accountable {
/** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */ /** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */
protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues, protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
byte[] minPackedValue, byte[] maxPackedValue) throws IOException { byte[] minPackedValue, byte[] maxPackedValue, long pointCount) throws IOException {
this.in = in; this.in = in;
this.numDims = numDims; this.numDims = numDims;
this.maxPointsInLeafNode = maxPointsInLeafNode; this.maxPointsInLeafNode = maxPointsInLeafNode;
@ -133,6 +137,7 @@ public class BKDReader implements Accountable {
this.splitPackedValues = splitPackedValues; this.splitPackedValues = splitPackedValues;
this.minPackedValue = minPackedValue; this.minPackedValue = minPackedValue;
this.maxPackedValue = maxPackedValue; this.maxPackedValue = maxPackedValue;
this.pointCount = pointCount;
assert minPackedValue.length == packedBytesLength; assert minPackedValue.length == packedBytesLength;
assert maxPackedValue.length == packedBytesLength; assert maxPackedValue.length == packedBytesLength;
} }
@ -275,10 +280,7 @@ public class BKDReader implements Accountable {
packedBytesLength, packedBytesLength,
maxPointsInLeafNode, maxPointsInLeafNode,
visitor); visitor);
byte[] rootMinPacked = new byte[packedBytesLength]; intersect(state, 1, minPackedValue, maxPackedValue);
byte[] rootMaxPacked = new byte[packedBytesLength];
Arrays.fill(rootMaxPacked, (byte) 0xff);
intersect(state, 1, rootMinPacked, rootMaxPacked);
} }
/** Fast path: this is called when the query box fully encompasses all cells under this node. */ /** Fast path: this is called when the query box fully encompasses all cells under this node. */
@ -430,4 +432,8 @@ public class BKDReader implements Accountable {
public int getBytesPerDimension() { public int getBytesPerDimension() {
return bytesPerDim; return bytesPerDim;
} }
public long getPointCount() {
return pointCount;
}
} }

View File

@ -123,7 +123,7 @@ public class BKDWriter implements Closeable {
/** Maximum per-dim values, packed */ /** Maximum per-dim values, packed */
protected final byte[] maxPackedValue; protected final byte[] maxPackedValue;
private long pointCount; protected long pointCount;
public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim) throws IOException { public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim) throws IOException {
this(tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP); this(tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP);
@ -428,7 +428,8 @@ public class BKDWriter implements Closeable {
} }
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength); System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
assert numDims > 1 || valueInOrder(valueCount++, lastPackedValue, reader.state.scratchPackedValue); assert numDims > 1 || valueInOrder(valueCount, lastPackedValue, reader.state.scratchPackedValue);
valueCount++;
if (leafCount == 0) { if (leafCount == 0) {
if (leafBlockFPs.size() > 0) { if (leafBlockFPs.size() > 0) {
@ -478,6 +479,8 @@ public class BKDWriter implements Closeable {
} }
} }
pointCount = valueCount;
long indexFP = out.getFilePointer(); long indexFP = out.getFilePointer();
int numInnerNodes = leafBlockStartValues.size(); int numInnerNodes = leafBlockStartValues.size();
@ -799,10 +802,6 @@ public class BKDWriter implements Closeable {
// Sort all docs once by each dimension: // Sort all docs once by each dimension:
PathSlice[] sortedPointWriters = new PathSlice[numDims]; PathSlice[] sortedPointWriters = new PathSlice[numDims];
byte[] minPacked = new byte[packedBytesLength];
byte[] maxPacked = new byte[packedBytesLength];
Arrays.fill(maxPacked, (byte) 0xff);
boolean success = false; boolean success = false;
try { try {
//long t0 = System.nanoTime(); //long t0 = System.nanoTime();
@ -822,7 +821,7 @@ public class BKDWriter implements Closeable {
build(1, numLeaves, sortedPointWriters, build(1, numLeaves, sortedPointWriters,
ordBitSet, out, ordBitSet, out,
minPacked, maxPacked, minPackedValue, maxPackedValue,
splitPackedValues, splitPackedValues,
leafBlockFPs); leafBlockFPs);
@ -862,6 +861,8 @@ public class BKDWriter implements Closeable {
out.writeBytes(minPackedValue, 0, packedBytesLength); out.writeBytes(minPackedValue, 0, packedBytesLength);
out.writeBytes(maxPackedValue, 0, packedBytesLength); out.writeBytes(maxPackedValue, 0, packedBytesLength);
out.writeVLong(pointCount);
// TODO: for 1D case, don't waste the first byte of each split value (it's always 0) // TODO: for 1D case, don't waste the first byte of each split value (it's always 0)
// NOTE: splitPackedValues[0] is unused, because nodeID is 1-based: // NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:

View File

@ -31,6 +31,7 @@ import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
@ -105,8 +106,8 @@ public final class AssertingPointFormat extends PointFormat {
// This doc's packed value should be contained in the last cell passed to compare: // This doc's packed value should be contained in the last cell passed to compare:
for(int dim=0;dim<numDims;dim++) { for(int dim=0;dim<numDims;dim++) {
assert StringHelper.compare(bytesPerDim, lastMinPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) <= 0: "dim=" + dim + " of " + numDims; assert StringHelper.compare(bytesPerDim, lastMinPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) <= 0: "dim=" + dim + " of " + numDims + " value=" + new BytesRef(packedValue);
assert StringHelper.compare(bytesPerDim, lastMaxPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) >= 0: "dim=" + dim + " of " + numDims; assert StringHelper.compare(bytesPerDim, lastMaxPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) >= 0: "dim=" + dim + " of " + numDims + " value=" + new BytesRef(packedValue);
} }
// TODO: we should assert that this "matches" whatever relation the last call to compare had returned // TODO: we should assert that this "matches" whatever relation the last call to compare had returned
@ -214,6 +215,12 @@ public final class AssertingPointFormat extends PointFormat {
public int getBytesPerDimension(String fieldName) throws IOException { public int getBytesPerDimension(String fieldName) throws IOException {
return in.getBytesPerDimension(fieldName); return in.getBytesPerDimension(fieldName);
} }
@Override
public long size(String fieldName) {
// TODO: what to assert?
return in.size(fieldName);
}
} }
static class AssertingPointWriter extends PointWriter { static class AssertingPointWriter extends PointWriter {

View File

@ -171,5 +171,10 @@ class CrankyPointFormat extends PointFormat {
public long ramBytesUsed() { public long ramBytesUsed() {
return delegate.ramBytesUsed(); return delegate.ramBytesUsed();
} }
@Override
public long size(String fieldName) {
return delegate.size(fieldName);
}
} }
} }