mirror of https://github.com/apache/lucene.git
add PointValues.size() stat, to return total number of points
This commit is contained in:
parent
b17c57f072
commit
3c02ab2187
|
@ -34,8 +34,8 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.BLOCK_VA
|
||||||
class SimpleTextBKDReader extends BKDReader {
|
class SimpleTextBKDReader extends BKDReader {
|
||||||
|
|
||||||
public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
|
public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
|
||||||
byte[] minPackedValue, byte[] maxPackedValue) throws IOException {
|
byte[] minPackedValue, byte[] maxPackedValue, long pointCount) throws IOException {
|
||||||
super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue);
|
super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue, pointCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -47,6 +47,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MAX_LEAF
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MAX_VALUE;
|
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MAX_VALUE;
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MIN_VALUE;
|
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MIN_VALUE;
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.NUM_DIMS;
|
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.NUM_DIMS;
|
||||||
|
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.POINT_COUNT;
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_COUNT;
|
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_COUNT;
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_DIM;
|
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_DIM;
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_VALUE;
|
import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_VALUE;
|
||||||
|
@ -119,6 +120,10 @@ class SimpleTextPointReader extends PointReader {
|
||||||
assert startsWith(MAX_VALUE);
|
assert startsWith(MAX_VALUE);
|
||||||
BytesRef maxValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE));
|
BytesRef maxValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE));
|
||||||
assert maxValue.length == numDims*bytesPerDim;
|
assert maxValue.length == numDims*bytesPerDim;
|
||||||
|
|
||||||
|
readLine(dataIn);
|
||||||
|
assert startsWith(POINT_COUNT);
|
||||||
|
long pointCount = parseLong(POINT_COUNT);
|
||||||
|
|
||||||
long[] leafBlockFPs = new long[count];
|
long[] leafBlockFPs = new long[count];
|
||||||
for(int i=0;i<count;i++) {
|
for(int i=0;i<count;i++) {
|
||||||
|
@ -139,7 +144,7 @@ class SimpleTextPointReader extends PointReader {
|
||||||
System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim);
|
System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes);
|
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes, pointCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void readLine(IndexInput in) throws IOException {
|
private void readLine(IndexInput in) throws IOException {
|
||||||
|
@ -267,4 +272,15 @@ class SimpleTextPointReader extends PointReader {
|
||||||
}
|
}
|
||||||
return bkdReader.getBytesPerDimension();
|
return bkdReader.getBytesPerDimension();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size(String fieldName) {
|
||||||
|
BKDReader bkdReader = getBKDReader(fieldName);
|
||||||
|
if (bkdReader == null) {
|
||||||
|
// Schema ghost corner case! This field did index points in the past, but
|
||||||
|
// now all docs having this field were deleted in this segment:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return bkdReader.getPointCount();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,6 +51,7 @@ class SimpleTextPointWriter extends PointWriter {
|
||||||
final static BytesRef FIELD_FP = new BytesRef(" field fp ");
|
final static BytesRef FIELD_FP = new BytesRef(" field fp ");
|
||||||
final static BytesRef MIN_VALUE = new BytesRef("min value ");
|
final static BytesRef MIN_VALUE = new BytesRef("min value ");
|
||||||
final static BytesRef MAX_VALUE = new BytesRef("max value ");
|
final static BytesRef MAX_VALUE = new BytesRef("max value ");
|
||||||
|
final static BytesRef POINT_COUNT = new BytesRef("point count ");
|
||||||
|
|
||||||
private IndexOutput dataOut;
|
private IndexOutput dataOut;
|
||||||
final BytesRefBuilder scratch = new BytesRefBuilder();
|
final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||||
|
@ -102,6 +103,10 @@ class SimpleTextPointWriter extends PointWriter {
|
||||||
write(out, br.toString());
|
write(out, br.toString());
|
||||||
newline(out);
|
newline(out);
|
||||||
|
|
||||||
|
write(out, POINT_COUNT);
|
||||||
|
writeLong(out, pointCount);
|
||||||
|
newline(out);
|
||||||
|
|
||||||
for(int i=0;i<leafBlockFPs.length;i++) {
|
for(int i=0;i<leafBlockFPs.length;i++) {
|
||||||
write(out, BLOCK_FP);
|
write(out, BLOCK_FP);
|
||||||
writeLong(out, leafBlockFPs[i]);
|
writeLong(out, leafBlockFPs[i]);
|
||||||
|
|
|
@ -95,6 +95,11 @@ public abstract class PointFormat {
|
||||||
public int getBytesPerDimension(String fieldName) {
|
public int getBytesPerDimension(String fieldName) {
|
||||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with points");
|
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with points");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size(String fieldName) {
|
||||||
|
throw new IllegalArgumentException("field=\"" + fieldName + "\" was not indexed with points");
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -115,6 +115,12 @@ public abstract class PointWriter implements Closeable {
|
||||||
public int getBytesPerDimension(String fieldName) {
|
public int getBytesPerDimension(String fieldName) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size(String fieldName) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -215,5 +215,16 @@ public class Lucene60PointReader extends PointReader implements Closeable {
|
||||||
}
|
}
|
||||||
return bkdReader.getBytesPerDimension();
|
return bkdReader.getBytesPerDimension();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size(String fieldName) {
|
||||||
|
BKDReader bkdReader = getBKDReader(fieldName);
|
||||||
|
if (bkdReader == null) {
|
||||||
|
// Schema ghost corner case! This field did index points in the past, but
|
||||||
|
// now all docs having this point field were deleted in this segment:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return bkdReader.getPointCount();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1698,15 +1698,31 @@ public final class CheckIndex implements Closeable {
|
||||||
status.totalValueFields++;
|
status.totalValueFields++;
|
||||||
int dimCount = fieldInfo.getPointDimensionCount();
|
int dimCount = fieldInfo.getPointDimensionCount();
|
||||||
int bytesPerDim = fieldInfo.getPointNumBytes();
|
int bytesPerDim = fieldInfo.getPointNumBytes();
|
||||||
byte[] lastMinPackedValue = new byte[dimCount*bytesPerDim];
|
int packedBytesCount = dimCount * bytesPerDim;
|
||||||
BytesRef lastMinPacked = new BytesRef(lastMinPackedValue);
|
byte[] lastMinPackedValue = new byte[packedBytesCount];
|
||||||
byte[] lastMaxPackedValue = new byte[dimCount*bytesPerDim];
|
byte[] lastMaxPackedValue = new byte[packedBytesCount];
|
||||||
BytesRef lastMaxPacked = new BytesRef(lastMaxPackedValue);
|
|
||||||
BytesRef scratch = new BytesRef();
|
BytesRef scratch = new BytesRef();
|
||||||
lastMaxPacked.length = bytesPerDim;
|
|
||||||
lastMinPacked.length = bytesPerDim;
|
|
||||||
scratch.length = bytesPerDim;
|
scratch.length = bytesPerDim;
|
||||||
byte[] lastPackedValue = new byte[dimCount*bytesPerDim];
|
byte[] lastPackedValue = new byte[packedBytesCount];
|
||||||
|
|
||||||
|
long[] pointCountSeen = new long[1];
|
||||||
|
|
||||||
|
byte[] globalMinPackedValue = values.getMinPackedValue(fieldInfo.name);
|
||||||
|
long size = values.size(fieldInfo.name);
|
||||||
|
if (globalMinPackedValue == null && size != 0) {
|
||||||
|
throw new RuntimeException("getMinPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size);
|
||||||
|
}
|
||||||
|
if (globalMinPackedValue.length != packedBytesCount) {
|
||||||
|
throw new RuntimeException("getMinPackedValue for field \"" + fieldInfo.name + "\" return length=" + globalMinPackedValue.length + " array, but should be " + packedBytesCount);
|
||||||
|
}
|
||||||
|
byte[] globalMaxPackedValue = values.getMaxPackedValue(fieldInfo.name);
|
||||||
|
if (globalMaxPackedValue == null && size != 0) {
|
||||||
|
throw new RuntimeException("getMaxPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size);
|
||||||
|
}
|
||||||
|
if (globalMaxPackedValue.length != packedBytesCount) {
|
||||||
|
throw new RuntimeException("getMaxPackedValue for field \"" + fieldInfo.name + "\" return length=" + globalMaxPackedValue.length + " array, but should be " + packedBytesCount);
|
||||||
|
}
|
||||||
|
|
||||||
values.intersect(fieldInfo.name,
|
values.intersect(fieldInfo.name,
|
||||||
new PointValues.IntersectVisitor() {
|
new PointValues.IntersectVisitor() {
|
||||||
|
|
||||||
|
@ -1720,18 +1736,18 @@ public final class CheckIndex implements Closeable {
|
||||||
@Override
|
@Override
|
||||||
public void visit(int docID, byte[] packedValue) {
|
public void visit(int docID, byte[] packedValue) {
|
||||||
checkPackedValue("packed value", packedValue, docID);
|
checkPackedValue("packed value", packedValue, docID);
|
||||||
scratch.bytes = packedValue;
|
pointCountSeen[0]++;
|
||||||
|
|
||||||
for(int dim=0;dim<dimCount;dim++) {
|
for(int dim=0;dim<dimCount;dim++) {
|
||||||
lastMaxPacked.offset = bytesPerDim * dim;
|
int offset = bytesPerDim * dim;
|
||||||
lastMinPacked.offset = bytesPerDim * dim;
|
|
||||||
scratch.offset = bytesPerDim * dim;
|
|
||||||
|
|
||||||
if (scratch.compareTo(lastMinPacked) < 0) {
|
// Compare to last cell:
|
||||||
|
if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMinPackedValue, offset) < 0) {
|
||||||
// This doc's point, in this dimension, is lower than the minimum value of the last cell checked:
|
// This doc's point, in this dimension, is lower than the minimum value of the last cell checked:
|
||||||
throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
|
throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
|
||||||
}
|
}
|
||||||
if (scratch.compareTo(lastMaxPacked) > 0) {
|
|
||||||
|
if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMaxPackedValue, offset) > 0) {
|
||||||
// This doc's point, in this dimension, is greater than the maximum value of the last cell checked:
|
// This doc's point, in this dimension, is greater than the maximum value of the last cell checked:
|
||||||
throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
|
throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
|
||||||
}
|
}
|
||||||
|
@ -1758,9 +1774,33 @@ public final class CheckIndex implements Closeable {
|
||||||
@Override
|
@Override
|
||||||
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||||
checkPackedValue("min packed value", minPackedValue, -1);
|
checkPackedValue("min packed value", minPackedValue, -1);
|
||||||
System.arraycopy(minPackedValue, 0, lastMinPackedValue, 0, minPackedValue.length);
|
System.arraycopy(minPackedValue, 0, lastMinPackedValue, 0, packedBytesCount);
|
||||||
checkPackedValue("max packed value", maxPackedValue, -1);
|
checkPackedValue("max packed value", maxPackedValue, -1);
|
||||||
System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, maxPackedValue.length);
|
System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, packedBytesCount);
|
||||||
|
|
||||||
|
for(int dim=0;dim<dimCount;dim++) {
|
||||||
|
int offset = bytesPerDim * dim;
|
||||||
|
|
||||||
|
// Make sure this cell is not outside of the global min/max:
|
||||||
|
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMinPackedValue, offset) < 0) {
|
||||||
|
throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
|
||||||
|
" is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMinPackedValue, offset) < 0) {
|
||||||
|
throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(minPackedValue) +
|
||||||
|
" is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMaxPackedValue, offset) > 0) {
|
||||||
|
throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
|
||||||
|
" is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim);
|
||||||
|
}
|
||||||
|
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMaxPackedValue, offset) > 0) {
|
||||||
|
throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
|
||||||
|
" is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We always pretend the query shape is so complex that it crosses every cell, so
|
// We always pretend the query shape is so complex that it crosses every cell, so
|
||||||
// that packedValue is passed for every document
|
// that packedValue is passed for every document
|
||||||
|
@ -1772,11 +1812,16 @@ public final class CheckIndex implements Closeable {
|
||||||
throw new RuntimeException(desc + " is null for docID=" + docID);
|
throw new RuntimeException(desc + " is null for docID=" + docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (packedValue.length != dimCount * bytesPerDim) {
|
if (packedValue.length != packedBytesCount) {
|
||||||
throw new RuntimeException(desc + " has incorrect length=" + packedValue.length + " vs expected=" + (dimCount * bytesPerDim) + " for docID=" + docID);
|
throw new RuntimeException(desc + " has incorrect length=" + packedValue.length + " vs expected=" + packedBytesCount + " for docID=" + docID);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (pointCountSeen[0] != size) {
|
||||||
|
System.out.println("HERE: " + values);
|
||||||
|
throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points, but in fact has " + pointCountSeen[0]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,172 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.index;
|
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.StringHelper;
|
|
||||||
|
|
||||||
/** Merges multiple {@link PointValues} into a single one. */
|
|
||||||
public class MultiPointValues extends PointValues {
|
|
||||||
|
|
||||||
private final List<PointValues> subs;
|
|
||||||
private final List<Integer> docBases;
|
|
||||||
|
|
||||||
private MultiPointValues(List<PointValues> subs, List<Integer> docBases) {
|
|
||||||
this.subs = subs;
|
|
||||||
this.docBases = docBases;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns a {@link PointValues} merging all point values from the provided reader. */
|
|
||||||
public static PointValues get(IndexReader r) {
|
|
||||||
final List<LeafReaderContext> leaves = r.leaves();
|
|
||||||
final int size = leaves.size();
|
|
||||||
if (size == 0) {
|
|
||||||
return null;
|
|
||||||
} else if (size == 1) {
|
|
||||||
return leaves.get(0).reader().getPointValues();
|
|
||||||
}
|
|
||||||
|
|
||||||
List<PointValues> values = new ArrayList<>();
|
|
||||||
List<Integer> docBases = new ArrayList<>();
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
LeafReaderContext context = leaves.get(i);
|
|
||||||
PointValues v = context.reader().getPointValues();
|
|
||||||
if (v != null) {
|
|
||||||
values.add(v);
|
|
||||||
docBases.add(context.docBase);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (values.isEmpty()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new MultiPointValues(values, docBases);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Finds all documents and points matching the provided visitor */
|
|
||||||
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
|
||||||
for(int i=0;i<subs.size();i++) {
|
|
||||||
int docBase = docBases.get(i);
|
|
||||||
subs.get(i).intersect(fieldName,
|
|
||||||
new IntersectVisitor() {
|
|
||||||
@Override
|
|
||||||
public void visit(int docID) throws IOException {
|
|
||||||
visitor.visit(docBase+docID);
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void visit(int docID, byte[] packedValue) throws IOException {
|
|
||||||
visitor.visit(docBase+docID, packedValue);
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
|
||||||
return visitor.compare(minPackedValue, maxPackedValue);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
StringBuilder b = new StringBuilder();
|
|
||||||
b.append("MultiPointValues(");
|
|
||||||
for(int i=0;i<subs.size();i++) {
|
|
||||||
if (i > 0) {
|
|
||||||
b.append(", ");
|
|
||||||
}
|
|
||||||
b.append("docBase=");
|
|
||||||
b.append(docBases.get(i));
|
|
||||||
b.append(" sub=" + subs.get(i));
|
|
||||||
}
|
|
||||||
b.append(')');
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public byte[] getMinPackedValue(String fieldName) throws IOException {
|
|
||||||
byte[] result = null;
|
|
||||||
for(int i=0;i<subs.size();i++) {
|
|
||||||
byte[] minPackedValue = subs.get(i).getMinPackedValue(fieldName);
|
|
||||||
if (result == null) {
|
|
||||||
if (minPackedValue != null) {
|
|
||||||
result = minPackedValue.clone();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
int numDims = subs.get(0).getNumDimensions(fieldName);
|
|
||||||
int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName);
|
|
||||||
for(int dim=0;dim<numDims;dim++) {
|
|
||||||
int offset = dim*bytesPerDim;
|
|
||||||
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, result, offset) < 0) {
|
|
||||||
System.arraycopy(minPackedValue, offset, result, offset, bytesPerDim);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public byte[] getMaxPackedValue(String fieldName) throws IOException {
|
|
||||||
byte[] result = null;
|
|
||||||
for(int i=0;i<subs.size();i++) {
|
|
||||||
byte[] maxPackedValue = subs.get(i).getMaxPackedValue(fieldName);
|
|
||||||
if (result == null) {
|
|
||||||
if (maxPackedValue != null) {
|
|
||||||
result = maxPackedValue.clone();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
int numDims = subs.get(0).getNumDimensions(fieldName);
|
|
||||||
int bytesPerDim = subs.get(0).getBytesPerDimension(fieldName);
|
|
||||||
for(int dim=0;dim<numDims;dim++) {
|
|
||||||
int offset = dim*bytesPerDim;
|
|
||||||
if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, result, offset) > 0) {
|
|
||||||
System.arraycopy(maxPackedValue, offset, result, offset, bytesPerDim);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumDimensions(String fieldName) throws IOException {
|
|
||||||
for(int i=0;i<subs.size();i++) {
|
|
||||||
int result = subs.get(i).getNumDimensions(fieldName);
|
|
||||||
if (result != 0) {
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
|
||||||
for(int i=0;i<subs.size();i++) {
|
|
||||||
int result = subs.get(i).getBytesPerDimension(fieldName);
|
|
||||||
if (result != 0) {
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -381,6 +381,19 @@ public class ParallelLeafReader extends LeafReader {
|
||||||
}
|
}
|
||||||
return dimValues.getBytesPerDimension(fieldName);
|
return dimValues.getBytesPerDimension(fieldName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size(String fieldName) {
|
||||||
|
LeafReader reader = fieldToReader.get(fieldName);
|
||||||
|
if (reader == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
PointValues dimValues = reader.getPointValues();
|
||||||
|
if (dimValues == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return dimValues.size(fieldName);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -80,10 +80,10 @@ public abstract class PointValues {
|
||||||
* to test whether each document is deleted, if necessary. */
|
* to test whether each document is deleted, if necessary. */
|
||||||
public abstract void intersect(String fieldName, IntersectVisitor visitor) throws IOException;
|
public abstract void intersect(String fieldName, IntersectVisitor visitor) throws IOException;
|
||||||
|
|
||||||
/** Returns minimum value for each dimension, packed, or null if no points were indexed */
|
/** Returns minimum value for each dimension, packed, or null if {@link #size} is <code>0</code> */
|
||||||
public abstract byte[] getMinPackedValue(String fieldName) throws IOException;
|
public abstract byte[] getMinPackedValue(String fieldName) throws IOException;
|
||||||
|
|
||||||
/** Returns maximum value for each dimension, packed, or null if no points were indexed */
|
/** Returns maximum value for each dimension, packed, or null if {@link #size} is <code>0</code> */
|
||||||
public abstract byte[] getMaxPackedValue(String fieldName) throws IOException;
|
public abstract byte[] getMaxPackedValue(String fieldName) throws IOException;
|
||||||
|
|
||||||
/** Returns how many dimensions were indexed */
|
/** Returns how many dimensions were indexed */
|
||||||
|
@ -91,4 +91,9 @@ public abstract class PointValues {
|
||||||
|
|
||||||
/** Returns the number of bytes per dimension */
|
/** Returns the number of bytes per dimension */
|
||||||
public abstract int getBytesPerDimension(String fieldName) throws IOException;
|
public abstract int getBytesPerDimension(String fieldName) throws IOException;
|
||||||
|
|
||||||
|
/** Returns the total number of indexed points across all documents in this field. */
|
||||||
|
public abstract long size(String fieldName);
|
||||||
|
|
||||||
|
// nocommit make "delete all point docs then force merge" and then check stats test
|
||||||
}
|
}
|
||||||
|
|
|
@ -108,6 +108,11 @@ class PointValuesWriter {
|
||||||
public int getBytesPerDimension(String fieldName) {
|
public int getBytesPerDimension(String fieldName) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size(String fieldName) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -172,6 +172,11 @@ public final class SlowCodecReaderWrapper {
|
||||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||||
return values.getBytesPerDimension(fieldName);
|
return values.getBytesPerDimension(fieldName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size(String fieldName) {
|
||||||
|
return values.size(fieldName);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,7 @@ public class BKDReader implements Accountable {
|
||||||
final int maxPointsInLeafNode;
|
final int maxPointsInLeafNode;
|
||||||
final byte[] minPackedValue;
|
final byte[] minPackedValue;
|
||||||
final byte[] maxPackedValue;
|
final byte[] maxPackedValue;
|
||||||
|
final long pointCount;
|
||||||
protected final int packedBytesLength;
|
protected final int packedBytesLength;
|
||||||
|
|
||||||
/** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
|
/** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
|
||||||
|
@ -59,9 +60,12 @@ public class BKDReader implements Accountable {
|
||||||
|
|
||||||
minPackedValue = new byte[packedBytesLength];
|
minPackedValue = new byte[packedBytesLength];
|
||||||
maxPackedValue = new byte[packedBytesLength];
|
maxPackedValue = new byte[packedBytesLength];
|
||||||
|
|
||||||
in.readBytes(minPackedValue, 0, packedBytesLength);
|
in.readBytes(minPackedValue, 0, packedBytesLength);
|
||||||
in.readBytes(maxPackedValue, 0, packedBytesLength);
|
in.readBytes(maxPackedValue, 0, packedBytesLength);
|
||||||
|
|
||||||
|
pointCount = in.readVLong();
|
||||||
|
|
||||||
splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
|
splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
|
||||||
|
|
||||||
// TODO: don't write split packed values[0]!
|
// TODO: don't write split packed values[0]!
|
||||||
|
@ -122,7 +126,7 @@ public class BKDReader implements Accountable {
|
||||||
|
|
||||||
/** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */
|
/** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */
|
||||||
protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
|
protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
|
||||||
byte[] minPackedValue, byte[] maxPackedValue) throws IOException {
|
byte[] minPackedValue, byte[] maxPackedValue, long pointCount) throws IOException {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.numDims = numDims;
|
this.numDims = numDims;
|
||||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||||
|
@ -133,6 +137,7 @@ public class BKDReader implements Accountable {
|
||||||
this.splitPackedValues = splitPackedValues;
|
this.splitPackedValues = splitPackedValues;
|
||||||
this.minPackedValue = minPackedValue;
|
this.minPackedValue = minPackedValue;
|
||||||
this.maxPackedValue = maxPackedValue;
|
this.maxPackedValue = maxPackedValue;
|
||||||
|
this.pointCount = pointCount;
|
||||||
assert minPackedValue.length == packedBytesLength;
|
assert minPackedValue.length == packedBytesLength;
|
||||||
assert maxPackedValue.length == packedBytesLength;
|
assert maxPackedValue.length == packedBytesLength;
|
||||||
}
|
}
|
||||||
|
@ -275,10 +280,7 @@ public class BKDReader implements Accountable {
|
||||||
packedBytesLength,
|
packedBytesLength,
|
||||||
maxPointsInLeafNode,
|
maxPointsInLeafNode,
|
||||||
visitor);
|
visitor);
|
||||||
byte[] rootMinPacked = new byte[packedBytesLength];
|
intersect(state, 1, minPackedValue, maxPackedValue);
|
||||||
byte[] rootMaxPacked = new byte[packedBytesLength];
|
|
||||||
Arrays.fill(rootMaxPacked, (byte) 0xff);
|
|
||||||
intersect(state, 1, rootMinPacked, rootMaxPacked);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Fast path: this is called when the query box fully encompasses all cells under this node. */
|
/** Fast path: this is called when the query box fully encompasses all cells under this node. */
|
||||||
|
@ -430,4 +432,8 @@ public class BKDReader implements Accountable {
|
||||||
public int getBytesPerDimension() {
|
public int getBytesPerDimension() {
|
||||||
return bytesPerDim;
|
return bytesPerDim;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getPointCount() {
|
||||||
|
return pointCount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -123,7 +123,7 @@ public class BKDWriter implements Closeable {
|
||||||
/** Maximum per-dim values, packed */
|
/** Maximum per-dim values, packed */
|
||||||
protected final byte[] maxPackedValue;
|
protected final byte[] maxPackedValue;
|
||||||
|
|
||||||
private long pointCount;
|
protected long pointCount;
|
||||||
|
|
||||||
public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim) throws IOException {
|
public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim) throws IOException {
|
||||||
this(tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP);
|
this(tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP);
|
||||||
|
@ -428,7 +428,8 @@ public class BKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
|
System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength);
|
||||||
|
|
||||||
assert numDims > 1 || valueInOrder(valueCount++, lastPackedValue, reader.state.scratchPackedValue);
|
assert numDims > 1 || valueInOrder(valueCount, lastPackedValue, reader.state.scratchPackedValue);
|
||||||
|
valueCount++;
|
||||||
|
|
||||||
if (leafCount == 0) {
|
if (leafCount == 0) {
|
||||||
if (leafBlockFPs.size() > 0) {
|
if (leafBlockFPs.size() > 0) {
|
||||||
|
@ -478,6 +479,8 @@ public class BKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pointCount = valueCount;
|
||||||
|
|
||||||
long indexFP = out.getFilePointer();
|
long indexFP = out.getFilePointer();
|
||||||
|
|
||||||
int numInnerNodes = leafBlockStartValues.size();
|
int numInnerNodes = leafBlockStartValues.size();
|
||||||
|
@ -799,10 +802,6 @@ public class BKDWriter implements Closeable {
|
||||||
// Sort all docs once by each dimension:
|
// Sort all docs once by each dimension:
|
||||||
PathSlice[] sortedPointWriters = new PathSlice[numDims];
|
PathSlice[] sortedPointWriters = new PathSlice[numDims];
|
||||||
|
|
||||||
byte[] minPacked = new byte[packedBytesLength];
|
|
||||||
byte[] maxPacked = new byte[packedBytesLength];
|
|
||||||
Arrays.fill(maxPacked, (byte) 0xff);
|
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
//long t0 = System.nanoTime();
|
//long t0 = System.nanoTime();
|
||||||
|
@ -822,7 +821,7 @@ public class BKDWriter implements Closeable {
|
||||||
|
|
||||||
build(1, numLeaves, sortedPointWriters,
|
build(1, numLeaves, sortedPointWriters,
|
||||||
ordBitSet, out,
|
ordBitSet, out,
|
||||||
minPacked, maxPacked,
|
minPackedValue, maxPackedValue,
|
||||||
splitPackedValues,
|
splitPackedValues,
|
||||||
leafBlockFPs);
|
leafBlockFPs);
|
||||||
|
|
||||||
|
@ -862,6 +861,8 @@ public class BKDWriter implements Closeable {
|
||||||
out.writeBytes(minPackedValue, 0, packedBytesLength);
|
out.writeBytes(minPackedValue, 0, packedBytesLength);
|
||||||
out.writeBytes(maxPackedValue, 0, packedBytesLength);
|
out.writeBytes(maxPackedValue, 0, packedBytesLength);
|
||||||
|
|
||||||
|
out.writeVLong(pointCount);
|
||||||
|
|
||||||
// TODO: for 1D case, don't waste the first byte of each split value (it's always 0)
|
// TODO: for 1D case, don't waste the first byte of each split value (it's always 0)
|
||||||
|
|
||||||
// NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:
|
// NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.index.PointValues;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
@ -105,8 +106,8 @@ public final class AssertingPointFormat extends PointFormat {
|
||||||
|
|
||||||
// This doc's packed value should be contained in the last cell passed to compare:
|
// This doc's packed value should be contained in the last cell passed to compare:
|
||||||
for(int dim=0;dim<numDims;dim++) {
|
for(int dim=0;dim<numDims;dim++) {
|
||||||
assert StringHelper.compare(bytesPerDim, lastMinPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) <= 0: "dim=" + dim + " of " + numDims;
|
assert StringHelper.compare(bytesPerDim, lastMinPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) <= 0: "dim=" + dim + " of " + numDims + " value=" + new BytesRef(packedValue);
|
||||||
assert StringHelper.compare(bytesPerDim, lastMaxPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) >= 0: "dim=" + dim + " of " + numDims;
|
assert StringHelper.compare(bytesPerDim, lastMaxPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) >= 0: "dim=" + dim + " of " + numDims + " value=" + new BytesRef(packedValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: we should assert that this "matches" whatever relation the last call to compare had returned
|
// TODO: we should assert that this "matches" whatever relation the last call to compare had returned
|
||||||
|
@ -214,6 +215,12 @@ public final class AssertingPointFormat extends PointFormat {
|
||||||
public int getBytesPerDimension(String fieldName) throws IOException {
|
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||||
return in.getBytesPerDimension(fieldName);
|
return in.getBytesPerDimension(fieldName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size(String fieldName) {
|
||||||
|
// TODO: what to assert?
|
||||||
|
return in.size(fieldName);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class AssertingPointWriter extends PointWriter {
|
static class AssertingPointWriter extends PointWriter {
|
||||||
|
|
|
@ -171,5 +171,10 @@ class CrankyPointFormat extends PointFormat {
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
return delegate.ramBytesUsed();
|
return delegate.ramBytesUsed();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size(String fieldName) {
|
||||||
|
return delegate.size(fieldName);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue