diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java index d0ab81e5626..6752393da73 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java @@ -34,8 +34,8 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.BLOCK_VA class SimpleTextBKDReader extends BKDReader { public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues, - byte[] minPackedValue, byte[] maxPackedValue) throws IOException { - super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue); + byte[] minPackedValue, byte[] maxPackedValue, long pointCount) throws IOException { + super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue, pointCount); } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointReader.java index 0ec2e0350c0..76c0431b0d0 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointReader.java @@ -47,6 +47,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MAX_LEAF import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MAX_VALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.MIN_VALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.NUM_DIMS; +import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.POINT_COUNT; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_COUNT; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_DIM; import static org.apache.lucene.codecs.simpletext.SimpleTextPointWriter.SPLIT_VALUE; @@ -119,6 +120,10 @@ class SimpleTextPointReader extends PointReader { assert startsWith(MAX_VALUE); BytesRef maxValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE)); assert maxValue.length == numDims*bytesPerDim; + + readLine(dataIn); + assert startsWith(POINT_COUNT); + long pointCount = parseLong(POINT_COUNT); long[] leafBlockFPs = new long[count]; for(int i=0;i subs; - private final List docBases; - - private MultiPointValues(List subs, List docBases) { - this.subs = subs; - this.docBases = docBases; - } - - /** Returns a {@link PointValues} merging all point values from the provided reader. */ - public static PointValues get(IndexReader r) { - final List leaves = r.leaves(); - final int size = leaves.size(); - if (size == 0) { - return null; - } else if (size == 1) { - return leaves.get(0).reader().getPointValues(); - } - - List values = new ArrayList<>(); - List docBases = new ArrayList<>(); - for (int i = 0; i < size; i++) { - LeafReaderContext context = leaves.get(i); - PointValues v = context.reader().getPointValues(); - if (v != null) { - values.add(v); - docBases.add(context.docBase); - } - } - - if (values.isEmpty()) { - return null; - } - - return new MultiPointValues(values, docBases); - } - - /** Finds all documents and points matching the provided visitor */ - public void intersect(String fieldName, IntersectVisitor visitor) throws IOException { - for(int i=0;i 0) { - b.append(", "); - } - b.append("docBase="); - b.append(docBases.get(i)); - b.append(" sub=" + subs.get(i)); - } - b.append(')'); - return b.toString(); - } - - @Override - public byte[] getMinPackedValue(String fieldName) throws IOException { - byte[] result = null; - for(int i=0;i 0) { - System.arraycopy(maxPackedValue, offset, result, offset, bytesPerDim); - } - } - } - } - - return result; - } - - @Override - public int getNumDimensions(String fieldName) throws IOException { - for(int i=0;i0 */ public abstract byte[] getMinPackedValue(String fieldName) throws IOException; - /** Returns maximum value for each dimension, packed, or null if no points were indexed */ + /** Returns maximum value for each dimension, packed, or null if {@link #size} is 0 */ public abstract byte[] getMaxPackedValue(String fieldName) throws IOException; /** Returns how many dimensions were indexed */ @@ -91,4 +91,9 @@ public abstract class PointValues { /** Returns the number of bytes per dimension */ public abstract int getBytesPerDimension(String fieldName) throws IOException; + + /** Returns the total number of indexed points across all documents in this field. */ + public abstract long size(String fieldName); + + // nocommit make "delete all point docs then force merge" and then check stats test } diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java index 2fa8b4fa2b1..fe3aa14c2c8 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java @@ -108,6 +108,11 @@ class PointValuesWriter { public int getBytesPerDimension(String fieldName) { throw new UnsupportedOperationException(); } + + @Override + public long size(String fieldName) { + throw new UnsupportedOperationException(); + } }); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java index 50d9778ed8b..50f5ad7692c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java @@ -172,6 +172,11 @@ public final class SlowCodecReaderWrapper { public int getBytesPerDimension(String fieldName) throws IOException { return values.getBytesPerDimension(fieldName); } + + @Override + public long size(String fieldName) { + return values.size(fieldName); + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index 9d7259969af..424b81cb36a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -42,6 +42,7 @@ public class BKDReader implements Accountable { final int maxPointsInLeafNode; final byte[] minPackedValue; final byte[] maxPackedValue; + final long pointCount; protected final int packedBytesLength; /** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */ @@ -59,9 +60,12 @@ public class BKDReader implements Accountable { minPackedValue = new byte[packedBytesLength]; maxPackedValue = new byte[packedBytesLength]; + in.readBytes(minPackedValue, 0, packedBytesLength); in.readBytes(maxPackedValue, 0, packedBytesLength); + pointCount = in.readVLong(); + splitPackedValues = new byte[(1+bytesPerDim)*numLeaves]; // TODO: don't write split packed values[0]! @@ -122,7 +126,7 @@ public class BKDReader implements Accountable { /** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */ protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues, - byte[] minPackedValue, byte[] maxPackedValue) throws IOException { + byte[] minPackedValue, byte[] maxPackedValue, long pointCount) throws IOException { this.in = in; this.numDims = numDims; this.maxPointsInLeafNode = maxPointsInLeafNode; @@ -133,6 +137,7 @@ public class BKDReader implements Accountable { this.splitPackedValues = splitPackedValues; this.minPackedValue = minPackedValue; this.maxPackedValue = maxPackedValue; + this.pointCount = pointCount; assert minPackedValue.length == packedBytesLength; assert maxPackedValue.length == packedBytesLength; } @@ -275,10 +280,7 @@ public class BKDReader implements Accountable { packedBytesLength, maxPointsInLeafNode, visitor); - byte[] rootMinPacked = new byte[packedBytesLength]; - byte[] rootMaxPacked = new byte[packedBytesLength]; - Arrays.fill(rootMaxPacked, (byte) 0xff); - intersect(state, 1, rootMinPacked, rootMaxPacked); + intersect(state, 1, minPackedValue, maxPackedValue); } /** Fast path: this is called when the query box fully encompasses all cells under this node. */ @@ -430,4 +432,8 @@ public class BKDReader implements Accountable { public int getBytesPerDimension() { return bytesPerDim; } + + public long getPointCount() { + return pointCount; + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index 9d6ad978878..0ccdf43d7e5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -123,7 +123,7 @@ public class BKDWriter implements Closeable { /** Maximum per-dim values, packed */ protected final byte[] maxPackedValue; - private long pointCount; + protected long pointCount; public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim) throws IOException { this(tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP); @@ -428,7 +428,8 @@ public class BKDWriter implements Closeable { } System.arraycopy(reader.state.scratchPackedValue, 0, maxPackedValue, 0, packedBytesLength); - assert numDims > 1 || valueInOrder(valueCount++, lastPackedValue, reader.state.scratchPackedValue); + assert numDims > 1 || valueInOrder(valueCount, lastPackedValue, reader.state.scratchPackedValue); + valueCount++; if (leafCount == 0) { if (leafBlockFPs.size() > 0) { @@ -478,6 +479,8 @@ public class BKDWriter implements Closeable { } } + pointCount = valueCount; + long indexFP = out.getFilePointer(); int numInnerNodes = leafBlockStartValues.size(); @@ -799,10 +802,6 @@ public class BKDWriter implements Closeable { // Sort all docs once by each dimension: PathSlice[] sortedPointWriters = new PathSlice[numDims]; - byte[] minPacked = new byte[packedBytesLength]; - byte[] maxPacked = new byte[packedBytesLength]; - Arrays.fill(maxPacked, (byte) 0xff); - boolean success = false; try { //long t0 = System.nanoTime(); @@ -822,7 +821,7 @@ public class BKDWriter implements Closeable { build(1, numLeaves, sortedPointWriters, ordBitSet, out, - minPacked, maxPacked, + minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs); @@ -862,6 +861,8 @@ public class BKDWriter implements Closeable { out.writeBytes(minPackedValue, 0, packedBytesLength); out.writeBytes(maxPackedValue, 0, packedBytesLength); + out.writeVLong(pointCount); + // TODO: for 1D case, don't waste the first byte of each split value (it's always 0) // NOTE: splitPackedValues[0] is unused, because nodeID is 1-based: diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointFormat.java index 15836de6ec0..71790fc4ba6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointFormat.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.PointValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.TestUtil; @@ -105,8 +106,8 @@ public final class AssertingPointFormat extends PointFormat { // This doc's packed value should be contained in the last cell passed to compare: for(int dim=0;dim= 0: "dim=" + dim + " of " + numDims; + assert StringHelper.compare(bytesPerDim, lastMinPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) <= 0: "dim=" + dim + " of " + numDims + " value=" + new BytesRef(packedValue); + assert StringHelper.compare(bytesPerDim, lastMaxPackedValue, dim*bytesPerDim, packedValue, dim*bytesPerDim) >= 0: "dim=" + dim + " of " + numDims + " value=" + new BytesRef(packedValue); } // TODO: we should assert that this "matches" whatever relation the last call to compare had returned @@ -214,6 +215,12 @@ public final class AssertingPointFormat extends PointFormat { public int getBytesPerDimension(String fieldName) throws IOException { return in.getBytesPerDimension(fieldName); } + + @Override + public long size(String fieldName) { + // TODO: what to assert? + return in.size(fieldName); + } } static class AssertingPointWriter extends PointWriter { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPointFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPointFormat.java index 6b83b1e4fef..699fb3c7cbd 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPointFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPointFormat.java @@ -171,5 +171,10 @@ class CrankyPointFormat extends PointFormat { public long ramBytesUsed() { return delegate.ramBytesUsed(); } + + @Override + public long size(String fieldName) { + return delegate.size(fieldName); + } } }