diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java index 488547b4dea..bea7b62de23 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java @@ -21,7 +21,8 @@ import java.nio.charset.StandardCharsets; import org.apache.lucene.codecs.simpletext.SimpleTextUtil; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.PointValues.IntersectVisitor; +import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.BytesRef; @@ -36,7 +37,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_V /** Forked from {@link BKDReader} and simplified/specialized for SimpleText's usage */ -final class SimpleTextBKDReader extends PointValues implements Accountable { +final class SimpleTextBKDReader implements Accountable { // Packed array of byte[] holding all split values in the full binary tree: final private byte[] splitPackedValues; final long[] leafBlockFPs; @@ -306,32 +307,26 @@ final class SimpleTextBKDReader extends PointValues implements Accountable { RamUsageEstimator.sizeOf(leafBlockFPs); } - @Override public byte[] getMinPackedValue() { return minPackedValue.clone(); } - @Override public byte[] getMaxPackedValue() { return maxPackedValue.clone(); } - @Override public int getNumDimensions() { return numDims; } - @Override public int getBytesPerDimension() { return bytesPerDim; } - @Override - public long size() { + public long getPointCount() { return pointCount; } - @Override public int getDocCount() { return docCount; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java index d7674edf369..0dbc176c274 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java @@ -25,7 +25,7 @@ import java.util.List; import java.util.function.IntFunction; import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.MutablePointValues; +import org.apache.lucene.codecs.MutablePointsReader; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; @@ -427,12 +427,12 @@ final class SimpleTextBKDWriter implements Closeable { } } - /** Write a field from a {@link MutablePointValues}. This way of writing + /** Write a field from a {@link MutablePointsReader}. This way of writing * points is faster than regular writes with {@link BKDWriter#add} since * there is opportunity for reordering points before writing them to * disk. This method does not use transient disk in order to reorder points. */ - public long writeField(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException { + public long writeField(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException { if (numDims == 1) { return writeField1Dim(out, fieldName, reader); } else { @@ -443,7 +443,7 @@ final class SimpleTextBKDWriter implements Closeable { /* In the 2+D case, we recursively pick the split dimension, compute the * median value and partition other values around it. */ - private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointValues values) throws IOException { + private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointsReader values) throws IOException { if (pointCount != 0) { throw new IllegalStateException("cannot mix add and writeField"); } @@ -456,7 +456,7 @@ final class SimpleTextBKDWriter implements Closeable { // Mark that we already finished: heapPointWriter = null; - long countPerLeaf = pointCount = values.size(); + long countPerLeaf = pointCount = values.size(fieldName); long innerNodeCount = 1; while (countPerLeaf > maxPointsInLeafNode) { @@ -501,12 +501,12 @@ final class SimpleTextBKDWriter implements Closeable { /* In the 1D case, we can simply sort points in ascending order and use the * same writing logic as we use at merge time. */ - private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException { - MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size())); + private long writeField1Dim(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException { + MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size(fieldName))); final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out); - reader.intersect(new IntersectVisitor() { + reader.intersect(fieldName, new IntersectVisitor() { @Override public void visit(int docID, byte[] packedValue) throws IOException { @@ -1264,7 +1264,7 @@ final class SimpleTextBKDWriter implements Closeable { /* Recursively reorders the provided reader and writes the bkd-tree on the fly. */ private void build(int nodeID, int leafNodeOffset, - MutablePointValues reader, int from, int to, + MutablePointsReader reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java index 8ca16357e92..e6711e7d550 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java @@ -184,7 +184,7 @@ class SimpleTextPointsReader extends PointsReader { return new String(scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8); } - private BKDReader getBKDReader(String fieldName) { + private SimpleTextBKDReader getBKDReader(String fieldName) { FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName); if (fieldInfo == null) { throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized"); @@ -198,7 +198,7 @@ class SimpleTextPointsReader extends PointsReader { /** Finds all documents and points matching the provided visitor */ @Override public void intersect(String fieldName, IntersectVisitor visitor) throws IOException { - BKDReader bkdReader = getBKDReader(fieldName); + SimpleTextBKDReader bkdReader = getBKDReader(fieldName); if (bkdReader == null) { // Schema ghost corner case! This field did index points in the past, but // now all docs having this field were deleted in this segment: @@ -246,7 +246,7 @@ class SimpleTextPointsReader extends PointsReader { @Override public byte[] getMinPackedValue(String fieldName) { - BKDReader bkdReader = getBKDReader(fieldName); + SimpleTextBKDReader bkdReader = getBKDReader(fieldName); if (bkdReader == null) { // Schema ghost corner case! This field did index points in the past, but // now all docs having this field were deleted in this segment: @@ -257,7 +257,7 @@ class SimpleTextPointsReader extends PointsReader { @Override public byte[] getMaxPackedValue(String fieldName) { - BKDReader bkdReader = getBKDReader(fieldName); + SimpleTextBKDReader bkdReader = getBKDReader(fieldName); if (bkdReader == null) { // Schema ghost corner case! This field did index points in the past, but // now all docs having this field were deleted in this segment: @@ -268,7 +268,7 @@ class SimpleTextPointsReader extends PointsReader { @Override public int getNumDimensions(String fieldName) { - BKDReader bkdReader = getBKDReader(fieldName); + SimpleTextBKDReader bkdReader = getBKDReader(fieldName); if (bkdReader == null) { // Schema ghost corner case! This field did index points in the past, but // now all docs having this field were deleted in this segment: @@ -279,7 +279,7 @@ class SimpleTextPointsReader extends PointsReader { @Override public int getBytesPerDimension(String fieldName) { - BKDReader bkdReader = getBKDReader(fieldName); + SimpleTextBKDReader bkdReader = getBKDReader(fieldName); if (bkdReader == null) { // Schema ghost corner case! This field did index points in the past, but // now all docs having this field were deleted in this segment: @@ -290,7 +290,7 @@ class SimpleTextPointsReader extends PointsReader { @Override public long size(String fieldName) { - BKDReader bkdReader = getBKDReader(fieldName); + SimpleTextBKDReader bkdReader = getBKDReader(fieldName); if (bkdReader == null) { // Schema ghost corner case! This field did index points in the past, but // now all docs having this field were deleted in this segment: @@ -301,7 +301,7 @@ class SimpleTextPointsReader extends PointsReader { @Override public int getDocCount(String fieldName) { - BKDReader bkdReader = getBKDReader(fieldName); + SimpleTextBKDReader bkdReader = getBKDReader(fieldName); if (bkdReader == null) { // Schema ghost corner case! This field did index points in the past, but // now all docs having this field were deleted in this segment: diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java index e4b2c2cd4c7..0c62929b752 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java @@ -70,7 +70,7 @@ class SimpleTextPointsWriter extends PointsWriter { boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name); - // We use the normal BKDWriter, but subclass to customize how it writes the index and blocks to disk: + // We use our own fork of the BKDWriter to customize how it writes the index and blocks to disk: try (SimpleTextBKDWriter writer = new SimpleTextBKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, @@ -78,7 +78,7 @@ class SimpleTextPointsWriter extends PointsWriter { fieldInfo.getPointNumBytes(), SimpleTextBKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, SimpleTextBKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, - values.size(), + values.size(fieldInfo.name), singleValuePerDoc)) { values.intersect(fieldInfo.name, new IntersectVisitor() { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java index a914001d9d2..f6f07832be9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java @@ -16,7 +16,7 @@ */ /** - * Components from the Lucene 6.0 index format. See {@link org.apache.lucene.codecs.lucene70} + * Components from the Lucene 6.0 index format. See {@link org.apache.lucene.codecs.lucene62} * for an overview of the current index format. */ package org.apache.lucene.codecs.lucene60; diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index bc22d86b5f5..21a28e5fcdb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -1799,18 +1799,14 @@ public final class CheckIndex implements Closeable { } for (FieldInfo fieldInfo : fieldInfos) { if (fieldInfo.getPointDimensionCount() > 0) { - PointValues values = pointsReader.getValues(fieldInfo.name); - if (values == null) { - continue; - } status.totalValueFields++; - long size = values.size(); - int docCount = values.getDocCount(); + long size = values.size(fieldInfo.name); + int docCount = values.getDocCount(fieldInfo.name); VerifyPointsVisitor visitor = new VerifyPointsVisitor(fieldInfo.name, reader.maxDoc(), values); - values.intersect(visitor); + values.intersect(fieldInfo.name, visitor); if (visitor.getPointCountSeen() != size) { throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points, but in fact has " + visitor.getPointCountSeen()); @@ -1863,34 +1859,34 @@ public final class CheckIndex implements Closeable { public VerifyPointsVisitor(String fieldName, int maxDoc, PointValues values) throws IOException { this.maxDoc = maxDoc; this.fieldName = fieldName; - numDims = values.getNumDimensions(); - bytesPerDim = values.getBytesPerDimension(); + numDims = values.getNumDimensions(fieldName); + bytesPerDim = values.getBytesPerDimension(fieldName); packedBytesCount = numDims * bytesPerDim; - globalMinPackedValue = values.getMinPackedValue(); - globalMaxPackedValue = values.getMaxPackedValue(); + globalMinPackedValue = values.getMinPackedValue(fieldName); + globalMaxPackedValue = values.getMaxPackedValue(fieldName); docsSeen = new FixedBitSet(maxDoc); lastMinPackedValue = new byte[packedBytesCount]; lastMaxPackedValue = new byte[packedBytesCount]; lastPackedValue = new byte[packedBytesCount]; - if (values.getDocCount() > values.size()) { - throw new RuntimeException("point values for field \"" + fieldName + "\" claims to have size=" + values.size() + " points and inconsistent docCount=" + values.getDocCount()); + if (values.getDocCount(fieldName) > values.size(fieldName)) { + throw new RuntimeException("point values for field \"" + fieldName + "\" claims to have size=" + values.size(fieldName) + " points and inconsistent docCount=" + values.getDocCount(fieldName)); } - if (values.getDocCount() > maxDoc) { - throw new RuntimeException("point values for field \"" + fieldName + "\" claims to have docCount=" + values.getDocCount() + " but that's greater than maxDoc=" + maxDoc); + if (values.getDocCount(fieldName) > maxDoc) { + throw new RuntimeException("point values for field \"" + fieldName + "\" claims to have docCount=" + values.getDocCount(fieldName) + " but that's greater than maxDoc=" + maxDoc); } if (globalMinPackedValue == null) { - if (values.size() != 0) { - throw new RuntimeException("getMinPackedValue is null points for field \"" + fieldName + "\" yet size=" + values.size()); + if (values.size(fieldName) != 0) { + throw new RuntimeException("getMinPackedValue is null points for field \"" + fieldName + "\" yet size=" + values.size(fieldName)); } } else if (globalMinPackedValue.length != packedBytesCount) { throw new RuntimeException("getMinPackedValue for field \"" + fieldName + "\" return length=" + globalMinPackedValue.length + " array, but should be " + packedBytesCount); } if (globalMaxPackedValue == null) { - if (values.size() != 0) { - throw new RuntimeException("getMaxPackedValue is null points for field \"" + fieldName + "\" yet size=" + values.size()); + if (values.size(fieldName) != 0) { + throw new RuntimeException("getMaxPackedValue is null points for field \"" + fieldName + "\" yet size=" + values.size(fieldName)); } } else if (globalMaxPackedValue.length != packedBytesCount) { throw new RuntimeException("getMaxPackedValue for field \"" + fieldName + "\" return length=" + globalMaxPackedValue.length + " array, but should be " + packedBytesCount); diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index ef9bccc3d1e..4f4228d5c70 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -20,7 +20,8 @@ import java.io.IOException; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.PointValues.IntersectVisitor; +import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Accountable; @@ -33,7 +34,7 @@ import org.apache.lucene.util.StringHelper; * * @lucene.experimental */ -public final class BKDReader extends PointValues implements Accountable { +public final class BKDReader implements Accountable { // Packed array of byte[] holding all split values in the full binary tree: final int leafNodeOffset; final int numDims; diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index 2567eef5344..b9fd37cb042 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -1451,7 +1451,7 @@ public class BKDWriter implements Closeable { /* Recursively reorders the provided reader and writes the bkd-tree on the fly. */ private void build(int nodeID, int leafNodeOffset, - MutablePointValues reader, int from, int to, + MutablePointsReader reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java index 9e84c8dde41..c7be5ba9133 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java @@ -33,9 +33,9 @@ public final class MutablePointsReaderUtils { MutablePointsReaderUtils() {} - /** Sort the given {@link MutablePointValues} based on its packed value then doc ID. */ + /** Sort the given {@link MutablePointsReader} based on its packed value then doc ID. */ public static void sort(int maxDoc, int packedBytesLength, - MutablePointValues reader, int from, int to) { + MutablePointsReader reader, int from, int to) { final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1); new MSBRadixSorter(packedBytesLength + (bitsPerDocId + 7) / 8) { @@ -92,7 +92,7 @@ public final class MutablePointsReaderUtils { /** Sort points on the given dimension. */ public static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths, - MutablePointValues reader, int from, int to, + MutablePointsReader reader, int from, int to, BytesRef scratch1, BytesRef scratch2) { // No need for a fancy radix sort here, this is called on the leaves only so @@ -131,7 +131,7 @@ public final class MutablePointsReaderUtils { * than or equal to it and all values on the right must be greater than or * equal to it. */ public static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen, - MutablePointValues reader, int from, int to, int mid, + MutablePointsReader reader, int from, int to, int mid, BytesRef scratch1, BytesRef scratch2) { final int offset = splitDim * bytesPerDim + commonPrefixLen; final int cmpBytes = bytesPerDim - commonPrefixLen; diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java index e30168c17e8..a89a1840ba0 100644 --- a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java @@ -16,7 +16,10 @@ */ package org.apache.lucene.util.bkd; +import java.io.IOException; + import org.apache.lucene.index.CheckIndex; +import org.apache.lucene.index.PointValues; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.IOContext; @@ -65,9 +68,9 @@ public class Test2BBKDPoints extends LuceneTestCase { IndexInput in = dir.openInput("1d.bkd", IOContext.DEFAULT); in.seek(indexFP); BKDReader r = new BKDReader(in); - CheckIndex.VerifyPointsVisitor visitor = new CheckIndex.VerifyPointsVisitor("1d", numDocs, r); + CheckIndex.VerifyPointsVisitor visitor = new CheckIndex.VerifyPointsVisitor("1d", numDocs, new BKDReaderToPointValues("1d", r)); r.intersect(visitor); - assertEquals(r.size(), visitor.getPointCountSeen()); + assertEquals(r.getPointCount(), visitor.getPointCountSeen()); assertEquals(r.getDocCount(), visitor.getDocCountSeen()); in.close(); dir.close(); @@ -105,11 +108,70 @@ public class Test2BBKDPoints extends LuceneTestCase { IndexInput in = dir.openInput("2d.bkd", IOContext.DEFAULT); in.seek(indexFP); BKDReader r = new BKDReader(in); - CheckIndex.VerifyPointsVisitor visitor = new CheckIndex.VerifyPointsVisitor("2d", numDocs, r); + CheckIndex.VerifyPointsVisitor visitor = new CheckIndex.VerifyPointsVisitor("2d", numDocs, new BKDReaderToPointValues("2d", r)); r.intersect(visitor); - assertEquals(r.size(), visitor.getPointCountSeen()); + assertEquals(r.getPointCount(), visitor.getPointCountSeen()); assertEquals(r.getDocCount(), visitor.getDocCountSeen()); in.close(); dir.close(); } + + private class BKDReaderToPointValues extends PointValues { + + private final BKDReader bkdReader; + private final String fieldName; + + public BKDReaderToPointValues(String fieldName, BKDReader bkdReader) { + this.fieldName = fieldName; + this.bkdReader = bkdReader; + } + + @Override + public void intersect(String fieldNameIn, IntersectVisitor visitor) throws IOException { + verifyFieldName(fieldNameIn); + bkdReader.intersect(visitor); + } + + @Override + public byte[] getMinPackedValue(String fieldNameIn) throws IOException { + verifyFieldName(fieldNameIn); + return bkdReader.getMinPackedValue(); + } + + @Override + public byte[] getMaxPackedValue(String fieldNameIn) throws IOException { + verifyFieldName(fieldNameIn); + return bkdReader.getMaxPackedValue(); + } + + @Override + public int getNumDimensions(String fieldNameIn) throws IOException { + verifyFieldName(fieldNameIn); + return bkdReader.getNumDimensions(); + } + + @Override + public int getBytesPerDimension(String fieldNameIn) throws IOException { + verifyFieldName(fieldNameIn); + return bkdReader.getBytesPerDimension(); + } + + @Override + public long size(String fieldNameIn) { + verifyFieldName(fieldNameIn); + return bkdReader.getPointCount(); + } + + @Override + public int getDocCount(String fieldNameIn) { + verifyFieldName(fieldNameIn); + return bkdReader.getDocCount(); + } + + private void verifyFieldName(String fieldNameIn) { + if (fieldName.equals(fieldNameIn) == false) { + throw new IllegalArgumentException("expected fieldName=\"" + fieldName + "\" but got \"" + fieldNameIn + "\""); + } + } + } }