mirror of https://github.com/apache/lucene.git
LUCENE-9820: PointTree#size() should handle the case of balanced tree in pre-8.6 indexes (#462)
Handle properly the case where trees are fully balanced for number of dimension > 1
This commit is contained in:
parent
8710252116
commit
800f002e44
|
@ -21,6 +21,7 @@ import java.util.ArrayList;
|
|||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.backward_codecs.lucene60.bkd.BKDWriter60;
|
||||
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.MutablePointTree;
|
||||
|
@ -36,8 +37,6 @@ import org.apache.lucene.index.PointValues.Relation;
|
|||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.bkd.BKDConfig;
|
||||
import org.apache.lucene.util.bkd.BKDWriter;
|
||||
|
||||
/** Writes dimensional values */
|
||||
public class Lucene60PointsWriter extends PointsWriter {
|
||||
|
@ -91,8 +90,8 @@ public class Lucene60PointsWriter extends PointsWriter {
|
|||
public Lucene60PointsWriter(SegmentWriteState writeState) throws IOException {
|
||||
this(
|
||||
writeState,
|
||||
BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
|
||||
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
|
||||
BKDWriter60.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
|
||||
BKDWriter60.DEFAULT_MAX_MB_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -100,28 +99,22 @@ public class Lucene60PointsWriter extends PointsWriter {
|
|||
|
||||
PointValues.PointTree values = reader.getValues(fieldInfo.name).getPointTree();
|
||||
|
||||
BKDConfig config =
|
||||
new BKDConfig(
|
||||
fieldInfo.getPointDimensionCount(),
|
||||
fieldInfo.getPointIndexDimensionCount(),
|
||||
fieldInfo.getPointNumBytes(),
|
||||
maxPointsInLeafNode);
|
||||
|
||||
try (BKDWriter writer =
|
||||
new BKDWriter(
|
||||
try (BKDWriter60 writer =
|
||||
new BKDWriter60(
|
||||
writeState.segmentInfo.maxDoc(),
|
||||
writeState.directory,
|
||||
writeState.segmentInfo.name,
|
||||
config,
|
||||
fieldInfo.getPointDimensionCount(),
|
||||
fieldInfo.getPointIndexDimensionCount(),
|
||||
fieldInfo.getPointNumBytes(),
|
||||
maxPointsInLeafNode,
|
||||
maxMBSortInHeap,
|
||||
values.size())) {
|
||||
|
||||
if (values instanceof MutablePointTree) {
|
||||
Runnable finalizer =
|
||||
writer.writeField(dataOut, dataOut, dataOut, fieldInfo.name, (MutablePointTree) values);
|
||||
if (finalizer != null) {
|
||||
indexFPs.put(fieldInfo.name, dataOut.getFilePointer());
|
||||
finalizer.run();
|
||||
final long fp = writer.writeField(dataOut, fieldInfo.name, (MutablePointTree) values);
|
||||
if (fp != -1) {
|
||||
indexFPs.put(fieldInfo.name, fp);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -145,10 +138,8 @@ public class Lucene60PointsWriter extends PointsWriter {
|
|||
});
|
||||
|
||||
// We could have 0 points on merge since all docs with dimensional fields may be deleted:
|
||||
Runnable finalizer = writer.finish(dataOut, dataOut, dataOut);
|
||||
if (finalizer != null) {
|
||||
indexFPs.put(fieldInfo.name, dataOut.getFilePointer());
|
||||
finalizer.run();
|
||||
if (writer.getPointCount() > 0) {
|
||||
indexFPs.put(fieldInfo.name, writer.finish(dataOut));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -193,26 +184,22 @@ public class Lucene60PointsWriter extends PointsWriter {
|
|||
}
|
||||
}
|
||||
|
||||
BKDConfig config =
|
||||
new BKDConfig(
|
||||
fieldInfo.getPointDimensionCount(),
|
||||
fieldInfo.getPointIndexDimensionCount(),
|
||||
fieldInfo.getPointNumBytes(),
|
||||
maxPointsInLeafNode);
|
||||
|
||||
// System.out.println("MERGE: field=" + fieldInfo.name);
|
||||
// Optimize the 1D case to use BKDWriter.merge, which does a single merge sort of the
|
||||
// already sorted incoming segments, instead of trying to sort all points again as if
|
||||
// we were simply reindexing them:
|
||||
try (BKDWriter writer =
|
||||
new BKDWriter(
|
||||
try (BKDWriter60 writer =
|
||||
new BKDWriter60(
|
||||
writeState.segmentInfo.maxDoc(),
|
||||
writeState.directory,
|
||||
writeState.segmentInfo.name,
|
||||
config,
|
||||
fieldInfo.getPointDimensionCount(),
|
||||
fieldInfo.getPointIndexDimensionCount(),
|
||||
fieldInfo.getPointNumBytes(),
|
||||
maxPointsInLeafNode,
|
||||
maxMBSortInHeap,
|
||||
totMaxSize)) {
|
||||
List<PointValues> pointValues = new ArrayList<>();
|
||||
List<PointValues> bkdReaders = new ArrayList<>();
|
||||
List<MergeState.DocMap> docMaps = new ArrayList<>();
|
||||
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
|
||||
PointsReader reader = mergeState.pointsReaders[i];
|
||||
|
@ -231,19 +218,18 @@ public class Lucene60PointsWriter extends PointsWriter {
|
|||
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
|
||||
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
|
||||
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
|
||||
PointValues aPointValues = reader60.readers.get(readerFieldInfo.number);
|
||||
if (aPointValues != null) {
|
||||
pointValues.add(aPointValues);
|
||||
PointValues bkdReader = reader60.readers.get(readerFieldInfo.number);
|
||||
if (bkdReader != null) {
|
||||
bkdReaders.add(bkdReader);
|
||||
docMaps.add(mergeState.docMaps[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Runnable finalizer = writer.merge(dataOut, dataOut, dataOut, docMaps, pointValues);
|
||||
if (finalizer != null) {
|
||||
indexFPs.put(fieldInfo.name, dataOut.getFilePointer());
|
||||
finalizer.run();
|
||||
long fp = writer.merge(dataOut, docMaps, bkdReaders);
|
||||
if (fp != -1) {
|
||||
indexFPs.put(fieldInfo.name, fp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.backward_codecs.lucene60;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.backward_codecs.lucene60.bkd.BKDWriter60;
|
||||
import org.apache.lucene.backward_codecs.lucene84.Lucene84RWCodec;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.BinaryPoint;
|
||||
|
@ -35,7 +36,6 @@ import org.apache.lucene.index.PointValues.Relation;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase.Nightly;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.bkd.BKDConfig;
|
||||
|
||||
/** Tests Lucene60PointsFormat */
|
||||
@Nightly // N-2 formats are only tested on nightly runs
|
||||
|
@ -45,7 +45,7 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
|
|||
|
||||
public TestLucene60PointsFormat() {
|
||||
codec = new Lucene84RWCodec();
|
||||
maxPointsInLeafNode = BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
|
||||
maxPointsInLeafNode = BKDWriter60.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -280,16 +280,23 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
|
|||
};
|
||||
|
||||
final long pointCount = points.estimatePointCount(onePointMatchVisitor);
|
||||
final long lastNodePointCount = totalValues % maxPointsInLeafNode;
|
||||
// With >1 dims, the tree is balanced
|
||||
long actualMaxPointsInLeafNode = points.size();
|
||||
while (actualMaxPointsInLeafNode > maxPointsInLeafNode) {
|
||||
actualMaxPointsInLeafNode = (actualMaxPointsInLeafNode + 1) / 2;
|
||||
}
|
||||
final long countPerFullLeaf = (actualMaxPointsInLeafNode + 1) / 2;
|
||||
final long countPerNotFullLeaf = (actualMaxPointsInLeafNode) / 2;
|
||||
assertTrue(
|
||||
"" + pointCount,
|
||||
pointCount == (maxPointsInLeafNode + 1) / 2 // common case
|
||||
|| pointCount == (lastNodePointCount + 1) / 2 // not fully populated leaf
|
||||
|| pointCount == 2 * ((maxPointsInLeafNode + 1) / 2) // if the point is a split value
|
||||
|| pointCount == ((maxPointsInLeafNode + 1) / 2) + ((lastNodePointCount + 1) / 2)
|
||||
// in extreme cases, a point can be shared by 4 leaves
|
||||
|| pointCount == 4 * ((maxPointsInLeafNode + 1) / 2)
|
||||
|| pointCount == 3 * ((maxPointsInLeafNode + 1) / 2) + ((lastNodePointCount + 1) / 2));
|
||||
pointCount + " vs " + actualMaxPointsInLeafNode,
|
||||
// common case, point in one leaf.
|
||||
pointCount >= countPerNotFullLeaf && pointCount <= countPerFullLeaf
|
||||
||
|
||||
// one dimension is a split value
|
||||
pointCount >= 2 * countPerNotFullLeaf && pointCount <= 2 * countPerFullLeaf
|
||||
||
|
||||
// both dimensions are split values
|
||||
pointCount >= 4 * countPerNotFullLeaf && pointCount <= 4 * countPerFullLeaf);
|
||||
|
||||
final long docCount = points.estimateDocCount(onePointMatchVisitor);
|
||||
if (multiValues) {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene60.bkd;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
|
||||
class DocIdsWriter {
|
||||
|
||||
private DocIdsWriter() {}
|
||||
|
||||
static void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException {
|
||||
// docs can be sorted either when all docs in a block have the same value
|
||||
// or when a segment is sorted
|
||||
boolean sorted = true;
|
||||
for (int i = 1; i < count; ++i) {
|
||||
if (docIds[start + i - 1] > docIds[start + i]) {
|
||||
sorted = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (sorted) {
|
||||
out.writeByte((byte) 0);
|
||||
int previous = 0;
|
||||
for (int i = 0; i < count; ++i) {
|
||||
int doc = docIds[start + i];
|
||||
out.writeVInt(doc - previous);
|
||||
previous = doc;
|
||||
}
|
||||
} else {
|
||||
long max = 0;
|
||||
for (int i = 0; i < count; ++i) {
|
||||
max |= Integer.toUnsignedLong(docIds[start + i]);
|
||||
}
|
||||
if (max <= 0xffffff) {
|
||||
out.writeByte((byte) 24);
|
||||
// write them the same way we are reading them.
|
||||
int i;
|
||||
for (i = 0; i < count - 7; i += 8) {
|
||||
int doc1 = docIds[start + i];
|
||||
int doc2 = docIds[start + i + 1];
|
||||
int doc3 = docIds[start + i + 2];
|
||||
int doc4 = docIds[start + i + 3];
|
||||
int doc5 = docIds[start + i + 4];
|
||||
int doc6 = docIds[start + i + 5];
|
||||
int doc7 = docIds[start + i + 6];
|
||||
int doc8 = docIds[start + i + 7];
|
||||
long l1 = (doc1 & 0xffffffL) << 40 | (doc2 & 0xffffffL) << 16 | ((doc3 >>> 8) & 0xffffL);
|
||||
long l2 =
|
||||
(doc3 & 0xffL) << 56
|
||||
| (doc4 & 0xffffffL) << 32
|
||||
| (doc5 & 0xffffffL) << 8
|
||||
| ((doc6 >> 16) & 0xffL);
|
||||
long l3 = (doc6 & 0xffffL) << 48 | (doc7 & 0xffffffL) << 24 | (doc8 & 0xffffffL);
|
||||
out.writeLong(l1);
|
||||
out.writeLong(l2);
|
||||
out.writeLong(l3);
|
||||
}
|
||||
for (; i < count; ++i) {
|
||||
out.writeShort((short) (docIds[start + i] >>> 8));
|
||||
out.writeByte((byte) docIds[start + i]);
|
||||
}
|
||||
} else {
|
||||
out.writeByte((byte) 32);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
out.writeInt(docIds[start + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -87,7 +87,6 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
int nodeID;
|
||||
int level;
|
||||
final int rootNode;
|
||||
final int lastLeafNodeCount;
|
||||
// holds the min / max value of the current node.
|
||||
private final byte[] minPackedValue, maxPackedValue;
|
||||
// holds the previous value of the split dimension
|
||||
|
@ -107,9 +106,6 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
int treeDepth = getTreeDepth(leafNodeOffset);
|
||||
splitDimValueStack = new byte[treeDepth + 1][];
|
||||
splitDims = new int[treeDepth + 1];
|
||||
int lastLeafNodeCount = Math.toIntExact(pointCount % config.maxPointsInLeafNode);
|
||||
this.lastLeafNodeCount =
|
||||
lastLeafNodeCount == 0 ? config.maxPointsInLeafNode : lastLeafNodeCount;
|
||||
}
|
||||
|
||||
private int getTreeDepth(int numLeaves) {
|
||||
|
@ -285,9 +281,39 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
numLeaves = rightMostLeafNode - leftMostLeafNode + 1 + leafNodeOffset;
|
||||
}
|
||||
assert numLeaves == getNumLeavesSlow(nodeID) : numLeaves + " " + getNumLeavesSlow(nodeID);
|
||||
return rightMostLeafNode == (1 << getTreeDepth(leafNodeOffset) - 1) - 1
|
||||
? (long) (numLeaves - 1) * config.maxPointsInLeafNode + lastLeafNodeCount
|
||||
: (long) numLeaves * config.maxPointsInLeafNode;
|
||||
return sizeFromBalancedTree(leftMostLeafNode, rightMostLeafNode);
|
||||
}
|
||||
|
||||
private long sizeFromBalancedTree(int leftMostLeafNode, int rightMostLeafNode) {
|
||||
// number of points that need to be distributed between leaves, one per leaf
|
||||
final int extraPoints =
|
||||
Math.toIntExact(((long) config.maxPointsInLeafNode * leafNodeOffset) - pointCount);
|
||||
assert extraPoints < leafNodeOffset : "point excess should be lower than leafNodeOffset";
|
||||
// offset where we stop adding one point to the leaves
|
||||
final int nodeOffset = leafNodeOffset - extraPoints;
|
||||
long count = 0;
|
||||
for (int node = leftMostLeafNode; node <= rightMostLeafNode; node++) {
|
||||
// offsetPosition provides which extra point will be added to this node
|
||||
if (balanceTreeNodePosition(0, leafNodeOffset, node - leafNodeOffset, 0, 0) < nodeOffset) {
|
||||
count += config.maxPointsInLeafNode;
|
||||
} else {
|
||||
count += config.maxPointsInLeafNode - 1;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private int balanceTreeNodePosition(
|
||||
int minNode, int maxNode, int node, int position, int level) {
|
||||
if (maxNode - minNode == 1) {
|
||||
return position;
|
||||
}
|
||||
final int mid = (minNode + maxNode + 1) >>> 1;
|
||||
if (mid > node) {
|
||||
return balanceTreeNodePosition(minNode, mid, node, position, level + 1);
|
||||
} else {
|
||||
return balanceTreeNodePosition(mid, maxNode, node, position + (1 << level), level + 1);
|
||||
}
|
||||
}
|
||||
|
||||
private int getNumLeavesSlow(int node) {
|
||||
|
|
|
@ -349,7 +349,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
new int[config.maxPointsInLeafNode]);
|
||||
|
||||
long indexFP = out.getFilePointer();
|
||||
writeIndex(out, leafBlockFPs, splitPackedValues);
|
||||
writeIndex(out, leafBlockFPs, splitPackedValues, Math.toIntExact(countPerLeaf));
|
||||
return indexFP;
|
||||
}
|
||||
|
||||
|
@ -478,7 +478,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
for (int i = 0; i < leafBlockFPs.size(); i++) {
|
||||
arr[i] = leafBlockFPs.get(i);
|
||||
}
|
||||
writeIndex(out, arr, index);
|
||||
writeIndex(out, arr, index, config.maxPointsInLeafNode);
|
||||
return indexFP;
|
||||
}
|
||||
|
||||
|
@ -714,16 +714,15 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
// System.out.println("Total nodes: " + innerNodeCount);
|
||||
|
||||
// Write index:
|
||||
long indexFP = out.getFilePointer();
|
||||
writeIndex(out, leafBlockFPs, splitPackedValues);
|
||||
writeIndex(out, leafBlockFPs, splitPackedValues, Math.toIntExact(countPerLeaf));
|
||||
return indexFP;
|
||||
}
|
||||
|
||||
/** Subclass can change how it writes the index. */
|
||||
private void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues)
|
||||
private void writeIndex(
|
||||
IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues, int maxPointsInLeafNode)
|
||||
throws IOException {
|
||||
write(out, NUM_DATA_DIMS);
|
||||
writeInt(out, config.numDims);
|
||||
|
@ -738,7 +737,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
newline(out);
|
||||
|
||||
write(out, MAX_LEAF_POINTS);
|
||||
writeInt(out, config.maxPointsInLeafNode);
|
||||
writeInt(out, maxPointsInLeafNode);
|
||||
newline(out);
|
||||
|
||||
write(out, INDEX_COUNT);
|
||||
|
|
|
@ -154,6 +154,8 @@ public class BKDReader extends PointValues {
|
|||
private final int leafNodeOffset;
|
||||
// version of the index
|
||||
private final int version;
|
||||
// total number of points
|
||||
final long pointCount;
|
||||
// last node might not be fully populated
|
||||
private final int lastLeafNodePointCount;
|
||||
// right most leaf node ID
|
||||
|
@ -181,7 +183,7 @@ public class BKDReader extends PointValues {
|
|||
config,
|
||||
numLeaves,
|
||||
version,
|
||||
Math.toIntExact(pointCount % config.maxPointsInLeafNode),
|
||||
pointCount,
|
||||
1,
|
||||
1,
|
||||
minPackedValue,
|
||||
|
@ -201,7 +203,7 @@ public class BKDReader extends PointValues {
|
|||
BKDConfig config,
|
||||
int numLeaves,
|
||||
int version,
|
||||
int lastLeafNodePointCount,
|
||||
long pointCount,
|
||||
int nodeID,
|
||||
int level,
|
||||
byte[] minPackedValue,
|
||||
|
@ -231,7 +233,9 @@ public class BKDReader extends PointValues {
|
|||
splitDimsPos = new int[treeDepth];
|
||||
negativeDeltas = new boolean[config.numIndexDims * treeDepth];
|
||||
// information about the unbalance of the tree so we can report the exact size below a node
|
||||
this.pointCount = pointCount;
|
||||
rightMostLeafNode = (1 << treeDepth - 1) - 1;
|
||||
int lastLeafNodePointCount = Math.toIntExact(pointCount % config.maxPointsInLeafNode);
|
||||
this.lastLeafNodePointCount =
|
||||
lastLeafNodePointCount == 0 ? config.maxPointsInLeafNode : lastLeafNodePointCount;
|
||||
// scratch objects, reused between clones so NN search are not creating those objects
|
||||
|
@ -252,7 +256,7 @@ public class BKDReader extends PointValues {
|
|||
config,
|
||||
leafNodeOffset,
|
||||
version,
|
||||
lastLeafNodePointCount,
|
||||
pointCount,
|
||||
nodeID,
|
||||
level,
|
||||
minPackedValue,
|
||||
|
@ -437,11 +441,48 @@ public class BKDReader extends PointValues {
|
|||
numLeaves = rightMostLeafNode - leftMostLeafNode + 1 + leafNodeOffset;
|
||||
}
|
||||
assert numLeaves == getNumLeavesSlow(nodeID) : numLeaves + " " + getNumLeavesSlow(nodeID);
|
||||
if (version < BKDWriter.VERSION_META_FILE && config.numDims > 1) {
|
||||
// before lucene 8.6, high dimensional trees were constructed as fully balanced trees.
|
||||
return sizeFromBalancedTree(leftMostLeafNode, rightMostLeafNode);
|
||||
}
|
||||
// size for an unbalanced tree.
|
||||
return rightMostLeafNode == this.rightMostLeafNode
|
||||
? (long) (numLeaves - 1) * config.maxPointsInLeafNode + lastLeafNodePointCount
|
||||
: (long) numLeaves * config.maxPointsInLeafNode;
|
||||
}
|
||||
|
||||
private long sizeFromBalancedTree(int leftMostLeafNode, int rightMostLeafNode) {
|
||||
// number of points that need to be distributed between leaves, one per leaf
|
||||
final int extraPoints =
|
||||
Math.toIntExact(((long) config.maxPointsInLeafNode * this.leafNodeOffset) - pointCount);
|
||||
assert extraPoints < leafNodeOffset : "point excess should be lower than leafNodeOffset";
|
||||
// offset where we stop adding one point to the leaves
|
||||
final int nodeOffset = leafNodeOffset - extraPoints;
|
||||
long count = 0;
|
||||
for (int node = leftMostLeafNode; node <= rightMostLeafNode; node++) {
|
||||
// offsetPosition provides which extra point will be added to this node
|
||||
if (balanceTreeNodePosition(0, leafNodeOffset, node - leafNodeOffset, 0, 0) < nodeOffset) {
|
||||
count += config.maxPointsInLeafNode;
|
||||
} else {
|
||||
count += config.maxPointsInLeafNode - 1;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private int balanceTreeNodePosition(
|
||||
int minNode, int maxNode, int node, int position, int level) {
|
||||
if (maxNode - minNode == 1) {
|
||||
return position;
|
||||
}
|
||||
final int mid = (minNode + maxNode + 1) >>> 1;
|
||||
if (mid > node) {
|
||||
return balanceTreeNodePosition(minNode, mid, node, position, level + 1);
|
||||
} else {
|
||||
return balanceTreeNodePosition(mid, maxNode, node, position + (1 << level), level + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitDocIDs(PointValues.IntersectVisitor visitor) throws IOException {
|
||||
addAll(visitor, false);
|
||||
|
|
|
@ -822,7 +822,7 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
if (dimValues == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assertSize(dimValues.getPointTree());
|
||||
byte[] leafMinValues = dimValues.getMinPackedValue();
|
||||
byte[] leafMaxValues = dimValues.getMaxPackedValue();
|
||||
for (int dim = 0; dim < numIndexDims; dim++) {
|
||||
|
@ -1063,6 +1063,36 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa
|
|||
}
|
||||
}
|
||||
|
||||
private void assertSize(PointValues.PointTree tree) throws IOException {
|
||||
final PointValues.PointTree clone = tree.clone();
|
||||
assertEquals(clone.size(), tree.size());
|
||||
final long[] size = new long[] {0};
|
||||
clone.visitDocIDs(
|
||||
new IntersectVisitor() {
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
size[0]++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
});
|
||||
assertEquals(size[0], tree.size());
|
||||
if (tree.moveToChild()) {
|
||||
do {
|
||||
assertSize(tree);
|
||||
} while (tree.moveToSibling());
|
||||
tree.moveToParent();
|
||||
}
|
||||
}
|
||||
|
||||
public void testAddIndexes() throws IOException {
|
||||
Directory dir1 = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir1);
|
||||
|
|
Loading…
Reference in New Issue