LUCENE-10262: Lift up restrictions for navigating PointValues#PointTree (#476)

This change allows random navigation of a PointValues#PointTree.
This commit is contained in:
Ignacio Vera 2021-11-26 07:42:13 +01:00 committed by GitHub
parent d973e50c15
commit a590c6d2a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 98 additions and 28 deletions

View File

@ -58,6 +58,9 @@ Improvements
* LUCENE-9820: Extract BKD tree interface and move intersecting logic to the
PointValues abstract class. (Ignacio Vera, Adrien Grand)
* LUCENE-10262: Lift up restrictions for navigating PointValues#PointTree
added in LUCENE-9820 (Ignacio Vera)
Optimizations
---------------------

View File

@ -238,16 +238,12 @@ public abstract class PointValues {
*/
public interface PointTree extends Cloneable {
/**
* Clone, the current node becomes the root of the new tree. The method should not be called
* after a successful call to {@link #moveToParent()}
*/
/** Clone, the current node becomes the root of the new tree. */
PointTree clone();
/**
* Move to the first child node and return {@code true} upon success. Returns {@code false} for
* leaf nodes and {@code true} otherwise. The method should not be called after a successful
* call to {@link #moveToParent()}
* leaf nodes and {@code true} otherwise.
*/
boolean moveToChild() throws IOException;

View File

@ -132,6 +132,8 @@ public class BKDReader extends PointValues {
private final IndexInput leafNodes;
// holds the minimum (left most) leaf block file pointer for each level we've recursed to:
private final long[] leafBlockFPStack;
// holds the address, in the off-heap index, after reading the node data of each level:
private final int[] readNodeDataPositions;
// holds the address, in the off-heap index, of the right-node of each level:
private final int[] rightNodePositions;
// holds the splitDim position for each level:
@ -229,6 +231,7 @@ public class BKDReader extends PointValues {
splitValuesStack = new byte[treeDepth][];
splitValuesStack[0] = new byte[config.packedIndexBytesLength];
leafBlockFPStack = new long[treeDepth + 1];
readNodeDataPositions = new int[treeDepth + 1];
rightNodePositions = new int[treeDepth];
splitDimsPos = new int[treeDepth];
negativeDeltas = new boolean[config.numIndexDims * treeDepth];
@ -270,6 +273,7 @@ public class BKDReader extends PointValues {
if (isLeafNode() == false) {
// copy node data
index.rightNodePositions[index.level] = rightNodePositions[level];
index.readNodeDataPositions[index.level] = readNodeDataPositions[level];
index.splitValuesStack[index.level] = splitValuesStack[level].clone();
System.arraycopy(
negativeDeltas,
@ -297,11 +301,18 @@ public class BKDReader extends PointValues {
if (isLeafNode()) {
return false;
}
resetNodeDataPosition();
pushBoundsLeft();
pushLeft();
return true;
}
private void resetNodeDataPosition() throws IOException {
// move position of the inner nodes index to visit the first child
assert readNodeDataPositions[level] <= innerNodes.getFilePointer();
innerNodes.seek(readNodeDataPositions[level]);
}
private void pushBoundsLeft() {
final int splitDimPos = splitDimsPos[level];
if (splitDimValueStack[level] == null) {
@ -485,6 +496,7 @@ public class BKDReader extends PointValues {
@Override
public void visitDocIDs(PointValues.IntersectVisitor visitor) throws IOException {
resetNodeDataPosition();
addAll(visitor, false);
}
@ -515,15 +527,20 @@ public class BKDReader extends PointValues {
@Override
public void visitDocValues(PointValues.IntersectVisitor visitor) throws IOException {
resetNodeDataPosition();
visitLeavesOneByOne(visitor);
}
private void visitLeavesOneByOne(PointValues.IntersectVisitor visitor) throws IOException {
if (isLeafNode()) {
// Leaf node
visitDocValues(visitor, getLeafBlockFP());
} else {
pushLeft();
visitDocValues(visitor);
visitLeavesOneByOne(visitor);
pop();
pushRight();
visitDocValues(visitor);
visitLeavesOneByOne(visitor);
pop();
}
}
@ -636,6 +653,7 @@ public class BKDReader extends PointValues {
leftNumBytes = 0;
}
rightNodePositions[level] = Math.toIntExact(innerNodes.getFilePointer()) + leftNumBytes;
readNodeDataPositions[level] = Math.toIntExact(innerNodes.getFilePointer());
}
}

View File

@ -890,32 +890,62 @@ public class TestBKD extends LuceneTestCase {
private void assertSize(PointValues.PointTree tree) throws IOException {
final PointValues.PointTree clone = tree.clone();
assertEquals(clone.size(), tree.size());
final long[] size = new long[] {0};
clone.visitDocIDs(
// rarely continue with the clone tree
tree = rarely() ? clone : tree;
final long[] visitDocIDSize = new long[] {0};
final long[] visitDocValuesSize = new long[] {0};
final IntersectVisitor visitor =
new IntersectVisitor() {
@Override
public void visit(int docID) {
size[0]++;
visitDocIDSize[0]++;
}
@Override
public void visit(int docID, byte[] packedValue) {
throw new UnsupportedOperationException();
visitDocValuesSize[0]++;
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
throw new UnsupportedOperationException();
return Relation.CELL_CROSSES_QUERY;
}
});
assertEquals(size[0], tree.size());
};
if (random().nextBoolean()) {
tree.visitDocIDs(visitor);
tree.visitDocValues(visitor);
} else {
tree.visitDocValues(visitor);
tree.visitDocIDs(visitor);
}
assertEquals(visitDocIDSize[0], visitDocValuesSize[0]);
assertEquals(visitDocIDSize[0], tree.size());
if (tree.moveToChild()) {
do {
randomPointTreeNavigation(tree);
assertSize(tree);
} while (tree.moveToSibling());
tree.moveToParent();
}
}
private void randomPointTreeNavigation(PointValues.PointTree tree) throws IOException {
byte[] minPackedValue = tree.getMinPackedValue().clone();
byte[] maxPackedValue = tree.getMaxPackedValue().clone();
long size = tree.size();
if (random().nextBoolean() && tree.moveToChild()) {
randomPointTreeNavigation(tree);
if (random().nextBoolean() && tree.moveToSibling()) {
randomPointTreeNavigation(tree);
}
tree.moveToParent();
}
// we always finish on the same node we started
assertArrayEquals(minPackedValue, tree.getMinPackedValue());
assertArrayEquals(maxPackedValue, tree.getMaxPackedValue());
assertEquals(size, tree.size());
}
private void assertHits(BitSet hits, BitSet expected) {
int limit = Math.max(expected.length(), hits.length());
for (int docID = 0; docID < limit; docID++) {

View File

@ -1139,12 +1139,10 @@ public class AssertingLeafReader extends FilterLeafReader {
}
}
/** Validates that we don't call moveToChild() or clone() after having called moveToParent() */
static class AssertingPointTree implements PointValues.PointTree {
final PointValues pointValues;
final PointValues.PointTree in;
private boolean moveToParent;
AssertingPointTree(PointValues pointValues, PointValues.PointTree in) {
this.pointValues = pointValues;
@ -1153,25 +1151,21 @@ public class AssertingLeafReader extends FilterLeafReader {
@Override
public PointValues.PointTree clone() {
assert moveToParent == false : "calling clone() after calling moveToParent()";
return new AssertingPointTree(pointValues, in.clone());
}
@Override
public boolean moveToChild() throws IOException {
assert moveToParent == false : "calling moveToChild() after calling moveToParent()";
return in.moveToChild();
}
@Override
public boolean moveToSibling() throws IOException {
moveToParent = false;
return in.moveToSibling();
}
@Override
public boolean moveToParent() throws IOException {
moveToParent = true;
return in.moveToParent();
}

View File

@ -1066,33 +1066,62 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa
private void assertSize(PointValues.PointTree tree) throws IOException {
final PointValues.PointTree clone = tree.clone();
assertEquals(clone.size(), tree.size());
final long[] size = new long[] {0};
clone.visitDocIDs(
// rarely continue with the clone tree
tree = rarely() ? clone : tree;
final long[] visitDocIDSize = new long[] {0};
final long[] visitDocValuesSize = new long[] {0};
final IntersectVisitor visitor =
new IntersectVisitor() {
@Override
public void visit(int docID) {
size[0]++;
visitDocIDSize[0]++;
}
@Override
public void visit(int docID, byte[] packedValue) {
throw new UnsupportedOperationException();
visitDocValuesSize[0]++;
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
throw new UnsupportedOperationException();
return Relation.CELL_CROSSES_QUERY;
}
});
assertEquals(size[0], tree.size());
};
if (random().nextBoolean()) {
tree.visitDocIDs(visitor);
tree.visitDocValues(visitor);
} else {
tree.visitDocValues(visitor);
tree.visitDocIDs(visitor);
}
assertEquals(visitDocIDSize[0], visitDocValuesSize[0]);
assertEquals(visitDocIDSize[0], tree.size());
if (tree.moveToChild()) {
do {
randomPointTreeNavigation(tree);
assertSize(tree);
} while (tree.moveToSibling());
tree.moveToParent();
}
}
private void randomPointTreeNavigation(PointValues.PointTree tree) throws IOException {
byte[] minPackedValue = tree.getMinPackedValue().clone();
byte[] maxPackedValue = tree.getMaxPackedValue().clone();
long size = tree.size();
if (random().nextBoolean() && tree.moveToChild()) {
randomPointTreeNavigation(tree);
if (random().nextBoolean() && tree.moveToSibling()) {
randomPointTreeNavigation(tree);
}
tree.moveToParent();
}
// we always finish on the same node we started
assertArrayEquals(minPackedValue, tree.getMinPackedValue());
assertArrayEquals(maxPackedValue, tree.getMaxPackedValue());
assertEquals(size, tree.size());
}
public void testAddIndexes() throws IOException {
Directory dir1 = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir1);