From f785d2a0341ab5192e963ff5a470fb09b6b2709b Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Fri, 25 Mar 2016 15:40:16 -0400 Subject: [PATCH] randomize how BKDWriter splits in RandomCodec so we exercise geo shape APIs with more exotic rectangles --- .../codecs/lucene60/Lucene60PointsWriter.java | 4 +- .../org/apache/lucene/util/bkd/BKDWriter.java | 3 +- .../org/apache/lucene/index/RandomCodec.java | 68 ++++++++++++++++++- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java index 7bb1faf6803..9098cfbb01a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java @@ -42,8 +42,8 @@ import org.apache.lucene.util.bkd.BKDWriter; /** Writes dimensional values */ public class Lucene60PointsWriter extends PointsWriter implements Closeable { - final IndexOutput dataOut; - final Map indexFPs = new HashMap<>(); + protected final IndexOutput dataOut; + protected final Map indexFPs = new HashMap<>(); final SegmentWriteState writeState; final int maxPointsInLeafNode; final double maxMBSortInHeap; diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index dd2ec5df7f7..5002e50445b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -1033,8 +1033,7 @@ public class BKDWriter implements Closeable { return true; } - // TODO: make this protected when we want to subclass to play with different splitting criteria - private int split(byte[] minPackedValue, byte[] maxPackedValue) { + protected int split(byte[] minPackedValue, byte[] maxPackedValue) { // Find which dim has the largest span so we can split on it: int splitDim = -1; for(int dim=0;dim 0) { + indexFPs.put(fieldInfo.name, writer.finish(dataOut)); + } + } + } + }; } @Override @@ -152,6 +197,7 @@ public class RandomCodec extends AssertingCodec { maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048); maxMBSortInHeap = 4.0 + (3*random.nextDouble()); + bkdSplitRandomSeed = random.nextInt(); add(avoidCodecs, TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock), @@ -221,4 +267,24 @@ public class RandomCodec extends AssertingCodec { ", maxPointsInLeafNode=" + maxPointsInLeafNode + ", maxMBSortInHeap=" + maxMBSortInHeap; } + + /** Just like {@link BKDWriter} except it evilly picks random ways to split cells on + * recursion to try to provoke geo APIs that get upset at fun rectangles. */ + private static class RandomlySplittingBKDWriter extends BKDWriter { + + final Random random; + + public RandomlySplittingBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, + int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap, + long totalPointCount, boolean singleValuePerDoc, int randomSeed) throws IOException { + super(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount, singleValuePerDoc); + this.random = new Random(randomSeed); + } + + @Override + protected int split(byte[] minPackedValue, byte[] maxPackedValue) { + // BKD normally defaults by the widest dimension, to try to make as squarish cells as possible, but we just pick a random one ;) + return random.nextInt(numDims); + } + } }