1.1 MB with 128 points
+// per leaf, and you can reduce that by putting more points per leaf
+// - we could use threads while building; the higher nodes are very parallelizable
+
+/** Recursively builds a block KD-tree to assign all incoming points in N-dim space to smaller
+ * and smaller N-dim rectangles (cells) until the number of points in a given
+ * rectangle is <= maxPointsInLeafNode
. The tree is
+ * fully balanced, which means the leaf nodes will have between 50% and 100% of
+ * the requested maxPointsInLeafNode
. Values that fall exactly
+ * on a cell boundary may be in either cell.
+ *
+ * The number of dimensions can be 1 to 255, but every byte[] value is fixed length.
+ *
+ *
+ * See this paper for details.
+ *
+ *
This consumes heap during writing: it allocates a LongBitSet(numPoints)
,
+ * and then uses up to the specified {@code maxMBSortInHeap} heap space for writing.
+ *
+ *
+ * NOTE: This can write at most Integer.MAX_VALUE * maxPointsInLeafNode
total points, and
+ *
+ * @lucene.experimental */
+
+public final class BKDWriter implements Closeable {
+
+ static final String CODEC_NAME = "BKD";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+ /** How many bytes each docs takes in the fixed-width offline format */
+ private final int bytesPerDoc;
+
+ public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024;
+
+ public static final float DEFAULT_MAX_MB_SORT_IN_HEAP = 16.0f;
+
+ /** Maximum number of dimensions */
+ public static final int MAX_DIMS = 15;
+
+ /** How many dimensions we are indexing */
+ final int numDims;
+
+ /** How many bytes each value in each dimension takes. */
+ final int bytesPerDim;
+
+ /** numDims * bytesPerDim */
+ final int packedBytesLength;
+
+ final TrackingDirectoryWrapper tempDir;
+ final String tempFileNamePrefix;
+
+ final byte[] scratchDiff;
+ final byte[] scratchPackedValue;
+ final byte[] scratch1;
+ final byte[] scratch2;
+
+ private OfflinePointWriter offlinePointWriter;
+ private HeapPointWriter heapPointWriter;
+
+ private IndexOutput tempInput;
+ private final int maxPointsInLeafNode;
+ private final int maxPointsSortInHeap;
+
+ private long pointCount;
+
+ public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim) throws IOException {
+ this(tempDir, tempFileNamePrefix, numDims, bytesPerDim, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_MB_SORT_IN_HEAP);
+ }
+
+ public BKDWriter(Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap) throws IOException {
+ verifyParams(numDims, maxPointsInLeafNode, maxMBSortInHeap);
+ // We use tracking dir to deal with removing files on exception, so each place that
+ // creates temp files doesn't need crazy try/finally/sucess logic:
+ this.tempDir = new TrackingDirectoryWrapper(tempDir);
+ this.tempFileNamePrefix = tempFileNamePrefix;
+ this.maxPointsInLeafNode = maxPointsInLeafNode;
+ this.numDims = numDims;
+ this.bytesPerDim = bytesPerDim;
+ packedBytesLength = numDims * bytesPerDim;
+
+ scratchDiff = new byte[bytesPerDim];
+ scratchPackedValue = new byte[packedBytesLength];
+ scratch1 = new byte[packedBytesLength];
+ scratch2 = new byte[packedBytesLength];
+
+ // dimensional values (numDims * bytesPerDim) + ord (long) + docID (int)
+ bytesPerDoc = packedBytesLength + RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT;
+
+ // As we recurse, we compute temporary partitions of the data, halving the
+ // number of points at each recursion. Once there are few enough points,
+ // we can switch to sorting in heap instead of offline (on disk). At any
+ // time in the recursion, we hold the number of points at that level, plus
+ // all recursive halves (i.e. 16 + 8 + 4 + 2) so the memory usage is 2X
+ // what that level would consume, so we multiply by 0.5 to convert from
+ // bytes to points here. Each dimension has its own sorted partition, so
+ // we must divide by numDims as wel.
+
+ maxPointsSortInHeap = (int) (0.5 * (maxMBSortInHeap * 1024 * 1024) / (bytesPerDoc * numDims));
+
+ // Finally, we must be able to hold at least the leaf node in heap during build:
+ if (maxPointsSortInHeap < maxPointsInLeafNode) {
+ throw new IllegalArgumentException("maxMBSortInHeap=" + maxMBSortInHeap + " only allows for maxPointsSortInHeap=" + maxPointsSortInHeap + ", but this is less than maxPointsInLeafNode=" + maxPointsInLeafNode + "; either increase maxMBSortInHeap or decrease maxPointsInLeafNode");
+ }
+
+ // We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
+ heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength);
+ }
+
+ public static void verifyParams(int numDims, int maxPointsInLeafNode, double maxMBSortInHeap) {
+ // We encode dim in a single byte in the splitPackedValues, but we only expose 4 bits for it now, in case we want to use
+ // remaining 4 bits for another purpose later
+ if (numDims < 1 || numDims > MAX_DIMS) {
+ throw new IllegalArgumentException("numDims must be 1 .. " + MAX_DIMS + " (got: " + numDims + ")");
+ }
+ if (maxPointsInLeafNode <= 0) {
+ throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
+ }
+ if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
+ throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
+ }
+ if (maxMBSortInHeap < 0.0) {
+ throw new IllegalArgumentException("maxMBSortInHeap must be >= 0.0 (got: " + maxMBSortInHeap + ")");
+ }
+ }
+
+ /** If the current segment has too many points then we switchover to temp files / offline sort. */
+ private void switchToOffline() throws IOException {
+
+ // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
+ offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength);
+ PointReader reader = heapPointWriter.getReader(0);
+ for(int i=0;i= maxPointsSortInHeap) {
+ if (offlinePointWriter == null) {
+ switchToOffline();
+ }
+ offlinePointWriter.append(packedValue, pointCount, docID);
+ } else {
+ // Not too many points added yet, continue using heap:
+ heapPointWriter.append(packedValue, pointCount, docID);
+ }
+
+ pointCount++;
+ }
+
+ // TODO: if we fixed each partition step to just record the file offset at the "split point", we could probably handle variable length
+ // encoding and not have our own ByteSequencesReader/Writer
+
+ /** If dim=-1 we sort by docID, else by that dim. */
+ private void sortHeapPointWriter(final HeapPointWriter writer, int start, int length, int dim) {
+
+ assert pointCount < Integer.MAX_VALUE;
+
+ // All buffered points are still in heap; just do in-place sort:
+ new InPlaceMergeSorter() {
+ @Override
+ protected void swap(int i, int j) {
+ int docID = writer.docIDs[i];
+ writer.docIDs[i] = writer.docIDs[j];
+ writer.docIDs[j] = docID;
+
+ long ord = writer.ords[i];
+ writer.ords[i] = writer.ords[j];
+ writer.ords[j] = ord;
+
+ // scratch1 = values[i]
+ writer.readPackedValue(i, scratch1);
+ // scratch2 = values[j]
+ writer.readPackedValue(j, scratch2);
+ // values[i] = scratch2
+ writer.writePackedValue(i, scratch2);
+ // values[j] = scratch1
+ writer.writePackedValue(j, scratch1);
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ if (dim != -1) {
+ writer.readPackedValue(i, scratch1);
+ writer.readPackedValue(j, scratch2);
+ int cmp = BKDUtil.compare(bytesPerDim, scratch1, dim, scratch2, dim);
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+
+ // Tie-break
+ int cmp = Integer.compare(writer.docIDs[i], writer.docIDs[j]);
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ return Long.compare(writer.ords[i], writer.ords[j]);
+ }
+ }.sort(start, start+length);
+ }
+
+ private PointWriter sort(int dim) throws IOException {
+
+ if (heapPointWriter != null) {
+
+ assert tempInput == null;
+
+ // We never spilled the incoming points to disk, so now we sort in heap:
+ HeapPointWriter sorted;
+
+ if (dim == 0) {
+ // First dim can re-use the current heap writer
+ sorted = heapPointWriter;
+ } else {
+ // Subsequent dims need a private copy
+ sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength);
+ sorted.copyFrom(heapPointWriter);
+ }
+
+ sortHeapPointWriter(sorted, 0, (int) pointCount, dim);
+
+ sorted.close();
+ return sorted;
+ } else {
+
+ // Offline sort:
+ assert tempInput != null;
+
+ final ByteArrayDataInput reader = new ByteArrayDataInput();
+ Comparator cmp = new Comparator() {
+ private final ByteArrayDataInput readerB = new ByteArrayDataInput();
+
+ @Override
+ public int compare(BytesRef a, BytesRef b) {
+ reader.reset(a.bytes, a.offset, a.length);
+ reader.readBytes(scratch1, 0, scratch1.length);
+ final int docIDA = reader.readVInt();
+ final long ordA = reader.readVLong();
+
+ reader.reset(b.bytes, b.offset, b.length);
+ reader.readBytes(scratch2, 0, scratch2.length);
+ final int docIDB = reader.readVInt();
+ final long ordB = reader.readVLong();
+
+ int cmp = BKDUtil.compare(bytesPerDim, scratch1, dim, scratch2, dim);
+
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ // Tie-break
+ cmp = Integer.compare(docIDA, docIDB);
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ return Long.compare(ordA, ordB);
+ }
+ };
+
+ // TODO: this is sort of sneaky way to get the final OfflinePointWriter from OfflineSorter:
+ IndexOutput[] lastWriter = new IndexOutput[1];
+
+ OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp) {
+
+ /** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
+ @Override
+ protected ByteSequencesWriter getWriter(IndexOutput out) {
+ lastWriter[0] = out;
+ return new ByteSequencesWriter(out) {
+ @Override
+ public void write(byte[] bytes, int off, int len) throws IOException {
+ if (len != bytesPerDoc) {
+ throw new IllegalArgumentException("len=" + len + " bytesPerDoc=" + bytesPerDoc);
+ }
+ out.writeBytes(bytes, off, len);
+ }
+ };
+ }
+
+ /** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
+ @Override
+ protected ByteSequencesReader getReader(IndexInput in) throws IOException {
+ return new ByteSequencesReader(in) {
+ @Override
+ public boolean read(BytesRefBuilder ref) throws IOException {
+ ref.grow(bytesPerDoc);
+ try {
+ in.readBytes(ref.bytes(), 0, bytesPerDoc);
+ } catch (EOFException eofe) {
+ return false;
+ }
+ ref.setLength(bytesPerDoc);
+ return true;
+ }
+ };
+ }
+ };
+
+ sorter.sort(tempInput.getName());
+
+ assert lastWriter[0] != null;
+
+ return new OfflinePointWriter(tempDir, lastWriter[0], packedBytesLength, pointCount);
+ }
+ }
+
+ /** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
+ public long finish(IndexOutput out) throws IOException {
+ //System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter);
+
+ // TODO: specialize the 1D case? it's much faster at indexing time (no partitioning on recruse...)
+
+ if (offlinePointWriter != null) {
+ offlinePointWriter.close();
+ }
+
+ LongBitSet ordBitSet = new LongBitSet(pointCount);
+
+ long countPerLeaf = pointCount;
+ long innerNodeCount = 1;
+
+ while (countPerLeaf > maxPointsInLeafNode) {
+ countPerLeaf = (countPerLeaf+1)/2;
+ innerNodeCount *= 2;
+ }
+
+ //System.out.println("innerNodeCount=" + innerNodeCount);
+
+ if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
+ throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
+ }
+
+ innerNodeCount--;
+
+ int numLeaves = (int) (innerNodeCount+1);
+
+ // NOTE: we could save the 1+ here, to use a bit less heap at search time, but then we'd need a somewhat costly check at each
+ // step of the recursion to recompute the split dim:
+
+ // Indexed by nodeID, but first (root) nodeID is 1. We do 1+ because the lead byte at each recursion says which dim we split on.
+ byte[] splitPackedValues = new byte[Math.toIntExact(numLeaves*(1+bytesPerDim))];
+
+ // +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
+ long[] leafBlockFPs = new long[numLeaves];
+
+ // Make sure the math above "worked":
+ assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
+
+ // Sort all docs once by each dimension:
+ PathSlice[] sortedPointWriters = new PathSlice[numDims];
+
+ byte[] minPacked = new byte[packedBytesLength];
+ byte[] maxPacked = new byte[packedBytesLength];
+ Arrays.fill(maxPacked, (byte) 0xff);
+
+ boolean success = false;
+ try {
+ for(int dim=0;dim= the splitValue). */
+ private byte[] markRightTree(long rightCount, int splitDim, PathSlice source, LongBitSet ordBitSet) throws IOException {
+
+ // Now we mark ords that fall into the right half, so we can partition on all other dims that are not the split dim:
+ assert ordBitSet.cardinality() == 0: "cardinality=" + ordBitSet.cardinality();
+
+ // Read the split value, then mark all ords in the right tree (larger than the split value):
+ try (PointReader reader = source.writer.getReader(source.start + source.count - rightCount)) {
+ boolean result = reader.next();
+ assert result;
+
+ System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim);
+
+ ordBitSet.set(reader.ord());
+
+ // Start at 1 because we already did the first value above (so we could keep the split value):
+ for(int i=1;i blocks;
+ final int valuesPerBlock;
+ final int packedBytesLength;
+ final long[] ords;
+ final int[] docIDs;
+ final int end;
+ final byte[] scratch;
+
+ HeapPointReader(List blocks, int valuesPerBlock, int packedBytesLength, long[] ords, int[] docIDs, int start, int end) {
+ this.blocks = blocks;
+ this.valuesPerBlock = valuesPerBlock;
+ this.ords = ords;
+ this.docIDs = docIDs;
+ curRead = start-1;
+ this.end = end;
+ this.packedBytesLength = packedBytesLength;
+ scratch = new byte[packedBytesLength];
+ }
+
+ void writePackedValue(int index, byte[] bytes) {
+ int block = index / valuesPerBlock;
+ int blockIndex = index % valuesPerBlock;
+ while (blocks.size() <= block) {
+ blocks.add(new byte[valuesPerBlock*packedBytesLength]);
+ }
+ System.arraycopy(bytes, 0, blocks.get(blockIndex), blockIndex * packedBytesLength, packedBytesLength);
+ }
+
+ void readPackedValue(int index, byte[] bytes) {
+ int block = index / valuesPerBlock;
+ int blockIndex = index % valuesPerBlock;
+ System.arraycopy(blocks.get(block), blockIndex * packedBytesLength, bytes, 0, packedBytesLength);
+ }
+
+ @Override
+ public boolean next() {
+ curRead++;
+ return curRead < end;
+ }
+
+ @Override
+ public byte[] packedValue() {
+ readPackedValue(curRead, scratch);
+ return scratch;
+ }
+
+ @Override
+ public int docID() {
+ return docIDs[curRead];
+ }
+
+ @Override
+ public long ord() {
+ return ords[curRead];
+ }
+
+ @Override
+ public void close() {
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
new file mode 100644
index 00000000000..bea8d981ae9
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
@@ -0,0 +1,126 @@
+package org.apache.lucene.util.bkd;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+final class HeapPointWriter implements PointWriter {
+ int[] docIDs;
+ long[] ords;
+ private int nextWrite;
+ private boolean closed;
+ final int maxSize;
+ final int valuesPerBlock;
+ final int packedBytesLength;
+ // NOTE: can't use ByteBlockPool because we need random-write access when sorting in heap
+ final List blocks = new ArrayList<>();
+
+ public HeapPointWriter(int initSize, int maxSize, int packedBytesLength) {
+ docIDs = new int[initSize];
+ ords = new long[initSize];
+ this.maxSize = maxSize;
+ this.packedBytesLength = packedBytesLength;
+ // 4K per page, unless each value is > 4K:
+ valuesPerBlock = Math.max(1, 4096/packedBytesLength);
+ }
+
+ public void copyFrom(HeapPointWriter other) {
+ if (docIDs.length < other.nextWrite) {
+ throw new IllegalStateException("docIDs.length=" + docIDs.length + " other.nextWrite=" + other.nextWrite);
+ }
+ System.arraycopy(other.docIDs, 0, docIDs, 0, other.nextWrite);
+ System.arraycopy(other.ords, 0, ords, 0, other.nextWrite);
+ for(byte[] block : other.blocks) {
+ blocks.add(block.clone());
+ }
+ nextWrite = other.nextWrite;
+ }
+
+ void readPackedValue(int index, byte[] bytes) {
+ assert bytes.length == packedBytesLength;
+ int block = index / valuesPerBlock;
+ int blockIndex = index % valuesPerBlock;
+ System.arraycopy(blocks.get(block), blockIndex * packedBytesLength, bytes, 0, packedBytesLength);
+ }
+
+ void writePackedValue(int index, byte[] bytes) {
+ assert bytes.length == packedBytesLength;
+ int block = index / valuesPerBlock;
+ int blockIndex = index % valuesPerBlock;
+ //System.out.println("writePackedValue: index=" + index + " bytes.length=" + bytes.length + " block=" + block + " blockIndex=" + blockIndex + " valuesPerBlock=" + valuesPerBlock);
+ while (blocks.size() <= block) {
+ // If this is the last block, only allocate as large as necessary for maxSize:
+ int valuesInBlock = Math.min(valuesPerBlock, maxSize - (blocks.size() * valuesPerBlock));
+ blocks.add(new byte[valuesInBlock*packedBytesLength]);
+ }
+ System.arraycopy(bytes, 0, blocks.get(block), blockIndex * packedBytesLength, packedBytesLength);
+ }
+
+ private int[] growExact(int[] arr, int size) {
+ assert size > arr.length;
+ int[] newArr = new int[size];
+ System.arraycopy(arr, 0, newArr, 0, arr.length);
+ return newArr;
+ }
+
+ private long[] growExact(long[] arr, int size) {
+ assert size > arr.length;
+ long[] newArr = new long[size];
+ System.arraycopy(arr, 0, newArr, 0, arr.length);
+ return newArr;
+ }
+
+ @Override
+ public void append(byte[] packedValue, long ord, int docID) {
+ assert closed == false;
+ assert packedValue.length == packedBytesLength;
+ if (ords.length == nextWrite) {
+ int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT));
+ assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
+ ords = growExact(ords, nextSize);
+ docIDs = growExact(docIDs, nextSize);
+ }
+ writePackedValue(nextWrite, packedValue);
+ ords[nextWrite] = ord;
+ docIDs[nextWrite] = docID;
+ nextWrite++;
+ }
+
+ @Override
+ public PointReader getReader(long start) {
+ return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, docIDs, (int) start, nextWrite);
+ }
+
+ @Override
+ public void close() {
+ closed = true;
+ }
+
+ @Override
+ public void destroy() {
+ }
+
+ @Override
+ public String toString() {
+ return "HeapPointWriter(count=" + nextWrite + " alloc=" + ords.length + ")";
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
new file mode 100644
index 00000000000..57b28a9dbc6
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -0,0 +1,90 @@
+package org.apache.lucene.util.bkd;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.EOFException;
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/** Reads points from disk in a fixed-with format, previously written with {@link OfflinePointWriter}. */
+final class OfflinePointReader implements PointReader {
+ long countLeft;
+ private final IndexInput in;
+ private final byte[] packedValue;
+ private long ord;
+ private int docID;
+ final int bytesPerDoc;
+
+ OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length) throws IOException {
+ this(tempDir.openInput(tempFileName, IOContext.READONCE), packedBytesLength, start, length);
+ }
+
+ private OfflinePointReader(IndexInput in, int packedBytesLength, long start, long length) throws IOException {
+ this.in = in;
+ bytesPerDoc = packedBytesLength + RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT;
+ long seekFP = start * bytesPerDoc;
+ in.seek(seekFP);
+ this.countLeft = length;
+ packedValue = new byte[packedBytesLength];
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ if (countLeft >= 0) {
+ if (countLeft == 0) {
+ return false;
+ }
+ countLeft--;
+ }
+ try {
+ in.readBytes(packedValue, 0, packedValue.length);
+ } catch (EOFException eofe) {
+ assert countLeft == -1;
+ return false;
+ }
+ ord = in.readLong();
+ docID = in.readInt();
+ return true;
+ }
+
+ @Override
+ public byte[] packedValue() {
+ return packedValue;
+ }
+
+ @Override
+ public long ord() {
+ return ord;
+ }
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+}
+
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
new file mode 100644
index 00000000000..6cf10974da7
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -0,0 +1,86 @@
+package org.apache.lucene.util.bkd;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/** Writes points to disk in a fixed-with format. */
+final class OfflinePointWriter implements PointWriter {
+
+ final Directory tempDir;
+ final IndexOutput out;
+ final int packedBytesLength;
+ final int bytesPerDoc;
+ private long count;
+ private boolean closed;
+
+ public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength) throws IOException {
+ this.out = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
+ this.tempDir = tempDir;
+ this.packedBytesLength = packedBytesLength;
+ bytesPerDoc = packedBytesLength + RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT;
+ }
+
+ /** Initializes on an already written/closed file, just so consumers can use {@link #getReader} to read the file. */
+ public OfflinePointWriter(Directory tempDir, IndexOutput out, int packedBytesLength, long count) {
+ this.out = out;
+ this.tempDir = tempDir;
+ this.packedBytesLength = packedBytesLength;
+ bytesPerDoc = packedBytesLength + RamUsageEstimator.NUM_BYTES_LONG + RamUsageEstimator.NUM_BYTES_INT;
+ this.count = count;
+ closed = true;
+ }
+
+ @Override
+ public void append(byte[] packedValue, long ord, int docID) throws IOException {
+ assert packedValue.length == packedBytesLength;
+ out.writeBytes(packedValue, 0, packedValue.length);
+ out.writeLong(ord);
+ out.writeInt(docID);
+ count++;
+ }
+
+ @Override
+ public PointReader getReader(long start) throws IOException {
+ assert closed;
+ return new OfflinePointReader(tempDir, out.getName(), packedBytesLength, start, count-start);
+ }
+
+ @Override
+ public void close() throws IOException {
+ out.close();
+ closed = true;
+ }
+
+ @Override
+ public void destroy() throws IOException {
+ tempDir.deleteFile(out.getName());
+ }
+
+ @Override
+ public String toString() {
+ return "OfflinePointWriter(count=" + count + " tempFileName=" + out.getName() + ")";
+ }
+}
+
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
new file mode 100644
index 00000000000..2b7576f9ee8
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
@@ -0,0 +1,40 @@
+package org.apache.lucene.util.bkd;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/** One pass iterator through all points previously written with a
+ * {@link PointWriter}, abstracting away whether points a read
+ * from (offline) disk or simple arrays in heap. */
+interface PointReader extends Closeable {
+
+ /** Returns false once iteration is done, else true. */
+ boolean next() throws IOException;
+
+ /** Returns the packed byte[] value */
+ byte[] packedValue();
+
+ /** Point ordinal */
+ long ord();
+
+ /** DocID for this point */
+ int docID();
+}
+
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java
new file mode 100644
index 00000000000..a732ee9264a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java
@@ -0,0 +1,36 @@
+package org.apache.lucene.util.bkd;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/** Appends many points, and then at the end provides a {@link PointReader} to iterate
+ * those points. This abstracts away whether we write to disk, or use simple arrays
+ * in heap. */
+interface PointWriter extends Closeable {
+ /** Add a new point */
+ void append(byte[] packedValue, long ord, int docID) throws IOException;
+
+ /** Returns a {@link PointReader} iterator to step through all previously added points */
+ PointReader getReader(long startPoint) throws IOException;
+
+ /** Removes any temp files behind this writer */
+ void destroy() throws IOException;
+}
+
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/package-info.java b/lucene/core/src/java/org/apache/lucene/util/bkd/package-info.java
new file mode 100644
index 00000000000..85dec5a3d20
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Block KD-tree, implementing the generic spatial data structure described in
+ * this paper.
+ */
+
+package org.apache.lucene.util.bkd;
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
new file mode 100644
index 00000000000..e276d34efda
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -0,0 +1,705 @@
+package org.apache.lucene.util.bkd;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.List;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.RamUsageTester;
+import org.apache.lucene.util.TestUtil;
+
+@SuppressSysoutChecks(bugUrl = "Stuff gets printed.")
+public class TestBKD extends LuceneTestCase {
+
+ public void testBasicInts1D() throws Exception {
+ try (Directory dir = getDirectory(100)) {
+ BKDWriter w = new BKDWriter(dir, "tmp", 1, 4, 2, 1.0f);
+ byte[] scratch = new byte[4];
+ for(int docID=0;docID<100;docID++) {
+ BKDUtil.intToBytes(docID, scratch, 0);
+ w.add(scratch, docID);
+ }
+
+ long indexFP;
+ try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
+ indexFP = w.finish(out);
+ }
+
+ try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
+ in.seek(indexFP);
+ BKDReader r = new BKDReader(in);
+
+ // Simple 1D range query:
+ final int queryMin = 42;
+ final int queryMax = 87;
+
+ final BitSet hits = new BitSet();
+ r.intersect(new BKDReader.IntersectVisitor() {
+ @Override
+ public void visit(int docID) {
+ hits.set(docID);
+ if (VERBOSE) {
+ System.out.println("visit docID=" + docID);
+ }
+ }
+
+ @Override
+ public void visit(int docID, byte[] packedValue) {
+ int x = BKDUtil.bytesToInt(packedValue, 0);
+ if (VERBOSE) {
+ System.out.println("visit docID=" + docID + " x=" + x);
+ }
+ if (x >= queryMin && x <= queryMax) {
+ hits.set(docID);
+ }
+ }
+
+ @Override
+ public BKDReader.Relation compare(byte[] minPacked, byte[] maxPacked) {
+ int min = BKDUtil.bytesToInt(minPacked, 0);
+ int max = BKDUtil.bytesToInt(maxPacked, 0);
+ assert max >= min;
+ if (VERBOSE) {
+ System.out.println("compare: min=" + min + " max=" + max + " vs queryMin=" + queryMin + " queryMax=" + queryMax);
+ }
+
+ if (max < queryMin || min > queryMax) {
+ return BKDReader.Relation.QUERY_OUTSIDE_CELL;
+ } else if (min >= queryMin && max <= queryMax) {
+ return BKDReader.Relation.CELL_INSIDE_QUERY;
+ } else {
+ return BKDReader.Relation.QUERY_CROSSES_CELL;
+ }
+ }
+ });
+
+ for(int docID=0;docID<100;docID++) {
+ boolean expected = docID >= queryMin && docID <= queryMax;
+ boolean actual = hits.get(docID);
+ assertEquals("docID=" + docID, expected, actual);
+ }
+ }
+ }
+ }
+
+ public void testRandomIntsNDims() throws Exception {
+ int numDocs = atLeast(1000);
+ try (Directory dir = getDirectory(numDocs)) {
+ int numDims = TestUtil.nextInt(random(), 1, 5);
+ int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
+ float maxMB = (float) 0.1 + (3*random().nextFloat());
+ BKDWriter w = new BKDWriter(dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB);
+
+ if (VERBOSE) {
+ System.out.println("TEST: numDims=" + numDims + " numDocs=" + numDocs);
+ }
+ int[][] docs = new int[numDocs][];
+ byte[] scratch = new byte[4*numDims];
+ for(int docID=0;docID " + values[dim]);
+ }
+ }
+ docs[docID] = values;
+ w.add(scratch, docID);
+ }
+
+ long indexFP;
+ try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
+ indexFP = w.finish(out);
+ }
+
+ try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
+ in.seek(indexFP);
+ BKDReader r = new BKDReader(in);
+
+ int iters = atLeast(100);
+ for(int iter=0;iter= min;
+
+ if (max < queryMin[dim] || min > queryMax[dim]) {
+ return BKDReader.Relation.QUERY_OUTSIDE_CELL;
+ } else if (min < queryMin[dim] || max > queryMax[dim]) {
+ crosses = true;
+ }
+ }
+
+ if (crosses) {
+ return BKDReader.Relation.QUERY_CROSSES_CELL;
+ } else {
+ return BKDReader.Relation.CELL_INSIDE_QUERY;
+ }
+ }
+ });
+
+ for(int docID=0;docID queryMax[dim]) {
+ expected = false;
+ break;
+ }
+ }
+ boolean actual = hits.get(docID);
+ assertEquals("docID=" + docID, expected, actual);
+ }
+ }
+ }
+ }
+ }
+
+ // Tests on N-dimensional points where each dimension is a BigInteger
+ public void testBigIntNDims() throws Exception {
+
+ int numDocs = atLeast(1000);
+ try (Directory dir = getDirectory(numDocs)) {
+ int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
+ int numDims = TestUtil.nextInt(random(), 1, 5);
+ int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
+ float maxMB = (float) 0.1 + (3*random().nextFloat());
+ BKDWriter w = new BKDWriter(dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB);
+ BigInteger[][] docs = new BigInteger[numDocs][];
+
+ byte[] scratch = new byte[numBytesPerDim*numDims];
+ for(int docID=0;docID " + values[dim]);
+ }
+ }
+ docs[docID] = values;
+ w.add(scratch, docID);
+ }
+
+ long indexFP;
+ try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
+ indexFP = w.finish(out);
+ }
+
+ try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
+ in.seek(indexFP);
+ BKDReader r = new BKDReader(in);
+
+ int iters = atLeast(100);
+ for(int iter=0;iter= 0;
+
+ if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
+ return BKDReader.Relation.QUERY_OUTSIDE_CELL;
+ } else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
+ crosses = true;
+ }
+ }
+
+ if (crosses) {
+ return BKDReader.Relation.QUERY_CROSSES_CELL;
+ } else {
+ return BKDReader.Relation.CELL_INSIDE_QUERY;
+ }
+ }
+ });
+
+ for(int docID=0;docID 0) {
+ expected = false;
+ break;
+ }
+ }
+ boolean actual = hits.get(docID);
+ assertEquals("docID=" + docID, expected, actual);
+ }
+ }
+ }
+ }
+ }
+
+ /** Make sure we close open files, delete temp files, etc., on exception */
+ public void testWithExceptions() throws Exception {
+ int numDocs = atLeast(10000);
+ int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
+ int numDims = TestUtil.nextInt(random(), 1, 5);
+
+ byte[][][] docValues = new byte[numDocs][][];
+
+ for(int docID=0;docID 0) {
+ docValues[docID][theEqualDim] = docValues[0][theEqualDim];
+ }
+ }
+
+ verify(docValues, null, numDims, numBytesPerDim);
+ }
+
+ public void testMultiValued() throws Exception {
+ int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
+ int numDims = TestUtil.nextInt(random(), 1, 5);
+
+ int numDocs = atLeast(1000);
+ List docValues = new ArrayList<>();
+ List docIDs = new ArrayList<>();
+
+ for(int docID=0;docID " + new BytesRef(docValues[ord][dim]));
+ }
+ System.arraycopy(docValues[ord][dim], 0, scratch, dim*numBytesPerDim, numBytesPerDim);
+ }
+ w.add(scratch, docID);
+ }
+
+ boolean success = false;
+ try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
+ indexFP = w.finish(out);
+ success = true;
+ } finally {
+ if (success == false) {
+ IOUtils.deleteFilesIgnoringExceptions(dir, "bkd");
+ }
+ }
+ }
+
+ try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
+ in.seek(indexFP);
+ BKDReader r = new BKDReader(in);
+
+ int iters = atLeast(100);
+ for(int iter=0;iter= 0;
+
+ if (BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMin[dim], 0) < 0 ||
+ BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMax[dim], 0) > 0) {
+ return BKDReader.Relation.QUERY_OUTSIDE_CELL;
+ } else if (BKDUtil.compare(numBytesPerDim, minPacked, dim, queryMin[dim], 0) < 0 ||
+ BKDUtil.compare(numBytesPerDim, maxPacked, dim, queryMax[dim], 0) > 0) {
+ crosses = true;
+ }
+ }
+
+ if (crosses) {
+ return BKDReader.Relation.QUERY_CROSSES_CELL;
+ } else {
+ return BKDReader.Relation.CELL_INSIDE_QUERY;
+ }
+ }
+ });
+
+ BitSet expected = new BitSet();
+ for(int ord=0;ord 0) {
+ matches = false;
+ break;
+ }
+ }
+
+ if (matches) {
+ int docID;
+ if (docIDs == null) {
+ docID = ord;
+ } else {
+ docID = docIDs[ord];
+ }
+ expected.set(docID);
+ }
+ }
+
+ int limit = Math.max(expected.length(), hits.length());
+ for(int docID=0;docID 100000) {
+ dir = newFSDirectory(createTempDir("TestBKDTree"));
+ } else {
+ dir = newDirectory();
+ }
+ System.out.println("DIR: " + dir);
+ if (dir instanceof MockDirectoryWrapper) {
+ ((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
+ }
+ return dir;
+ }
+}
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
index 53a39888895..e07a68c4d3e 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
@@ -24,6 +24,7 @@ import java.util.Comparator;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.OfflineSorter;
@@ -105,7 +106,7 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
*/
class ByteSequenceIterator implements BytesRefIterator {
private final OfflineSorter.ByteSequencesReader reader;
- private BytesRef scratch = new BytesRef();
+ private BytesRefBuilder scratch = new BytesRefBuilder();
public ByteSequenceIterator(OfflineSorter.ByteSequencesReader reader) {
this.reader = reader;
@@ -118,17 +119,15 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
}
boolean success = false;
try {
- byte[] next = reader.read();
- if (next != null) {
- scratch.bytes = next;
- scratch.length = next.length;
- scratch.offset = 0;
- } else {
+ if (reader.read(scratch) == false) {
IOUtils.close(reader);
scratch = null;
}
success = true;
- return scratch;
+ if (scratch == null) {
+ return null;
+ }
+ return scratch.get();
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(reader);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
index 8018f1675fc..2f8891bdf04 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
@@ -440,7 +440,6 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
if (randomState.nextDouble() < randomIOExceptionRate) {
if (LuceneTestCase.VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception" + (message == null ? "" : " (" + message + ")"));
- new Throwable().printStackTrace(System.out);
}
throw new IOException("a random IOException" + (message == null ? "" : " (" + message + ")"));
}
@@ -567,9 +566,6 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
}
}
- if (crashed) {
- throw new IOException("cannot createOutput after crash");
- }
unSyncedFiles.add(name);
createdFiles.add(name);
@@ -607,6 +603,39 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
}
}
+ @Override
+ public synchronized IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
+ maybeThrowDeterministicException();
+ maybeThrowIOExceptionOnOpen("temp: prefix=" + prefix + " suffix=" + suffix);
+ maybeYield();
+ if (failOnCreateOutput) {
+ maybeThrowDeterministicException();
+ }
+ if (crashed) {
+ throw new IOException("cannot createTempOutput after crash");
+ }
+ init();
+
+ IndexOutput delegateOutput = in.createTempOutput(prefix, suffix, LuceneTestCase.newIOContext(randomState, context));
+ String name = delegateOutput.getName();
+ unSyncedFiles.add(name);
+ createdFiles.add(name);
+ final IndexOutput io = new MockIndexOutputWrapper(this, delegateOutput, name);
+ addFileHandle(io, name, Handle.Output);
+ openFilesForWrite.add(name);
+
+ // throttling REALLY slows down tests, so don't do it very often for SOMETIMES.
+ if (throttling == Throttling.ALWAYS ||
+ (throttling == Throttling.SOMETIMES && randomState.nextInt(200) == 0)) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("MockDirectoryWrapper: throttling indexOutput (" + name + ")");
+ }
+ return throttledOutput.newFromDelegate(io);
+ } else {
+ return io;
+ }
+ }
+
private static enum Handle {
Input, Output, Slice
}