From 60975d2dfa0bf855220db6d9755b7e24c14a59bb Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 25 Jul 2016 17:36:01 +0200 Subject: [PATCH] LUCENE-7396: Speed flush of points. --- lucene/CHANGES.txt | 2 + .../lucene/codecs/MutablePointsReader.java | 39 ++ .../codecs/lucene60/Lucene60PointsWriter.java | 9 + .../lucene/index/PointValuesWriter.java | 141 +++-- .../org/apache/lucene/util/ArrayUtil.java | 83 +-- .../org/apache/lucene/util/ByteBlockPool.java | 8 + .../org/apache/lucene/util/IntroSelector.java | 126 +++++ .../org/apache/lucene/util/IntroSorter.java | 7 +- .../org/apache/lucene/util/RadixSelector.java | 202 +++++++ .../java/org/apache/lucene/util/Selector.java | 41 ++ .../org/apache/lucene/util/bkd/BKDWriter.java | 495 ++++++++++++++---- .../util/bkd/MutablePointsReaderUtils.java | 185 +++++++ .../apache/lucene/util/TestByteBlockPool.java | 8 +- .../apache/lucene/util/TestIntroSelector.java | 86 +++ .../apache/lucene/util/TestRadixSelector.java | 77 +++ .../bkd/TestMutablePointsReaderUtils.java | 251 +++++++++ 16 files changed, 1531 insertions(+), 229 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/MutablePointsReader.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/IntroSelector.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/RadixSelector.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/Selector.java create mode 100644 lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/TestIntroSelector.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/TestRadixSelector.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a685839a9f5..67e268336b7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -161,6 +161,8 @@ Optimizations * LUCENE-7311: Cached term queries do not seek the terms dictionary anymore. (Adrien Grand) +* LUCENE-7396: Faster flush of points. (Adrien Grand, Mike McCandless) + Other * LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MutablePointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/MutablePointsReader.java new file mode 100644 index 00000000000..b6c328b0f5a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/MutablePointsReader.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs; + +/** {@link PointsReader} whose order of points can be changed. + * This class is useful for codecs to optimize flush. + * @lucene.internal */ +public abstract class MutablePointsReader extends PointsReader { + + /** Sole constructor. */ + protected MutablePointsReader() {} + + /** Fill {@code packedValue} with the packed bytes of the i-th value. */ + public abstract void getValue(int i, byte[] packedValue); + + /** Get the k-th byte of the i-th value. */ + public abstract byte getByteAt(int i, int k); + + /** Return the doc ID of the i-th value. */ + public abstract int getDocID(int i); + + /** Swap the i-th and j-th values. */ + public abstract void swap(int i, int j); + +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java index 3acfac3f8b0..5fedf64fd91 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.MutablePointsReader; import org.apache.lucene.codecs.PointsReader; import org.apache.lucene.codecs.PointsWriter; import org.apache.lucene.index.FieldInfo; @@ -98,6 +99,14 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable { values.size(fieldInfo.name), singleValuePerDoc)) { + if (values instanceof MutablePointsReader) { + final long fp = writer.writeField(dataOut, fieldInfo.name, (MutablePointsReader) values); + if (fp != -1) { + indexFPs.put(fieldInfo.name, fp); + } + return; + } + values.intersect(fieldInfo.name, new IntersectVisitor() { @Override public void visit(int docID) { diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java index 511635c14ed..b4decb67174 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java @@ -18,6 +18,7 @@ package org.apache.lucene.index; import java.io.IOException; +import org.apache.lucene.codecs.MutablePointsReader; import org.apache.lucene.codecs.PointsReader; import org.apache.lucene.codecs.PointsWriter; import org.apache.lucene.util.ArrayUtil; @@ -35,7 +36,7 @@ class PointValuesWriter { private int numPoints; private int numDocs; private int lastDocID = -1; - private final byte[] packedValue; + private final int packedBytesLength; private final LiveIndexWriterConfig indexWriterConfig; public PointValuesWriter(DocumentsWriterPerThread docWriter, FieldInfo fieldInfo) { @@ -44,7 +45,7 @@ class PointValuesWriter { this.bytes = new ByteBlockPool(docWriter.byteBlockAllocator); docIDs = new int[16]; iwBytesUsed.addAndGet(16 * Integer.BYTES); - packedValue = new byte[fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()]; + packedBytesLength = fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes(); indexWriterConfig = docWriter.indexWriterConfig; } @@ -70,64 +71,102 @@ class PointValuesWriter { } public void flush(SegmentWriteState state, PointsWriter writer) throws IOException { + PointsReader reader = new MutablePointsReader() { - writer.writeField(fieldInfo, - new PointsReader() { - @Override - public void intersect(String fieldName, IntersectVisitor visitor) throws IOException { - if (fieldName.equals(fieldInfo.name) == false) { - throw new IllegalArgumentException("fieldName must be the same"); - } - for(int i=0;i void select(T[] arr, int from, int to, int k, Comparator comparator) { - if (k < from) { - throw new IllegalArgumentException("k must be >= from"); - } - if (k >= to) { - throw new IllegalArgumentException("k must be < to"); - } - final int maxDepth = 2 * MathUtil.log(to - from, 2); - quickSelect(arr, from, to, k, comparator, maxDepth); + public static void select(T[] arr, int from, int to, int k, Comparator comparator) { + new IntroSelector() { + + T pivot; + + @Override + protected void swap(int i, int j) { + ArrayUtil.swap(arr, i, j); + } + + @Override + protected void setPivot(int i) { + pivot = arr[i]; + } + + @Override + protected int comparePivot(int j) { + return comparator.compare(pivot, arr[j]); + } + }.select(from, to, k); } - private static void quickSelect(T[] arr, int from, int to, int k, Comparator comparator, int maxDepth) { - assert from <= k; - assert k < to; - if (to - from == 1) { - return; - } - if (--maxDepth < 0) { - Arrays.sort(arr, from, to, comparator); - return; - } - - final int mid = (from + to) >>> 1; - // heuristic: we use the median of the values at from, to-1 and mid as a pivot - if (comparator.compare(arr[from], arr[to - 1]) > 0) { - swap(arr, from, to - 1); - } - if (comparator.compare(arr[to - 1], arr[mid]) > 0) { - swap(arr, to - 1, mid); - if (comparator.compare(arr[from], arr[to - 1]) > 0) { - swap(arr, from, to - 1); - } - } - - T pivot = arr[to - 1]; - - int left = from + 1; - int right = to - 2; - - for (;;) { - while (comparator.compare(pivot, arr[left]) > 0) { - ++left; - } - - while (left < right && comparator.compare(pivot, arr[right]) <= 0) { - --right; - } - - if (left < right) { - swap(arr, left, right); - --right; - } else { - break; - } - } - swap(arr, left, to - 1); - - if (left == k) { - return; - } else if (left < k) { - quickSelect(arr, left + 1, to, k, comparator, maxDepth); - } else { - quickSelect(arr, from, left, k, comparator, maxDepth); - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java index 6bb12bdfea6..e202d639368 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -378,5 +378,13 @@ public final class ByteBlockPool { } } while (true); } + + /** Read a single byte at the given {@code offset}. */ + public byte readByte(long offset) { + int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT); + int pos = (int) (offset & BYTE_BLOCK_MASK); + byte[] buffer = buffers[bufferIndex]; + return buffer[pos]; + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/IntroSelector.java b/lucene/core/src/java/org/apache/lucene/util/IntroSelector.java new file mode 100644 index 00000000000..78985352198 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/IntroSelector.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import java.util.Comparator; + +/** Implementation of the quick select algorithm. + *

It uses the median of the first, middle and last values as a pivot and + * falls back to a heap sort when the number of recursion levels exceeds + * {@code 2 lg(n)}, as a consequence it runs in linear time on average and in + * {@code n log(n)} time in the worst case.

+ * @lucene.internal */ +public abstract class IntroSelector extends Selector { + + @Override + public final void select(int from, int to, int k) { + checkArgs(from, to, k); + final int maxDepth = 2 * MathUtil.log(to - from, 2); + quickSelect(from, to, k, maxDepth); + } + + // heap sort + void slowSelect(int from, int to, int k) { + new Sorter() { + + @Override + protected void swap(int i, int j) { + IntroSelector.this.swap(i, j); + } + + @Override + protected int compare(int i, int j) { + return IntroSelector.this.compare(i, j); + } + + public void sort(int from, int to) { + heapSort(from, to); + } + }.sort(from, to); + } + + private void quickSelect(int from, int to, int k, int maxDepth) { + assert from <= k; + assert k < to; + if (to - from == 1) { + return; + } + if (--maxDepth < 0) { + slowSelect(from, to, k); + return; + } + + final int mid = (from + to) >>> 1; + // heuristic: we use the median of the values at from, to-1 and mid as a pivot + if (compare(from, to - 1) > 0) { + swap(from, to - 1); + } + if (compare(to - 1, mid) > 0) { + swap(to - 1, mid); + if (compare(from, to - 1) > 0) { + swap(from, to - 1); + } + } + + setPivot(to - 1); + + int left = from + 1; + int right = to - 2; + + for (;;) { + while (comparePivot(left) > 0) { + ++left; + } + + while (left < right && comparePivot(right) <= 0) { + --right; + } + + if (left < right) { + swap(left, right); + --right; + } else { + break; + } + } + swap(left, to - 1); + + if (left == k) { + return; + } else if (left < k) { + quickSelect(left + 1, to, k, maxDepth); + } else { + quickSelect(from, left, k, maxDepth); + } + } + + /** Compare entries found in slots i and j. + * The contract for the returned value is the same as + * {@link Comparator#compare(Object, Object)}. */ + protected int compare(int i, int j) { + setPivot(i); + return comparePivot(j); + } + + /** Save the value at slot i so that it can later be used as a + * pivot, see {@link #comparePivot(int)}. */ + protected abstract void setPivot(int i); + + /** Compare the pivot with the slot at j, similarly to + * {@link #compare(int, int) compare(i, j)}. */ + protected abstract int comparePivot(int j); +} diff --git a/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java b/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java index 26f7e37dc9f..83d118db56e 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.util; - /** * {@link Sorter} implementation based on a variant of the quicksort algorithm * called introsort: when @@ -91,4 +90,10 @@ public abstract class IntroSorter extends Sorter { /** Compare the pivot with the slot at j, similarly to * {@link #compare(int, int) compare(i, j)}. */ protected abstract int comparePivot(int j); + + @Override + protected int compare(int i, int j) { + setPivot(i); + return comparePivot(j); + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/RadixSelector.java b/lucene/core/src/java/org/apache/lucene/util/RadixSelector.java new file mode 100644 index 00000000000..543204ae367 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/RadixSelector.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import java.util.Arrays; + +/** Radix selector. + *

This implementation works similarly to a MSB radix sort except that it + * only recurses into the sub partition that contains the desired value. + * @lucene.internal */ +public abstract class RadixSelector extends Selector { + + // after that many levels of recursion we fall back to introselect anyway + // this is used as a protection against the fact that radix sort performs + // worse when there are long common prefixes (probably because of cache + // locality) + private static final int LEVEL_THRESHOLD = 8; + // size of histograms: 256 + 1 to indicate that the string is finished + private static final int HISTOGRAM_SIZE = 257; + // buckets below this size will be sorted with introselect + private static final int LENGTH_THRESHOLD = 100; + + // we store one histogram per recursion level + private final int[] histogram = new int[HISTOGRAM_SIZE]; + + private final int maxLength; + + /** + * Sole constructor. + * @param maxLength the maximum length of keys, pass {@link Integer#MAX_VALUE} if unknown. + */ + protected RadixSelector(int maxLength) { + this.maxLength = maxLength; + } + + /** Return the k-th byte of the entry at index {@code i}, or {@code -1} if + * its length is less than or equal to {@code k}. This may only be called + * with a value of {@code i} between {@code 0} included and + * {@code maxLength} excluded. */ + protected abstract int byteAt(int i, int k); + + /** Get a fall-back selector which may assume that the first {@code d} bytes + * of all compared strings are equal. This fallback selector is used when + * the range becomes narrow or when the maximum level of recursion has + * been exceeded. */ + protected Selector getFallbackSelector(int d) { + return new IntroSelector() { + @Override + protected void swap(int i, int j) { + RadixSelector.this.swap(i, j); + } + + @Override + protected int compare(int i, int j) { + for (int o = d; o < maxLength; ++o) { + final int b1 = byteAt(i, o); + final int b2 = byteAt(j, o); + if (b1 != b2) { + return b1 - b2; + } else if (b1 == -1) { + break; + } + } + return 0; + } + + @Override + protected void setPivot(int i) { + pivot.setLength(0); + for (int o = d; o < maxLength; ++o) { + final int b = byteAt(i, o); + if (b == -1) { + break; + } + pivot.append((byte) b); + } + } + + @Override + protected int comparePivot(int j) { + for (int o = 0; o < pivot.length(); ++o) { + final int b1 = pivot.byteAt(o) & 0xff; + final int b2 = byteAt(j, d + o); + if (b1 != b2) { + return b1 - b2; + } + } + if (d + pivot.length() == maxLength) { + return 0; + } + return -1 - byteAt(j, d + pivot.length()); + } + + private final BytesRefBuilder pivot = new BytesRefBuilder(); + }; + } + + @Override + public void select(int from, int to, int k) { + checkArgs(from, to, k); + select(from, to, k, 0); + } + + private void select(int from, int to, int k, int d) { + if (to - from <= LENGTH_THRESHOLD || d >= LEVEL_THRESHOLD) { + getFallbackSelector(d).select(from, to, k); + } else { + radixSelect(from, to, k, d); + } + } + + private void radixSelect(int from, int to, int k, int d) { + final int[] histogram = this.histogram; + Arrays.fill(histogram, 0); + + buildHistogram(from, to, d, histogram); + + int bucketFrom = from; + for (int bucket = 0; bucket < HISTOGRAM_SIZE; ++bucket) { + final int bucketTo = bucketFrom + histogram[bucket]; + + if (bucketTo > k) { + partition(from, to, bucket, bucketFrom, bucketTo, d); + + if (bucket != 0 && d + 1 < maxLength) { + // all elements in bucket 0 are equal so we only need to recurse if bucket != 0 + select(bucketFrom, bucketTo, k, d + 1); + } + return; + } + bucketFrom = bucketTo; + } + throw new AssertionError("Unreachable code"); + } + + /** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */ + private int getBucket(int i, int k) { + return byteAt(i, k) + 1; + } + + /** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */ + private int[] buildHistogram(int from, int to, int k, int[] histogram) { + for (int i = from; i < to; ++i) { + histogram[getBucket(i, k)]++; + } + return histogram; + } + + /** Reorder elements so that all of them that fall into {@code bucket} are + * between offsets {@code bucketFrom} and {@code bucketTo}. */ + private void partition(int from, int to, int bucket, int bucketFrom, int bucketTo, int d) { + int left = from; + int right = to - 1; + + int slot = bucketFrom; + + for (;;) { + int leftBucket = getBucket(left, d); + int rightBucket = getBucket(right, d); + + while (leftBucket <= bucket && left < bucketFrom) { + if (leftBucket == bucket) { + swap(left, slot++); + } else { + ++left; + } + leftBucket = getBucket(left, d); + } + + while (rightBucket >= bucket && right >= bucketTo) { + if (rightBucket == bucket) { + swap(right, slot++); + } else { + --right; + } + rightBucket = getBucket(right, d); + } + + if (left < bucketFrom && right >= bucketTo) { + swap(left++, right--); + } else { + assert left == bucketFrom; + assert right == bucketTo - 1; + break; + } + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/Selector.java b/lucene/core/src/java/org/apache/lucene/util/Selector.java new file mode 100644 index 00000000000..8c17ce4a2f6 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/Selector.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +/** An implementation of a selection algorithm, ie. computing the k-th greatest + * value from a collection. */ +public abstract class Selector { + + /** Reorder elements so that the element at position {@code k} is the same + * as if all elements were sorted and all other elements are partitioned + * around it: {@code [from, k)} only contains elements that are less than + * or equal to {@code k} and {@code (k, to)} only contains elements that + * are greater than or equal to {@code k}. */ + public abstract void select(int from, int to, int k); + + void checkArgs(int from, int to, int k) { + if (k < from) { + throw new IllegalArgumentException("k must be >= from"); + } + if (k >= to) { + throw new IllegalArgumentException("k must be < to"); + } + } + + /** Swap values at slots i and j. */ + protected abstract void swap(int i, int j); +} diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index 97651e47e38..d0d7dca2eb8 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.function.IntFunction; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.MutablePointsReader; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; @@ -67,7 +68,7 @@ import org.apache.lucene.util.StringHelper; *

* See this paper for details. * - *

This consumes heap during writing: it allocates a LongBitSet(numPoints), + *

This consumes heap during writing: it allocates a LongBitSet(numPoints), * and then uses up to the specified {@code maxMBSortInHeap} heap space for writing. * *

@@ -140,10 +141,10 @@ public class BKDWriter implements Closeable { /** True if every document has at most one value. We specialize this case by not bothering to store the ord since it's redundant with docID. */ protected final boolean singleValuePerDoc; - /** How much heap OfflineSorter is allowed to use */ + /** How much heap OfflineSorter is allowed to use */ protected final OfflineSorter.BufferSize offlineSorterBufferMB; - /** How much heap OfflineSorter is allowed to use */ + /** How much heap OfflineSorter is allowed to use */ protected final int offlineSorterMaxTempFiles; private final int maxDoc; @@ -381,7 +382,7 @@ public class BKDWriter implements Closeable { } else { mappedDocID = docMap.get(oldDocID); } - + if (mappedDocID != -1) { // Not deleted! docID = mappedDocID; @@ -416,15 +417,25 @@ public class BKDWriter implements Closeable { } } - /** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already - * sorted values and currently only works when numDims==1. This returns -1 if all documents containing - * dimensional values were deleted. */ - public long merge(IndexOutput out, List docMaps, List readers) throws IOException { - if (numDims != 1) { - throw new UnsupportedOperationException("numDims must be 1 but got " + numDims); + /** Write a field from a {@link MutablePointsReader}. This way of writing + * points is faster than regular writes with {@link BKDWriter#add} since + * there is opportunity for reordering points before writing them to + * disk. This method does not use transient disk in order to reorder points. + */ + public long writeField(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException { + if (numDims == 1) { + return writeField1Dim(out, fieldName, reader); + } else { + return writeFieldNDims(out, fieldName, reader); } + } + + + /* In the 2+D case, we recursively pick the split dimension, compute the + * median value and partition other values around it. */ + private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException { if (pointCount != 0) { - throw new IllegalStateException("cannot mix add and merge"); + throw new IllegalStateException("cannot mix add and writeField"); } // Catch user silliness: @@ -435,6 +446,81 @@ public class BKDWriter implements Closeable { // Mark that we already finished: heapPointWriter = null; + long countPerLeaf = pointCount = reader.size(fieldName); + long innerNodeCount = 1; + + while (countPerLeaf > maxPointsInLeafNode) { + countPerLeaf = (countPerLeaf+1)/2; + innerNodeCount *= 2; + } + + int numLeaves = Math.toIntExact(innerNodeCount); + + checkMaxLeafNodeCount(numLeaves); + + final byte[] splitPackedValues = new byte[numLeaves * (bytesPerDim + 1)]; + final long[] leafBlockFPs = new long[numLeaves]; + + // compute the min/max for this slice + Arrays.fill(minPackedValue, (byte) 0xff); + Arrays.fill(maxPackedValue, (byte) 0); + for (int i = 0; i < Math.toIntExact(pointCount); ++i) { + reader.getValue(i, scratch1); + for(int dim=0;dim 0) { + System.arraycopy(scratch1, offset, maxPackedValue, offset, bytesPerDim); + } + } + + docsSeen.set(reader.getDocID(i)); + } + + build(1, numLeaves, reader, 0, Math.toIntExact(pointCount), out, + minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, + new int[maxPointsInLeafNode]); + + long indexFP = out.getFilePointer(); + writeIndex(out, leafBlockFPs, splitPackedValues); + return indexFP; + } + + + /* In the 1D case, we can simply sort points in ascending order and use the + * same writing logic as we use at merge time. */ + private long writeField1Dim(IndexOutput out, String fieldName, MutablePointsReader reader) throws IOException { + MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size(fieldName))); + + final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out); + + reader.intersect(fieldName, new IntersectVisitor() { + + @Override + public void visit(int docID, byte[] packedValue) throws IOException { + oneDimWriter.add(packedValue, docID); + } + + @Override + public void visit(int docID) throws IOException { + throw new IllegalStateException(); + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return Relation.CELL_CROSSES_QUERY; + } + }); + + return oneDimWriter.finish(); + } + + /** More efficient bulk-add for incoming {@link BKDReader}s. This does a merge sort of the already + * sorted values and currently only works when numDims==1. This returns -1 if all documents containing + * dimensional values were deleted. */ + public long merge(IndexOutput out, List docMaps, List readers) throws IOException { assert docMaps == null || readers.size() == docMaps.size(); BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size()); @@ -453,72 +539,14 @@ public class BKDWriter implements Closeable { } } - if (queue.size() == 0) { - return -1; - } - - int leafCount = 0; - List leafBlockFPs = new ArrayList<>(); - List leafBlockStartValues = new ArrayList<>(); - - // Target halfway between min and max allowed for the leaf: - int pointsPerLeafBlock = (int) (0.75 * maxPointsInLeafNode); - //System.out.println("POINTS PER: " + pointsPerLeafBlock); - - byte[] lastPackedValue = new byte[bytesPerDim]; - byte[] firstPackedValue = new byte[bytesPerDim]; - long valueCount = 0; - - // Buffer up each leaf block's docs and values - int[] leafBlockDocIDs = new int[maxPointsInLeafNode]; - byte[][] leafBlockPackedValues = new byte[maxPointsInLeafNode][]; - for(int i=0;i 0) { - // Save the first (minimum) value in each leaf block except the first, to build the split value index in the end: - leafBlockStartValues.add(Arrays.copyOf(reader.state.scratchPackedValue, bytesPerDim)); - } - Arrays.fill(commonPrefixLengths, bytesPerDim); - System.arraycopy(reader.state.scratchPackedValue, 0, firstPackedValue, 0, bytesPerDim); - } else { - // Find per-dim common prefix: - for(int dim=0;dim packedValues = new IntFunction() { - final BytesRef scratch = new BytesRef(); + final IndexOutput out; + final int pointsPerLeafBlock = (int) (0.75 * maxPointsInLeafNode); + final List leafBlockFPs = new ArrayList<>(); + final List leafBlockStartValues = new ArrayList<>(); + final byte[] leafValues = new byte[pointsPerLeafBlock * packedBytesLength]; + final int[] leafDocs = new int[pointsPerLeafBlock]; + long valueCount; + int leafCount; - { - scratch.length = packedBytesLength; - scratch.offset = 0; - } + OneDimensionBKDWriter(IndexOutput out) { + if (numDims != 1) { + throw new UnsupportedOperationException("numDims must be 1 but got " + numDims); + } + if (pointCount != 0) { + throw new IllegalStateException("cannot mix add and merge"); + } - @Override - public BytesRef apply(int i) { - scratch.bytes = leafBlockPackedValues[i]; - return scratch; - } - }; - writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues); + // Catch user silliness: + if (heapPointWriter == null && tempInput == null) { + throw new IllegalStateException("already finished"); + } + // Mark that we already finished: + heapPointWriter = null; + + this.out = out; + + lastPackedValue = new byte[packedBytesLength]; + } + + // for asserts + final byte[] lastPackedValue; + int lastDocID; + + void add(byte[] packedValue, int docID) throws IOException { + assert valueInOrder(valueCount + leafCount, + 0, lastPackedValue, packedValue, 0, docID, lastDocID); + + System.arraycopy(packedValue, 0, leafValues, leafCount * packedBytesLength, packedBytesLength); + leafDocs[leafCount] = docID; + docsSeen.set(docID); + leafCount++; + + if (valueCount > totalPointCount) { + throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values"); + } + + if (leafCount == pointsPerLeafBlock) { + // We write a block once we hit exactly the max count ... this is different from + // when we flush a new segment, where we write between max/2 and max per leaf block, + // so merged segments will behave differently from newly flushed segments: + writeLeafBlock(); leafCount = 0; } + + assert (lastDocID = docID) >= 0; // only assign when asserts are enabled } - pointCount = valueCount; + public long finish() throws IOException { + if (leafCount > 0) { + writeLeafBlock(); + leafCount = 0; + } - long indexFP = out.getFilePointer(); + if (valueCount == 0) { + return -1; + } - int numInnerNodes = leafBlockStartValues.size(); + pointCount = valueCount; - //System.out.println("BKDW: now rotate numInnerNodes=" + numInnerNodes + " leafBlockStarts=" + leafBlockStartValues.size()); + long indexFP = out.getFilePointer(); - byte[] index = new byte[(1+numInnerNodes) * (1+bytesPerDim)]; - rotateToTree(1, 0, numInnerNodes, index, leafBlockStartValues); - long[] arr = new long[leafBlockFPs.size()]; - for(int i=0;i>> Math.max(0, shift)) & 0xff; + } + } + + @Override + protected org.apache.lucene.util.Sorter getFallbackSorter(int k) { + return new IntroSorter() { + + final byte[] pivot = new byte[packedBytesLength]; + final byte[] scratch = new byte[packedBytesLength]; + int pivotDoc; + + @Override + protected void swap(int i, int j) { + reader.swap(i, j); + } + + @Override + protected void setPivot(int i) { + reader.getValue(i, pivot); + pivotDoc = reader.getDocID(i); + } + + @Override + protected int comparePivot(int j) { + if (k < packedBytesLength) { + reader.getValue(j, scratch); + int cmp = StringHelper.compare(packedBytesLength - k, pivot, k, scratch, k); + if (cmp != 0) { + return cmp; + } + } + return pivotDoc - reader.getDocID(j); + } + }; + } + + }.sort(from, to); + } + + /** Sort points on the given dimension. */ + static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths, + MutablePointsReader reader, int from, int to, + byte[] scratch1, byte[] scratch2) { + + // No need for a fancy radix sort here, this is called on the leaves only so + // there are not many values to sort + final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim]; + final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim]; + new IntroSorter() { + + final byte[] pivot = scratch1; + int pivotDoc = -1; + + @Override + protected void swap(int i, int j) { + reader.swap(i, j); + } + + @Override + protected void setPivot(int i) { + reader.getValue(i, pivot); + pivotDoc = reader.getDocID(i); + } + + @Override + protected int comparePivot(int j) { + reader.getValue(j, scratch2); + int cmp = StringHelper.compare(numBytesToCompare, pivot, offset, scratch2, offset); + if (cmp == 0) { + cmp = pivotDoc - reader.getDocID(j); + } + return cmp; + } + }.sort(from, to); + } + + /** Partition points around {@code mid}. All values on the left must be less + * than or equal to it and all values on the right must be greater than or + * equal to it. */ + static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen, + MutablePointsReader reader, int from, int to, int mid, + byte[] scratch1, byte[] scratch2) { + final int offset = splitDim * bytesPerDim + commonPrefixLen; + final int cmpBytes = bytesPerDim - commonPrefixLen; + final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1); + new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) { + + @Override + protected Selector getFallbackSelector(int k) { + return new IntroSelector() { + + final byte[] pivot = scratch1; + int pivotDoc; + + @Override + protected void swap(int i, int j) { + reader.swap(i, j); + } + + @Override + protected void setPivot(int i) { + reader.getValue(i, pivot); + pivotDoc = reader.getDocID(i); + } + + @Override + protected int comparePivot(int j) { + if (k < cmpBytes) { + reader.getValue(j, scratch2); + int cmp = StringHelper.compare(cmpBytes - k, pivot, offset + k, scratch2, offset + k); + if (cmp != 0) { + return cmp; + } + } + return pivotDoc - reader.getDocID(j); + } + }; + } + + @Override + protected void swap(int i, int j) { + reader.swap(i, j); + } + + @Override + protected int byteAt(int i, int k) { + if (k < cmpBytes) { + return Byte.toUnsignedInt(reader.getByteAt(i, offset + k)); + } else { + final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3); + return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff; + } + } + }.select(from, to, mid); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java index b425b761877..388a7890dd2 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java @@ -45,7 +45,13 @@ public class TestByteBlockPool extends LuceneTestCase { for (BytesRef expected : list) { ref.grow(expected.length); ref.setLength(expected.length); - pool.readBytes(position, ref.bytes(), 0, ref.length()); + if (random().nextBoolean()) { + pool.readBytes(position, ref.bytes(), 0, ref.length()); + } else { + for (int i = 0; i < ref.length(); ++i) { + ref.setByteAt(i, pool.readByte(position + i)); + } + } assertEquals(expected, ref.get()); position += ref.length(); } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestIntroSelector.java b/lucene/core/src/test/org/apache/lucene/util/TestIntroSelector.java new file mode 100644 index 00000000000..468b2f78809 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/TestIntroSelector.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import java.util.Arrays; + +public class TestIntroSelector extends LuceneTestCase { + + public void testSelect() { + for (int iter = 0; iter < 100; ++iter) { + doTestSelect(false); + } + } + + public void testSlowSelect() { + for (int iter = 0; iter < 100; ++iter) { + doTestSelect(true); + } + } + + private void doTestSelect(boolean slow) { + final int from = random().nextInt(5); + final int to = from + TestUtil.nextInt(random(), 1, 10000); + final int max = random().nextBoolean() ? random().nextInt(100) : random().nextInt(100000); + Integer[] arr = new Integer[from + to + random().nextInt(5)]; + for (int i = 0; i < arr.length; ++i) { + arr[i] = TestUtil.nextInt(random(), 0, max); + } + final int k = TestUtil.nextInt(random(), from, to - 1); + + Integer[] expected = arr.clone(); + Arrays.sort(expected, from, to); + + Integer[] actual = arr.clone(); + IntroSelector selector = new IntroSelector() { + + Integer pivot; + + @Override + protected void swap(int i, int j) { + ArrayUtil.swap(actual, i, j); + } + + @Override + protected void setPivot(int i) { + pivot = actual[i]; + } + + @Override + protected int comparePivot(int j) { + return pivot.compareTo(actual[j]); + } + }; + if (slow) { + selector.slowSelect(from, to, k); + } else { + selector.select(from, to, k); + } + + assertEquals(expected[k], actual[k]); + for (int i = 0; i < actual.length; ++i) { + if (i < from || i >= to) { + assertSame(arr[i], actual[i]); + } else if (i <= k) { + assertTrue(actual[i].intValue() <= actual[k].intValue()); + } else { + assertTrue(actual[i].intValue() >= actual[k].intValue()); + } + } + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/util/TestRadixSelector.java b/lucene/core/src/test/org/apache/lucene/util/TestRadixSelector.java new file mode 100644 index 00000000000..e0d6bf95ecc --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/TestRadixSelector.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import java.util.Arrays; + +public class TestRadixSelector extends LuceneTestCase { + + public void testSelect() { + for (int iter = 0; iter < 100; ++iter) { + doTestSelect(); + } + } + + private void doTestSelect() { + final int from = random().nextInt(5); + final int to = from + TestUtil.nextInt(random(), 1, 10000); + final int maxLen = TestUtil.nextInt(random(), 1, 12); + BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)]; + for (int i = 0; i < arr.length; ++i) { + byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)]; + random().nextBytes(bytes); + arr[i] = new BytesRef(bytes); + } + final int k = TestUtil.nextInt(random(), from, to - 1); + + BytesRef[] expected = arr.clone(); + Arrays.sort(expected, from, to); + + BytesRef[] actual = arr.clone(); + RadixSelector selector = new RadixSelector(random().nextBoolean() ? maxLen : Integer.MAX_VALUE) { + + @Override + protected void swap(int i, int j) { + ArrayUtil.swap(actual, i, j); + } + + @Override + protected int byteAt(int i, int k) { + BytesRef b = actual[i]; + if (k >= b.length) { + return -1; + } else { + return Byte.toUnsignedInt(b.bytes[b.offset + k]); + } + } + + }; + selector.select(from, to, k); + + assertEquals(expected[k], actual[k]); + for (int i = 0; i < actual.length; ++i) { + if (i < from || i >= to) { + assertSame(arr[i], actual[i]); + } else if (i <= k) { + assertTrue(actual[i].compareTo(actual[k]) <= 0); + } else { + assertTrue(actual[i].compareTo(actual[k]) >= 0); + } + } + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java new file mode 100644 index 00000000000..f1140d4a80f --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util.bkd; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Comparator; + +import org.apache.lucene.codecs.MutablePointsReader; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.TestUtil; + +public class TestMutablePointsReaderUtils extends LuceneTestCase { + + public void testSort() { + for (int iter = 0; iter < 5; ++iter) { + doTestSort(); + } + } + + private void doTestSort() { + final int bytesPerDim = TestUtil.nextInt(random(), 1, 16); + final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30)); + Point[] points = createRandomPoints(1, bytesPerDim, maxDoc); + DummyPointsReader reader = new DummyPointsReader(points); + MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length); + Arrays.sort(points, new Comparator() { + @Override + public int compare(Point o1, Point o2) { + int cmp = StringHelper.compare(bytesPerDim, o1.packedValue, 0, o2.packedValue, 0); + if (cmp == 0) { + cmp = Integer.compare(o1.doc, o2.doc); + } + return cmp; + } + }); + assertNotSame(points, reader.points); + assertArrayEquals(points, reader.points); + } + + public void testSortByDim() { + for (int iter = 0; iter < 5; ++iter) { + doTestSortByDim(); + } + } + + private void doTestSortByDim() { + final int numDims = TestUtil.nextInt(random(), 1, 8); + final int bytesPerDim = TestUtil.nextInt(random(), 1, 16); + final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30)); + Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc); + int[] commonPrefixLengths = new int[numDims]; + for (int i = 0; i < commonPrefixLengths.length; ++i) { + commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim); + } + for (int i = 1; i < points.length; ++i) { + for (int dim = 0; dim < numDims; ++dim) { + int offset = dim * bytesPerDim; + System.arraycopy(points[0].packedValue, offset, points[i].packedValue, offset, commonPrefixLengths[dim]); + } + } + DummyPointsReader reader = new DummyPointsReader(points); + final int sortedDim = random().nextInt(numDims); + MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length, + new byte[numDims * bytesPerDim], new byte[numDims * bytesPerDim]); + for (int i = 1; i < points.length; ++i) { + final int offset = sortedDim * bytesPerDim; + int cmp = StringHelper.compare(bytesPerDim, reader.points[i-1].packedValue, offset, reader.points[i].packedValue, offset); + if (cmp == 0) { + cmp = reader.points[i - 1].doc - reader.points[i].doc; + } + assertTrue(cmp <= 0); + } + } + + public void testPartition() { + for (int iter = 0; iter < 5; ++iter) { + doTestPartition(); + } + } + + private void doTestPartition() { + final int numDims = TestUtil.nextInt(random(), 1, 8); + final int bytesPerDim = TestUtil.nextInt(random(), 1, 16); + final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30)); + Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc); + int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim); + final int splitDim = random().nextInt(numDims); + for (int i = 1; i < points.length; ++i) { + int offset = splitDim * bytesPerDim; + System.arraycopy(points[0].packedValue, offset, points[i].packedValue, offset, commonPrefixLength); + } + DummyPointsReader reader = new DummyPointsReader(points); + final int pivot = TestUtil.nextInt(random(), 0, points.length - 1); + MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot, + new byte[numDims * bytesPerDim], new byte[numDims * bytesPerDim]); + int offset = splitDim * bytesPerDim; + for (int i = 0; i < points.length; ++i) { + int cmp = StringHelper.compare(bytesPerDim, reader.points[i].packedValue, offset, reader.points[pivot].packedValue, offset); + if (cmp == 0) { + cmp = reader.points[i].doc - reader.points[pivot].doc; + } + if (i < pivot) { + assertTrue(cmp <= 0); + } else if (i > pivot) { + assertTrue(cmp >= 0); + } else { + assertEquals(0, cmp); + } + } + } + + private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) { + final int packedBytesLength = numDims * bytesPerDim; + final int numPoints = TestUtil.nextInt(random(), 1, 100000); + Point[] points = new Point[numPoints]; + for (int i = 0; i < numPoints; ++i) { + byte[] value = new byte[packedBytesLength]; + random().nextBytes(value); + points[i] = new Point(value, random().nextInt(maxDoc)); + } + return points; + } + + private static class Point { + final byte[] packedValue; + final int doc; + + Point(byte[] packedValue, int doc) { + this.packedValue = packedValue; + this.doc = doc; + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj instanceof Point == false) { + return false; + } + Point that = (Point) obj; + return Arrays.equals(packedValue, that.packedValue) && doc == that.doc; + } + + @Override + public int hashCode() { + return 31 * Arrays.hashCode(packedValue) + doc; + } + + @Override + public String toString() { + return "value=" + new BytesRef(packedValue) + " doc=" + doc; + } + } + + private static class DummyPointsReader extends MutablePointsReader { + + private final Point[] points; + + DummyPointsReader(Point[] points) { + this.points = points.clone(); + } + + @Override + public void close() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long ramBytesUsed() { + return 0; + } + + @Override + public void getValue(int i, byte[] packedValue) { + System.arraycopy(points[i].packedValue, 0, packedValue, 0, points[i].packedValue.length); + } + + @Override + public byte getByteAt(int i, int k) { + return points[i].packedValue[k]; + } + + @Override + public int getDocID(int i) { + return points[i].doc; + } + + @Override + public void swap(int i, int j) { + ArrayUtil.swap(points, i, j); + } + + @Override + public void checkIntegrity() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void intersect(String fieldName, IntersectVisitor visitor) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public byte[] getMinPackedValue(String fieldName) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public byte[] getMaxPackedValue(String fieldName) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int getNumDimensions(String fieldName) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int getBytesPerDimension(String fieldName) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long size(String fieldName) { + throw new UnsupportedOperationException(); + } + + @Override + public int getDocCount(String fieldName) { + throw new UnsupportedOperationException(); + } + + } + +}