LUCENE-4839: Sorter API: Use TimSort to sort doc IDs and postings lists.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1457272 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-03-16 16:33:30 +00:00
parent 77797c8c85
commit 8613df447f
8 changed files with 650 additions and 22 deletions

View File

@ -744,6 +744,46 @@ public final class ArrayUtil {
mergeSort(a, 0, a.length);
}
// timSorts:
/**
* Sorts the given array slice using the {@link Comparator}. This method uses the TimSort
* algorithm, but falls back to binary sort for small arrays.
* @param fromIndex start index (inclusive)
* @param toIndex end index (exclusive)
*/
public static <T> void timSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
if (toIndex-fromIndex <= 1) return;
getSorter(a, comp).timSort(fromIndex, toIndex-1);
}
/**
* Sorts the given array using the {@link Comparator}. This method uses the TimSort
* algorithm, but falls back to binary sort for small arrays.
*/
public static <T> void timSort(T[] a, Comparator<? super T> comp) {
timSort(a, 0, a.length, comp);
}
/**
* Sorts the given array slice in natural order. This method uses the TimSort
* algorithm, but falls back to binary sort for small arrays.
* @param fromIndex start index (inclusive)
* @param toIndex end index (exclusive)
*/
public static <T extends Comparable<? super T>> void timSort(T[] a, int fromIndex, int toIndex) {
if (toIndex-fromIndex <= 1) return;
getSorter(a).timSort(fromIndex, toIndex-1);
}
/**
* Sorts the given array in natural order. This method uses the TimSort
* algorithm, but falls back to binary sort for small arrays.
*/
public static <T extends Comparable<? super T>> void timSort(T[] a) {
timSort(a, 0, a.length);
}
// insertionSorts:
/**
@ -784,4 +824,44 @@ public final class ArrayUtil {
insertionSort(a, 0, a.length);
}
// binarySorts:
/**
* Sorts the given array slice using the {@link Comparator}. This method uses the binary sort
* algorithm. It is only recommended to use this algorithm for small arrays!
* @param fromIndex start index (inclusive)
* @param toIndex end index (exclusive)
*/
public static <T> void binarySort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
if (toIndex-fromIndex <= 1) return;
getSorter(a, comp).binarySort(fromIndex, toIndex-1);
}
/**
* Sorts the given array using the {@link Comparator}. This method uses the binary sort
* algorithm. It is only recommended to use this algorithm for small arrays!
*/
public static <T> void binarySort(T[] a, Comparator<? super T> comp) {
binarySort(a, 0, a.length, comp);
}
/**
* Sorts the given array slice in natural order. This method uses the binary sort
* algorithm. It is only recommended to use this algorithm for small arrays!
* @param fromIndex start index (inclusive)
* @param toIndex end index (exclusive)
*/
public static <T extends Comparable<? super T>> void binarySort(T[] a, int fromIndex, int toIndex) {
if (toIndex-fromIndex <= 1) return;
getSorter(a).binarySort(fromIndex, toIndex-1);
}
/**
* Sorts the given array in natural order. This method uses the binary sort
* algorithm. It is only recommended to use this algorithm for small arrays!
*/
public static <T extends Comparable<? super T>> void binarySort(T[] a) {
binarySort(a, 0, a.length);
}
}

View File

@ -143,6 +143,32 @@ public final class CollectionUtil {
getSorter(list).mergeSort(0, size-1);
}
// timSorts:
/**
* Sorts the given random access {@link List} using the {@link Comparator}.
* The list must implement {@link RandomAccess}. This method uses the TimSort
* algorithm, but falls back to binary sort for small lists.
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
*/
public static <T> void timSort(List<T> list, Comparator<? super T> comp) {
final int size = list.size();
if (size <= 1) return;
getSorter(list, comp).timSort(0, size-1);
}
/**
* Sorts the given random access {@link List} in natural order.
* The list must implement {@link RandomAccess}. This method uses the TimSort
* algorithm, but falls back to binary sort for small lists.
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
*/
public static <T extends Comparable<? super T>> void timSort(List<T> list) {
final int size = list.size();
if (size <= 1) return;
getSorter(list).timSort(0, size-1);
}
// insertionSorts:
/**
@ -169,4 +195,29 @@ public final class CollectionUtil {
getSorter(list).insertionSort(0, size-1);
}
// binarySorts:
/**
* Sorts the given random access {@link List} using the {@link Comparator}.
* The list must implement {@link RandomAccess}. This method uses the binary sort
* algorithm. It is only recommended to use this algorithm for small lists!
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
*/
public static <T> void binarySort(List<T> list, Comparator<? super T> comp) {
final int size = list.size();
if (size <= 1) return;
getSorter(list, comp).binarySort(0, size-1);
}
/**
* Sorts the given random access {@link List} in natural order.
* The list must implement {@link RandomAccess}. This method uses the insertion sort
* algorithm. It is only recommended to use this algorithm for small lists!
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
*/
public static <T extends Comparable<? super T>> void binarySort(List<T> list) {
final int size = list.size();
if (size <= 1) return;
getSorter(list).binarySort(0, size-1);
}
}

View File

@ -1,5 +1,6 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -29,9 +30,27 @@ package org.apache.lucene.util;
*/
public abstract class SorterTemplate {
private static final int TIMSORT_MINRUN = 32;
private static final int TIMSORT_THRESHOLD = 64;
private static final int TIMSORT_STACKSIZE = 40; // change if you change TIMSORT_MINRUN
private static final int MERGESORT_THRESHOLD = 12;
private static final int QUICKSORT_THRESHOLD = 7;
static {
// check whether TIMSORT_STACKSIZE is large enough
// for a run length of TIMSORT_MINRUN and an array
// of 2B values when TimSort invariants are verified
final long[] lengths = new long[TIMSORT_STACKSIZE];
lengths[0] = TIMSORT_MINRUN;
lengths[1] = lengths[0] + 1;
for (int i = 2; i < TIMSORT_STACKSIZE; ++i) {
lengths[i] = lengths[i-2] + lengths[i-1] + 1;
}
if (lengths[TIMSORT_STACKSIZE - 1] < Integer.MAX_VALUE) {
throw new Error("TIMSORT_STACKSIZE is too small");
}
}
/** Implement this method, that swaps slots {@code i} and {@code j} in your data */
protected abstract void swap(int i, int j);
@ -46,7 +65,7 @@ public abstract class SorterTemplate {
* Should be implemented like <code>pivot.compareTo(<em>valueOf(j)</em>)</code> */
protected abstract int comparePivot(int j);
/** Sorts via stable in-place InsertionSort algorithm
/** Sorts via stable in-place InsertionSort algorithm (O(n<sup>2</sup>))
*(ideal for small collections which are mostly presorted). */
public final void insertionSort(int lo, int hi) {
for (int i = lo + 1 ; i <= hi; i++) {
@ -60,6 +79,28 @@ public abstract class SorterTemplate {
}
}
/** Sorts via stable in-place BinarySort algorithm (O(n<sup>2</sup>))
* (ideal for small collections which are in random order). */
public final void binarySort(int lo, int hi) {
for (int i = lo + 1; i <= hi; ++i) {
int l = lo;
int h = i - 1;
setPivot(i);
while (l <= h) {
final int mid = (l + h) >>> 1;
final int cmp = comparePivot(mid);
if (cmp < 0) {
h = mid - 1;
} else {
l = mid + 1;
}
}
for (int j = i; j > l; --j) {
swap(j - 1, j);
}
}
}
/** Sorts via in-place, but unstable, QuickSort algorithm.
* For small collections falls back to {@link #insertionSort(int,int)}. */
public final void quickSort(final int lo, final int hi) {
@ -118,6 +159,174 @@ public abstract class SorterTemplate {
quickSort(left + 1, hi, maxDepth);
}
/** TimSort implementation. The only difference with the spec is that this
* impl reuses {@link SorterTemplate#merge(int, int, int, int, int)} to
* merge runs (in place) instead of the original merging routine from
* TimSort (which requires extra memory but might be slightly faster). */
private class TimSort {
final int hi;
final int minRun;
final int[] runEnds;
int stackSize;
TimSort(int lo, int hi) {
assert hi > lo;
// +1 because the first slot is reserved and always lo
runEnds = new int[TIMSORT_STACKSIZE + 1];
runEnds[0] = lo;
stackSize = 0;
this.hi = hi;
minRun = minRun(hi - lo + 1);
}
/** Minimum run length for an array of length <code>length</code>. */
int minRun(int length) {
assert length >= TIMSORT_MINRUN;
int n = length;
int r = 0;
while (n >= 64) {
r |= n & 1;
n >>>= 1;
}
final int minRun = n + r;
assert minRun >= TIMSORT_MINRUN && minRun <= 64;
return minRun;
}
int runLen(int i) {
final int off = stackSize - i;
return runEnds[off] - runEnds[off - 1];
}
int runBase(int i) {
return runEnds[stackSize - i - 1];
}
int runEnd(int i) {
return runEnds[stackSize - i];
}
void setRunEnd(int i, int runEnd) {
runEnds[stackSize - i] = runEnd;
}
void pushRunLen(int len) {
runEnds[stackSize + 1] = runEnds[stackSize] + len;
++stackSize;
}
/** Merge run i with run i+1 */
void mergeAt(int i) {
assert stackSize > i + 1;
final int l = runBase(i+1);
final int pivot = runBase(i);
final int h = runEnd(i);
merge(l, pivot, h, pivot - l, h - pivot);
for (int j = 1; j <= i+1; ++j) {
setRunEnd(j, runEnd(j-1));
}
--stackSize;
}
/** Compute the length of the next run, make the run sorted and return its
* length. */
int nextRun() {
final int runBase = runEnd(0);
if (runBase == hi) {
return 1;
}
int l = 1; // length of the run
if (compare(runBase, runBase+1) > 0) {
// run must be strictly descending
while (runBase + l <= hi && compare(runBase + l - 1, runBase + l) > 0) {
++l;
}
if (l < minRun && runBase + l <= hi) {
l = Math.min(hi - runBase + 1, minRun);
binarySort(runBase, runBase + l - 1);
} else {
// revert
for (int i = 0, halfL = l >>> 1; i < halfL; ++i) {
swap(runBase + i, runBase + l - i - 1);
}
}
} else {
// run must be non-descending
while (runBase + l <= hi && compare(runBase + l - 1, runBase + l) <= 0) {
++l;
}
if (l < minRun && runBase + l <= hi) {
l = Math.min(hi - runBase + 1, minRun);
binarySort(runBase, runBase + l - 1);
} // else nothing to do, the run is already sorted
}
return l;
}
void ensureInvariants() {
while (stackSize > 1) {
final int runLen0 = runLen(0);
final int runLen1 = runLen(1);
if (stackSize > 2) {
final int runLen2 = runLen(2);
if (runLen2 <= runLen1 + runLen0) {
// merge the smaller of 0 and 2 with 1
if (runLen2 < runLen0) {
mergeAt(1);
} else {
mergeAt(0);
}
continue;
}
}
if (runLen1 <= runLen0) {
mergeAt(0);
continue;
}
break;
}
}
void exhaustStack() {
while (stackSize > 1) {
mergeAt(0);
}
}
void sort() {
do {
ensureInvariants();
// Push a new run onto the stack
pushRunLen(nextRun());
} while (runEnd(0) <= hi);
exhaustStack();
assert runEnd(0) == hi + 1;
}
}
/** Sorts using TimSort, see http://svn.python.org/projects/python/trunk/Objects/listsort.txt
* and http://svn.python.org/projects/python/trunk/Objects/listobject.c.
* TimSort is a stable sorting algorithm based on MergeSort but known to
* perform extremely well on partially-sorted inputs.
* For small collections, falls back to {@link #binarySort(int, int)}. */
public final void timSort(int lo, int hi) {
if (hi - lo <= TIMSORT_THRESHOLD) {
binarySort(lo, hi);
return;
}
new TimSort(lo, hi).sort();
}
/** Sorts via stable in-place MergeSort algorithm
* For small collections falls back to {@link #insertionSort(int,int)}. */
public final void mergeSort(int lo, int hi) {

View File

@ -188,6 +188,26 @@ public class TestArrayUtil extends LuceneTestCase {
}
}
public void testTimSort() {
int num = atLeast(65);
for (int i = 0; i < num; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();
ArrayUtil.timSort(a1);
Arrays.sort(a2);
assertArrayEquals(a2, a1);
a1 = createRandomArray(1000);
a2 = a1.clone();
ArrayUtil.timSort(a1, Collections.reverseOrder());
Arrays.sort(a2, Collections.reverseOrder());
assertArrayEquals(a2, a1);
// reverse back, so we can test that completely backwards sorted array (worst case) is working:
ArrayUtil.timSort(a1);
Arrays.sort(a2);
assertArrayEquals(a2, a1);
}
}
public void testInsertionSort() {
for (int i = 0, c = atLeast(500); i < c; i++) {
Integer[] a1 = createRandomArray(30), a2 = a1.clone();
@ -207,6 +227,25 @@ public class TestArrayUtil extends LuceneTestCase {
}
}
public void testBinarySort() {
for (int i = 0, c = atLeast(500); i < c; i++) {
Integer[] a1 = createRandomArray(30), a2 = a1.clone();
ArrayUtil.binarySort(a1);
Arrays.sort(a2);
assertArrayEquals(a2, a1);
a1 = createRandomArray(30);
a2 = a1.clone();
ArrayUtil.binarySort(a1, Collections.reverseOrder());
Arrays.sort(a2, Collections.reverseOrder());
assertArrayEquals(a2, a1);
// reverse back, so we can test that completely backwards sorted array (worst case) is working:
ArrayUtil.binarySort(a1);
Arrays.sort(a2);
assertArrayEquals(a2, a1);
}
}
static class Item implements Comparable<Item> {
final int val, order;
@ -255,15 +294,48 @@ public class TestArrayUtil extends LuceneTestCase {
}
}
public void testTimSortStability() {
final Random rnd = random();
Item[] items = new Item[100];
for (int i = 0; i < items.length; i++) {
// half of the items have value but same order. The value of this items is sorted,
// so they should always be in order after sorting.
// The other half has defined order, but no (-1) value (they should appear after
// all above, when sorted).
final boolean equal = rnd.nextBoolean();
items[i] = new Item(equal ? (i+1) : -1, equal ? 0 : (rnd.nextInt(1000)+1));
}
if (VERBOSE) System.out.println("Before: " + Arrays.toString(items));
// if you replace this with ArrayUtil.quickSort(), test should fail:
ArrayUtil.timSort(items);
if (VERBOSE) System.out.println("Sorted: " + Arrays.toString(items));
Item last = items[0];
for (int i = 1; i < items.length; i++) {
final Item act = items[i];
if (act.order == 0) {
// order of "equal" items should be not mixed up
assertTrue(act.val > last.val);
}
assertTrue(act.order >= last.order);
last = act;
}
}
// should produce no exceptions
public void testEmptyArraySort() {
Integer[] a = new Integer[0];
ArrayUtil.quickSort(a);
ArrayUtil.mergeSort(a);
ArrayUtil.insertionSort(a);
ArrayUtil.binarySort(a);
ArrayUtil.timSort(a);
ArrayUtil.quickSort(a, Collections.reverseOrder());
ArrayUtil.mergeSort(a, Collections.reverseOrder());
ArrayUtil.timSort(a, Collections.reverseOrder());
ArrayUtil.insertionSort(a, Collections.reverseOrder());
ArrayUtil.binarySort(a, Collections.reverseOrder());
}
}

View File

@ -73,6 +73,25 @@ public class TestCollectionUtil extends LuceneTestCase {
}
}
public void testTimSort() {
for (int i = 0, c = atLeast(500); i < c; i++) {
List<Integer> list1 = createRandomList(1000), list2 = new ArrayList<Integer>(list1);
CollectionUtil.timSort(list1);
Collections.sort(list2);
assertEquals(list2, list1);
list1 = createRandomList(1000);
list2 = new ArrayList<Integer>(list1);
CollectionUtil.timSort(list1, Collections.reverseOrder());
Collections.sort(list2, Collections.reverseOrder());
assertEquals(list2, list1);
// reverse back, so we can test that completely backwards sorted array (worst case) is working:
CollectionUtil.timSort(list1);
Collections.sort(list2);
assertEquals(list2, list1);
}
}
public void testInsertionSort() {
for (int i = 0, c = atLeast(500); i < c; i++) {
List<Integer> list1 = createRandomList(30), list2 = new ArrayList<Integer>(list1);
@ -92,24 +111,51 @@ public class TestCollectionUtil extends LuceneTestCase {
}
}
public void testBinarySort() {
for (int i = 0, c = atLeast(500); i < c; i++) {
List<Integer> list1 = createRandomList(30), list2 = new ArrayList<Integer>(list1);
CollectionUtil.binarySort(list1);
Collections.sort(list2);
assertEquals(list2, list1);
list1 = createRandomList(30);
list2 = new ArrayList<Integer>(list1);
CollectionUtil.binarySort(list1, Collections.reverseOrder());
Collections.sort(list2, Collections.reverseOrder());
assertEquals(list2, list1);
// reverse back, so we can test that completely backwards sorted array (worst case) is working:
CollectionUtil.binarySort(list1);
Collections.sort(list2);
assertEquals(list2, list1);
}
}
public void testEmptyListSort() {
// should produce no exceptions
List<Integer> list = Arrays.asList(new Integer[0]); // LUCENE-2989
CollectionUtil.quickSort(list);
CollectionUtil.mergeSort(list);
CollectionUtil.timSort(list);
CollectionUtil.insertionSort(list);
CollectionUtil.binarySort(list);
CollectionUtil.quickSort(list, Collections.reverseOrder());
CollectionUtil.mergeSort(list, Collections.reverseOrder());
CollectionUtil.timSort(list, Collections.reverseOrder());
CollectionUtil.insertionSort(list, Collections.reverseOrder());
CollectionUtil.binarySort(list, Collections.reverseOrder());
// check that empty non-random access lists pass sorting without ex (as sorting is not needed)
list = new LinkedList<Integer>();
CollectionUtil.quickSort(list);
CollectionUtil.mergeSort(list);
CollectionUtil.timSort(list);
CollectionUtil.insertionSort(list);
CollectionUtil.binarySort(list);
CollectionUtil.quickSort(list, Collections.reverseOrder());
CollectionUtil.mergeSort(list, Collections.reverseOrder());
CollectionUtil.timSort(list, Collections.reverseOrder());
CollectionUtil.insertionSort(list, Collections.reverseOrder());
CollectionUtil.binarySort(list, Collections.reverseOrder());
}
public void testOneElementListSort() {
@ -118,10 +164,14 @@ public class TestCollectionUtil extends LuceneTestCase {
list.add(1);
CollectionUtil.quickSort(list);
CollectionUtil.mergeSort(list);
CollectionUtil.timSort(list);
CollectionUtil.insertionSort(list);
CollectionUtil.binarySort(list);
CollectionUtil.quickSort(list, Collections.reverseOrder());
CollectionUtil.mergeSort(list, Collections.reverseOrder());
CollectionUtil.timSort(list, Collections.reverseOrder());
CollectionUtil.insertionSort(list, Collections.reverseOrder());
CollectionUtil.binarySort(list, Collections.reverseOrder());
}
}

View File

@ -0,0 +1,167 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Arrays;
public class TestSorterTemplate extends LuceneTestCase {
private static final int SLOW_SORT_THRESHOLD = 1000;
// A sorter template that compares only the last 32 bits
static class Last32BitsSorterTemplate extends SorterTemplate {
final long[] arr;
long pivot;
Last32BitsSorterTemplate(long[] arr) {
this.arr = arr;
}
@Override
protected void swap(int i, int j) {
final long tmp = arr[i];
arr[i] = arr[j];
arr[j] = tmp;
}
private int compareValues(long i, long j) {
// only compare the last 32 bits
final long a = i & 0xFFFFFFFFL;
final long b = j & 0xFFFFFFFFL;
return a < b ? -1 : a == b ? 0 : 1;
}
@Override
protected int compare(int i, int j) {
return compareValues(arr[i], arr[j]);
}
@Override
protected void setPivot(int i) {
pivot = arr[i];
}
@Override
protected int comparePivot(int j) {
return compareValues(pivot, arr[j]);
}
}
void testSort(int[] intArr) {
// we modify the array as a long[] and store the original ord in the first 32 bits
// to be able to check stability
final long[] arr = toLongsAndOrds(intArr);
// use MergeSort as a reference
// assertArrayEquals checks for sorting + stability
// assertArrayEquals(toInts) checks for sorting only
final long[] mergeSorted = Arrays.copyOf(arr, arr.length);
new Last32BitsSorterTemplate(mergeSorted).mergeSort(0, arr.length - 1);
if (arr.length < SLOW_SORT_THRESHOLD) {
final long[] insertionSorted = Arrays.copyOf(arr, arr.length);
new Last32BitsSorterTemplate(insertionSorted).insertionSort(0, arr.length - 1);
assertArrayEquals(mergeSorted, insertionSorted);
final long[] binarySorted = Arrays.copyOf(arr, arr.length);
new Last32BitsSorterTemplate(binarySorted).binarySort(0, arr.length - 1);
assertArrayEquals(mergeSorted, binarySorted);
}
final long[] quickSorted = Arrays.copyOf(arr, arr.length);
new Last32BitsSorterTemplate(quickSorted).quickSort(0, arr.length - 1);
assertArrayEquals(toInts(mergeSorted), toInts(quickSorted));
final long[] timSorted = Arrays.copyOf(arr, arr.length);
new Last32BitsSorterTemplate(timSorted).timSort(0, arr.length - 1);
assertArrayEquals(mergeSorted, timSorted);
}
private int[] toInts(long[] longArr) {
int[] arr = new int[longArr.length];
for (int i = 0; i < longArr.length; ++i) {
arr[i] = (int) longArr[i];
}
return arr;
}
private long[] toLongsAndOrds(int[] intArr) {
final long[] arr = new long[intArr.length];
for (int i = 0; i < intArr.length; ++i) {
arr[i] = (((long) i) << 32) | (intArr[i] & 0xFFFFFFFFL);
}
return arr;
}
int randomLength() {
return random().nextBoolean()
? random().nextInt(SLOW_SORT_THRESHOLD)
: random().nextInt(100000);
}
public void testAscending() {
final int length = randomLength();
final int[] arr = new int[length];
arr[0] = random().nextInt(10);
for (int i = 1; i < arr.length; ++i) {
arr[i] = arr[i-1] + _TestUtil.nextInt(random(), 0, 10);
}
testSort(arr);
}
public void testDescending() {
final int length = randomLength();
final int[] arr = new int[length];
arr[0] = random().nextInt(10);
for (int i = 1; i < arr.length; ++i) {
arr[i] = arr[i-1] - _TestUtil.nextInt(random(), 0, 10);
}
testSort(arr);
}
public void testStrictlyDescending() {
final int length = randomLength();
final int[] arr = new int[length];
arr[0] = random().nextInt(10);
for (int i = 1; i < arr.length; ++i) {
arr[i] = arr[i-1] - _TestUtil.nextInt(random(), 1, 10);
}
testSort(arr);
}
public void testRandom1() {
final int length = randomLength();
final int[] arr = new int[length];
for (int i = 1; i < arr.length; ++i) {
arr[i] = random().nextInt();
}
testSort(arr);
}
public void testRandom2() {
final int length = randomLength();
final int[] arr = new int[length];
for (int i = 1; i < arr.length; ++i) {
arr[i] = random().nextInt(10);
}
testSort(arr);
}
}

View File

@ -147,8 +147,9 @@ public abstract class Sorter {
}
SorterTemplate sorter = new DocValueSorterTemplate(docs, comparator);
// TODO: use a stable sort instead?
sorter.quickSort(0, docs.length - 1); // docs is now the newToOld mapping
// It can be common to sort a reader, add docs, sort it again, ... and in
// that case timSort can save a lot of time
sorter.timSort(0, docs.length - 1); // docs is now the newToOld mapping
// The reason why we use MonotonicAppendingLongBuffer here is that it
// wastes very little memory if the index is in random order but can save

View File

@ -307,11 +307,13 @@ public class SortingAtomicReader extends FilterAtomicReader {
docs[i] = docs[j];
docs[j] = tmpDoc;
if (freqs != null) {
int tmpFreq = freqs[i];
freqs[i] = freqs[j];
freqs[j] = tmpFreq;
}
}
}
private int[] docs = new int[64];
private int[] freqs;
@ -335,8 +337,6 @@ public class SortingAtomicReader extends FilterAtomicReader {
freqs[i] = in.freq();
++i;
}
SorterTemplate sorter = new DocFreqSorterTemplate(docs, freqs);
sorter.quickSort(0, i - 1);
} else {
freqs = null;
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
@ -345,8 +345,10 @@ public class SortingAtomicReader extends FilterAtomicReader {
}
docs[i++] = docMap.oldToNew(doc);
}
Arrays.sort(docs, 0, i);
}
// TimSort can save much time compared to other sorts in case of
// reverse sorting, or when sorting a concatenation of sorted readers
new DocFreqSorterTemplate(docs, freqs).timSort(0, i - 1);
upto = i;
}
@ -451,12 +453,9 @@ public class SortingAtomicReader extends FilterAtomicReader {
int i = 0;
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (i == docs.length) {
docs = ArrayUtil.grow(docs, docs.length + 1);
// don't grow() offsets since growing pattern for long and int is not the same.
// since we want docs and offsets at the same length, just grow it manually.
long[] tmp = new long[docs.length];
System.arraycopy(offsets, 0, tmp, 0, offsets.length);
offsets = tmp;
final int newLength = ArrayUtil.oversize(i + 1, 4);
docs = Arrays.copyOf(docs, newLength);
offsets = Arrays.copyOf(offsets, newLength);
}
docs[i] = docMap.oldToNew(doc);
offsets[i] = out.getFilePointer();
@ -464,8 +463,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
i++;
}
upto = i;
SorterTemplate sorter = new DocOffsetSorterTemplate(docs, offsets);
sorter.quickSort(0, upto - 1);
new DocOffsetSorterTemplate(docs, offsets).timSort(0, upto - 1);
out.close();
this.postingInput = new RAMInputStream("", file);
}