mirror of https://github.com/apache/lucene.git
LUCENE-4839: Sorter API: Use TimSort to sort doc IDs and postings lists.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1457272 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
77797c8c85
commit
8613df447f
|
@ -744,6 +744,46 @@ public final class ArrayUtil {
|
|||
mergeSort(a, 0, a.length);
|
||||
}
|
||||
|
||||
// timSorts:
|
||||
|
||||
/**
|
||||
* Sorts the given array slice using the {@link Comparator}. This method uses the TimSort
|
||||
* algorithm, but falls back to binary sort for small arrays.
|
||||
* @param fromIndex start index (inclusive)
|
||||
* @param toIndex end index (exclusive)
|
||||
*/
|
||||
public static <T> void timSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
|
||||
if (toIndex-fromIndex <= 1) return;
|
||||
getSorter(a, comp).timSort(fromIndex, toIndex-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts the given array using the {@link Comparator}. This method uses the TimSort
|
||||
* algorithm, but falls back to binary sort for small arrays.
|
||||
*/
|
||||
public static <T> void timSort(T[] a, Comparator<? super T> comp) {
|
||||
timSort(a, 0, a.length, comp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts the given array slice in natural order. This method uses the TimSort
|
||||
* algorithm, but falls back to binary sort for small arrays.
|
||||
* @param fromIndex start index (inclusive)
|
||||
* @param toIndex end index (exclusive)
|
||||
*/
|
||||
public static <T extends Comparable<? super T>> void timSort(T[] a, int fromIndex, int toIndex) {
|
||||
if (toIndex-fromIndex <= 1) return;
|
||||
getSorter(a).timSort(fromIndex, toIndex-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts the given array in natural order. This method uses the TimSort
|
||||
* algorithm, but falls back to binary sort for small arrays.
|
||||
*/
|
||||
public static <T extends Comparable<? super T>> void timSort(T[] a) {
|
||||
timSort(a, 0, a.length);
|
||||
}
|
||||
|
||||
// insertionSorts:
|
||||
|
||||
/**
|
||||
|
@ -784,4 +824,44 @@ public final class ArrayUtil {
|
|||
insertionSort(a, 0, a.length);
|
||||
}
|
||||
|
||||
// binarySorts:
|
||||
|
||||
/**
|
||||
* Sorts the given array slice using the {@link Comparator}. This method uses the binary sort
|
||||
* algorithm. It is only recommended to use this algorithm for small arrays!
|
||||
* @param fromIndex start index (inclusive)
|
||||
* @param toIndex end index (exclusive)
|
||||
*/
|
||||
public static <T> void binarySort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
|
||||
if (toIndex-fromIndex <= 1) return;
|
||||
getSorter(a, comp).binarySort(fromIndex, toIndex-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts the given array using the {@link Comparator}. This method uses the binary sort
|
||||
* algorithm. It is only recommended to use this algorithm for small arrays!
|
||||
*/
|
||||
public static <T> void binarySort(T[] a, Comparator<? super T> comp) {
|
||||
binarySort(a, 0, a.length, comp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts the given array slice in natural order. This method uses the binary sort
|
||||
* algorithm. It is only recommended to use this algorithm for small arrays!
|
||||
* @param fromIndex start index (inclusive)
|
||||
* @param toIndex end index (exclusive)
|
||||
*/
|
||||
public static <T extends Comparable<? super T>> void binarySort(T[] a, int fromIndex, int toIndex) {
|
||||
if (toIndex-fromIndex <= 1) return;
|
||||
getSorter(a).binarySort(fromIndex, toIndex-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts the given array in natural order. This method uses the binary sort
|
||||
* algorithm. It is only recommended to use this algorithm for small arrays!
|
||||
*/
|
||||
public static <T extends Comparable<? super T>> void binarySort(T[] a) {
|
||||
binarySort(a, 0, a.length);
|
||||
}
|
||||
|
||||
}
|
|
@ -143,6 +143,32 @@ public final class CollectionUtil {
|
|||
getSorter(list).mergeSort(0, size-1);
|
||||
}
|
||||
|
||||
// timSorts:
|
||||
|
||||
/**
|
||||
* Sorts the given random access {@link List} using the {@link Comparator}.
|
||||
* The list must implement {@link RandomAccess}. This method uses the TimSort
|
||||
* algorithm, but falls back to binary sort for small lists.
|
||||
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
|
||||
*/
|
||||
public static <T> void timSort(List<T> list, Comparator<? super T> comp) {
|
||||
final int size = list.size();
|
||||
if (size <= 1) return;
|
||||
getSorter(list, comp).timSort(0, size-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts the given random access {@link List} in natural order.
|
||||
* The list must implement {@link RandomAccess}. This method uses the TimSort
|
||||
* algorithm, but falls back to binary sort for small lists.
|
||||
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
|
||||
*/
|
||||
public static <T extends Comparable<? super T>> void timSort(List<T> list) {
|
||||
final int size = list.size();
|
||||
if (size <= 1) return;
|
||||
getSorter(list).timSort(0, size-1);
|
||||
}
|
||||
|
||||
// insertionSorts:
|
||||
|
||||
/**
|
||||
|
@ -168,5 +194,30 @@ public final class CollectionUtil {
|
|||
if (size <= 1) return;
|
||||
getSorter(list).insertionSort(0, size-1);
|
||||
}
|
||||
|
||||
// binarySorts:
|
||||
|
||||
/**
|
||||
* Sorts the given random access {@link List} using the {@link Comparator}.
|
||||
* The list must implement {@link RandomAccess}. This method uses the binary sort
|
||||
* algorithm. It is only recommended to use this algorithm for small lists!
|
||||
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
|
||||
*/
|
||||
public static <T> void binarySort(List<T> list, Comparator<? super T> comp) {
|
||||
final int size = list.size();
|
||||
if (size <= 1) return;
|
||||
getSorter(list, comp).binarySort(0, size-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts the given random access {@link List} in natural order.
|
||||
* The list must implement {@link RandomAccess}. This method uses the insertion sort
|
||||
* algorithm. It is only recommended to use this algorithm for small lists!
|
||||
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
|
||||
*/
|
||||
public static <T extends Comparable<? super T>> void binarySort(List<T> list) {
|
||||
final int size = list.size();
|
||||
if (size <= 1) return;
|
||||
getSorter(list).binarySort(0, size-1);
|
||||
}
|
||||
}
|
|
@ -1,5 +1,6 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -29,9 +30,27 @@ package org.apache.lucene.util;
|
|||
*/
|
||||
public abstract class SorterTemplate {
|
||||
|
||||
private static final int TIMSORT_MINRUN = 32;
|
||||
private static final int TIMSORT_THRESHOLD = 64;
|
||||
private static final int TIMSORT_STACKSIZE = 40; // change if you change TIMSORT_MINRUN
|
||||
private static final int MERGESORT_THRESHOLD = 12;
|
||||
private static final int QUICKSORT_THRESHOLD = 7;
|
||||
|
||||
static {
|
||||
// check whether TIMSORT_STACKSIZE is large enough
|
||||
// for a run length of TIMSORT_MINRUN and an array
|
||||
// of 2B values when TimSort invariants are verified
|
||||
final long[] lengths = new long[TIMSORT_STACKSIZE];
|
||||
lengths[0] = TIMSORT_MINRUN;
|
||||
lengths[1] = lengths[0] + 1;
|
||||
for (int i = 2; i < TIMSORT_STACKSIZE; ++i) {
|
||||
lengths[i] = lengths[i-2] + lengths[i-1] + 1;
|
||||
}
|
||||
if (lengths[TIMSORT_STACKSIZE - 1] < Integer.MAX_VALUE) {
|
||||
throw new Error("TIMSORT_STACKSIZE is too small");
|
||||
}
|
||||
}
|
||||
|
||||
/** Implement this method, that swaps slots {@code i} and {@code j} in your data */
|
||||
protected abstract void swap(int i, int j);
|
||||
|
||||
|
@ -46,7 +65,7 @@ public abstract class SorterTemplate {
|
|||
* Should be implemented like <code>pivot.compareTo(<em>valueOf(j)</em>)</code> */
|
||||
protected abstract int comparePivot(int j);
|
||||
|
||||
/** Sorts via stable in-place InsertionSort algorithm
|
||||
/** Sorts via stable in-place InsertionSort algorithm (O(n<sup>2</sup>))
|
||||
*(ideal for small collections which are mostly presorted). */
|
||||
public final void insertionSort(int lo, int hi) {
|
||||
for (int i = lo + 1 ; i <= hi; i++) {
|
||||
|
@ -60,6 +79,28 @@ public abstract class SorterTemplate {
|
|||
}
|
||||
}
|
||||
|
||||
/** Sorts via stable in-place BinarySort algorithm (O(n<sup>2</sup>))
|
||||
* (ideal for small collections which are in random order). */
|
||||
public final void binarySort(int lo, int hi) {
|
||||
for (int i = lo + 1; i <= hi; ++i) {
|
||||
int l = lo;
|
||||
int h = i - 1;
|
||||
setPivot(i);
|
||||
while (l <= h) {
|
||||
final int mid = (l + h) >>> 1;
|
||||
final int cmp = comparePivot(mid);
|
||||
if (cmp < 0) {
|
||||
h = mid - 1;
|
||||
} else {
|
||||
l = mid + 1;
|
||||
}
|
||||
}
|
||||
for (int j = i; j > l; --j) {
|
||||
swap(j - 1, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Sorts via in-place, but unstable, QuickSort algorithm.
|
||||
* For small collections falls back to {@link #insertionSort(int,int)}. */
|
||||
public final void quickSort(final int lo, final int hi) {
|
||||
|
@ -117,7 +158,175 @@ public abstract class SorterTemplate {
|
|||
quickSort(lo, left, maxDepth);
|
||||
quickSort(left + 1, hi, maxDepth);
|
||||
}
|
||||
|
||||
|
||||
/** TimSort implementation. The only difference with the spec is that this
|
||||
* impl reuses {@link SorterTemplate#merge(int, int, int, int, int)} to
|
||||
* merge runs (in place) instead of the original merging routine from
|
||||
* TimSort (which requires extra memory but might be slightly faster). */
|
||||
private class TimSort {
|
||||
|
||||
final int hi;
|
||||
final int minRun;
|
||||
final int[] runEnds;
|
||||
int stackSize;
|
||||
|
||||
TimSort(int lo, int hi) {
|
||||
assert hi > lo;
|
||||
// +1 because the first slot is reserved and always lo
|
||||
runEnds = new int[TIMSORT_STACKSIZE + 1];
|
||||
runEnds[0] = lo;
|
||||
stackSize = 0;
|
||||
this.hi = hi;
|
||||
minRun = minRun(hi - lo + 1);
|
||||
}
|
||||
|
||||
/** Minimum run length for an array of length <code>length</code>. */
|
||||
int minRun(int length) {
|
||||
assert length >= TIMSORT_MINRUN;
|
||||
int n = length;
|
||||
int r = 0;
|
||||
while (n >= 64) {
|
||||
r |= n & 1;
|
||||
n >>>= 1;
|
||||
}
|
||||
final int minRun = n + r;
|
||||
assert minRun >= TIMSORT_MINRUN && minRun <= 64;
|
||||
return minRun;
|
||||
}
|
||||
|
||||
int runLen(int i) {
|
||||
final int off = stackSize - i;
|
||||
return runEnds[off] - runEnds[off - 1];
|
||||
}
|
||||
|
||||
int runBase(int i) {
|
||||
return runEnds[stackSize - i - 1];
|
||||
}
|
||||
|
||||
int runEnd(int i) {
|
||||
return runEnds[stackSize - i];
|
||||
}
|
||||
|
||||
void setRunEnd(int i, int runEnd) {
|
||||
runEnds[stackSize - i] = runEnd;
|
||||
}
|
||||
|
||||
void pushRunLen(int len) {
|
||||
runEnds[stackSize + 1] = runEnds[stackSize] + len;
|
||||
++stackSize;
|
||||
}
|
||||
|
||||
/** Merge run i with run i+1 */
|
||||
void mergeAt(int i) {
|
||||
assert stackSize > i + 1;
|
||||
final int l = runBase(i+1);
|
||||
final int pivot = runBase(i);
|
||||
final int h = runEnd(i);
|
||||
merge(l, pivot, h, pivot - l, h - pivot);
|
||||
for (int j = 1; j <= i+1; ++j) {
|
||||
setRunEnd(j, runEnd(j-1));
|
||||
}
|
||||
--stackSize;
|
||||
}
|
||||
|
||||
/** Compute the length of the next run, make the run sorted and return its
|
||||
* length. */
|
||||
int nextRun() {
|
||||
final int runBase = runEnd(0);
|
||||
if (runBase == hi) {
|
||||
return 1;
|
||||
}
|
||||
int l = 1; // length of the run
|
||||
if (compare(runBase, runBase+1) > 0) {
|
||||
// run must be strictly descending
|
||||
while (runBase + l <= hi && compare(runBase + l - 1, runBase + l) > 0) {
|
||||
++l;
|
||||
}
|
||||
if (l < minRun && runBase + l <= hi) {
|
||||
l = Math.min(hi - runBase + 1, minRun);
|
||||
binarySort(runBase, runBase + l - 1);
|
||||
} else {
|
||||
// revert
|
||||
for (int i = 0, halfL = l >>> 1; i < halfL; ++i) {
|
||||
swap(runBase + i, runBase + l - i - 1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// run must be non-descending
|
||||
while (runBase + l <= hi && compare(runBase + l - 1, runBase + l) <= 0) {
|
||||
++l;
|
||||
}
|
||||
if (l < minRun && runBase + l <= hi) {
|
||||
l = Math.min(hi - runBase + 1, minRun);
|
||||
binarySort(runBase, runBase + l - 1);
|
||||
} // else nothing to do, the run is already sorted
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
void ensureInvariants() {
|
||||
while (stackSize > 1) {
|
||||
final int runLen0 = runLen(0);
|
||||
final int runLen1 = runLen(1);
|
||||
|
||||
if (stackSize > 2) {
|
||||
final int runLen2 = runLen(2);
|
||||
|
||||
if (runLen2 <= runLen1 + runLen0) {
|
||||
// merge the smaller of 0 and 2 with 1
|
||||
if (runLen2 < runLen0) {
|
||||
mergeAt(1);
|
||||
} else {
|
||||
mergeAt(0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (runLen1 <= runLen0) {
|
||||
mergeAt(0);
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void exhaustStack() {
|
||||
while (stackSize > 1) {
|
||||
mergeAt(0);
|
||||
}
|
||||
}
|
||||
|
||||
void sort() {
|
||||
do {
|
||||
ensureInvariants();
|
||||
|
||||
// Push a new run onto the stack
|
||||
pushRunLen(nextRun());
|
||||
|
||||
} while (runEnd(0) <= hi);
|
||||
|
||||
exhaustStack();
|
||||
assert runEnd(0) == hi + 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Sorts using TimSort, see http://svn.python.org/projects/python/trunk/Objects/listsort.txt
|
||||
* and http://svn.python.org/projects/python/trunk/Objects/listobject.c.
|
||||
* TimSort is a stable sorting algorithm based on MergeSort but known to
|
||||
* perform extremely well on partially-sorted inputs.
|
||||
* For small collections, falls back to {@link #binarySort(int, int)}. */
|
||||
public final void timSort(int lo, int hi) {
|
||||
if (hi - lo <= TIMSORT_THRESHOLD) {
|
||||
binarySort(lo, hi);
|
||||
return;
|
||||
}
|
||||
|
||||
new TimSort(lo, hi).sort();
|
||||
}
|
||||
|
||||
/** Sorts via stable in-place MergeSort algorithm
|
||||
* For small collections falls back to {@link #insertionSort(int,int)}. */
|
||||
public final void mergeSort(int lo, int hi) {
|
||||
|
|
|
@ -187,7 +187,27 @@ public class TestArrayUtil extends LuceneTestCase {
|
|||
assertArrayEquals(a2, a1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void testTimSort() {
|
||||
int num = atLeast(65);
|
||||
for (int i = 0; i < num; i++) {
|
||||
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();
|
||||
ArrayUtil.timSort(a1);
|
||||
Arrays.sort(a2);
|
||||
assertArrayEquals(a2, a1);
|
||||
|
||||
a1 = createRandomArray(1000);
|
||||
a2 = a1.clone();
|
||||
ArrayUtil.timSort(a1, Collections.reverseOrder());
|
||||
Arrays.sort(a2, Collections.reverseOrder());
|
||||
assertArrayEquals(a2, a1);
|
||||
// reverse back, so we can test that completely backwards sorted array (worst case) is working:
|
||||
ArrayUtil.timSort(a1);
|
||||
Arrays.sort(a2);
|
||||
assertArrayEquals(a2, a1);
|
||||
}
|
||||
}
|
||||
|
||||
public void testInsertionSort() {
|
||||
for (int i = 0, c = atLeast(500); i < c; i++) {
|
||||
Integer[] a1 = createRandomArray(30), a2 = a1.clone();
|
||||
|
@ -207,6 +227,25 @@ public class TestArrayUtil extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testBinarySort() {
|
||||
for (int i = 0, c = atLeast(500); i < c; i++) {
|
||||
Integer[] a1 = createRandomArray(30), a2 = a1.clone();
|
||||
ArrayUtil.binarySort(a1);
|
||||
Arrays.sort(a2);
|
||||
assertArrayEquals(a2, a1);
|
||||
|
||||
a1 = createRandomArray(30);
|
||||
a2 = a1.clone();
|
||||
ArrayUtil.binarySort(a1, Collections.reverseOrder());
|
||||
Arrays.sort(a2, Collections.reverseOrder());
|
||||
assertArrayEquals(a2, a1);
|
||||
// reverse back, so we can test that completely backwards sorted array (worst case) is working:
|
||||
ArrayUtil.binarySort(a1);
|
||||
Arrays.sort(a2);
|
||||
assertArrayEquals(a2, a1);
|
||||
}
|
||||
}
|
||||
|
||||
static class Item implements Comparable<Item> {
|
||||
final int val, order;
|
||||
|
||||
|
@ -254,16 +293,49 @@ public class TestArrayUtil extends LuceneTestCase {
|
|||
last = act;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void testTimSortStability() {
|
||||
final Random rnd = random();
|
||||
Item[] items = new Item[100];
|
||||
for (int i = 0; i < items.length; i++) {
|
||||
// half of the items have value but same order. The value of this items is sorted,
|
||||
// so they should always be in order after sorting.
|
||||
// The other half has defined order, but no (-1) value (they should appear after
|
||||
// all above, when sorted).
|
||||
final boolean equal = rnd.nextBoolean();
|
||||
items[i] = new Item(equal ? (i+1) : -1, equal ? 0 : (rnd.nextInt(1000)+1));
|
||||
}
|
||||
|
||||
if (VERBOSE) System.out.println("Before: " + Arrays.toString(items));
|
||||
// if you replace this with ArrayUtil.quickSort(), test should fail:
|
||||
ArrayUtil.timSort(items);
|
||||
if (VERBOSE) System.out.println("Sorted: " + Arrays.toString(items));
|
||||
|
||||
Item last = items[0];
|
||||
for (int i = 1; i < items.length; i++) {
|
||||
final Item act = items[i];
|
||||
if (act.order == 0) {
|
||||
// order of "equal" items should be not mixed up
|
||||
assertTrue(act.val > last.val);
|
||||
}
|
||||
assertTrue(act.order >= last.order);
|
||||
last = act;
|
||||
}
|
||||
}
|
||||
|
||||
// should produce no exceptions
|
||||
public void testEmptyArraySort() {
|
||||
Integer[] a = new Integer[0];
|
||||
ArrayUtil.quickSort(a);
|
||||
ArrayUtil.mergeSort(a);
|
||||
ArrayUtil.insertionSort(a);
|
||||
ArrayUtil.binarySort(a);
|
||||
ArrayUtil.timSort(a);
|
||||
ArrayUtil.quickSort(a, Collections.reverseOrder());
|
||||
ArrayUtil.mergeSort(a, Collections.reverseOrder());
|
||||
ArrayUtil.timSort(a, Collections.reverseOrder());
|
||||
ArrayUtil.insertionSort(a, Collections.reverseOrder());
|
||||
ArrayUtil.binarySort(a, Collections.reverseOrder());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -72,7 +72,26 @@ public class TestCollectionUtil extends LuceneTestCase {
|
|||
assertEquals(list2, list1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void testTimSort() {
|
||||
for (int i = 0, c = atLeast(500); i < c; i++) {
|
||||
List<Integer> list1 = createRandomList(1000), list2 = new ArrayList<Integer>(list1);
|
||||
CollectionUtil.timSort(list1);
|
||||
Collections.sort(list2);
|
||||
assertEquals(list2, list1);
|
||||
|
||||
list1 = createRandomList(1000);
|
||||
list2 = new ArrayList<Integer>(list1);
|
||||
CollectionUtil.timSort(list1, Collections.reverseOrder());
|
||||
Collections.sort(list2, Collections.reverseOrder());
|
||||
assertEquals(list2, list1);
|
||||
// reverse back, so we can test that completely backwards sorted array (worst case) is working:
|
||||
CollectionUtil.timSort(list1);
|
||||
Collections.sort(list2);
|
||||
assertEquals(list2, list1);
|
||||
}
|
||||
}
|
||||
|
||||
public void testInsertionSort() {
|
||||
for (int i = 0, c = atLeast(500); i < c; i++) {
|
||||
List<Integer> list1 = createRandomList(30), list2 = new ArrayList<Integer>(list1);
|
||||
|
@ -91,25 +110,52 @@ public class TestCollectionUtil extends LuceneTestCase {
|
|||
assertEquals(list2, list1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void testBinarySort() {
|
||||
for (int i = 0, c = atLeast(500); i < c; i++) {
|
||||
List<Integer> list1 = createRandomList(30), list2 = new ArrayList<Integer>(list1);
|
||||
CollectionUtil.binarySort(list1);
|
||||
Collections.sort(list2);
|
||||
assertEquals(list2, list1);
|
||||
|
||||
list1 = createRandomList(30);
|
||||
list2 = new ArrayList<Integer>(list1);
|
||||
CollectionUtil.binarySort(list1, Collections.reverseOrder());
|
||||
Collections.sort(list2, Collections.reverseOrder());
|
||||
assertEquals(list2, list1);
|
||||
// reverse back, so we can test that completely backwards sorted array (worst case) is working:
|
||||
CollectionUtil.binarySort(list1);
|
||||
Collections.sort(list2);
|
||||
assertEquals(list2, list1);
|
||||
}
|
||||
}
|
||||
|
||||
public void testEmptyListSort() {
|
||||
// should produce no exceptions
|
||||
List<Integer> list = Arrays.asList(new Integer[0]); // LUCENE-2989
|
||||
CollectionUtil.quickSort(list);
|
||||
CollectionUtil.mergeSort(list);
|
||||
CollectionUtil.timSort(list);
|
||||
CollectionUtil.insertionSort(list);
|
||||
CollectionUtil.binarySort(list);
|
||||
CollectionUtil.quickSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.mergeSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.timSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.insertionSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.binarySort(list, Collections.reverseOrder());
|
||||
|
||||
// check that empty non-random access lists pass sorting without ex (as sorting is not needed)
|
||||
list = new LinkedList<Integer>();
|
||||
CollectionUtil.quickSort(list);
|
||||
CollectionUtil.mergeSort(list);
|
||||
CollectionUtil.timSort(list);
|
||||
CollectionUtil.insertionSort(list);
|
||||
CollectionUtil.binarySort(list);
|
||||
CollectionUtil.quickSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.mergeSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.timSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.insertionSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.binarySort(list, Collections.reverseOrder());
|
||||
}
|
||||
|
||||
public void testOneElementListSort() {
|
||||
|
@ -118,10 +164,14 @@ public class TestCollectionUtil extends LuceneTestCase {
|
|||
list.add(1);
|
||||
CollectionUtil.quickSort(list);
|
||||
CollectionUtil.mergeSort(list);
|
||||
CollectionUtil.timSort(list);
|
||||
CollectionUtil.insertionSort(list);
|
||||
CollectionUtil.binarySort(list);
|
||||
CollectionUtil.quickSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.mergeSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.timSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.insertionSort(list, Collections.reverseOrder());
|
||||
CollectionUtil.binarySort(list, Collections.reverseOrder());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class TestSorterTemplate extends LuceneTestCase {
|
||||
|
||||
private static final int SLOW_SORT_THRESHOLD = 1000;
|
||||
|
||||
// A sorter template that compares only the last 32 bits
|
||||
static class Last32BitsSorterTemplate extends SorterTemplate {
|
||||
|
||||
final long[] arr;
|
||||
long pivot;
|
||||
|
||||
Last32BitsSorterTemplate(long[] arr) {
|
||||
this.arr = arr;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
final long tmp = arr[i];
|
||||
arr[i] = arr[j];
|
||||
arr[j] = tmp;
|
||||
}
|
||||
|
||||
private int compareValues(long i, long j) {
|
||||
// only compare the last 32 bits
|
||||
final long a = i & 0xFFFFFFFFL;
|
||||
final long b = j & 0xFFFFFFFFL;
|
||||
return a < b ? -1 : a == b ? 0 : 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return compareValues(arr[i], arr[j]);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
pivot = arr[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
return compareValues(pivot, arr[j]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void testSort(int[] intArr) {
|
||||
// we modify the array as a long[] and store the original ord in the first 32 bits
|
||||
// to be able to check stability
|
||||
final long[] arr = toLongsAndOrds(intArr);
|
||||
|
||||
// use MergeSort as a reference
|
||||
// assertArrayEquals checks for sorting + stability
|
||||
// assertArrayEquals(toInts) checks for sorting only
|
||||
final long[] mergeSorted = Arrays.copyOf(arr, arr.length);
|
||||
new Last32BitsSorterTemplate(mergeSorted).mergeSort(0, arr.length - 1);
|
||||
|
||||
if (arr.length < SLOW_SORT_THRESHOLD) {
|
||||
final long[] insertionSorted = Arrays.copyOf(arr, arr.length);
|
||||
new Last32BitsSorterTemplate(insertionSorted).insertionSort(0, arr.length - 1);
|
||||
assertArrayEquals(mergeSorted, insertionSorted);
|
||||
|
||||
final long[] binarySorted = Arrays.copyOf(arr, arr.length);
|
||||
new Last32BitsSorterTemplate(binarySorted).binarySort(0, arr.length - 1);
|
||||
assertArrayEquals(mergeSorted, binarySorted);
|
||||
}
|
||||
|
||||
final long[] quickSorted = Arrays.copyOf(arr, arr.length);
|
||||
new Last32BitsSorterTemplate(quickSorted).quickSort(0, arr.length - 1);
|
||||
assertArrayEquals(toInts(mergeSorted), toInts(quickSorted));
|
||||
|
||||
final long[] timSorted = Arrays.copyOf(arr, arr.length);
|
||||
new Last32BitsSorterTemplate(timSorted).timSort(0, arr.length - 1);
|
||||
assertArrayEquals(mergeSorted, timSorted);
|
||||
}
|
||||
|
||||
private int[] toInts(long[] longArr) {
|
||||
int[] arr = new int[longArr.length];
|
||||
for (int i = 0; i < longArr.length; ++i) {
|
||||
arr[i] = (int) longArr[i];
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
private long[] toLongsAndOrds(int[] intArr) {
|
||||
final long[] arr = new long[intArr.length];
|
||||
for (int i = 0; i < intArr.length; ++i) {
|
||||
arr[i] = (((long) i) << 32) | (intArr[i] & 0xFFFFFFFFL);
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
int randomLength() {
|
||||
return random().nextBoolean()
|
||||
? random().nextInt(SLOW_SORT_THRESHOLD)
|
||||
: random().nextInt(100000);
|
||||
}
|
||||
|
||||
public void testAscending() {
|
||||
final int length = randomLength();
|
||||
final int[] arr = new int[length];
|
||||
arr[0] = random().nextInt(10);
|
||||
for (int i = 1; i < arr.length; ++i) {
|
||||
arr[i] = arr[i-1] + _TestUtil.nextInt(random(), 0, 10);
|
||||
}
|
||||
testSort(arr);
|
||||
}
|
||||
|
||||
public void testDescending() {
|
||||
final int length = randomLength();
|
||||
final int[] arr = new int[length];
|
||||
arr[0] = random().nextInt(10);
|
||||
for (int i = 1; i < arr.length; ++i) {
|
||||
arr[i] = arr[i-1] - _TestUtil.nextInt(random(), 0, 10);
|
||||
}
|
||||
testSort(arr);
|
||||
}
|
||||
|
||||
public void testStrictlyDescending() {
|
||||
final int length = randomLength();
|
||||
final int[] arr = new int[length];
|
||||
arr[0] = random().nextInt(10);
|
||||
for (int i = 1; i < arr.length; ++i) {
|
||||
arr[i] = arr[i-1] - _TestUtil.nextInt(random(), 1, 10);
|
||||
}
|
||||
testSort(arr);
|
||||
}
|
||||
|
||||
public void testRandom1() {
|
||||
final int length = randomLength();
|
||||
final int[] arr = new int[length];
|
||||
for (int i = 1; i < arr.length; ++i) {
|
||||
arr[i] = random().nextInt();
|
||||
}
|
||||
testSort(arr);
|
||||
}
|
||||
|
||||
public void testRandom2() {
|
||||
final int length = randomLength();
|
||||
final int[] arr = new int[length];
|
||||
for (int i = 1; i < arr.length; ++i) {
|
||||
arr[i] = random().nextInt(10);
|
||||
}
|
||||
testSort(arr);
|
||||
}
|
||||
|
||||
}
|
|
@ -147,8 +147,9 @@ public abstract class Sorter {
|
|||
}
|
||||
|
||||
SorterTemplate sorter = new DocValueSorterTemplate(docs, comparator);
|
||||
// TODO: use a stable sort instead?
|
||||
sorter.quickSort(0, docs.length - 1); // docs is now the newToOld mapping
|
||||
// It can be common to sort a reader, add docs, sort it again, ... and in
|
||||
// that case timSort can save a lot of time
|
||||
sorter.timSort(0, docs.length - 1); // docs is now the newToOld mapping
|
||||
|
||||
// The reason why we use MonotonicAppendingLongBuffer here is that it
|
||||
// wastes very little memory if the index is in random order but can save
|
||||
|
|
|
@ -307,9 +307,11 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
docs[i] = docs[j];
|
||||
docs[j] = tmpDoc;
|
||||
|
||||
int tmpFreq = freqs[i];
|
||||
freqs[i] = freqs[j];
|
||||
freqs[j] = tmpFreq;
|
||||
if (freqs != null) {
|
||||
int tmpFreq = freqs[i];
|
||||
freqs[i] = freqs[j];
|
||||
freqs[j] = tmpFreq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -335,8 +337,6 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
freqs[i] = in.freq();
|
||||
++i;
|
||||
}
|
||||
SorterTemplate sorter = new DocFreqSorterTemplate(docs, freqs);
|
||||
sorter.quickSort(0, i - 1);
|
||||
} else {
|
||||
freqs = null;
|
||||
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
|
||||
|
@ -345,8 +345,10 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
}
|
||||
docs[i++] = docMap.oldToNew(doc);
|
||||
}
|
||||
Arrays.sort(docs, 0, i);
|
||||
}
|
||||
// TimSort can save much time compared to other sorts in case of
|
||||
// reverse sorting, or when sorting a concatenation of sorted readers
|
||||
new DocFreqSorterTemplate(docs, freqs).timSort(0, i - 1);
|
||||
upto = i;
|
||||
}
|
||||
|
||||
|
@ -451,12 +453,9 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
int i = 0;
|
||||
while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (i == docs.length) {
|
||||
docs = ArrayUtil.grow(docs, docs.length + 1);
|
||||
// don't grow() offsets since growing pattern for long and int is not the same.
|
||||
// since we want docs and offsets at the same length, just grow it manually.
|
||||
long[] tmp = new long[docs.length];
|
||||
System.arraycopy(offsets, 0, tmp, 0, offsets.length);
|
||||
offsets = tmp;
|
||||
final int newLength = ArrayUtil.oversize(i + 1, 4);
|
||||
docs = Arrays.copyOf(docs, newLength);
|
||||
offsets = Arrays.copyOf(offsets, newLength);
|
||||
}
|
||||
docs[i] = docMap.oldToNew(doc);
|
||||
offsets[i] = out.getFilePointer();
|
||||
|
@ -464,8 +463,7 @@ public class SortingAtomicReader extends FilterAtomicReader {
|
|||
i++;
|
||||
}
|
||||
upto = i;
|
||||
SorterTemplate sorter = new DocOffsetSorterTemplate(docs, offsets);
|
||||
sorter.quickSort(0, upto - 1);
|
||||
new DocOffsetSorterTemplate(docs, offsets).timSort(0, upto - 1);
|
||||
out.close();
|
||||
this.postingInput = new RAMInputStream("", file);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue