LUCENE-4867: Allow custom SorterTemplates to override merge.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1459851 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-03-22 15:47:14 +00:00
parent 3cc1919645
commit 7adf68d067
6 changed files with 292 additions and 106 deletions

View File

@ -136,6 +136,14 @@ New Features
to return BytesRef[] instead of Term[]. (Robert Muir, Mike
McCandless)
Optimizations
* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace
the default implementation which merges in-place by a faster implementation
that could require fewer swaps at the expense of some extra memory.
ArrayUtil and CollectionUtil override it so that their mergeSort and timSort
methods are faster but only require up to 1% of extra memory. (Adrien Grand)
API Changes
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use

View File

@ -28,6 +28,11 @@ import java.util.Comparator;
public final class ArrayUtil {
// affordable memory overhead to merge sorted arrays
static final float MERGE_OVERHEAD_RATIO = 0.01f;
// arrays below this size will always be sorted in-place
static final int MERGE_EXTRA_MEMORY_THRESHOLD = (int) (15 / MERGE_OVERHEAD_RATIO);
private ArrayUtil() {} // no instance
/*
@ -605,9 +610,16 @@ public final class ArrayUtil {
return result;
}
/** SorterTemplate with custom {@link Comparator} */
private static <T> SorterTemplate getSorter(final T[] a, final Comparator<? super T> comp) {
return new SorterTemplate() {
private static abstract class ArraySorterTemplate<T> extends SorterTemplate {
protected final T[] a;
ArraySorterTemplate(T[] a) {
this.a = a;
}
protected abstract int compare(T a, T b);
@Override
protected void swap(int i, int j) {
final T o = a[i];
@ -617,7 +629,7 @@ public final class ArrayUtil {
@Override
protected int compare(int i, int j) {
return comp.compare(a[i], a[j]);
return compare(a[i], a[j]);
}
@Override
@ -627,42 +639,115 @@ public final class ArrayUtil {
@Override
protected int comparePivot(int j) {
return comp.compare(pivot, a[j]);
return compare(pivot, a[j]);
}
private T pivot;
}
// a template for merge-based sorts which uses extra memory to speed up merging
private static abstract class ArrayMergeSorterTemplate<T> extends ArraySorterTemplate<T> {
private final int threshold; // maximum length of a merge that can be made using extra memory
private final T[] tmp;
ArrayMergeSorterTemplate(T[] a, float overheadRatio) {
super(a);
this.threshold = (int) (a.length * overheadRatio);
@SuppressWarnings("unchecked")
final T[] tmpBuf = (T[]) new Object[threshold];
this.tmp = tmpBuf;
}
private void mergeWithExtraMemory(int lo, int pivot, int hi, int len1, int len2) {
System.arraycopy(a, lo, tmp, 0, len1);
int i = 0, j = pivot, dest = lo;
while (i < len1 && j < hi) {
if (compare(tmp[i], a[j]) <= 0) {
a[dest++] = tmp[i++];
} else {
a[dest++] = a[j++];
}
}
while (i < len1) {
a[dest++] = tmp[i++];
}
while (j < hi) {
a[dest++] = a[j++];
}
assert dest == hi;
}
@Override
protected void merge(int lo, int pivot, int hi, int len1, int len2) {
if (len1 <= threshold) {
mergeWithExtraMemory(lo, pivot, hi, len1, len2);
} else {
// since this method recurses to run merge on smaller arrays, it will
// end up using mergeWithExtraMemory
super.merge(lo, pivot, hi, len1, len2);
}
}
}
/** SorterTemplate with custom {@link Comparator} */
private static <T> SorterTemplate getSorter(final T[] a, final Comparator<? super T> comp) {
return new ArraySorterTemplate<T>(a) {
@Override
protected int compare(T a, T b) {
return comp.compare(a, b);
}
};
}
/** Natural SorterTemplate */
private static <T extends Comparable<? super T>> SorterTemplate getSorter(final T[] a) {
return new SorterTemplate() {
@Override
protected void swap(int i, int j) {
final T o = a[i];
a[i] = a[j];
a[j] = o;
}
return new ArraySorterTemplate<T>(a) {
@Override
protected int compare(int i, int j) {
return a[i].compareTo(a[j]);
protected int compare(T a, T b) {
return a.compareTo(b);
}
@Override
protected void setPivot(int i) {
pivot = a[i];
}
@Override
protected int comparePivot(int j) {
return pivot.compareTo(a[j]);
}
private T pivot;
};
}
/** SorterTemplate with custom {@link Comparator} for merge-based sorts. */
private static <T> SorterTemplate getMergeSorter(final T[] a, final Comparator<? super T> comp) {
if (a.length < MERGE_EXTRA_MEMORY_THRESHOLD) {
return getSorter(a, comp);
} else {
return new ArrayMergeSorterTemplate<T>(a, MERGE_OVERHEAD_RATIO) {
@Override
protected int compare(T a, T b) {
return comp.compare(a, b);
}
};
}
}
/** Natural SorterTemplate for merge-based sorts. */
private static <T extends Comparable<? super T>> SorterTemplate getMergeSorter(final T[] a) {
if (a.length < MERGE_EXTRA_MEMORY_THRESHOLD) {
return getSorter(a);
} else {
return new ArrayMergeSorterTemplate<T>(a, MERGE_OVERHEAD_RATIO) {
@Override
protected int compare(T a, T b) {
return a.compareTo(b);
}
};
}
}
// quickSorts (endindex is exclusive!):
/**
@ -714,7 +799,7 @@ public final class ArrayUtil {
public static <T> void mergeSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
if (toIndex-fromIndex <= 1) return;
//System.out.println("SORT: " + (toIndex-fromIndex));
getSorter(a, comp).mergeSort(fromIndex, toIndex-1);
getMergeSorter(a, comp).mergeSort(fromIndex, toIndex-1);
}
/**
@ -733,7 +818,7 @@ public final class ArrayUtil {
*/
public static <T extends Comparable<? super T>> void mergeSort(T[] a, int fromIndex, int toIndex) {
if (toIndex-fromIndex <= 1) return;
getSorter(a).mergeSort(fromIndex, toIndex-1);
getMergeSorter(a).mergeSort(fromIndex, toIndex-1);
}
/**
@ -754,7 +839,7 @@ public final class ArrayUtil {
*/
public static <T> void timSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
if (toIndex-fromIndex <= 1) return;
getSorter(a, comp).timSort(fromIndex, toIndex-1);
getMergeSorter(a, comp).timSort(fromIndex, toIndex-1);
}
/**
@ -773,7 +858,7 @@ public final class ArrayUtil {
*/
public static <T extends Comparable<? super T>> void timSort(T[] a, int fromIndex, int toIndex) {
if (toIndex-fromIndex <= 1) return;
getSorter(a).timSort(fromIndex, toIndex-1);
getMergeSorter(a).timSort(fromIndex, toIndex-1);
}
/**

View File

@ -17,8 +17,11 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.util.Comparator;
import static org.apache.lucene.util.ArrayUtil.MERGE_EXTRA_MEMORY_THRESHOLD;
import static org.apache.lucene.util.ArrayUtil.MERGE_OVERHEAD_RATIO;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.RandomAccess;
@ -35,11 +38,16 @@ public final class CollectionUtil {
private CollectionUtil() {} // no instance
/** SorterTemplate with custom {@link Comparator} */
private static <T> SorterTemplate getSorter(final List<T> list, final Comparator<? super T> comp) {
if (!(list instanceof RandomAccess))
throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place.");
return new SorterTemplate() {
private static abstract class ListSorterTemplate<T> extends SorterTemplate {
protected final List<T> list;
ListSorterTemplate(List<T> list) {
this.list = list;
}
protected abstract int compare(T a, T b);
@Override
protected void swap(int i, int j) {
Collections.swap(list, i, j);
@ -47,7 +55,7 @@ public final class CollectionUtil {
@Override
protected int compare(int i, int j) {
return comp.compare(list.get(i), list.get(j));
return compare(list.get(i), list.get(j));
}
@Override
@ -57,10 +65,72 @@ public final class CollectionUtil {
@Override
protected int comparePivot(int j) {
return comp.compare(pivot, list.get(j));
return compare(pivot, list.get(j));
}
private T pivot;
}
// a template for merge-based sorts which uses extra memory to speed up merging
private static abstract class ListMergeSorterTemplate<T> extends ListSorterTemplate<T> {
private final int threshold; // maximum length of a merge that can be made using extra memory
private final T[] tmp;
ListMergeSorterTemplate(List<T> list, float overheadRatio) {
super(list);
this.threshold = (int) (list.size() * overheadRatio);
@SuppressWarnings("unchecked")
final T[] tmpBuf = (T[]) new Object[threshold];
this.tmp = tmpBuf;
}
private void mergeWithExtraMemory(int lo, int pivot, int hi, int len1, int len2) {
for (int i = 0; i < len1; ++i) {
tmp[i] = list.get(lo + i);
}
int i = 0, j = pivot, dest = lo;
while (i < len1 && j < hi) {
if (compare(tmp[i], list.get(j)) <= 0) {
list.set(dest++, tmp[i++]);
} else {
list.set(dest++, list.get(j++));
}
}
while (i < len1) {
list.set(dest++, tmp[i++]);
}
while (j < hi) {
list.set(dest++, list.get(j++));
}
assert dest == hi;
}
@Override
protected void merge(int lo, int pivot, int hi, int len1, int len2) {
if (len1 <= threshold) {
mergeWithExtraMemory(lo, pivot, hi, len1, len2);
} else {
// since this method recurses to run merge on smaller arrays, it will
// end up using mergeWithExtraMemory
super.merge(lo, pivot, hi, len1, len2);
}
}
}
/** SorterTemplate with custom {@link Comparator} */
private static <T> SorterTemplate getSorter(final List<T> list, final Comparator<? super T> comp) {
if (!(list instanceof RandomAccess))
throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place.");
return new ListSorterTemplate<T>(list) {
@Override
protected int compare(T a, T b) {
return comp.compare(a, b);
}
};
}
@ -68,31 +138,52 @@ public final class CollectionUtil {
private static <T extends Comparable<? super T>> SorterTemplate getSorter(final List<T> list) {
if (!(list instanceof RandomAccess))
throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place.");
return new SorterTemplate() {
@Override
protected void swap(int i, int j) {
Collections.swap(list, i, j);
}
return new ListSorterTemplate<T>(list) {
@Override
protected int compare(int i, int j) {
return list.get(i).compareTo(list.get(j));
protected int compare(T a, T b) {
return a.compareTo(b);
}
@Override
protected void setPivot(int i) {
pivot = list.get(i);
}
@Override
protected int comparePivot(int j) {
return pivot.compareTo(list.get(j));
}
private T pivot;
};
}
/** SorterTemplate with custom {@link Comparator} for merge-based sorts. */
private static <T> SorterTemplate getMergeSorter(final List<T> list, final Comparator<? super T> comp) {
if (!(list instanceof RandomAccess))
throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place.");
if (list.size() < MERGE_EXTRA_MEMORY_THRESHOLD) {
return getSorter(list, comp);
} else {
return new ListMergeSorterTemplate<T>(list, MERGE_OVERHEAD_RATIO) {
@Override
protected int compare(T a, T b) {
return comp.compare(a, b);
}
};
}
}
/** Natural SorterTemplate for merge-based sorts. */
private static <T extends Comparable<? super T>> SorterTemplate getMergeSorter(final List<T> list) {
if (!(list instanceof RandomAccess))
throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place.");
if (list.size() < MERGE_EXTRA_MEMORY_THRESHOLD) {
return getSorter(list);
} else {
return new ListMergeSorterTemplate<T>(list, MERGE_OVERHEAD_RATIO) {
@Override
protected int compare(T a, T b) {
return a.compareTo(b);
}
};
}
}
/**
* Sorts the given random access {@link List} using the {@link Comparator}.
* The list must implement {@link RandomAccess}. This method uses the quick sort
@ -128,7 +219,7 @@ public final class CollectionUtil {
public static <T> void mergeSort(List<T> list, Comparator<? super T> comp) {
final int size = list.size();
if (size <= 1) return;
getSorter(list, comp).mergeSort(0, size-1);
getMergeSorter(list, comp).mergeSort(0, size-1);
}
/**
@ -140,7 +231,7 @@ public final class CollectionUtil {
public static <T extends Comparable<? super T>> void mergeSort(List<T> list) {
final int size = list.size();
if (size <= 1) return;
getSorter(list).mergeSort(0, size-1);
getMergeSorter(list).mergeSort(0, size-1);
}
// timSorts:
@ -154,7 +245,7 @@ public final class CollectionUtil {
public static <T> void timSort(List<T> list, Comparator<? super T> comp) {
final int size = list.size();
if (size <= 1) return;
getSorter(list, comp).timSort(0, size-1);
getMergeSorter(list, comp).timSort(0, size-1);
}
/**
@ -166,7 +257,7 @@ public final class CollectionUtil {
public static <T extends Comparable<? super T>> void timSort(List<T> list) {
final int size = list.size();
if (size <= 1) return;
getSorter(list).timSort(0, size-1);
getMergeSorter(list).timSort(0, size-1);
}
// insertionSorts:

View File

@ -343,8 +343,10 @@ public abstract class SorterTemplate {
merge(lo, mid, hi, mid - lo, hi - mid);
}
// pkg-protected for access from TimSort class
void merge(int lo, int pivot, int hi, int len1, int len2) {
/** Merge the slices [lo-pivot[ (of length len1) and [pivot-hi[ (of length
* len2) which are already sorted. This method merges in-place but can be
* extended to provide a faster implementation using extra memory. */
protected void merge(int lo, int pivot, int hi, int len1, int len2) {
if (len1 == 0 || len2 == 0) {
return;
}

View File

@ -131,12 +131,12 @@ public class TestArrayUtil extends LuceneTestCase {
public void testQuickSort() {
int num = atLeast(50);
for (int i = 0; i < num; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();
Integer[] a1 = createRandomArray(2000), a2 = a1.clone();
ArrayUtil.quickSort(a1);
Arrays.sort(a2);
assertArrayEquals(a2, a1);
a1 = createRandomArray(1000);
a1 = createRandomArray(2000);
a2 = a1.clone();
ArrayUtil.quickSort(a1, Collections.reverseOrder());
Arrays.sort(a2, Collections.reverseOrder());
@ -171,12 +171,12 @@ public class TestArrayUtil extends LuceneTestCase {
public void testMergeSort() {
int num = atLeast(50);
for (int i = 0; i < num; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();
Integer[] a1 = createRandomArray(2000), a2 = a1.clone();
ArrayUtil.mergeSort(a1);
Arrays.sort(a2);
assertArrayEquals(a2, a1);
a1 = createRandomArray(1000);
a1 = createRandomArray(2000);
a2 = a1.clone();
ArrayUtil.mergeSort(a1, Collections.reverseOrder());
Arrays.sort(a2, Collections.reverseOrder());
@ -191,12 +191,12 @@ public class TestArrayUtil extends LuceneTestCase {
public void testTimSort() {
int num = atLeast(65);
for (int i = 0; i < num; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();
Integer[] a1 = createRandomArray(2000), a2 = a1.clone();
ArrayUtil.timSort(a1);
Arrays.sort(a2);
assertArrayEquals(a2, a1);
a1 = createRandomArray(1000);
a1 = createRandomArray(2000);
a2 = a1.clone();
ArrayUtil.timSort(a1, Collections.reverseOrder());
Arrays.sort(a2, Collections.reverseOrder());

View File

@ -37,12 +37,12 @@ public class TestCollectionUtil extends LuceneTestCase {
public void testQuickSort() {
for (int i = 0, c = atLeast(500); i < c; i++) {
List<Integer> list1 = createRandomList(1000), list2 = new ArrayList<Integer>(list1);
List<Integer> list1 = createRandomList(2000), list2 = new ArrayList<Integer>(list1);
CollectionUtil.quickSort(list1);
Collections.sort(list2);
assertEquals(list2, list1);
list1 = createRandomList(1000);
list1 = createRandomList(2000);
list2 = new ArrayList<Integer>(list1);
CollectionUtil.quickSort(list1, Collections.reverseOrder());
Collections.sort(list2, Collections.reverseOrder());
@ -56,12 +56,12 @@ public class TestCollectionUtil extends LuceneTestCase {
public void testMergeSort() {
for (int i = 0, c = atLeast(500); i < c; i++) {
List<Integer> list1 = createRandomList(1000), list2 = new ArrayList<Integer>(list1);
List<Integer> list1 = createRandomList(2000), list2 = new ArrayList<Integer>(list1);
CollectionUtil.mergeSort(list1);
Collections.sort(list2);
assertEquals(list2, list1);
list1 = createRandomList(1000);
list1 = createRandomList(2000);
list2 = new ArrayList<Integer>(list1);
CollectionUtil.mergeSort(list1, Collections.reverseOrder());
Collections.sort(list2, Collections.reverseOrder());
@ -75,12 +75,12 @@ public class TestCollectionUtil extends LuceneTestCase {
public void testTimSort() {
for (int i = 0, c = atLeast(500); i < c; i++) {
List<Integer> list1 = createRandomList(1000), list2 = new ArrayList<Integer>(list1);
List<Integer> list1 = createRandomList(2000), list2 = new ArrayList<Integer>(list1);
CollectionUtil.timSort(list1);
Collections.sort(list2);
assertEquals(list2, list1);
list1 = createRandomList(1000);
list1 = createRandomList(2000);
list2 = new ArrayList<Integer>(list1);
CollectionUtil.timSort(list1, Collections.reverseOrder());
Collections.sort(list2, Collections.reverseOrder());