LUCENE-4867: Allow custom SorterTemplates to override merge.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1459851 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-03-22 15:47:14 +00:00
parent 3cc1919645
commit 7adf68d067
6 changed files with 292 additions and 106 deletions

View File

@ -136,6 +136,14 @@ New Features
to return BytesRef[] instead of Term[]. (Robert Muir, Mike to return BytesRef[] instead of Term[]. (Robert Muir, Mike
McCandless) McCandless)
Optimizations
* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace
the default implementation which merges in-place by a faster implementation
that could require fewer swaps at the expense of some extra memory.
ArrayUtil and CollectionUtil override it so that their mergeSort and timSort
methods are faster but only require up to 1% of extra memory. (Adrien Grand)
API Changes API Changes
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use * LUCENE-4844: removed TaxonomyReader.getParent(), you should use

View File

@ -28,6 +28,11 @@ import java.util.Comparator;
public final class ArrayUtil { public final class ArrayUtil {
// affordable memory overhead to merge sorted arrays
static final float MERGE_OVERHEAD_RATIO = 0.01f;
// arrays below this size will always be sorted in-place
static final int MERGE_EXTRA_MEMORY_THRESHOLD = (int) (15 / MERGE_OVERHEAD_RATIO);
private ArrayUtil() {} // no instance private ArrayUtil() {} // no instance
/* /*
@ -604,65 +609,145 @@ public final class ArrayUtil {
return result; return result;
} }
private static abstract class ArraySorterTemplate<T> extends SorterTemplate {
protected final T[] a;
ArraySorterTemplate(T[] a) {
this.a = a;
}
protected abstract int compare(T a, T b);
@Override
protected void swap(int i, int j) {
final T o = a[i];
a[i] = a[j];
a[j] = o;
}
@Override
protected int compare(int i, int j) {
return compare(a[i], a[j]);
}
@Override
protected void setPivot(int i) {
pivot = a[i];
}
@Override
protected int comparePivot(int j) {
return compare(pivot, a[j]);
}
private T pivot;
}
// a template for merge-based sorts which uses extra memory to speed up merging
private static abstract class ArrayMergeSorterTemplate<T> extends ArraySorterTemplate<T> {
private final int threshold; // maximum length of a merge that can be made using extra memory
private final T[] tmp;
ArrayMergeSorterTemplate(T[] a, float overheadRatio) {
super(a);
this.threshold = (int) (a.length * overheadRatio);
@SuppressWarnings("unchecked")
final T[] tmpBuf = (T[]) new Object[threshold];
this.tmp = tmpBuf;
}
private void mergeWithExtraMemory(int lo, int pivot, int hi, int len1, int len2) {
System.arraycopy(a, lo, tmp, 0, len1);
int i = 0, j = pivot, dest = lo;
while (i < len1 && j < hi) {
if (compare(tmp[i], a[j]) <= 0) {
a[dest++] = tmp[i++];
} else {
a[dest++] = a[j++];
}
}
while (i < len1) {
a[dest++] = tmp[i++];
}
while (j < hi) {
a[dest++] = a[j++];
}
assert dest == hi;
}
@Override
protected void merge(int lo, int pivot, int hi, int len1, int len2) {
if (len1 <= threshold) {
mergeWithExtraMemory(lo, pivot, hi, len1, len2);
} else {
// since this method recurses to run merge on smaller arrays, it will
// end up using mergeWithExtraMemory
super.merge(lo, pivot, hi, len1, len2);
}
}
}
/** SorterTemplate with custom {@link Comparator} */ /** SorterTemplate with custom {@link Comparator} */
private static <T> SorterTemplate getSorter(final T[] a, final Comparator<? super T> comp) { private static <T> SorterTemplate getSorter(final T[] a, final Comparator<? super T> comp) {
return new SorterTemplate() { return new ArraySorterTemplate<T>(a) {
@Override
protected void swap(int i, int j) {
final T o = a[i];
a[i] = a[j];
a[j] = o;
}
@Override
protected int compare(int i, int j) {
return comp.compare(a[i], a[j]);
}
@Override @Override
protected void setPivot(int i) { protected int compare(T a, T b) {
pivot = a[i]; return comp.compare(a, b);
} }
@Override
protected int comparePivot(int j) {
return comp.compare(pivot, a[j]);
}
private T pivot;
}; };
} }
/** Natural SorterTemplate */ /** Natural SorterTemplate */
private static <T extends Comparable<? super T>> SorterTemplate getSorter(final T[] a) { private static <T extends Comparable<? super T>> SorterTemplate getSorter(final T[] a) {
return new SorterTemplate() { return new ArraySorterTemplate<T>(a) {
@Override
protected void swap(int i, int j) {
final T o = a[i];
a[i] = a[j];
a[j] = o;
}
@Override
protected int compare(int i, int j) {
return a[i].compareTo(a[j]);
}
@Override @Override
protected void setPivot(int i) { protected int compare(T a, T b) {
pivot = a[i]; return a.compareTo(b);
} }
@Override
protected int comparePivot(int j) {
return pivot.compareTo(a[j]);
}
private T pivot;
}; };
} }
/** SorterTemplate with custom {@link Comparator} for merge-based sorts. */
private static <T> SorterTemplate getMergeSorter(final T[] a, final Comparator<? super T> comp) {
if (a.length < MERGE_EXTRA_MEMORY_THRESHOLD) {
return getSorter(a, comp);
} else {
return new ArrayMergeSorterTemplate<T>(a, MERGE_OVERHEAD_RATIO) {
@Override
protected int compare(T a, T b) {
return comp.compare(a, b);
}
};
}
}
/** Natural SorterTemplate for merge-based sorts. */
private static <T extends Comparable<? super T>> SorterTemplate getMergeSorter(final T[] a) {
if (a.length < MERGE_EXTRA_MEMORY_THRESHOLD) {
return getSorter(a);
} else {
return new ArrayMergeSorterTemplate<T>(a, MERGE_OVERHEAD_RATIO) {
@Override
protected int compare(T a, T b) {
return a.compareTo(b);
}
};
}
}
// quickSorts (endindex is exclusive!): // quickSorts (endindex is exclusive!):
/** /**
@ -714,7 +799,7 @@ public final class ArrayUtil {
public static <T> void mergeSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) { public static <T> void mergeSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
if (toIndex-fromIndex <= 1) return; if (toIndex-fromIndex <= 1) return;
//System.out.println("SORT: " + (toIndex-fromIndex)); //System.out.println("SORT: " + (toIndex-fromIndex));
getSorter(a, comp).mergeSort(fromIndex, toIndex-1); getMergeSorter(a, comp).mergeSort(fromIndex, toIndex-1);
} }
/** /**
@ -733,7 +818,7 @@ public final class ArrayUtil {
*/ */
public static <T extends Comparable<? super T>> void mergeSort(T[] a, int fromIndex, int toIndex) { public static <T extends Comparable<? super T>> void mergeSort(T[] a, int fromIndex, int toIndex) {
if (toIndex-fromIndex <= 1) return; if (toIndex-fromIndex <= 1) return;
getSorter(a).mergeSort(fromIndex, toIndex-1); getMergeSorter(a).mergeSort(fromIndex, toIndex-1);
} }
/** /**
@ -754,7 +839,7 @@ public final class ArrayUtil {
*/ */
public static <T> void timSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) { public static <T> void timSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
if (toIndex-fromIndex <= 1) return; if (toIndex-fromIndex <= 1) return;
getSorter(a, comp).timSort(fromIndex, toIndex-1); getMergeSorter(a, comp).timSort(fromIndex, toIndex-1);
} }
/** /**
@ -773,7 +858,7 @@ public final class ArrayUtil {
*/ */
public static <T extends Comparable<? super T>> void timSort(T[] a, int fromIndex, int toIndex) { public static <T extends Comparable<? super T>> void timSort(T[] a, int fromIndex, int toIndex) {
if (toIndex-fromIndex <= 1) return; if (toIndex-fromIndex <= 1) return;
getSorter(a).timSort(fromIndex, toIndex-1); getMergeSorter(a).timSort(fromIndex, toIndex-1);
} }
/** /**

View File

@ -17,8 +17,11 @@ package org.apache.lucene.util;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Comparator; import static org.apache.lucene.util.ArrayUtil.MERGE_EXTRA_MEMORY_THRESHOLD;
import static org.apache.lucene.util.ArrayUtil.MERGE_OVERHEAD_RATIO;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.RandomAccess; import java.util.RandomAccess;
@ -34,33 +37,100 @@ import java.util.RandomAccess;
public final class CollectionUtil { public final class CollectionUtil {
private CollectionUtil() {} // no instance private CollectionUtil() {} // no instance
private static abstract class ListSorterTemplate<T> extends SorterTemplate {
protected final List<T> list;
ListSorterTemplate(List<T> list) {
this.list = list;
}
protected abstract int compare(T a, T b);
@Override
protected void swap(int i, int j) {
Collections.swap(list, i, j);
}
@Override
protected int compare(int i, int j) {
return compare(list.get(i), list.get(j));
}
@Override
protected void setPivot(int i) {
pivot = list.get(i);
}
@Override
protected int comparePivot(int j) {
return compare(pivot, list.get(j));
}
private T pivot;
}
// a template for merge-based sorts which uses extra memory to speed up merging
private static abstract class ListMergeSorterTemplate<T> extends ListSorterTemplate<T> {
private final int threshold; // maximum length of a merge that can be made using extra memory
private final T[] tmp;
ListMergeSorterTemplate(List<T> list, float overheadRatio) {
super(list);
this.threshold = (int) (list.size() * overheadRatio);
@SuppressWarnings("unchecked")
final T[] tmpBuf = (T[]) new Object[threshold];
this.tmp = tmpBuf;
}
private void mergeWithExtraMemory(int lo, int pivot, int hi, int len1, int len2) {
for (int i = 0; i < len1; ++i) {
tmp[i] = list.get(lo + i);
}
int i = 0, j = pivot, dest = lo;
while (i < len1 && j < hi) {
if (compare(tmp[i], list.get(j)) <= 0) {
list.set(dest++, tmp[i++]);
} else {
list.set(dest++, list.get(j++));
}
}
while (i < len1) {
list.set(dest++, tmp[i++]);
}
while (j < hi) {
list.set(dest++, list.get(j++));
}
assert dest == hi;
}
@Override
protected void merge(int lo, int pivot, int hi, int len1, int len2) {
if (len1 <= threshold) {
mergeWithExtraMemory(lo, pivot, hi, len1, len2);
} else {
// since this method recurses to run merge on smaller arrays, it will
// end up using mergeWithExtraMemory
super.merge(lo, pivot, hi, len1, len2);
}
}
}
/** SorterTemplate with custom {@link Comparator} */ /** SorterTemplate with custom {@link Comparator} */
private static <T> SorterTemplate getSorter(final List<T> list, final Comparator<? super T> comp) { private static <T> SorterTemplate getSorter(final List<T> list, final Comparator<? super T> comp) {
if (!(list instanceof RandomAccess)) if (!(list instanceof RandomAccess))
throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place."); throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place.");
return new SorterTemplate() { return new ListSorterTemplate<T>(list) {
@Override
protected void swap(int i, int j) {
Collections.swap(list, i, j);
}
@Override
protected int compare(int i, int j) {
return comp.compare(list.get(i), list.get(j));
}
@Override @Override
protected void setPivot(int i) { protected int compare(T a, T b) {
pivot = list.get(i); return comp.compare(a, b);
} }
@Override
protected int comparePivot(int j) {
return comp.compare(pivot, list.get(j));
}
private T pivot;
}; };
} }
@ -68,31 +138,52 @@ public final class CollectionUtil {
private static <T extends Comparable<? super T>> SorterTemplate getSorter(final List<T> list) { private static <T extends Comparable<? super T>> SorterTemplate getSorter(final List<T> list) {
if (!(list instanceof RandomAccess)) if (!(list instanceof RandomAccess))
throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place."); throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place.");
return new SorterTemplate() { return new ListSorterTemplate<T>(list) {
@Override
protected void swap(int i, int j) {
Collections.swap(list, i, j);
}
@Override
protected int compare(int i, int j) {
return list.get(i).compareTo(list.get(j));
}
@Override @Override
protected void setPivot(int i) { protected int compare(T a, T b) {
pivot = list.get(i); return a.compareTo(b);
} }
@Override
protected int comparePivot(int j) {
return pivot.compareTo(list.get(j));
}
private T pivot;
}; };
} }
/** SorterTemplate with custom {@link Comparator} for merge-based sorts. */
private static <T> SorterTemplate getMergeSorter(final List<T> list, final Comparator<? super T> comp) {
if (!(list instanceof RandomAccess))
throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place.");
if (list.size() < MERGE_EXTRA_MEMORY_THRESHOLD) {
return getSorter(list, comp);
} else {
return new ListMergeSorterTemplate<T>(list, MERGE_OVERHEAD_RATIO) {
@Override
protected int compare(T a, T b) {
return comp.compare(a, b);
}
};
}
}
/** Natural SorterTemplate for merge-based sorts. */
private static <T extends Comparable<? super T>> SorterTemplate getMergeSorter(final List<T> list) {
if (!(list instanceof RandomAccess))
throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place.");
if (list.size() < MERGE_EXTRA_MEMORY_THRESHOLD) {
return getSorter(list);
} else {
return new ListMergeSorterTemplate<T>(list, MERGE_OVERHEAD_RATIO) {
@Override
protected int compare(T a, T b) {
return a.compareTo(b);
}
};
}
}
/** /**
* Sorts the given random access {@link List} using the {@link Comparator}. * Sorts the given random access {@link List} using the {@link Comparator}.
* The list must implement {@link RandomAccess}. This method uses the quick sort * The list must implement {@link RandomAccess}. This method uses the quick sort
@ -128,7 +219,7 @@ public final class CollectionUtil {
public static <T> void mergeSort(List<T> list, Comparator<? super T> comp) { public static <T> void mergeSort(List<T> list, Comparator<? super T> comp) {
final int size = list.size(); final int size = list.size();
if (size <= 1) return; if (size <= 1) return;
getSorter(list, comp).mergeSort(0, size-1); getMergeSorter(list, comp).mergeSort(0, size-1);
} }
/** /**
@ -140,7 +231,7 @@ public final class CollectionUtil {
public static <T extends Comparable<? super T>> void mergeSort(List<T> list) { public static <T extends Comparable<? super T>> void mergeSort(List<T> list) {
final int size = list.size(); final int size = list.size();
if (size <= 1) return; if (size <= 1) return;
getSorter(list).mergeSort(0, size-1); getMergeSorter(list).mergeSort(0, size-1);
} }
// timSorts: // timSorts:
@ -154,7 +245,7 @@ public final class CollectionUtil {
public static <T> void timSort(List<T> list, Comparator<? super T> comp) { public static <T> void timSort(List<T> list, Comparator<? super T> comp) {
final int size = list.size(); final int size = list.size();
if (size <= 1) return; if (size <= 1) return;
getSorter(list, comp).timSort(0, size-1); getMergeSorter(list, comp).timSort(0, size-1);
} }
/** /**
@ -166,7 +257,7 @@ public final class CollectionUtil {
public static <T extends Comparable<? super T>> void timSort(List<T> list) { public static <T extends Comparable<? super T>> void timSort(List<T> list) {
final int size = list.size(); final int size = list.size();
if (size <= 1) return; if (size <= 1) return;
getSorter(list).timSort(0, size-1); getMergeSorter(list).timSort(0, size-1);
} }
// insertionSorts: // insertionSorts:

View File

@ -343,8 +343,10 @@ public abstract class SorterTemplate {
merge(lo, mid, hi, mid - lo, hi - mid); merge(lo, mid, hi, mid - lo, hi - mid);
} }
// pkg-protected for access from TimSort class /** Merge the slices [lo-pivot[ (of length len1) and [pivot-hi[ (of length
void merge(int lo, int pivot, int hi, int len1, int len2) { * len2) which are already sorted. This method merges in-place but can be
* extended to provide a faster implementation using extra memory. */
protected void merge(int lo, int pivot, int hi, int len1, int len2) {
if (len1 == 0 || len2 == 0) { if (len1 == 0 || len2 == 0) {
return; return;
} }

View File

@ -131,12 +131,12 @@ public class TestArrayUtil extends LuceneTestCase {
public void testQuickSort() { public void testQuickSort() {
int num = atLeast(50); int num = atLeast(50);
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone(); Integer[] a1 = createRandomArray(2000), a2 = a1.clone();
ArrayUtil.quickSort(a1); ArrayUtil.quickSort(a1);
Arrays.sort(a2); Arrays.sort(a2);
assertArrayEquals(a2, a1); assertArrayEquals(a2, a1);
a1 = createRandomArray(1000); a1 = createRandomArray(2000);
a2 = a1.clone(); a2 = a1.clone();
ArrayUtil.quickSort(a1, Collections.reverseOrder()); ArrayUtil.quickSort(a1, Collections.reverseOrder());
Arrays.sort(a2, Collections.reverseOrder()); Arrays.sort(a2, Collections.reverseOrder());
@ -171,12 +171,12 @@ public class TestArrayUtil extends LuceneTestCase {
public void testMergeSort() { public void testMergeSort() {
int num = atLeast(50); int num = atLeast(50);
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone(); Integer[] a1 = createRandomArray(2000), a2 = a1.clone();
ArrayUtil.mergeSort(a1); ArrayUtil.mergeSort(a1);
Arrays.sort(a2); Arrays.sort(a2);
assertArrayEquals(a2, a1); assertArrayEquals(a2, a1);
a1 = createRandomArray(1000); a1 = createRandomArray(2000);
a2 = a1.clone(); a2 = a1.clone();
ArrayUtil.mergeSort(a1, Collections.reverseOrder()); ArrayUtil.mergeSort(a1, Collections.reverseOrder());
Arrays.sort(a2, Collections.reverseOrder()); Arrays.sort(a2, Collections.reverseOrder());
@ -191,12 +191,12 @@ public class TestArrayUtil extends LuceneTestCase {
public void testTimSort() { public void testTimSort() {
int num = atLeast(65); int num = atLeast(65);
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone(); Integer[] a1 = createRandomArray(2000), a2 = a1.clone();
ArrayUtil.timSort(a1); ArrayUtil.timSort(a1);
Arrays.sort(a2); Arrays.sort(a2);
assertArrayEquals(a2, a1); assertArrayEquals(a2, a1);
a1 = createRandomArray(1000); a1 = createRandomArray(2000);
a2 = a1.clone(); a2 = a1.clone();
ArrayUtil.timSort(a1, Collections.reverseOrder()); ArrayUtil.timSort(a1, Collections.reverseOrder());
Arrays.sort(a2, Collections.reverseOrder()); Arrays.sort(a2, Collections.reverseOrder());

View File

@ -37,12 +37,12 @@ public class TestCollectionUtil extends LuceneTestCase {
public void testQuickSort() { public void testQuickSort() {
for (int i = 0, c = atLeast(500); i < c; i++) { for (int i = 0, c = atLeast(500); i < c; i++) {
List<Integer> list1 = createRandomList(1000), list2 = new ArrayList<Integer>(list1); List<Integer> list1 = createRandomList(2000), list2 = new ArrayList<Integer>(list1);
CollectionUtil.quickSort(list1); CollectionUtil.quickSort(list1);
Collections.sort(list2); Collections.sort(list2);
assertEquals(list2, list1); assertEquals(list2, list1);
list1 = createRandomList(1000); list1 = createRandomList(2000);
list2 = new ArrayList<Integer>(list1); list2 = new ArrayList<Integer>(list1);
CollectionUtil.quickSort(list1, Collections.reverseOrder()); CollectionUtil.quickSort(list1, Collections.reverseOrder());
Collections.sort(list2, Collections.reverseOrder()); Collections.sort(list2, Collections.reverseOrder());
@ -56,12 +56,12 @@ public class TestCollectionUtil extends LuceneTestCase {
public void testMergeSort() { public void testMergeSort() {
for (int i = 0, c = atLeast(500); i < c; i++) { for (int i = 0, c = atLeast(500); i < c; i++) {
List<Integer> list1 = createRandomList(1000), list2 = new ArrayList<Integer>(list1); List<Integer> list1 = createRandomList(2000), list2 = new ArrayList<Integer>(list1);
CollectionUtil.mergeSort(list1); CollectionUtil.mergeSort(list1);
Collections.sort(list2); Collections.sort(list2);
assertEquals(list2, list1); assertEquals(list2, list1);
list1 = createRandomList(1000); list1 = createRandomList(2000);
list2 = new ArrayList<Integer>(list1); list2 = new ArrayList<Integer>(list1);
CollectionUtil.mergeSort(list1, Collections.reverseOrder()); CollectionUtil.mergeSort(list1, Collections.reverseOrder());
Collections.sort(list2, Collections.reverseOrder()); Collections.sort(list2, Collections.reverseOrder());
@ -75,12 +75,12 @@ public class TestCollectionUtil extends LuceneTestCase {
public void testTimSort() { public void testTimSort() {
for (int i = 0, c = atLeast(500); i < c; i++) { for (int i = 0, c = atLeast(500); i < c; i++) {
List<Integer> list1 = createRandomList(1000), list2 = new ArrayList<Integer>(list1); List<Integer> list1 = createRandomList(2000), list2 = new ArrayList<Integer>(list1);
CollectionUtil.timSort(list1); CollectionUtil.timSort(list1);
Collections.sort(list2); Collections.sort(list2);
assertEquals(list2, list1); assertEquals(list2, list1);
list1 = createRandomList(1000); list1 = createRandomList(2000);
list2 = new ArrayList<Integer>(list1); list2 = new ArrayList<Integer>(list1);
CollectionUtil.timSort(list1, Collections.reverseOrder()); CollectionUtil.timSort(list1, Collections.reverseOrder());
Collections.sort(list2, Collections.reverseOrder()); Collections.sort(list2, Collections.reverseOrder());