diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9c96a6e99ff..f121058667c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -136,6 +136,14 @@ New Features to return BytesRef[] instead of Term[]. (Robert Muir, Mike McCandless) +Optimizations + +* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace + the default implementation which merges in-place by a faster implementation + that could require fewer swaps at the expense of some extra memory. + ArrayUtil and CollectionUtil override it so that their mergeSort and timSort + methods are faster but only require up to 1% of extra memory. (Adrien Grand) + API Changes * LUCENE-4844: removed TaxonomyReader.getParent(), you should use diff --git a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java index d31a2e43f33..f793e4ba14d 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java @@ -28,6 +28,11 @@ import java.util.Comparator; public final class ArrayUtil { + // affordable memory overhead to merge sorted arrays + static final float MERGE_OVERHEAD_RATIO = 0.01f; + // arrays below this size will always be sorted in-place + static final int MERGE_EXTRA_MEMORY_THRESHOLD = (int) (15 / MERGE_OVERHEAD_RATIO); + private ArrayUtil() {} // no instance /* @@ -604,65 +609,145 @@ public final class ArrayUtil { return result; } - + + private static abstract class ArraySorterTemplate extends SorterTemplate { + + protected final T[] a; + + ArraySorterTemplate(T[] a) { + this.a = a; + } + + protected abstract int compare(T a, T b); + + @Override + protected void swap(int i, int j) { + final T o = a[i]; + a[i] = a[j]; + a[j] = o; + } + + @Override + protected int compare(int i, int j) { + return compare(a[i], a[j]); + } + + @Override + protected void setPivot(int i) { + pivot = a[i]; + } + + @Override + protected int comparePivot(int j) { + return compare(pivot, a[j]); + } + + private T pivot; + + } + + // a template for merge-based sorts which uses extra memory to speed up merging + private static abstract class ArrayMergeSorterTemplate extends ArraySorterTemplate { + + private final int threshold; // maximum length of a merge that can be made using extra memory + private final T[] tmp; + + ArrayMergeSorterTemplate(T[] a, float overheadRatio) { + super(a); + this.threshold = (int) (a.length * overheadRatio); + @SuppressWarnings("unchecked") + final T[] tmpBuf = (T[]) new Object[threshold]; + this.tmp = tmpBuf; + } + + private void mergeWithExtraMemory(int lo, int pivot, int hi, int len1, int len2) { + System.arraycopy(a, lo, tmp, 0, len1); + int i = 0, j = pivot, dest = lo; + while (i < len1 && j < hi) { + if (compare(tmp[i], a[j]) <= 0) { + a[dest++] = tmp[i++]; + } else { + a[dest++] = a[j++]; + } + } + while (i < len1) { + a[dest++] = tmp[i++]; + } + while (j < hi) { + a[dest++] = a[j++]; + } + assert dest == hi; + } + + @Override + protected void merge(int lo, int pivot, int hi, int len1, int len2) { + if (len1 <= threshold) { + mergeWithExtraMemory(lo, pivot, hi, len1, len2); + } else { + // since this method recurses to run merge on smaller arrays, it will + // end up using mergeWithExtraMemory + super.merge(lo, pivot, hi, len1, len2); + } + } + + } + /** SorterTemplate with custom {@link Comparator} */ private static SorterTemplate getSorter(final T[] a, final Comparator comp) { - return new SorterTemplate() { - @Override - protected void swap(int i, int j) { - final T o = a[i]; - a[i] = a[j]; - a[j] = o; - } - - @Override - protected int compare(int i, int j) { - return comp.compare(a[i], a[j]); - } + return new ArraySorterTemplate(a) { @Override - protected void setPivot(int i) { - pivot = a[i]; + protected int compare(T a, T b) { + return comp.compare(a, b); } - - @Override - protected int comparePivot(int j) { - return comp.compare(pivot, a[j]); - } - - private T pivot; + }; } - + /** Natural SorterTemplate */ private static > SorterTemplate getSorter(final T[] a) { - return new SorterTemplate() { - @Override - protected void swap(int i, int j) { - final T o = a[i]; - a[i] = a[j]; - a[j] = o; - } - - @Override - protected int compare(int i, int j) { - return a[i].compareTo(a[j]); - } + return new ArraySorterTemplate(a) { @Override - protected void setPivot(int i) { - pivot = a[i]; + protected int compare(T a, T b) { + return a.compareTo(b); } - - @Override - protected int comparePivot(int j) { - return pivot.compareTo(a[j]); - } - - private T pivot; + }; } + /** SorterTemplate with custom {@link Comparator} for merge-based sorts. */ + private static SorterTemplate getMergeSorter(final T[] a, final Comparator comp) { + if (a.length < MERGE_EXTRA_MEMORY_THRESHOLD) { + return getSorter(a, comp); + } else { + return new ArrayMergeSorterTemplate(a, MERGE_OVERHEAD_RATIO) { + + @Override + protected int compare(T a, T b) { + return comp.compare(a, b); + } + + }; + } + } + + /** Natural SorterTemplate for merge-based sorts. */ + private static > SorterTemplate getMergeSorter(final T[] a) { + if (a.length < MERGE_EXTRA_MEMORY_THRESHOLD) { + return getSorter(a); + } else { + return new ArrayMergeSorterTemplate(a, MERGE_OVERHEAD_RATIO) { + + @Override + protected int compare(T a, T b) { + return a.compareTo(b); + } + + }; + } + } + // quickSorts (endindex is exclusive!): /** @@ -714,7 +799,7 @@ public final class ArrayUtil { public static void mergeSort(T[] a, int fromIndex, int toIndex, Comparator comp) { if (toIndex-fromIndex <= 1) return; //System.out.println("SORT: " + (toIndex-fromIndex)); - getSorter(a, comp).mergeSort(fromIndex, toIndex-1); + getMergeSorter(a, comp).mergeSort(fromIndex, toIndex-1); } /** @@ -733,7 +818,7 @@ public final class ArrayUtil { */ public static > void mergeSort(T[] a, int fromIndex, int toIndex) { if (toIndex-fromIndex <= 1) return; - getSorter(a).mergeSort(fromIndex, toIndex-1); + getMergeSorter(a).mergeSort(fromIndex, toIndex-1); } /** @@ -754,7 +839,7 @@ public final class ArrayUtil { */ public static void timSort(T[] a, int fromIndex, int toIndex, Comparator comp) { if (toIndex-fromIndex <= 1) return; - getSorter(a, comp).timSort(fromIndex, toIndex-1); + getMergeSorter(a, comp).timSort(fromIndex, toIndex-1); } /** @@ -773,7 +858,7 @@ public final class ArrayUtil { */ public static > void timSort(T[] a, int fromIndex, int toIndex) { if (toIndex-fromIndex <= 1) return; - getSorter(a).timSort(fromIndex, toIndex-1); + getMergeSorter(a).timSort(fromIndex, toIndex-1); } /** diff --git a/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java b/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java index 486a5a365d5..36d4252be6d 100644 --- a/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java @@ -17,8 +17,11 @@ package org.apache.lucene.util; * limitations under the License. */ -import java.util.Comparator; +import static org.apache.lucene.util.ArrayUtil.MERGE_EXTRA_MEMORY_THRESHOLD; +import static org.apache.lucene.util.ArrayUtil.MERGE_OVERHEAD_RATIO; + import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.RandomAccess; @@ -34,33 +37,100 @@ import java.util.RandomAccess; public final class CollectionUtil { private CollectionUtil() {} // no instance - + + private static abstract class ListSorterTemplate extends SorterTemplate { + + protected final List list; + + ListSorterTemplate(List list) { + this.list = list; + } + + protected abstract int compare(T a, T b); + + @Override + protected void swap(int i, int j) { + Collections.swap(list, i, j); + } + + @Override + protected int compare(int i, int j) { + return compare(list.get(i), list.get(j)); + } + + @Override + protected void setPivot(int i) { + pivot = list.get(i); + } + + @Override + protected int comparePivot(int j) { + return compare(pivot, list.get(j)); + } + + private T pivot; + + } + + // a template for merge-based sorts which uses extra memory to speed up merging + private static abstract class ListMergeSorterTemplate extends ListSorterTemplate { + + private final int threshold; // maximum length of a merge that can be made using extra memory + private final T[] tmp; + + ListMergeSorterTemplate(List list, float overheadRatio) { + super(list); + this.threshold = (int) (list.size() * overheadRatio); + @SuppressWarnings("unchecked") + final T[] tmpBuf = (T[]) new Object[threshold]; + this.tmp = tmpBuf; + } + + private void mergeWithExtraMemory(int lo, int pivot, int hi, int len1, int len2) { + for (int i = 0; i < len1; ++i) { + tmp[i] = list.get(lo + i); + } + int i = 0, j = pivot, dest = lo; + while (i < len1 && j < hi) { + if (compare(tmp[i], list.get(j)) <= 0) { + list.set(dest++, tmp[i++]); + } else { + list.set(dest++, list.get(j++)); + } + } + while (i < len1) { + list.set(dest++, tmp[i++]); + } + while (j < hi) { + list.set(dest++, list.get(j++)); + } + assert dest == hi; + } + + @Override + protected void merge(int lo, int pivot, int hi, int len1, int len2) { + if (len1 <= threshold) { + mergeWithExtraMemory(lo, pivot, hi, len1, len2); + } else { + // since this method recurses to run merge on smaller arrays, it will + // end up using mergeWithExtraMemory + super.merge(lo, pivot, hi, len1, len2); + } + } + + } + /** SorterTemplate with custom {@link Comparator} */ private static SorterTemplate getSorter(final List list, final Comparator comp) { if (!(list instanceof RandomAccess)) throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place."); - return new SorterTemplate() { - @Override - protected void swap(int i, int j) { - Collections.swap(list, i, j); - } - - @Override - protected int compare(int i, int j) { - return comp.compare(list.get(i), list.get(j)); - } + return new ListSorterTemplate(list) { @Override - protected void setPivot(int i) { - pivot = list.get(i); + protected int compare(T a, T b) { + return comp.compare(a, b); } - - @Override - protected int comparePivot(int j) { - return comp.compare(pivot, list.get(j)); - } - - private T pivot; + }; } @@ -68,31 +138,52 @@ public final class CollectionUtil { private static > SorterTemplate getSorter(final List list) { if (!(list instanceof RandomAccess)) throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place."); - return new SorterTemplate() { - @Override - protected void swap(int i, int j) { - Collections.swap(list, i, j); - } - - @Override - protected int compare(int i, int j) { - return list.get(i).compareTo(list.get(j)); - } + return new ListSorterTemplate(list) { @Override - protected void setPivot(int i) { - pivot = list.get(i); + protected int compare(T a, T b) { + return a.compareTo(b); } - - @Override - protected int comparePivot(int j) { - return pivot.compareTo(list.get(j)); - } - - private T pivot; + }; } + /** SorterTemplate with custom {@link Comparator} for merge-based sorts. */ + private static SorterTemplate getMergeSorter(final List list, final Comparator comp) { + if (!(list instanceof RandomAccess)) + throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place."); + if (list.size() < MERGE_EXTRA_MEMORY_THRESHOLD) { + return getSorter(list, comp); + } else { + return new ListMergeSorterTemplate(list, MERGE_OVERHEAD_RATIO) { + + @Override + protected int compare(T a, T b) { + return comp.compare(a, b); + } + + }; + } + } + + /** Natural SorterTemplate for merge-based sorts. */ + private static > SorterTemplate getMergeSorter(final List list) { + if (!(list instanceof RandomAccess)) + throw new IllegalArgumentException("CollectionUtil can only sort random access lists in-place."); + if (list.size() < MERGE_EXTRA_MEMORY_THRESHOLD) { + return getSorter(list); + } else { + return new ListMergeSorterTemplate(list, MERGE_OVERHEAD_RATIO) { + + @Override + protected int compare(T a, T b) { + return a.compareTo(b); + } + + }; + } + } + /** * Sorts the given random access {@link List} using the {@link Comparator}. * The list must implement {@link RandomAccess}. This method uses the quick sort @@ -128,7 +219,7 @@ public final class CollectionUtil { public static void mergeSort(List list, Comparator comp) { final int size = list.size(); if (size <= 1) return; - getSorter(list, comp).mergeSort(0, size-1); + getMergeSorter(list, comp).mergeSort(0, size-1); } /** @@ -140,7 +231,7 @@ public final class CollectionUtil { public static > void mergeSort(List list) { final int size = list.size(); if (size <= 1) return; - getSorter(list).mergeSort(0, size-1); + getMergeSorter(list).mergeSort(0, size-1); } // timSorts: @@ -154,7 +245,7 @@ public final class CollectionUtil { public static void timSort(List list, Comparator comp) { final int size = list.size(); if (size <= 1) return; - getSorter(list, comp).timSort(0, size-1); + getMergeSorter(list, comp).timSort(0, size-1); } /** @@ -166,7 +257,7 @@ public final class CollectionUtil { public static > void timSort(List list) { final int size = list.size(); if (size <= 1) return; - getSorter(list).timSort(0, size-1); + getMergeSorter(list).timSort(0, size-1); } // insertionSorts: diff --git a/lucene/core/src/java/org/apache/lucene/util/SorterTemplate.java b/lucene/core/src/java/org/apache/lucene/util/SorterTemplate.java index 55024bb5977..b74342435ad 100644 --- a/lucene/core/src/java/org/apache/lucene/util/SorterTemplate.java +++ b/lucene/core/src/java/org/apache/lucene/util/SorterTemplate.java @@ -343,8 +343,10 @@ public abstract class SorterTemplate { merge(lo, mid, hi, mid - lo, hi - mid); } - // pkg-protected for access from TimSort class - void merge(int lo, int pivot, int hi, int len1, int len2) { + /** Merge the slices [lo-pivot[ (of length len1) and [pivot-hi[ (of length + * len2) which are already sorted. This method merges in-place but can be + * extended to provide a faster implementation using extra memory. */ + protected void merge(int lo, int pivot, int hi, int len1, int len2) { if (len1 == 0 || len2 == 0) { return; } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java b/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java index 6b9d0a476be..f59af0e06dc 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java @@ -131,12 +131,12 @@ public class TestArrayUtil extends LuceneTestCase { public void testQuickSort() { int num = atLeast(50); for (int i = 0; i < num; i++) { - Integer[] a1 = createRandomArray(1000), a2 = a1.clone(); + Integer[] a1 = createRandomArray(2000), a2 = a1.clone(); ArrayUtil.quickSort(a1); Arrays.sort(a2); assertArrayEquals(a2, a1); - a1 = createRandomArray(1000); + a1 = createRandomArray(2000); a2 = a1.clone(); ArrayUtil.quickSort(a1, Collections.reverseOrder()); Arrays.sort(a2, Collections.reverseOrder()); @@ -171,12 +171,12 @@ public class TestArrayUtil extends LuceneTestCase { public void testMergeSort() { int num = atLeast(50); for (int i = 0; i < num; i++) { - Integer[] a1 = createRandomArray(1000), a2 = a1.clone(); + Integer[] a1 = createRandomArray(2000), a2 = a1.clone(); ArrayUtil.mergeSort(a1); Arrays.sort(a2); assertArrayEquals(a2, a1); - a1 = createRandomArray(1000); + a1 = createRandomArray(2000); a2 = a1.clone(); ArrayUtil.mergeSort(a1, Collections.reverseOrder()); Arrays.sort(a2, Collections.reverseOrder()); @@ -191,12 +191,12 @@ public class TestArrayUtil extends LuceneTestCase { public void testTimSort() { int num = atLeast(65); for (int i = 0; i < num; i++) { - Integer[] a1 = createRandomArray(1000), a2 = a1.clone(); + Integer[] a1 = createRandomArray(2000), a2 = a1.clone(); ArrayUtil.timSort(a1); Arrays.sort(a2); assertArrayEquals(a2, a1); - a1 = createRandomArray(1000); + a1 = createRandomArray(2000); a2 = a1.clone(); ArrayUtil.timSort(a1, Collections.reverseOrder()); Arrays.sort(a2, Collections.reverseOrder()); diff --git a/lucene/core/src/test/org/apache/lucene/util/TestCollectionUtil.java b/lucene/core/src/test/org/apache/lucene/util/TestCollectionUtil.java index a3cdc3bebd2..c45130f5b4b 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestCollectionUtil.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestCollectionUtil.java @@ -37,12 +37,12 @@ public class TestCollectionUtil extends LuceneTestCase { public void testQuickSort() { for (int i = 0, c = atLeast(500); i < c; i++) { - List list1 = createRandomList(1000), list2 = new ArrayList(list1); + List list1 = createRandomList(2000), list2 = new ArrayList(list1); CollectionUtil.quickSort(list1); Collections.sort(list2); assertEquals(list2, list1); - list1 = createRandomList(1000); + list1 = createRandomList(2000); list2 = new ArrayList(list1); CollectionUtil.quickSort(list1, Collections.reverseOrder()); Collections.sort(list2, Collections.reverseOrder()); @@ -56,12 +56,12 @@ public class TestCollectionUtil extends LuceneTestCase { public void testMergeSort() { for (int i = 0, c = atLeast(500); i < c; i++) { - List list1 = createRandomList(1000), list2 = new ArrayList(list1); + List list1 = createRandomList(2000), list2 = new ArrayList(list1); CollectionUtil.mergeSort(list1); Collections.sort(list2); assertEquals(list2, list1); - list1 = createRandomList(1000); + list1 = createRandomList(2000); list2 = new ArrayList(list1); CollectionUtil.mergeSort(list1, Collections.reverseOrder()); Collections.sort(list2, Collections.reverseOrder()); @@ -75,12 +75,12 @@ public class TestCollectionUtil extends LuceneTestCase { public void testTimSort() { for (int i = 0, c = atLeast(500); i < c; i++) { - List list1 = createRandomList(1000), list2 = new ArrayList(list1); + List list1 = createRandomList(2000), list2 = new ArrayList(list1); CollectionUtil.timSort(list1); Collections.sort(list2); assertEquals(list2, list1); - list1 = createRandomList(1000); + list1 = createRandomList(2000); list2 = new ArrayList(list1); CollectionUtil.timSort(list1, Collections.reverseOrder()); Collections.sort(list2, Collections.reverseOrder());