diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f1711a2cb7a..a11eee3622e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -59,6 +59,8 @@ Bug Fixes * LUCENE-6242: Ram usage estimation was incorrect for SparseFixedBitSet when object alignment was different from 8. (Uwe Schindler, Adrien Grand) +* LUCENE-6293: Fixed TimSorter bug. (Adrien Grand) + Optimizations * LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields diff --git a/lucene/core/src/java/org/apache/lucene/util/TimSorter.java b/lucene/core/src/java/org/apache/lucene/util/TimSorter.java index 44267dc0ab7..dd0ddc32eee 100644 --- a/lucene/core/src/java/org/apache/lucene/util/TimSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/TimSorter.java @@ -40,7 +40,7 @@ public abstract class TimSorter extends Sorter { static final int MINRUN = 32; static final int THRESHOLD = 64; - static final int STACKSIZE = 40; // depends on MINRUN + static final int STACKSIZE = 49; // depends on MINRUN static final int MIN_GALLOP = 7; final int maxTempSlots; diff --git a/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java b/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java index 064f5d7ea13..556a5c84f98 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java @@ -17,6 +17,13 @@ package org.apache.lucene.util; * limitations under the License. */ +import java.util.LinkedList; +import java.util.List; + +import org.apache.lucene.util.packed.PackedInts; + +import com.carrotsearch.randomizedtesting.generators.RandomInts; + public class TestTimSorter extends BaseSortTestCase { public TestTimSorter() { @@ -28,4 +35,140 @@ public class TestTimSorter extends BaseSortTestCase { return new ArrayTimSorter<>(arr, ArrayUtil.naturalComparator(), TestUtil.nextInt(random(), 0, arr.length)); } + public void testWorstCaseStackSize() { + // we need large arrays to be able to reproduce this bug + final int length; + if (TEST_NIGHTLY) { + length = RandomInts.randomIntBetween(random(), 140000000, Integer.MAX_VALUE); + } else { + length = RandomInts.randomIntBetween(random(), 140000000, 200000000); + } + final PackedInts.Mutable arr = generateWorstCaseArray(length); + new TimSorter(0) { + + @Override + protected void swap(int i, int j) { + final long tmp = arr.get(i); + arr.set(i, arr.get(j)); + arr.set(j, tmp); + } + + @Override + protected int compare(int i, int j) { + return Long.compare(arr.get(i), arr.get(j)); + } + + @Override + protected void save(int i, int len) { + throw new UnsupportedOperationException(); + } + + @Override + protected void restore(int i, int j) { + throw new UnsupportedOperationException(); + } + + @Override + protected void copy(int src, int dest) { + arr.set(dest, arr.get(src)); + } + + @Override + protected int compareSaved(int i, int j) { + throw new UnsupportedOperationException(); + } + }.sort(0, length); + } + + /** Create an array for the given list of runs. */ + private static PackedInts.Mutable createArray(int length, List runs) { + PackedInts.Mutable array = PackedInts.getMutable(length, 1, 0); + int endRun = -1; + for (long len : runs) { + array.set(endRun += len, 1); + } + array.set(length - 1, 0); + return array; + } + + /** Create an array that triggers a worst-case sequence of run lens. */ + public static PackedInts.Mutable generateWorstCaseArray(int length) { + final int minRun = TimSorter.minRun(length); + final List runs = runsWorstCase(length, minRun); + return createArray(length, runs); + } + + // + // Code below is borrowed from https://github.com/abstools/java-timsort-bug/blob/master/TestTimSort.java + // + + /** + * Fills runs with a sequence of run lengths of the form
+ * Y_n x_{n,1} x_{n,2} ... x_{n,l_n}
+ * Y_{n-1} x_{n-1,1} x_{n-1,2} ... x_{n-1,l_{n-1}}
+ * ...
+ * Y_1 x_{1,1} x_{1,2} ... x_{1,l_1}
+ * The Y_i's are chosen to satisfy the invariant throughout execution, + * but the x_{i,j}'s are merged (by TimSort.mergeCollapse) + * into an X_i that violates the invariant. + */ + private static List runsWorstCase(int length, int minRun) { + List runs = new LinkedList<>(); + + int runningTotal = 0, Y=minRun+4, X=minRun; + + while((long) runningTotal+Y+X <= length) { + runningTotal += X + Y; + generateWrongElem(X, minRun, runs); + runs.add(0,Y); + + // X_{i+1} = Y_i + x_{i,1} + 1, since runs.get(1) = x_{i,1} + X = Y + runs.get(1) + 1; + + // Y_{i+1} = X_{i+1} + Y_i + 1 + Y += X + 1; + } + + if(runningTotal + X <= length) { + runningTotal += X; + generateWrongElem(X, minRun, runs); + } + + runs.add(length-runningTotal); + return runs; + } + + /** + * Adds a sequence x_1, ..., x_n of run lengths to runs such that:
+ * 1. X = x_1 + ... + x_n
+ * 2. x_j >= minRun for all j
+ * 3. x_1 + ... + x_{j-2} < x_j < x_1 + ... + x_{j-1} for all j
+ * These conditions guarantee that TimSort merges all x_j's one by one + * (resulting in X) using only merges on the second-to-last element. + * @param X The sum of the sequence that should be added to runs. + */ + private static void generateWrongElem(int X, int minRun, List runs) { + for(int newTotal; X >= 2*minRun+1; X = newTotal) { + //Default strategy + newTotal = X/2 + 1; + + //Specialized strategies + if(3*minRun+3 <= X && X <= 4*minRun+1) { + // add x_1=MIN+1, x_2=MIN, x_3=X-newTotal to runs + newTotal = 2*minRun+1; + } else if(5*minRun+5 <= X && X <= 6*minRun+5) { + // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=X-newTotal to runs + newTotal = 3*minRun+3; + } else if(8*minRun+9 <= X && X <= 10*minRun+9) { + // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=X-newTotal to runs + newTotal = 5*minRun+5; + } else if(13*minRun+15 <= X && X <= 16*minRun+17) { + // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=3MIN+4, x_6=X-newTotal to runs + newTotal = 8*minRun+9; + } + runs.add(0, X-newTotal); + } + runs.add(0, X); + } + }