LUCENE-6293: Fixed TimSorter bug.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1662410 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-02-26 10:28:33 +00:00
parent 7cf4eedf59
commit 3ec9440e94
3 changed files with 146 additions and 1 deletions

View File

@ -59,6 +59,8 @@ Bug Fixes
* LUCENE-6242: Ram usage estimation was incorrect for SparseFixedBitSet when * LUCENE-6242: Ram usage estimation was incorrect for SparseFixedBitSet when
object alignment was different from 8. (Uwe Schindler, Adrien Grand) object alignment was different from 8. (Uwe Schindler, Adrien Grand)
* LUCENE-6293: Fixed TimSorter bug. (Adrien Grand)
Optimizations Optimizations
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields * LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields

View File

@ -40,7 +40,7 @@ public abstract class TimSorter extends Sorter {
static final int MINRUN = 32; static final int MINRUN = 32;
static final int THRESHOLD = 64; static final int THRESHOLD = 64;
static final int STACKSIZE = 40; // depends on MINRUN static final int STACKSIZE = 49; // depends on MINRUN
static final int MIN_GALLOP = 7; static final int MIN_GALLOP = 7;
final int maxTempSlots; final int maxTempSlots;

View File

@ -17,6 +17,13 @@ package org.apache.lucene.util;
* limitations under the License. * limitations under the License.
*/ */
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.util.packed.PackedInts;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
public class TestTimSorter extends BaseSortTestCase { public class TestTimSorter extends BaseSortTestCase {
public TestTimSorter() { public TestTimSorter() {
@ -28,4 +35,140 @@ public class TestTimSorter extends BaseSortTestCase {
return new ArrayTimSorter<>(arr, ArrayUtil.<Entry>naturalComparator(), TestUtil.nextInt(random(), 0, arr.length)); return new ArrayTimSorter<>(arr, ArrayUtil.<Entry>naturalComparator(), TestUtil.nextInt(random(), 0, arr.length));
} }
public void testWorstCaseStackSize() {
// we need large arrays to be able to reproduce this bug
final int length;
if (TEST_NIGHTLY) {
length = RandomInts.randomIntBetween(random(), 140000000, Integer.MAX_VALUE);
} else {
length = RandomInts.randomIntBetween(random(), 140000000, 200000000);
}
final PackedInts.Mutable arr = generateWorstCaseArray(length);
new TimSorter(0) {
@Override
protected void swap(int i, int j) {
final long tmp = arr.get(i);
arr.set(i, arr.get(j));
arr.set(j, tmp);
}
@Override
protected int compare(int i, int j) {
return Long.compare(arr.get(i), arr.get(j));
}
@Override
protected void save(int i, int len) {
throw new UnsupportedOperationException();
}
@Override
protected void restore(int i, int j) {
throw new UnsupportedOperationException();
}
@Override
protected void copy(int src, int dest) {
arr.set(dest, arr.get(src));
}
@Override
protected int compareSaved(int i, int j) {
throw new UnsupportedOperationException();
}
}.sort(0, length);
}
/** Create an array for the given list of runs. */
private static PackedInts.Mutable createArray(int length, List<Integer> runs) {
PackedInts.Mutable array = PackedInts.getMutable(length, 1, 0);
int endRun = -1;
for (long len : runs) {
array.set(endRun += len, 1);
}
array.set(length - 1, 0);
return array;
}
/** Create an array that triggers a worst-case sequence of run lens. */
public static PackedInts.Mutable generateWorstCaseArray(int length) {
final int minRun = TimSorter.minRun(length);
final List<Integer> runs = runsWorstCase(length, minRun);
return createArray(length, runs);
}
//
// Code below is borrowed from https://github.com/abstools/java-timsort-bug/blob/master/TestTimSort.java
//
/**
* Fills <code>runs</code> with a sequence of run lengths of the form<br>
* Y_n x_{n,1} x_{n,2} ... x_{n,l_n} <br>
* Y_{n-1} x_{n-1,1} x_{n-1,2} ... x_{n-1,l_{n-1}} <br>
* ... <br>
* Y_1 x_{1,1} x_{1,2} ... x_{1,l_1}<br>
* The Y_i's are chosen to satisfy the invariant throughout execution,
* but the x_{i,j}'s are merged (by <code>TimSort.mergeCollapse</code>)
* into an X_i that violates the invariant.
*/
private static List<Integer> runsWorstCase(int length, int minRun) {
List<Integer> runs = new LinkedList<>();
int runningTotal = 0, Y=minRun+4, X=minRun;
while((long) runningTotal+Y+X <= length) {
runningTotal += X + Y;
generateWrongElem(X, minRun, runs);
runs.add(0,Y);
// X_{i+1} = Y_i + x_{i,1} + 1, since runs.get(1) = x_{i,1}
X = Y + runs.get(1) + 1;
// Y_{i+1} = X_{i+1} + Y_i + 1
Y += X + 1;
}
if(runningTotal + X <= length) {
runningTotal += X;
generateWrongElem(X, minRun, runs);
}
runs.add(length-runningTotal);
return runs;
}
/**
* Adds a sequence x_1, ..., x_n of run lengths to <code>runs</code> such that:<br>
* 1. X = x_1 + ... + x_n <br>
* 2. x_j >= minRun for all j <br>
* 3. x_1 + ... + x_{j-2} < x_j < x_1 + ... + x_{j-1} for all j <br>
* These conditions guarantee that TimSort merges all x_j's one by one
* (resulting in X) using only merges on the second-to-last element.
* @param X The sum of the sequence that should be added to runs.
*/
private static void generateWrongElem(int X, int minRun, List<Integer> runs) {
for(int newTotal; X >= 2*minRun+1; X = newTotal) {
//Default strategy
newTotal = X/2 + 1;
//Specialized strategies
if(3*minRun+3 <= X && X <= 4*minRun+1) {
// add x_1=MIN+1, x_2=MIN, x_3=X-newTotal to runs
newTotal = 2*minRun+1;
} else if(5*minRun+5 <= X && X <= 6*minRun+5) {
// add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=X-newTotal to runs
newTotal = 3*minRun+3;
} else if(8*minRun+9 <= X && X <= 10*minRun+9) {
// add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=X-newTotal to runs
newTotal = 5*minRun+5;
} else if(13*minRun+15 <= X && X <= 16*minRun+17) {
// add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=3MIN+4, x_6=X-newTotal to runs
newTotal = 8*minRun+9;
}
runs.add(0, X-newTotal);
}
runs.add(0, X);
}
} }