mirror of https://github.com/apache/lucene.git
LUCENE-9024: Optimize IntroSelector to use median of medians (#966)
This commit is contained in:
parent
e713811b17
commit
7d0dbdaf62
|
@ -35,6 +35,10 @@ Optimizations
|
|||
* LUCENE-8932: BKDReader's index is now stored off-heap when the IndexInput is
|
||||
an instance of ByteBufferIndexInput. (Jack Conradson via Adrien Grand)
|
||||
|
||||
* LUCENE-9024: IntroSelector now falls back to the median of medians algorithm
|
||||
instead of sorting when the maximum recursion level is exceeded, providing
|
||||
better worst-case runtime. (Paul Sanwald via Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-9001: Fix race condition in SetOnce. (Przemko Robakowski)
|
||||
|
|
|
@ -20,9 +20,8 @@ import java.util.Comparator;
|
|||
|
||||
/** Implementation of the quick select algorithm.
|
||||
* <p>It uses the median of the first, middle and last values as a pivot and
|
||||
* falls back to a heap sort when the number of recursion levels exceeds
|
||||
* {@code 2 lg(n)}, as a consequence it runs in linear time on average and in
|
||||
* {@code n log(n)} time in the worst case.</p>
|
||||
* falls back to a median of medians when the number of recursion levels exceeds
|
||||
* {@code 2 lg(n)}, as a consequence it runs in linear time on average.</p>
|
||||
* @lucene.internal */
|
||||
public abstract class IntroSelector extends Selector {
|
||||
|
||||
|
@ -33,26 +32,90 @@ public abstract class IntroSelector extends Selector {
|
|||
quickSelect(from, to, k, maxDepth);
|
||||
}
|
||||
|
||||
// heap sort
|
||||
// TODO: use median of median instead to have linear worst-case rather than
|
||||
// n*log(n)
|
||||
void slowSelect(int from, int to, int k) {
|
||||
new Sorter() {
|
||||
int slowSelect(int from, int to, int k) {
|
||||
return medianOfMediansSelect(from, to-1, k);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
IntroSelector.this.swap(i, j);
|
||||
int medianOfMediansSelect(int left, int right, int k) {
|
||||
do {
|
||||
// Defensive check, this is also checked in the calling
|
||||
// method. Including here so this method can be used
|
||||
// as a self contained quickSelect implementation.
|
||||
if (left == right) {
|
||||
return left;
|
||||
}
|
||||
int pivotIndex = pivot(left, right);
|
||||
pivotIndex = partition(left, right, k, pivotIndex);
|
||||
if (k == pivotIndex) {
|
||||
return k;
|
||||
} else if (k < pivotIndex) {
|
||||
right = pivotIndex-1;
|
||||
} else {
|
||||
left = pivotIndex+1;
|
||||
}
|
||||
} while (left != right);
|
||||
return left;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return IntroSelector.this.compare(i, j);
|
||||
private int partition(int left, int right, int k, int pivotIndex) {
|
||||
setPivot(pivotIndex);
|
||||
swap(pivotIndex, right);
|
||||
int storeIndex = left;
|
||||
for (int i = left; i < right; i++) {
|
||||
if (comparePivot(i) > 0) {
|
||||
swap(storeIndex, i);
|
||||
storeIndex++;
|
||||
}
|
||||
}
|
||||
int storeIndexEq = storeIndex;
|
||||
for (int i = storeIndex; i < right; i++) {
|
||||
if (comparePivot(i) == 0) {
|
||||
swap(storeIndexEq, i);
|
||||
storeIndexEq++;
|
||||
}
|
||||
}
|
||||
swap(right, storeIndexEq);
|
||||
if (k < storeIndex) {
|
||||
return storeIndex;
|
||||
} else if (k <= storeIndexEq) {
|
||||
return k;
|
||||
}
|
||||
return storeIndexEq;
|
||||
}
|
||||
|
||||
public void sort(int from, int to) {
|
||||
heapSort(from, to);
|
||||
private int pivot(int left, int right) {
|
||||
if (right - left < 5) {
|
||||
int pivotIndex = partition5(left, right);
|
||||
return pivotIndex;
|
||||
}
|
||||
|
||||
for (int i = left; i <= right; i=i+5) {
|
||||
int subRight = i + 4;
|
||||
if (subRight > right) {
|
||||
subRight = right;
|
||||
}
|
||||
}.sort(from, to);
|
||||
int median5 = partition5(i, subRight);
|
||||
swap(median5, left + ((i-left)/5));
|
||||
}
|
||||
int mid = ((right - left) / 10) + left + 1;
|
||||
int to = left + ((right - left)/5);
|
||||
return medianOfMediansSelect(left, to, mid);
|
||||
}
|
||||
|
||||
// selects the median of a group of at most five elements,
|
||||
// implemented using insertion sort. Efficient due to
|
||||
// bounded nature of data set.
|
||||
private int partition5(int left, int right) {
|
||||
int i = left + 1;
|
||||
while( i <= right) {
|
||||
int j = i;
|
||||
while (j > left && compare(j-1,j)>0) {
|
||||
swap(j-1, j);
|
||||
j--;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return (left + right) >>> 1;
|
||||
}
|
||||
|
||||
private void quickSelect(int from, int to, int k, int maxDepth) {
|
||||
|
|
|
@ -36,7 +36,7 @@ public class TestIntroSelector extends LuceneTestCase {
|
|||
final int from = random().nextInt(5);
|
||||
final int to = from + TestUtil.nextInt(random(), 1, 10000);
|
||||
final int max = random().nextBoolean() ? random().nextInt(100) : random().nextInt(100000);
|
||||
Integer[] arr = new Integer[from + to + random().nextInt(5)];
|
||||
Integer[] arr = new Integer[to + random().nextInt(5)];
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
arr[i] = TestUtil.nextInt(random(), 0, max);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue