mirror of https://github.com/apache/lucene.git
LUCENE-9024: Optimize IntroSelector to use median of medians (#966)
This commit is contained in:
parent
e713811b17
commit
7d0dbdaf62
|
@ -35,6 +35,10 @@ Optimizations
|
||||||
* LUCENE-8932: BKDReader's index is now stored off-heap when the IndexInput is
|
* LUCENE-8932: BKDReader's index is now stored off-heap when the IndexInput is
|
||||||
an instance of ByteBufferIndexInput. (Jack Conradson via Adrien Grand)
|
an instance of ByteBufferIndexInput. (Jack Conradson via Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-9024: IntroSelector now falls back to the median of medians algorithm
|
||||||
|
instead of sorting when the maximum recursion level is exceeded, providing
|
||||||
|
better worst-case runtime. (Paul Sanwald via Adrien Grand)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-9001: Fix race condition in SetOnce. (Przemko Robakowski)
|
* LUCENE-9001: Fix race condition in SetOnce. (Przemko Robakowski)
|
||||||
|
|
|
@ -20,9 +20,8 @@ import java.util.Comparator;
|
||||||
|
|
||||||
/** Implementation of the quick select algorithm.
|
/** Implementation of the quick select algorithm.
|
||||||
* <p>It uses the median of the first, middle and last values as a pivot and
|
* <p>It uses the median of the first, middle and last values as a pivot and
|
||||||
* falls back to a heap sort when the number of recursion levels exceeds
|
* falls back to a median of medians when the number of recursion levels exceeds
|
||||||
* {@code 2 lg(n)}, as a consequence it runs in linear time on average and in
|
* {@code 2 lg(n)}, as a consequence it runs in linear time on average.</p>
|
||||||
* {@code n log(n)} time in the worst case.</p>
|
|
||||||
* @lucene.internal */
|
* @lucene.internal */
|
||||||
public abstract class IntroSelector extends Selector {
|
public abstract class IntroSelector extends Selector {
|
||||||
|
|
||||||
|
@ -33,26 +32,90 @@ public abstract class IntroSelector extends Selector {
|
||||||
quickSelect(from, to, k, maxDepth);
|
quickSelect(from, to, k, maxDepth);
|
||||||
}
|
}
|
||||||
|
|
||||||
// heap sort
|
int slowSelect(int from, int to, int k) {
|
||||||
// TODO: use median of median instead to have linear worst-case rather than
|
return medianOfMediansSelect(from, to-1, k);
|
||||||
// n*log(n)
|
}
|
||||||
void slowSelect(int from, int to, int k) {
|
|
||||||
new Sorter() {
|
|
||||||
|
|
||||||
@Override
|
int medianOfMediansSelect(int left, int right, int k) {
|
||||||
protected void swap(int i, int j) {
|
do {
|
||||||
IntroSelector.this.swap(i, j);
|
// Defensive check, this is also checked in the calling
|
||||||
|
// method. Including here so this method can be used
|
||||||
|
// as a self contained quickSelect implementation.
|
||||||
|
if (left == right) {
|
||||||
|
return left;
|
||||||
}
|
}
|
||||||
|
int pivotIndex = pivot(left, right);
|
||||||
|
pivotIndex = partition(left, right, k, pivotIndex);
|
||||||
|
if (k == pivotIndex) {
|
||||||
|
return k;
|
||||||
|
} else if (k < pivotIndex) {
|
||||||
|
right = pivotIndex-1;
|
||||||
|
} else {
|
||||||
|
left = pivotIndex+1;
|
||||||
|
}
|
||||||
|
} while (left != right);
|
||||||
|
return left;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
private int partition(int left, int right, int k, int pivotIndex) {
|
||||||
protected int compare(int i, int j) {
|
setPivot(pivotIndex);
|
||||||
return IntroSelector.this.compare(i, j);
|
swap(pivotIndex, right);
|
||||||
|
int storeIndex = left;
|
||||||
|
for (int i = left; i < right; i++) {
|
||||||
|
if (comparePivot(i) > 0) {
|
||||||
|
swap(storeIndex, i);
|
||||||
|
storeIndex++;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
int storeIndexEq = storeIndex;
|
||||||
|
for (int i = storeIndex; i < right; i++) {
|
||||||
|
if (comparePivot(i) == 0) {
|
||||||
|
swap(storeIndexEq, i);
|
||||||
|
storeIndexEq++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
swap(right, storeIndexEq);
|
||||||
|
if (k < storeIndex) {
|
||||||
|
return storeIndex;
|
||||||
|
} else if (k <= storeIndexEq) {
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
return storeIndexEq;
|
||||||
|
}
|
||||||
|
|
||||||
public void sort(int from, int to) {
|
private int pivot(int left, int right) {
|
||||||
heapSort(from, to);
|
if (right - left < 5) {
|
||||||
|
int pivotIndex = partition5(left, right);
|
||||||
|
return pivotIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = left; i <= right; i=i+5) {
|
||||||
|
int subRight = i + 4;
|
||||||
|
if (subRight > right) {
|
||||||
|
subRight = right;
|
||||||
}
|
}
|
||||||
}.sort(from, to);
|
int median5 = partition5(i, subRight);
|
||||||
|
swap(median5, left + ((i-left)/5));
|
||||||
|
}
|
||||||
|
int mid = ((right - left) / 10) + left + 1;
|
||||||
|
int to = left + ((right - left)/5);
|
||||||
|
return medianOfMediansSelect(left, to, mid);
|
||||||
|
}
|
||||||
|
|
||||||
|
// selects the median of a group of at most five elements,
|
||||||
|
// implemented using insertion sort. Efficient due to
|
||||||
|
// bounded nature of data set.
|
||||||
|
private int partition5(int left, int right) {
|
||||||
|
int i = left + 1;
|
||||||
|
while( i <= right) {
|
||||||
|
int j = i;
|
||||||
|
while (j > left && compare(j-1,j)>0) {
|
||||||
|
swap(j-1, j);
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return (left + right) >>> 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void quickSelect(int from, int to, int k, int maxDepth) {
|
private void quickSelect(int from, int to, int k, int maxDepth) {
|
||||||
|
|
|
@ -36,7 +36,7 @@ public class TestIntroSelector extends LuceneTestCase {
|
||||||
final int from = random().nextInt(5);
|
final int from = random().nextInt(5);
|
||||||
final int to = from + TestUtil.nextInt(random(), 1, 10000);
|
final int to = from + TestUtil.nextInt(random(), 1, 10000);
|
||||||
final int max = random().nextBoolean() ? random().nextInt(100) : random().nextInt(100000);
|
final int max = random().nextBoolean() ? random().nextInt(100) : random().nextInt(100000);
|
||||||
Integer[] arr = new Integer[from + to + random().nextInt(5)];
|
Integer[] arr = new Integer[to + random().nextInt(5)];
|
||||||
for (int i = 0; i < arr.length; ++i) {
|
for (int i = 0; i < arr.length; ++i) {
|
||||||
arr[i] = TestUtil.nextInt(random(), 0, max);
|
arr[i] = TestUtil.nextInt(random(), 0, max);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue