mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 03:25:15 +00:00
LUCENE-7261: Speed up LSBRadixSorter.
This commit is contained in:
parent
3e6de6059f
commit
ef45d4b2e1
@ -78,6 +78,9 @@ Optimizations
|
|||||||
* LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than
|
* LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than
|
||||||
waiting on a lock. (Adrien Grand)
|
waiting on a lock. (Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-7261: Speed up LSBRadixSorter (which is used by TermsQuery, multi-term
|
||||||
|
queries and point queries). (Adrien Grand)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-7127: Fix corner case bugs in GeoPointDistanceQuery. (Robert Muir)
|
* LUCENE-7127: Fix corner case bugs in GeoPointDistanceQuery. (Robert Muir)
|
||||||
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||||||
|
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A builder of {@link DocIdSet}s. At first it uses a sparse structure to gather
|
* A builder of {@link DocIdSet}s. At first it uses a sparse structure to gather
|
||||||
@ -174,7 +175,7 @@ public final class DocIdSetBuilder {
|
|||||||
return new BitDocIdSet(bitSet);
|
return new BitDocIdSet(bitSet);
|
||||||
} else {
|
} else {
|
||||||
LSBRadixSorter sorter = new LSBRadixSorter();
|
LSBRadixSorter sorter = new LSBRadixSorter();
|
||||||
sorter.sort(buffer, 0, bufferSize);
|
sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, bufferSize);
|
||||||
final int l = dedup(buffer, bufferSize);
|
final int l = dedup(buffer, bufferSize);
|
||||||
assert l <= bufferSize;
|
assert l <= bufferSize;
|
||||||
buffer = ArrayUtil.grow(buffer, l + 1);
|
buffer = ArrayUtil.grow(buffer, l + 1);
|
||||||
|
@ -31,9 +31,9 @@ public final class LSBRadixSorter {
|
|||||||
private final int[] histogram = new int[HISTOGRAM_SIZE];
|
private final int[] histogram = new int[HISTOGRAM_SIZE];
|
||||||
private int[] buffer = new int[0];
|
private int[] buffer = new int[0];
|
||||||
|
|
||||||
private static void buildHistogram(int[] array, int off, int len, int[] histogram, int shift) {
|
private static void buildHistogram(int[] array, int len, int[] histogram, int shift) {
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
final int b = (array[off + i] >>> shift) & 0xFF;
|
final int b = (array[i] >>> shift) & 0xFF;
|
||||||
histogram[b] += 1;
|
histogram[b] += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -47,22 +47,22 @@ public final class LSBRadixSorter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void reorder(int[] array, int off, int len, int[] histogram, int shift, int[] dest, int destOff) {
|
private static void reorder(int[] array, int len, int[] histogram, int shift, int[] dest) {
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
final int v = array[off + i];
|
final int v = array[i];
|
||||||
final int b = (v >>> shift) & 0xFF;
|
final int b = (v >>> shift) & 0xFF;
|
||||||
dest[destOff + histogram[b]++] = v;
|
dest[histogram[b]++] = v;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean sort(int[] array, int off, int len, int[] histogram, int shift, int[] dest, int destOff) {
|
private static boolean sort(int[] array, int len, int[] histogram, int shift, int[] dest) {
|
||||||
Arrays.fill(histogram, 0);
|
Arrays.fill(histogram, 0);
|
||||||
buildHistogram(array, off, len, histogram, shift);
|
buildHistogram(array, len, histogram, shift);
|
||||||
if (histogram[0] == len) {
|
if (histogram[0] == len) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
sumHistogram(histogram);
|
sumHistogram(histogram);
|
||||||
reorder(array, off, len, histogram, shift, dest, destOff);
|
reorder(array, len, histogram, shift, dest);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,34 +80,32 @@ public final class LSBRadixSorter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void sort(final int[] array, int off, int len) {
|
/** Sort {@code array[0:len]} in place.
|
||||||
|
* @param numBits how many bits are required to store any of the values in
|
||||||
|
* {@code array[0:len]}. Pass {@code 32} if unknown. */
|
||||||
|
public void sort(int numBits, final int[] array, int len) {
|
||||||
if (len < INSERTION_SORT_THRESHOLD) {
|
if (len < INSERTION_SORT_THRESHOLD) {
|
||||||
insertionSort(array, off, len);
|
insertionSort(array, 0, len);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer = ArrayUtil.grow(buffer, len);
|
buffer = ArrayUtil.grow(buffer, len);
|
||||||
|
|
||||||
int[] arr = array;
|
int[] arr = array;
|
||||||
int arrOff = off;
|
|
||||||
|
|
||||||
int[] buf = buffer;
|
int[] buf = buffer;
|
||||||
int bufOff = 0;
|
|
||||||
|
for (int shift = 0; shift < numBits; shift += 8) {
|
||||||
for (int shift = 0; shift <= 24; shift += 8) {
|
if (sort(arr, len, histogram, shift, buf)) {
|
||||||
if (sort(arr, arrOff, len, histogram, shift, buf, bufOff)) {
|
|
||||||
// swap arrays
|
// swap arrays
|
||||||
int[] tmp = arr;
|
int[] tmp = arr;
|
||||||
int tmpOff = arrOff;
|
|
||||||
arr = buf;
|
arr = buf;
|
||||||
arrOff = bufOff;
|
|
||||||
buf = tmp;
|
buf = tmp;
|
||||||
bufOff = tmpOff;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (array == buf) {
|
if (array == buf) {
|
||||||
System.arraycopy(arr, arrOff, array, off, len);
|
System.arraycopy(arr, 0, array, 0, len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,28 +19,38 @@ package org.apache.lucene.util;
|
|||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
public class TestLSBRadixSorter extends LuceneTestCase {
|
public class TestLSBRadixSorter extends LuceneTestCase {
|
||||||
|
|
||||||
public void test(LSBRadixSorter sorter, int maxLen) {
|
public void test(LSBRadixSorter sorter, int maxLen) {
|
||||||
for (int iter = 0; iter < 10; ++iter) {
|
for (int iter = 0; iter < 10; ++iter) {
|
||||||
int off = random().nextInt(10);
|
|
||||||
final int len = TestUtil.nextInt(random(), 0, maxLen);
|
final int len = TestUtil.nextInt(random(), 0, maxLen);
|
||||||
int[] arr = new int[off + len + random().nextInt(10)];
|
int[] arr = new int[len + random().nextInt(10)];
|
||||||
final int numBits = random().nextInt(31);
|
final int numBits = random().nextInt(31);
|
||||||
final int maxValue = (1 << numBits) - 1;
|
final int maxValue = (1 << numBits) - 1;
|
||||||
for (int i = 0; i < arr.length; ++i) {
|
for (int i = 0; i < arr.length; ++i) {
|
||||||
arr[i] = TestUtil.nextInt(random(), 0, maxValue);
|
arr[i] = TestUtil.nextInt(random(), 0, maxValue);
|
||||||
}
|
}
|
||||||
test(sorter, arr, off, len);
|
test(sorter, arr, len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test(LSBRadixSorter sorter, int[] arr, int off, int len) {
|
public void test(LSBRadixSorter sorter, int[] arr, int len) {
|
||||||
final int[] expected = Arrays.copyOfRange(arr, off, off + len);
|
final int[] expected = Arrays.copyOf(arr, len);
|
||||||
Arrays.sort(expected);
|
Arrays.sort(expected);
|
||||||
|
|
||||||
sorter.sort(arr, off, len);
|
int numBits = 0;
|
||||||
final int[] actual = Arrays.copyOfRange(arr, off, off + len);
|
for (int i = 0; i < len; ++i) {
|
||||||
|
numBits = Math.max(numBits, PackedInts.bitsRequired(arr[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
numBits = TestUtil.nextInt(random(), numBits, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
sorter.sort(numBits, arr, len);
|
||||||
|
final int[] actual = Arrays.copyOf(arr, len);
|
||||||
assertArrayEquals(expected, actual);
|
assertArrayEquals(expected, actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,9 +83,8 @@ public class TestLSBRadixSorter extends LuceneTestCase {
|
|||||||
a += random().nextInt(10);
|
a += random().nextInt(10);
|
||||||
arr[i] = a;
|
arr[i] = a;
|
||||||
}
|
}
|
||||||
final int off = random().nextInt(arr.length);
|
final int len = TestUtil.nextInt(random(), 0, arr.length);
|
||||||
final int len = TestUtil.nextInt(random(), 0, arr.length - off);
|
test(sorter, arr, len);
|
||||||
test(sorter, arr, off, len);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,11 +21,10 @@ import java.io.IOException;
|
|||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.LSBRadixSorter;
|
import org.apache.lucene.util.LSBRadixSorter;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adapted from DocIdSetBuilder to build DocSets
|
* Adapted from DocIdSetBuilder to build DocSets
|
||||||
@ -188,7 +187,7 @@ public final class DocSetBuilder {
|
|||||||
// TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize?
|
// TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize?
|
||||||
} else {
|
} else {
|
||||||
LSBRadixSorter sorter = new LSBRadixSorter();
|
LSBRadixSorter sorter = new LSBRadixSorter();
|
||||||
sorter.sort(buffer, 0, pos);
|
sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, pos);
|
||||||
final int l = dedup(buffer, pos, filter);
|
final int l = dedup(buffer, pos, filter);
|
||||||
assert l <= pos;
|
assert l <= pos;
|
||||||
return new SortedIntDocSet(buffer, l); // TODO: have option to not shrink in the future if it will be a temporary set
|
return new SortedIntDocSet(buffer, l); // TODO: have option to not shrink in the future if it will be a temporary set
|
||||||
|
Loading…
x
Reference in New Issue
Block a user