LUCENE-7261: Speed up LSBRadixSorter.

This commit is contained in:
Adrien Grand 2016-04-28 18:20:41 +02:00
parent 3e6de6059f
commit ef45d4b2e1
5 changed files with 43 additions and 33 deletions

View File

@ -78,6 +78,9 @@ Optimizations
* LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than * LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than
waiting on a lock. (Adrien Grand) waiting on a lock. (Adrien Grand)
* LUCENE-7261: Speed up LSBRadixSorter (which is used by TermsQuery, multi-term
queries and point queries). (Adrien Grand)
Bug Fixes Bug Fixes
* LUCENE-7127: Fix corner case bugs in GeoPointDistanceQuery. (Robert Muir) * LUCENE-7127: Fix corner case bugs in GeoPointDistanceQuery. (Robert Muir)

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.packed.PackedInts;
/** /**
* A builder of {@link DocIdSet}s. At first it uses a sparse structure to gather * A builder of {@link DocIdSet}s. At first it uses a sparse structure to gather
@ -174,7 +175,7 @@ public final class DocIdSetBuilder {
return new BitDocIdSet(bitSet); return new BitDocIdSet(bitSet);
} else { } else {
LSBRadixSorter sorter = new LSBRadixSorter(); LSBRadixSorter sorter = new LSBRadixSorter();
sorter.sort(buffer, 0, bufferSize); sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, bufferSize);
final int l = dedup(buffer, bufferSize); final int l = dedup(buffer, bufferSize);
assert l <= bufferSize; assert l <= bufferSize;
buffer = ArrayUtil.grow(buffer, l + 1); buffer = ArrayUtil.grow(buffer, l + 1);

View File

@ -31,9 +31,9 @@ public final class LSBRadixSorter {
private final int[] histogram = new int[HISTOGRAM_SIZE]; private final int[] histogram = new int[HISTOGRAM_SIZE];
private int[] buffer = new int[0]; private int[] buffer = new int[0];
private static void buildHistogram(int[] array, int off, int len, int[] histogram, int shift) { private static void buildHistogram(int[] array, int len, int[] histogram, int shift) {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
final int b = (array[off + i] >>> shift) & 0xFF; final int b = (array[i] >>> shift) & 0xFF;
histogram[b] += 1; histogram[b] += 1;
} }
} }
@ -47,22 +47,22 @@ public final class LSBRadixSorter {
} }
} }
private static void reorder(int[] array, int off, int len, int[] histogram, int shift, int[] dest, int destOff) { private static void reorder(int[] array, int len, int[] histogram, int shift, int[] dest) {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
final int v = array[off + i]; final int v = array[i];
final int b = (v >>> shift) & 0xFF; final int b = (v >>> shift) & 0xFF;
dest[destOff + histogram[b]++] = v; dest[histogram[b]++] = v;
} }
} }
private static boolean sort(int[] array, int off, int len, int[] histogram, int shift, int[] dest, int destOff) { private static boolean sort(int[] array, int len, int[] histogram, int shift, int[] dest) {
Arrays.fill(histogram, 0); Arrays.fill(histogram, 0);
buildHistogram(array, off, len, histogram, shift); buildHistogram(array, len, histogram, shift);
if (histogram[0] == len) { if (histogram[0] == len) {
return false; return false;
} }
sumHistogram(histogram); sumHistogram(histogram);
reorder(array, off, len, histogram, shift, dest, destOff); reorder(array, len, histogram, shift, dest);
return true; return true;
} }
@ -80,34 +80,32 @@ public final class LSBRadixSorter {
} }
} }
public void sort(final int[] array, int off, int len) { /** Sort {@code array[0:len]} in place.
* @param numBits how many bits are required to store any of the values in
* {@code array[0:len]}. Pass {@code 32} if unknown. */
public void sort(int numBits, final int[] array, int len) {
if (len < INSERTION_SORT_THRESHOLD) { if (len < INSERTION_SORT_THRESHOLD) {
insertionSort(array, off, len); insertionSort(array, 0, len);
return; return;
} }
buffer = ArrayUtil.grow(buffer, len); buffer = ArrayUtil.grow(buffer, len);
int[] arr = array; int[] arr = array;
int arrOff = off;
int[] buf = buffer; int[] buf = buffer;
int bufOff = 0;
for (int shift = 0; shift < numBits; shift += 8) {
for (int shift = 0; shift <= 24; shift += 8) { if (sort(arr, len, histogram, shift, buf)) {
if (sort(arr, arrOff, len, histogram, shift, buf, bufOff)) {
// swap arrays // swap arrays
int[] tmp = arr; int[] tmp = arr;
int tmpOff = arrOff;
arr = buf; arr = buf;
arrOff = bufOff;
buf = tmp; buf = tmp;
bufOff = tmpOff;
} }
} }
if (array == buf) { if (array == buf) {
System.arraycopy(arr, arrOff, array, off, len); System.arraycopy(arr, 0, array, 0, len);
} }
} }

View File

@ -19,28 +19,38 @@ package org.apache.lucene.util;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.util.packed.PackedInts;
public class TestLSBRadixSorter extends LuceneTestCase { public class TestLSBRadixSorter extends LuceneTestCase {
public void test(LSBRadixSorter sorter, int maxLen) { public void test(LSBRadixSorter sorter, int maxLen) {
for (int iter = 0; iter < 10; ++iter) { for (int iter = 0; iter < 10; ++iter) {
int off = random().nextInt(10);
final int len = TestUtil.nextInt(random(), 0, maxLen); final int len = TestUtil.nextInt(random(), 0, maxLen);
int[] arr = new int[off + len + random().nextInt(10)]; int[] arr = new int[len + random().nextInt(10)];
final int numBits = random().nextInt(31); final int numBits = random().nextInt(31);
final int maxValue = (1 << numBits) - 1; final int maxValue = (1 << numBits) - 1;
for (int i = 0; i < arr.length; ++i) { for (int i = 0; i < arr.length; ++i) {
arr[i] = TestUtil.nextInt(random(), 0, maxValue); arr[i] = TestUtil.nextInt(random(), 0, maxValue);
} }
test(sorter, arr, off, len); test(sorter, arr, len);
} }
} }
public void test(LSBRadixSorter sorter, int[] arr, int off, int len) { public void test(LSBRadixSorter sorter, int[] arr, int len) {
final int[] expected = Arrays.copyOfRange(arr, off, off + len); final int[] expected = Arrays.copyOf(arr, len);
Arrays.sort(expected); Arrays.sort(expected);
sorter.sort(arr, off, len); int numBits = 0;
final int[] actual = Arrays.copyOfRange(arr, off, off + len); for (int i = 0; i < len; ++i) {
numBits = Math.max(numBits, PackedInts.bitsRequired(arr[i]));
}
if (random().nextBoolean()) {
numBits = TestUtil.nextInt(random(), numBits, 32);
}
sorter.sort(numBits, arr, len);
final int[] actual = Arrays.copyOf(arr, len);
assertArrayEquals(expected, actual); assertArrayEquals(expected, actual);
} }
@ -73,9 +83,8 @@ public class TestLSBRadixSorter extends LuceneTestCase {
a += random().nextInt(10); a += random().nextInt(10);
arr[i] = a; arr[i] = a;
} }
final int off = random().nextInt(arr.length); final int len = TestUtil.nextInt(random(), 0, arr.length);
final int len = TestUtil.nextInt(random(), 0, arr.length - off); test(sorter, arr, len);
test(sorter, arr, off, len);
} }
} }
} }

View File

@ -21,11 +21,10 @@ import java.io.IOException;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LSBRadixSorter; import org.apache.lucene.util.LSBRadixSorter;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.PackedInts;
/** /**
* Adapted from DocIdSetBuilder to build DocSets * Adapted from DocIdSetBuilder to build DocSets
@ -188,7 +187,7 @@ public final class DocSetBuilder {
// TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize? // TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize?
} else { } else {
LSBRadixSorter sorter = new LSBRadixSorter(); LSBRadixSorter sorter = new LSBRadixSorter();
sorter.sort(buffer, 0, pos); sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, pos);
final int l = dedup(buffer, pos, filter); final int l = dedup(buffer, pos, filter);
assert l <= pos; assert l <= pos;
return new SortedIntDocSet(buffer, l); // TODO: have option to not shrink in the future if it will be a temporary set return new SortedIntDocSet(buffer, l); // TODO: have option to not shrink in the future if it will be a temporary set