mirror of https://github.com/apache/lucene.git
Use radix sort to speed up the sorting of terms in TermInSetQuery (#12587)
This commit is contained in:
parent
6dac2f7afc
commit
c6e76d3e01
|
@ -185,6 +185,8 @@ Optimizations
|
|||
|
||||
* GITHUB#12591: Use stable radix sort to speed up the sorting of update terms. (Guo Feng)
|
||||
|
||||
* GITHUB#12587: Use radix sort to speed up the sorting of terms in TermInSetQuery. (Guo Feng)
|
||||
|
||||
* GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter
|
||||
to reduce GC load during indexing. (Guo Feng)
|
||||
|
||||
|
|
|
@ -29,11 +29,12 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.BytesRefComparator;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.StringSorter;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
|
@ -112,7 +113,23 @@ public class TermInSetQuery extends MultiTermQuery implements Accountable {
|
|||
boolean sorted =
|
||||
terms instanceof SortedSet && ((SortedSet<BytesRef>) terms).comparator() == null;
|
||||
if (sorted == false) {
|
||||
ArrayUtil.timSort(sortedTerms);
|
||||
new StringSorter(BytesRefComparator.NATURAL) {
|
||||
|
||||
@Override
|
||||
protected void get(BytesRefBuilder builder, BytesRef result, int i) {
|
||||
BytesRef term = sortedTerms[i];
|
||||
result.length = term.length;
|
||||
result.offset = term.offset;
|
||||
result.bytes = term.bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
BytesRef b = sortedTerms[i];
|
||||
sortedTerms[i] = sortedTerms[j];
|
||||
sortedTerms[j] = b;
|
||||
}
|
||||
}.sort(0, sortedTerms.length);
|
||||
}
|
||||
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
||||
BytesRefBuilder previous = null;
|
||||
|
|
|
@ -20,8 +20,7 @@ import java.util.Arrays;
|
|||
import java.util.Comparator;
|
||||
|
||||
/**
|
||||
* Specialized {@link BytesRef} comparator that {@link
|
||||
* FixedLengthBytesRefArray#iterator(Comparator)} has optimizations for.
|
||||
* Specialized {@link BytesRef} comparator that {@link StringSorter} has optimizations for.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
|
|
|
@ -19,7 +19,13 @@ package org.apache.lucene.util;
|
|||
|
||||
import java.util.Comparator;
|
||||
|
||||
abstract class StringSorter extends Sorter {
|
||||
/**
|
||||
* A {@link BytesRef} sorter tries to use a efficient radix sorter if {@link StringSorter#cmp} is a
|
||||
* {@link BytesRefComparator}, otherwise fallback to {@link StringSorter#fallbackSorter}
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public abstract class StringSorter extends Sorter {
|
||||
|
||||
private final Comparator<BytesRef> cmp;
|
||||
protected final BytesRefBuilder scratch1 = new BytesRefBuilder();
|
||||
|
@ -29,7 +35,7 @@ abstract class StringSorter extends Sorter {
|
|||
protected final BytesRef scratchBytes2 = new BytesRef();
|
||||
protected final BytesRef pivot = new BytesRef();
|
||||
|
||||
StringSorter(Comparator<BytesRef> cmp) {
|
||||
protected StringSorter(Comparator<BytesRef> cmp) {
|
||||
this.cmp = cmp;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue