mirror of https://github.com/apache/lucene.git
Use radix sort to speed up the sorting of terms in TermInSetQuery (#12587)
This commit is contained in:
parent
6dac2f7afc
commit
c6e76d3e01
|
@ -185,6 +185,8 @@ Optimizations
|
||||||
|
|
||||||
* GITHUB#12591: Use stable radix sort to speed up the sorting of update terms. (Guo Feng)
|
* GITHUB#12591: Use stable radix sort to speed up the sorting of update terms. (Guo Feng)
|
||||||
|
|
||||||
|
* GITHUB#12587: Use radix sort to speed up the sorting of terms in TermInSetQuery. (Guo Feng)
|
||||||
|
|
||||||
* GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter
|
* GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter
|
||||||
to reduce GC load during indexing. (Guo Feng)
|
to reduce GC load during indexing. (Guo Feng)
|
||||||
|
|
||||||
|
|
|
@ -29,11 +29,12 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
import org.apache.lucene.util.BytesRefComparator;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.StringSorter;
|
||||||
import org.apache.lucene.util.automaton.Automata;
|
import org.apache.lucene.util.automaton.Automata;
|
||||||
import org.apache.lucene.util.automaton.Automaton;
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||||
|
@ -112,7 +113,23 @@ public class TermInSetQuery extends MultiTermQuery implements Accountable {
|
||||||
boolean sorted =
|
boolean sorted =
|
||||||
terms instanceof SortedSet && ((SortedSet<BytesRef>) terms).comparator() == null;
|
terms instanceof SortedSet && ((SortedSet<BytesRef>) terms).comparator() == null;
|
||||||
if (sorted == false) {
|
if (sorted == false) {
|
||||||
ArrayUtil.timSort(sortedTerms);
|
new StringSorter(BytesRefComparator.NATURAL) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void get(BytesRefBuilder builder, BytesRef result, int i) {
|
||||||
|
BytesRef term = sortedTerms[i];
|
||||||
|
result.length = term.length;
|
||||||
|
result.offset = term.offset;
|
||||||
|
result.bytes = term.bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void swap(int i, int j) {
|
||||||
|
BytesRef b = sortedTerms[i];
|
||||||
|
sortedTerms[i] = sortedTerms[j];
|
||||||
|
sortedTerms[j] = b;
|
||||||
|
}
|
||||||
|
}.sort(0, sortedTerms.length);
|
||||||
}
|
}
|
||||||
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
||||||
BytesRefBuilder previous = null;
|
BytesRefBuilder previous = null;
|
||||||
|
|
|
@ -20,8 +20,7 @@ import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specialized {@link BytesRef} comparator that {@link
|
* Specialized {@link BytesRef} comparator that {@link StringSorter} has optimizations for.
|
||||||
* FixedLengthBytesRefArray#iterator(Comparator)} has optimizations for.
|
|
||||||
*
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -19,7 +19,13 @@ package org.apache.lucene.util;
|
||||||
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
abstract class StringSorter extends Sorter {
|
/**
|
||||||
|
* A {@link BytesRef} sorter tries to use a efficient radix sorter if {@link StringSorter#cmp} is a
|
||||||
|
* {@link BytesRefComparator}, otherwise fallback to {@link StringSorter#fallbackSorter}
|
||||||
|
*
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public abstract class StringSorter extends Sorter {
|
||||||
|
|
||||||
private final Comparator<BytesRef> cmp;
|
private final Comparator<BytesRef> cmp;
|
||||||
protected final BytesRefBuilder scratch1 = new BytesRefBuilder();
|
protected final BytesRefBuilder scratch1 = new BytesRefBuilder();
|
||||||
|
@ -29,7 +35,7 @@ abstract class StringSorter extends Sorter {
|
||||||
protected final BytesRef scratchBytes2 = new BytesRef();
|
protected final BytesRef scratchBytes2 = new BytesRef();
|
||||||
protected final BytesRef pivot = new BytesRef();
|
protected final BytesRef pivot = new BytesRef();
|
||||||
|
|
||||||
StringSorter(Comparator<BytesRef> cmp) {
|
protected StringSorter(Comparator<BytesRef> cmp) {
|
||||||
this.cmp = cmp;
|
this.cmp = cmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue