mirror of https://github.com/apache/lucene.git
LUCENE-6470: Improve efficiency of TermsQuery constructors.
This commit is contained in:
parent
0ff8d11367
commit
a3b2ad334c
|
@ -193,6 +193,8 @@ Optimizations
|
||||||
* LUCENE-6940: MUST_NOT clauses execute faster, especially when they are sparse.
|
* LUCENE-6940: MUST_NOT clauses execute faster, especially when they are sparse.
|
||||||
(Adrien Grand)
|
(Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-6470: Improve efficiency of TermsQuery constructors. (Robert Muir)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-6976: BytesRefTermAttributeImpl.copyTo NPE'ed if BytesRef was null.
|
* LUCENE-6976: BytesRefTermAttributeImpl.copyTo NPE'ed if BytesRef was null.
|
||||||
|
|
|
@ -66,22 +66,27 @@ public class PrefixCodedTerms implements Accountable {
|
||||||
|
|
||||||
/** add a term */
|
/** add a term */
|
||||||
public void add(Term term) {
|
public void add(Term term) {
|
||||||
assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0;
|
add(term.field(), term.bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** add a term */
|
||||||
|
public void add(String field, BytesRef bytes) {
|
||||||
|
assert lastTerm.equals(new Term("")) || new Term(field, bytes).compareTo(lastTerm) > 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
int prefix = sharedPrefix(lastTerm.bytes, term.bytes);
|
int prefix = sharedPrefix(lastTerm.bytes, bytes);
|
||||||
int suffix = term.bytes.length - prefix;
|
int suffix = bytes.length - prefix;
|
||||||
if (term.field.equals(lastTerm.field)) {
|
if (field.equals(lastTerm.field)) {
|
||||||
output.writeVInt(prefix << 1);
|
output.writeVInt(prefix << 1);
|
||||||
} else {
|
} else {
|
||||||
output.writeVInt(prefix << 1 | 1);
|
output.writeVInt(prefix << 1 | 1);
|
||||||
output.writeString(term.field);
|
output.writeString(field);
|
||||||
}
|
}
|
||||||
output.writeVInt(suffix);
|
output.writeVInt(suffix);
|
||||||
output.writeBytes(term.bytes.bytes, term.bytes.offset + prefix, suffix);
|
output.writeBytes(bytes.bytes, bytes.offset + prefix, suffix);
|
||||||
lastTermBytes.copyBytes(term.bytes);
|
lastTermBytes.copyBytes(bytes);
|
||||||
lastTerm.bytes = lastTermBytes.get();
|
lastTerm.bytes = lastTermBytes.get();
|
||||||
lastTerm.field = term.field;
|
lastTerm.field = field;
|
||||||
size += 1;
|
size += 1;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.SortedSet;
|
||||||
|
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
@ -55,6 +56,7 @@ import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.DocIdSetBuilder;
|
import org.apache.lucene.util.DocIdSetBuilder;
|
||||||
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -86,22 +88,17 @@ public class TermsQuery extends Query implements Accountable {
|
||||||
private final PrefixCodedTerms termData;
|
private final PrefixCodedTerms termData;
|
||||||
private final int termDataHashCode; // cached hashcode of termData
|
private final int termDataHashCode; // cached hashcode of termData
|
||||||
|
|
||||||
private static Term[] toTermArray(String field, List<BytesRef> termBytes) {
|
|
||||||
Term[] array = new Term[termBytes.size()];
|
|
||||||
int i = 0;
|
|
||||||
for (BytesRef t : termBytes) {
|
|
||||||
array[i++] = new Term(field, t);
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new {@link TermsQuery} from the given list. The list
|
* Creates a new {@link TermsQuery} from the given collection. It
|
||||||
* can contain duplicate terms and multiple fields.
|
* can contain duplicate terms and multiple fields.
|
||||||
*/
|
*/
|
||||||
public TermsQuery(final List<Term> terms) {
|
public TermsQuery(Collection<Term> terms) {
|
||||||
Term[] sortedTerms = terms.toArray(new Term[terms.size()]);
|
Term[] sortedTerms = terms.toArray(new Term[terms.size()]);
|
||||||
ArrayUtil.timSort(sortedTerms);
|
// already sorted if we are a SortedSet with natural order
|
||||||
|
boolean sorted = terms instanceof SortedSet && ((SortedSet<Term>)terms).comparator() == null;
|
||||||
|
if (!sorted) {
|
||||||
|
ArrayUtil.timSort(sortedTerms);
|
||||||
|
}
|
||||||
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
||||||
Term previous = null;
|
Term previous = null;
|
||||||
for (Term term : sortedTerms) {
|
for (Term term : sortedTerms) {
|
||||||
|
@ -113,21 +110,38 @@ public class TermsQuery extends Query implements Accountable {
|
||||||
termData = builder.finish();
|
termData = builder.finish();
|
||||||
termDataHashCode = termData.hashCode();
|
termDataHashCode = termData.hashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new {@link TermsQuery} from the given {@link BytesRef} list for
|
* Creates a new {@link TermsQuery} from the given collection for
|
||||||
* a single field.
|
* a single field. It can contain duplicate terms.
|
||||||
*/
|
*/
|
||||||
public TermsQuery(final String field, final List<BytesRef> terms) {
|
public TermsQuery(String field, Collection<BytesRef> terms) {
|
||||||
this(toTermArray(field, terms));
|
BytesRef[] sortedTerms = terms.toArray(new BytesRef[terms.size()]);
|
||||||
|
// already sorted if we are a SortedSet with natural order
|
||||||
|
boolean sorted = terms instanceof SortedSet && ((SortedSet<BytesRef>)terms).comparator() == null;
|
||||||
|
if (!sorted) {
|
||||||
|
ArrayUtil.timSort(sortedTerms);
|
||||||
|
}
|
||||||
|
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
||||||
|
BytesRefBuilder previous = null;
|
||||||
|
for (BytesRef term : sortedTerms) {
|
||||||
|
if (previous == null) {
|
||||||
|
previous = new BytesRefBuilder();
|
||||||
|
} else if (previous.get().equals(term)) {
|
||||||
|
continue; // deduplicate
|
||||||
|
}
|
||||||
|
builder.add(field, term);
|
||||||
|
previous.copyBytes(term);
|
||||||
|
}
|
||||||
|
termData = builder.finish();
|
||||||
|
termDataHashCode = termData.hashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new {@link TermsQuery} from the given {@link BytesRef} array for
|
* Creates a new {@link TermsQuery} from the given {@link BytesRef} array for
|
||||||
* a single field.
|
* a single field.
|
||||||
*/
|
*/
|
||||||
public TermsQuery(final String field, final BytesRef...terms) {
|
public TermsQuery(String field, BytesRef...terms) {
|
||||||
// this ctor prevents unnecessary Term creations
|
|
||||||
this(field, Arrays.asList(terms));
|
this(field, Arrays.asList(terms));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue