LUCENE-6470: Improve efficiency of TermsQuery constructors.

This commit is contained in:
Robert Muir 2016-01-26 03:38:19 -05:00
parent 0ff8d11367
commit a3b2ad334c
3 changed files with 48 additions and 27 deletions

View File

@ -193,6 +193,8 @@ Optimizations
* LUCENE-6940: MUST_NOT clauses execute faster, especially when they are sparse. * LUCENE-6940: MUST_NOT clauses execute faster, especially when they are sparse.
(Adrien Grand) (Adrien Grand)
* LUCENE-6470: Improve efficiency of TermsQuery constructors. (Robert Muir)
Bug Fixes Bug Fixes
* LUCENE-6976: BytesRefTermAttributeImpl.copyTo NPE'ed if BytesRef was null. * LUCENE-6976: BytesRefTermAttributeImpl.copyTo NPE'ed if BytesRef was null.

View File

@ -66,22 +66,27 @@ public class PrefixCodedTerms implements Accountable {
/** add a term */ /** add a term */
public void add(Term term) { public void add(Term term) {
assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0; add(term.field(), term.bytes());
}
/** add a term */
public void add(String field, BytesRef bytes) {
assert lastTerm.equals(new Term("")) || new Term(field, bytes).compareTo(lastTerm) > 0;
try { try {
int prefix = sharedPrefix(lastTerm.bytes, term.bytes); int prefix = sharedPrefix(lastTerm.bytes, bytes);
int suffix = term.bytes.length - prefix; int suffix = bytes.length - prefix;
if (term.field.equals(lastTerm.field)) { if (field.equals(lastTerm.field)) {
output.writeVInt(prefix << 1); output.writeVInt(prefix << 1);
} else { } else {
output.writeVInt(prefix << 1 | 1); output.writeVInt(prefix << 1 | 1);
output.writeString(term.field); output.writeString(field);
} }
output.writeVInt(suffix); output.writeVInt(suffix);
output.writeBytes(term.bytes.bytes, term.bytes.offset + prefix, suffix); output.writeBytes(bytes.bytes, bytes.offset + prefix, suffix);
lastTermBytes.copyBytes(term.bytes); lastTermBytes.copyBytes(bytes);
lastTerm.bytes = lastTermBytes.get(); lastTerm.bytes = lastTermBytes.get();
lastTerm.field = term.field; lastTerm.field = field;
size += 1; size += 1;
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);

View File

@ -25,6 +25,7 @@ import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.SortedSet;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
@ -55,6 +56,7 @@ import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
/** /**
@ -86,22 +88,17 @@ public class TermsQuery extends Query implements Accountable {
private final PrefixCodedTerms termData; private final PrefixCodedTerms termData;
private final int termDataHashCode; // cached hashcode of termData private final int termDataHashCode; // cached hashcode of termData
private static Term[] toTermArray(String field, List<BytesRef> termBytes) {
Term[] array = new Term[termBytes.size()];
int i = 0;
for (BytesRef t : termBytes) {
array[i++] = new Term(field, t);
}
return array;
}
/** /**
* Creates a new {@link TermsQuery} from the given list. The list * Creates a new {@link TermsQuery} from the given collection. It
* can contain duplicate terms and multiple fields. * can contain duplicate terms and multiple fields.
*/ */
public TermsQuery(final List<Term> terms) { public TermsQuery(Collection<Term> terms) {
Term[] sortedTerms = terms.toArray(new Term[terms.size()]); Term[] sortedTerms = terms.toArray(new Term[terms.size()]);
ArrayUtil.timSort(sortedTerms); // already sorted if we are a SortedSet with natural order
boolean sorted = terms instanceof SortedSet && ((SortedSet<Term>)terms).comparator() == null;
if (!sorted) {
ArrayUtil.timSort(sortedTerms);
}
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder(); PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
Term previous = null; Term previous = null;
for (Term term : sortedTerms) { for (Term term : sortedTerms) {
@ -113,21 +110,38 @@ public class TermsQuery extends Query implements Accountable {
termData = builder.finish(); termData = builder.finish();
termDataHashCode = termData.hashCode(); termDataHashCode = termData.hashCode();
} }
/** /**
* Creates a new {@link TermsQuery} from the given {@link BytesRef} list for * Creates a new {@link TermsQuery} from the given collection for
* a single field. * a single field. It can contain duplicate terms.
*/ */
public TermsQuery(final String field, final List<BytesRef> terms) { public TermsQuery(String field, Collection<BytesRef> terms) {
this(toTermArray(field, terms)); BytesRef[] sortedTerms = terms.toArray(new BytesRef[terms.size()]);
// already sorted if we are a SortedSet with natural order
boolean sorted = terms instanceof SortedSet && ((SortedSet<BytesRef>)terms).comparator() == null;
if (!sorted) {
ArrayUtil.timSort(sortedTerms);
}
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
BytesRefBuilder previous = null;
for (BytesRef term : sortedTerms) {
if (previous == null) {
previous = new BytesRefBuilder();
} else if (previous.get().equals(term)) {
continue; // deduplicate
}
builder.add(field, term);
previous.copyBytes(term);
}
termData = builder.finish();
termDataHashCode = termData.hashCode();
} }
/** /**
* Creates a new {@link TermsQuery} from the given {@link BytesRef} array for * Creates a new {@link TermsQuery} from the given {@link BytesRef} array for
* a single field. * a single field.
*/ */
public TermsQuery(final String field, final BytesRef...terms) { public TermsQuery(String field, BytesRef...terms) {
// this ctor prevents unnecessary Term creations
this(field, Arrays.asList(terms)); this(field, Arrays.asList(terms));
} }