mirror of https://github.com/apache/lucene.git
LUCENE-6470: Improve efficiency of TermsQuery constructors.
This commit is contained in:
parent
0ff8d11367
commit
a3b2ad334c
|
@ -193,6 +193,8 @@ Optimizations
|
|||
* LUCENE-6940: MUST_NOT clauses execute faster, especially when they are sparse.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-6470: Improve efficiency of TermsQuery constructors. (Robert Muir)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-6976: BytesRefTermAttributeImpl.copyTo NPE'ed if BytesRef was null.
|
||||
|
|
|
@ -66,22 +66,27 @@ public class PrefixCodedTerms implements Accountable {
|
|||
|
||||
/** add a term */
|
||||
public void add(Term term) {
|
||||
assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0;
|
||||
add(term.field(), term.bytes());
|
||||
}
|
||||
|
||||
/** add a term */
|
||||
public void add(String field, BytesRef bytes) {
|
||||
assert lastTerm.equals(new Term("")) || new Term(field, bytes).compareTo(lastTerm) > 0;
|
||||
|
||||
try {
|
||||
int prefix = sharedPrefix(lastTerm.bytes, term.bytes);
|
||||
int suffix = term.bytes.length - prefix;
|
||||
if (term.field.equals(lastTerm.field)) {
|
||||
int prefix = sharedPrefix(lastTerm.bytes, bytes);
|
||||
int suffix = bytes.length - prefix;
|
||||
if (field.equals(lastTerm.field)) {
|
||||
output.writeVInt(prefix << 1);
|
||||
} else {
|
||||
output.writeVInt(prefix << 1 | 1);
|
||||
output.writeString(term.field);
|
||||
output.writeString(field);
|
||||
}
|
||||
output.writeVInt(suffix);
|
||||
output.writeBytes(term.bytes.bytes, term.bytes.offset + prefix, suffix);
|
||||
lastTermBytes.copyBytes(term.bytes);
|
||||
output.writeBytes(bytes.bytes, bytes.offset + prefix, suffix);
|
||||
lastTermBytes.copyBytes(bytes);
|
||||
lastTerm.bytes = lastTermBytes.get();
|
||||
lastTerm.field = term.field;
|
||||
lastTerm.field = field;
|
||||
size += 1;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -55,6 +56,7 @@ import org.apache.lucene.util.Accountable;
|
|||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
|
@ -86,22 +88,17 @@ public class TermsQuery extends Query implements Accountable {
|
|||
private final PrefixCodedTerms termData;
|
||||
private final int termDataHashCode; // cached hashcode of termData
|
||||
|
||||
private static Term[] toTermArray(String field, List<BytesRef> termBytes) {
|
||||
Term[] array = new Term[termBytes.size()];
|
||||
int i = 0;
|
||||
for (BytesRef t : termBytes) {
|
||||
array[i++] = new Term(field, t);
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link TermsQuery} from the given list. The list
|
||||
* Creates a new {@link TermsQuery} from the given collection. It
|
||||
* can contain duplicate terms and multiple fields.
|
||||
*/
|
||||
public TermsQuery(final List<Term> terms) {
|
||||
public TermsQuery(Collection<Term> terms) {
|
||||
Term[] sortedTerms = terms.toArray(new Term[terms.size()]);
|
||||
ArrayUtil.timSort(sortedTerms);
|
||||
// already sorted if we are a SortedSet with natural order
|
||||
boolean sorted = terms instanceof SortedSet && ((SortedSet<Term>)terms).comparator() == null;
|
||||
if (!sorted) {
|
||||
ArrayUtil.timSort(sortedTerms);
|
||||
}
|
||||
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
||||
Term previous = null;
|
||||
for (Term term : sortedTerms) {
|
||||
|
@ -113,21 +110,38 @@ public class TermsQuery extends Query implements Accountable {
|
|||
termData = builder.finish();
|
||||
termDataHashCode = termData.hashCode();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new {@link TermsQuery} from the given {@link BytesRef} list for
|
||||
* a single field.
|
||||
* Creates a new {@link TermsQuery} from the given collection for
|
||||
* a single field. It can contain duplicate terms.
|
||||
*/
|
||||
public TermsQuery(final String field, final List<BytesRef> terms) {
|
||||
this(toTermArray(field, terms));
|
||||
public TermsQuery(String field, Collection<BytesRef> terms) {
|
||||
BytesRef[] sortedTerms = terms.toArray(new BytesRef[terms.size()]);
|
||||
// already sorted if we are a SortedSet with natural order
|
||||
boolean sorted = terms instanceof SortedSet && ((SortedSet<BytesRef>)terms).comparator() == null;
|
||||
if (!sorted) {
|
||||
ArrayUtil.timSort(sortedTerms);
|
||||
}
|
||||
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
||||
BytesRefBuilder previous = null;
|
||||
for (BytesRef term : sortedTerms) {
|
||||
if (previous == null) {
|
||||
previous = new BytesRefBuilder();
|
||||
} else if (previous.get().equals(term)) {
|
||||
continue; // deduplicate
|
||||
}
|
||||
builder.add(field, term);
|
||||
previous.copyBytes(term);
|
||||
}
|
||||
termData = builder.finish();
|
||||
termDataHashCode = termData.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link TermsQuery} from the given {@link BytesRef} array for
|
||||
* a single field.
|
||||
*/
|
||||
public TermsQuery(final String field, final BytesRef...terms) {
|
||||
// this ctor prevents unnecessary Term creations
|
||||
public TermsQuery(String field, BytesRef...terms) {
|
||||
this(field, Arrays.asList(terms));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue