lucene 4: cleanup terms/uid filter
This commit is contained in:
parent
79368bb221
commit
0660e20c47
|
@ -28,6 +28,7 @@ import org.elasticsearch.common.lucene.search.NotDeletedFilter;
|
|||
// So it can basically be cached safely even with a reader that changes deletions but remain with teh same cache key
|
||||
// See more: https://issues.apache.org/jira/browse/LUCENE-2468
|
||||
// TODO Lucene 4.0 won't need this, since live docs are "and'ed" while scoring
|
||||
// LUCENE 4 UPGRADE: we probably don't need this anymore, because of acceptDocs
|
||||
public class DeletionAwareConstantScoreQuery extends ConstantScoreQuery {
|
||||
|
||||
private final Filter actualFilter;
|
||||
|
|
|
@ -19,13 +19,7 @@
|
|||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.queries.TermsFilter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -39,51 +33,52 @@ import java.util.Collection;
|
|||
* Similar to {@link TermsFilter} but stores the terms in an array for better memory usage
|
||||
* when cached, and also uses bulk read
|
||||
*/
|
||||
// LUCENE MONITOR: Against TermsFilter - this is now identical to TermsFilter once 4.1 is released
|
||||
// LUCENE 4 UPGRADE: Make sure to sync this against latest 4.1
|
||||
// LUCENE 4.1: once its out, we can use TermsFilter from it
|
||||
public class XTermsFilter extends Filter {
|
||||
|
||||
private final Term[] filterTerms;
|
||||
private final boolean[] resetTermsEnum;// true if the enum must be reset when building the bitset
|
||||
private final int length;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new {@link XTermsFilter} from the given collection. The collection
|
||||
* can contain duplicate terms and multiple fields.
|
||||
*/
|
||||
public XTermsFilter(Collection<Term> terms) {
|
||||
this(terms.toArray(new Term[terms.size()]));
|
||||
this(terms.toArray(new Term[terms.size()]));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new {@link XTermsFilter} from the given array. The array can
|
||||
* contain duplicate terms and multiple fields.
|
||||
*/
|
||||
public XTermsFilter(Term... terms) {
|
||||
if (terms == null || terms.length == 0) {
|
||||
throw new IllegalArgumentException("TermsFilter requires at least one term");
|
||||
}
|
||||
Arrays.sort(terms);
|
||||
this.filterTerms = new Term[terms.length];
|
||||
this.resetTermsEnum = new boolean[terms.length];
|
||||
int index = 0;
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
Term currentTerm = terms[i];
|
||||
boolean fieldChanged = true;
|
||||
if (index > 0) {
|
||||
// deduplicate
|
||||
if (filterTerms[index-1].field().equals(currentTerm.field())) {
|
||||
fieldChanged = false;
|
||||
if (filterTerms[index-1].bytes().bytesEquals(currentTerm.bytes())){
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (terms == null || terms.length == 0) {
|
||||
throw new IllegalArgumentException("TermsFilter requires at least one term");
|
||||
}
|
||||
this.filterTerms[index] = currentTerm;
|
||||
this.resetTermsEnum[index] = index == 0 || fieldChanged; // mark index 0 so we have a clear path in the iteration
|
||||
|
||||
index++;
|
||||
}
|
||||
length = index;
|
||||
Arrays.sort(terms);
|
||||
this.filterTerms = new Term[terms.length];
|
||||
this.resetTermsEnum = new boolean[terms.length];
|
||||
int index = 0;
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
Term currentTerm = terms[i];
|
||||
boolean fieldChanged = true;
|
||||
if (index > 0) {
|
||||
// deduplicate
|
||||
if (filterTerms[index - 1].field().equals(currentTerm.field())) {
|
||||
fieldChanged = false;
|
||||
if (filterTerms[index - 1].bytes().bytesEquals(currentTerm.bytes())) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
this.filterTerms[index] = currentTerm;
|
||||
this.resetTermsEnum[index] = index == 0 || fieldChanged; // mark index 0 so we have a clear path in the iteration
|
||||
|
||||
index++;
|
||||
}
|
||||
length = index;
|
||||
}
|
||||
|
||||
public Term[] getTerms() {
|
||||
|
@ -92,100 +87,100 @@ public class XTermsFilter extends Filter {
|
|||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
AtomicReader reader = context.reader();
|
||||
FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time
|
||||
Fields fields = reader.fields();
|
||||
if (fields == null) {
|
||||
AtomicReader reader = context.reader();
|
||||
FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time
|
||||
Fields fields = reader.fields();
|
||||
if (fields == null) {
|
||||
return result;
|
||||
}
|
||||
final BytesRef br = new BytesRef();
|
||||
Terms terms = null;
|
||||
TermsEnum termsEnum = null;
|
||||
DocsEnum docs = null;
|
||||
assert resetTermsEnum[0];
|
||||
for (int i = 0; i < length; i++) {
|
||||
Term term = this.filterTerms[i];
|
||||
if (resetTermsEnum[i]) {
|
||||
terms = fields.terms(term.field());
|
||||
if (terms == null) {
|
||||
i = skipToNextField(i + 1, length); // skip to the next field since this field is not indexed
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if ((termsEnum = terms.iterator(termsEnum)) != null) {
|
||||
br.copyBytes(term.bytes());
|
||||
assert termsEnum != null;
|
||||
if (termsEnum.seekExact(br, true)) {
|
||||
docs = termsEnum.docs(acceptDocs, docs, 0);
|
||||
if (result == null) {
|
||||
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
result = new FixedBitSet(reader.maxDoc());
|
||||
// lazy init but don't do it in the hot loop since we could read many docs
|
||||
result.set(docs.docID());
|
||||
}
|
||||
}
|
||||
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
result.set(docs.docID());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
final BytesRef br = new BytesRef();
|
||||
Terms terms = null;
|
||||
TermsEnum termsEnum = null;
|
||||
DocsEnum docs = null;
|
||||
assert resetTermsEnum[0];
|
||||
for (int i = 0; i < length; i++) {
|
||||
Term term = this.filterTerms[i];
|
||||
if (resetTermsEnum[i]) {
|
||||
terms = fields.terms(term.field());
|
||||
if (terms == null) {
|
||||
i = skipToNextField(i+1, length); // skip to the next field since this field is not indexed
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if ((termsEnum = terms.iterator(termsEnum)) != null) {
|
||||
br.copyBytes(term.bytes());
|
||||
assert termsEnum != null;
|
||||
if (termsEnum.seekExact(br,true)) {
|
||||
docs = termsEnum.docs(acceptDocs, docs, 0);
|
||||
if (result == null) {
|
||||
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
result = new FixedBitSet(reader.maxDoc());
|
||||
// lazy init but don't do it in the hot loop since we could read many docs
|
||||
result.set(docs.docID());
|
||||
}
|
||||
}
|
||||
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
result.set(docs.docID());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private final int skipToNextField(int index, int length) {
|
||||
for (int i = index; i < length; i++) {
|
||||
if (resetTermsEnum[i]) {
|
||||
return i-1;
|
||||
for (int i = index; i < length; i++) {
|
||||
if (resetTermsEnum[i]) {
|
||||
return i - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return length;
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if ((obj == null) || (obj.getClass() != this.getClass())) {
|
||||
return false;
|
||||
}
|
||||
XTermsFilter test = (XTermsFilter) obj;
|
||||
if (filterTerms != test.filterTerms) {
|
||||
if (length == test.length) {
|
||||
for (int i = 0; i < length; i++) {
|
||||
// can not be null!
|
||||
if (!filterTerms[i].equals(test.filterTerms[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
if ((obj == null) || (obj.getClass() != this.getClass())) {
|
||||
return false;
|
||||
}
|
||||
XTermsFilter test = (XTermsFilter) obj;
|
||||
if (filterTerms != test.filterTerms) {
|
||||
if (length == test.length) {
|
||||
for (int i = 0; i < length; i++) {
|
||||
// can not be null!
|
||||
if (!filterTerms[i].equals(test.filterTerms[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hash = 9;
|
||||
for (int i = 0; i < length; i++) {
|
||||
hash = 31 * hash + filterTerms[i].hashCode();
|
||||
}
|
||||
return hash;
|
||||
int hash = 9;
|
||||
for (int i = 0; i < length; i++) {
|
||||
hash = 31 * hash + filterTerms[i].hashCode();
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (builder.length() > 0) {
|
||||
builder.append(' ');
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (builder.length() > 0) {
|
||||
builder.append(' ');
|
||||
}
|
||||
builder.append(filterTerms[i]);
|
||||
}
|
||||
builder.append(filterTerms[i]);
|
||||
}
|
||||
return builder.toString();
|
||||
return builder.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -166,7 +166,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
|
|||
if (indexed() || context == null) {
|
||||
return super.fieldQuery(value, context);
|
||||
}
|
||||
UidFilter filter = new UidFilter(context.queryTypes(), ImmutableList.of(value), context.indexCache().bloomCache());
|
||||
UidFilter filter = new UidFilter(context.queryTypes(), ImmutableList.of(value));
|
||||
// no need for constant score filter, since we don't cache the filter, and it always takes deletes into account
|
||||
return new ConstantScoreQuery(filter);
|
||||
}
|
||||
|
@ -176,7 +176,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
|
|||
if (indexed() || context == null) {
|
||||
return super.fieldFilter(value, context);
|
||||
}
|
||||
return new UidFilter(context.queryTypes(), ImmutableList.of(value), context.indexCache().bloomCache());
|
||||
return new UidFilter(context.queryTypes(), ImmutableList.of(value));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -280,7 +280,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
|
|||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
// if all are defaults, no sense to write it at all
|
||||
if (fieldType.stored() == Defaults.ID_FIELD_TYPE.stored() &&
|
||||
fieldType.indexed() == Defaults.ID_FIELD_TYPE.indexed() && path == Defaults.PATH) {
|
||||
fieldType.indexed() == Defaults.ID_FIELD_TYPE.indexed() && path == Defaults.PATH) {
|
||||
return builder;
|
||||
}
|
||||
builder.startObject(CONTENT_TYPE);
|
||||
|
|
|
@ -19,11 +19,7 @@
|
|||
|
||||
package org.elasticsearch.index.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -36,9 +32,11 @@ import java.util.Arrays;
|
|||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
// LUCENE 4 UPGRADE: we can potentially use TermsFilter here, specifically, now when we don't do bloom filter, batching, and with optimization on single field terms
|
||||
public class UidFilter extends Filter {
|
||||
|
||||
final Term[] uids;
|
||||
|
||||
public UidFilter(Collection<String> types, List<String> ids) {
|
||||
this.uids = new Term[types.size() * ids.size()];
|
||||
int i = 0;
|
||||
|
@ -60,7 +58,6 @@ public class UidFilter extends Filter {
|
|||
// - If we have a single id, we can create a SingleIdDocIdSet to save on mem
|
||||
// - We can use sorted int array DocIdSet to reserve memory compared to OpenBitSet in some cases
|
||||
@Override
|
||||
// LUCENE 4 UPGRADE: this filter does respect acceptDocs maybe we need to change this
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
|
||||
FixedBitSet set = null;
|
||||
final AtomicReader reader = ctx.reader();
|
||||
|
@ -71,8 +68,6 @@ public class UidFilter extends Filter {
|
|||
docsEnum = termsEnum.docs(acceptedDocs, docsEnum, 0);
|
||||
int doc;
|
||||
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
||||
// no need for batching, its on the UID, there will be only
|
||||
// one doc
|
||||
if (set == null) {
|
||||
set = new FixedBitSet(reader.maxDoc());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue