lucene 4: cleanup terms/uid filter
This commit is contained in:
parent
79368bb221
commit
0660e20c47
|
@ -28,6 +28,7 @@ import org.elasticsearch.common.lucene.search.NotDeletedFilter;
|
||||||
// So it can basically be cached safely even with a reader that changes deletions but remain with teh same cache key
|
// So it can basically be cached safely even with a reader that changes deletions but remain with teh same cache key
|
||||||
// See more: https://issues.apache.org/jira/browse/LUCENE-2468
|
// See more: https://issues.apache.org/jira/browse/LUCENE-2468
|
||||||
// TODO Lucene 4.0 won't need this, since live docs are "and'ed" while scoring
|
// TODO Lucene 4.0 won't need this, since live docs are "and'ed" while scoring
|
||||||
|
// LUCENE 4 UPGRADE: we probably don't need this anymore, because of acceptDocs
|
||||||
public class DeletionAwareConstantScoreQuery extends ConstantScoreQuery {
|
public class DeletionAwareConstantScoreQuery extends ConstantScoreQuery {
|
||||||
|
|
||||||
private final Filter actualFilter;
|
private final Filter actualFilter;
|
||||||
|
|
|
@ -19,13 +19,7 @@
|
||||||
|
|
||||||
package org.apache.lucene.search;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.queries.TermsFilter;
|
import org.apache.lucene.queries.TermsFilter;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -39,51 +33,52 @@ import java.util.Collection;
|
||||||
* Similar to {@link TermsFilter} but stores the terms in an array for better memory usage
|
* Similar to {@link TermsFilter} but stores the terms in an array for better memory usage
|
||||||
* when cached, and also uses bulk read
|
* when cached, and also uses bulk read
|
||||||
*/
|
*/
|
||||||
// LUCENE MONITOR: Against TermsFilter - this is now identical to TermsFilter once 4.1 is released
|
// LUCENE 4 UPGRADE: Make sure to sync this against latest 4.1
|
||||||
|
// LUCENE 4.1: once its out, we can use TermsFilter from it
|
||||||
public class XTermsFilter extends Filter {
|
public class XTermsFilter extends Filter {
|
||||||
|
|
||||||
private final Term[] filterTerms;
|
private final Term[] filterTerms;
|
||||||
private final boolean[] resetTermsEnum;// true if the enum must be reset when building the bitset
|
private final boolean[] resetTermsEnum;// true if the enum must be reset when building the bitset
|
||||||
private final int length;
|
private final int length;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new {@link XTermsFilter} from the given collection. The collection
|
* Creates a new {@link XTermsFilter} from the given collection. The collection
|
||||||
* can contain duplicate terms and multiple fields.
|
* can contain duplicate terms and multiple fields.
|
||||||
*/
|
*/
|
||||||
public XTermsFilter(Collection<Term> terms) {
|
public XTermsFilter(Collection<Term> terms) {
|
||||||
this(terms.toArray(new Term[terms.size()]));
|
this(terms.toArray(new Term[terms.size()]));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new {@link XTermsFilter} from the given array. The array can
|
* Creates a new {@link XTermsFilter} from the given array. The array can
|
||||||
* contain duplicate terms and multiple fields.
|
* contain duplicate terms and multiple fields.
|
||||||
*/
|
*/
|
||||||
public XTermsFilter(Term... terms) {
|
public XTermsFilter(Term... terms) {
|
||||||
if (terms == null || terms.length == 0) {
|
if (terms == null || terms.length == 0) {
|
||||||
throw new IllegalArgumentException("TermsFilter requires at least one term");
|
throw new IllegalArgumentException("TermsFilter requires at least one term");
|
||||||
}
|
|
||||||
Arrays.sort(terms);
|
|
||||||
this.filterTerms = new Term[terms.length];
|
|
||||||
this.resetTermsEnum = new boolean[terms.length];
|
|
||||||
int index = 0;
|
|
||||||
for (int i = 0; i < terms.length; i++) {
|
|
||||||
Term currentTerm = terms[i];
|
|
||||||
boolean fieldChanged = true;
|
|
||||||
if (index > 0) {
|
|
||||||
// deduplicate
|
|
||||||
if (filterTerms[index-1].field().equals(currentTerm.field())) {
|
|
||||||
fieldChanged = false;
|
|
||||||
if (filterTerms[index-1].bytes().bytesEquals(currentTerm.bytes())){
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
this.filterTerms[index] = currentTerm;
|
Arrays.sort(terms);
|
||||||
this.resetTermsEnum[index] = index == 0 || fieldChanged; // mark index 0 so we have a clear path in the iteration
|
this.filterTerms = new Term[terms.length];
|
||||||
|
this.resetTermsEnum = new boolean[terms.length];
|
||||||
index++;
|
int index = 0;
|
||||||
}
|
for (int i = 0; i < terms.length; i++) {
|
||||||
length = index;
|
Term currentTerm = terms[i];
|
||||||
|
boolean fieldChanged = true;
|
||||||
|
if (index > 0) {
|
||||||
|
// deduplicate
|
||||||
|
if (filterTerms[index - 1].field().equals(currentTerm.field())) {
|
||||||
|
fieldChanged = false;
|
||||||
|
if (filterTerms[index - 1].bytes().bytesEquals(currentTerm.bytes())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.filterTerms[index] = currentTerm;
|
||||||
|
this.resetTermsEnum[index] = index == 0 || fieldChanged; // mark index 0 so we have a clear path in the iteration
|
||||||
|
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
length = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Term[] getTerms() {
|
public Term[] getTerms() {
|
||||||
|
@ -92,100 +87,100 @@ public class XTermsFilter extends Filter {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
AtomicReader reader = context.reader();
|
AtomicReader reader = context.reader();
|
||||||
FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time
|
FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time
|
||||||
Fields fields = reader.fields();
|
Fields fields = reader.fields();
|
||||||
if (fields == null) {
|
if (fields == null) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
final BytesRef br = new BytesRef();
|
||||||
|
Terms terms = null;
|
||||||
|
TermsEnum termsEnum = null;
|
||||||
|
DocsEnum docs = null;
|
||||||
|
assert resetTermsEnum[0];
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
Term term = this.filterTerms[i];
|
||||||
|
if (resetTermsEnum[i]) {
|
||||||
|
terms = fields.terms(term.field());
|
||||||
|
if (terms == null) {
|
||||||
|
i = skipToNextField(i + 1, length); // skip to the next field since this field is not indexed
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((termsEnum = terms.iterator(termsEnum)) != null) {
|
||||||
|
br.copyBytes(term.bytes());
|
||||||
|
assert termsEnum != null;
|
||||||
|
if (termsEnum.seekExact(br, true)) {
|
||||||
|
docs = termsEnum.docs(acceptDocs, docs, 0);
|
||||||
|
if (result == null) {
|
||||||
|
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
result = new FixedBitSet(reader.maxDoc());
|
||||||
|
// lazy init but don't do it in the hot loop since we could read many docs
|
||||||
|
result.set(docs.docID());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
result.set(docs.docID());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
|
||||||
final BytesRef br = new BytesRef();
|
|
||||||
Terms terms = null;
|
|
||||||
TermsEnum termsEnum = null;
|
|
||||||
DocsEnum docs = null;
|
|
||||||
assert resetTermsEnum[0];
|
|
||||||
for (int i = 0; i < length; i++) {
|
|
||||||
Term term = this.filterTerms[i];
|
|
||||||
if (resetTermsEnum[i]) {
|
|
||||||
terms = fields.terms(term.field());
|
|
||||||
if (terms == null) {
|
|
||||||
i = skipToNextField(i+1, length); // skip to the next field since this field is not indexed
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((termsEnum = terms.iterator(termsEnum)) != null) {
|
|
||||||
br.copyBytes(term.bytes());
|
|
||||||
assert termsEnum != null;
|
|
||||||
if (termsEnum.seekExact(br,true)) {
|
|
||||||
docs = termsEnum.docs(acceptDocs, docs, 0);
|
|
||||||
if (result == null) {
|
|
||||||
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
result = new FixedBitSet(reader.maxDoc());
|
|
||||||
// lazy init but don't do it in the hot loop since we could read many docs
|
|
||||||
result.set(docs.docID());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
result.set(docs.docID());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private final int skipToNextField(int index, int length) {
|
private final int skipToNextField(int index, int length) {
|
||||||
for (int i = index; i < length; i++) {
|
for (int i = index; i < length; i++) {
|
||||||
if (resetTermsEnum[i]) {
|
if (resetTermsEnum[i]) {
|
||||||
return i-1;
|
return i - 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
return length;
|
||||||
return length;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (this == obj) {
|
if (this == obj) {
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
if ((obj == null) || (obj.getClass() != this.getClass())) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
XTermsFilter test = (XTermsFilter) obj;
|
|
||||||
if (filterTerms != test.filterTerms) {
|
|
||||||
if (length == test.length) {
|
|
||||||
for (int i = 0; i < length; i++) {
|
|
||||||
// can not be null!
|
|
||||||
if (!filterTerms[i].equals(test.filterTerms[i])) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
if ((obj == null) || (obj.getClass() != this.getClass())) {
|
||||||
return true;
|
return false;
|
||||||
|
}
|
||||||
|
XTermsFilter test = (XTermsFilter) obj;
|
||||||
|
if (filterTerms != test.filterTerms) {
|
||||||
|
if (length == test.length) {
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
// can not be null!
|
||||||
|
if (!filterTerms[i].equals(test.filterTerms[i])) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
int hash = 9;
|
int hash = 9;
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
hash = 31 * hash + filterTerms[i].hashCode();
|
hash = 31 * hash + filterTerms[i].hashCode();
|
||||||
}
|
}
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder builder = new StringBuilder();
|
StringBuilder builder = new StringBuilder();
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
if (builder.length() > 0) {
|
if (builder.length() > 0) {
|
||||||
builder.append(' ');
|
builder.append(' ');
|
||||||
|
}
|
||||||
|
builder.append(filterTerms[i]);
|
||||||
}
|
}
|
||||||
builder.append(filterTerms[i]);
|
return builder.toString();
|
||||||
}
|
|
||||||
return builder.toString();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -166,7 +166,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
|
||||||
if (indexed() || context == null) {
|
if (indexed() || context == null) {
|
||||||
return super.fieldQuery(value, context);
|
return super.fieldQuery(value, context);
|
||||||
}
|
}
|
||||||
UidFilter filter = new UidFilter(context.queryTypes(), ImmutableList.of(value), context.indexCache().bloomCache());
|
UidFilter filter = new UidFilter(context.queryTypes(), ImmutableList.of(value));
|
||||||
// no need for constant score filter, since we don't cache the filter, and it always takes deletes into account
|
// no need for constant score filter, since we don't cache the filter, and it always takes deletes into account
|
||||||
return new ConstantScoreQuery(filter);
|
return new ConstantScoreQuery(filter);
|
||||||
}
|
}
|
||||||
|
@ -176,7 +176,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
|
||||||
if (indexed() || context == null) {
|
if (indexed() || context == null) {
|
||||||
return super.fieldFilter(value, context);
|
return super.fieldFilter(value, context);
|
||||||
}
|
}
|
||||||
return new UidFilter(context.queryTypes(), ImmutableList.of(value), context.indexCache().bloomCache());
|
return new UidFilter(context.queryTypes(), ImmutableList.of(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -280,7 +280,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
|
||||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
// if all are defaults, no sense to write it at all
|
// if all are defaults, no sense to write it at all
|
||||||
if (fieldType.stored() == Defaults.ID_FIELD_TYPE.stored() &&
|
if (fieldType.stored() == Defaults.ID_FIELD_TYPE.stored() &&
|
||||||
fieldType.indexed() == Defaults.ID_FIELD_TYPE.indexed() && path == Defaults.PATH) {
|
fieldType.indexed() == Defaults.ID_FIELD_TYPE.indexed() && path == Defaults.PATH) {
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
builder.startObject(CONTENT_TYPE);
|
builder.startObject(CONTENT_TYPE);
|
||||||
|
|
|
@ -19,11 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.search;
|
package org.elasticsearch.index.search;
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -36,9 +32,11 @@ import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
// LUCENE 4 UPGRADE: we can potentially use TermsFilter here, specifically, now when we don't do bloom filter, batching, and with optimization on single field terms
|
||||||
public class UidFilter extends Filter {
|
public class UidFilter extends Filter {
|
||||||
|
|
||||||
final Term[] uids;
|
final Term[] uids;
|
||||||
|
|
||||||
public UidFilter(Collection<String> types, List<String> ids) {
|
public UidFilter(Collection<String> types, List<String> ids) {
|
||||||
this.uids = new Term[types.size() * ids.size()];
|
this.uids = new Term[types.size() * ids.size()];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
@ -60,7 +58,6 @@ public class UidFilter extends Filter {
|
||||||
// - If we have a single id, we can create a SingleIdDocIdSet to save on mem
|
// - If we have a single id, we can create a SingleIdDocIdSet to save on mem
|
||||||
// - We can use sorted int array DocIdSet to reserve memory compared to OpenBitSet in some cases
|
// - We can use sorted int array DocIdSet to reserve memory compared to OpenBitSet in some cases
|
||||||
@Override
|
@Override
|
||||||
// LUCENE 4 UPGRADE: this filter does respect acceptDocs maybe we need to change this
|
|
||||||
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
|
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
|
||||||
FixedBitSet set = null;
|
FixedBitSet set = null;
|
||||||
final AtomicReader reader = ctx.reader();
|
final AtomicReader reader = ctx.reader();
|
||||||
|
@ -71,8 +68,6 @@ public class UidFilter extends Filter {
|
||||||
docsEnum = termsEnum.docs(acceptedDocs, docsEnum, 0);
|
docsEnum = termsEnum.docs(acceptedDocs, docsEnum, 0);
|
||||||
int doc;
|
int doc;
|
||||||
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
||||||
// no need for batching, its on the UID, there will be only
|
|
||||||
// one doc
|
|
||||||
if (set == null) {
|
if (set == null) {
|
||||||
set = new FixedBitSet(reader.maxDoc());
|
set = new FixedBitSet(reader.maxDoc());
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue