Remove XTermsFilter and UidFilter in favour of Lucene 4.1 TermsFilter

This commit is contained in:
Florian Schilling 2013-02-06 17:20:53 +01:00 committed by Simon Willnauer
parent 6890c9fa62
commit a52e01f3e5
14 changed files with 90 additions and 513 deletions

View File

@ -37,4 +37,15 @@ public class BytesRefs {
}
return new BytesRef(value.toString());
}
public static BytesRef toBytesRef(Object value, BytesRef spare) {
if (value == null) {
return null;
}
if (value instanceof BytesRef) {
return (BytesRef) value;
}
spare.copyChars(value.toString());
return spare;
}
}

View File

@ -1,342 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.*;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import java.io.IOException;
import java.util.*;
/**
* Constructs a filter for docs matching any of the terms added to this class.
* Unlike a RangeFilter this can be used for filtering on multiple terms that are not necessarily in
* a sequence. An example might be a collection of primary keys from a database query result or perhaps
* a choice of "category" labels picked by the end user. As a filter, this is much faster than the
* equivalent query (a BooleanQuery with many "should" TermQueries)
*/
// LUCENE 4.1 UPGRADE: Just use TermsFilter once upgrading to 4.1, its a copy
public final class XTermsFilter extends Filter {
/*
* this class is often used for large number of terms in a single field.
* to optimize for this case and to be filter-cache friendly we
* serialize all terms into a single byte array and store offsets
* in a parallel array to keep the # of object constant and speed up
* equals / hashcode.
*
* This adds quite a bit of complexity but allows large term filters to
* be efficient for GC and cache-lookups
*/
private final int[] offsets;
private final byte[] termsBytes;
private final TermsAndField[] termsAndFields;
private final int hashCode; // cached hashcode for fast cache lookups
private static final int PRIME = 31;
/**
* Creates a new {@link XTermsFilter} from the given list. The list
* can contain duplicate terms and multiple fields.
*/
public XTermsFilter(final List<Term> terms) {
this(new FieldAndTermEnum() {
// we need to sort for deduplication and to have a common cache key
final Iterator<Term> iter = sort(terms).iterator();
@Override
public BytesRef next() {
if (iter.hasNext()) {
Term next = iter.next();
field = next.field();
return next.bytes();
}
return null;
}
}, terms.size());
}
/**
* Creates a new {@link XTermsFilter} from the given {@link BytesRef} list for
* a single field.
*/
public XTermsFilter(final String field, final List<BytesRef> terms) {
this(new FieldAndTermEnum(field) {
// we need to sort for deduplication and to have a common cache key
final Iterator<BytesRef> iter = sort(terms).iterator();
@Override
public BytesRef next() {
if (iter.hasNext()) {
return iter.next();
}
return null;
}
}, terms.size());
}
/**
* Creates a new {@link XTermsFilter} from the given {@link BytesRef} array for
* a single field.
*/
public XTermsFilter(final String field, final BytesRef... terms) {
// this ctor prevents unnecessary Term creations
this(field, Arrays.asList(terms));
}
/**
* Creates a new {@link XTermsFilter} from the given array. The array can
* contain duplicate terms and multiple fields.
*/
public XTermsFilter(final Term... terms) {
this(Arrays.asList(terms));
}
private XTermsFilter(FieldAndTermEnum iter, int length) {
// TODO: maybe use oal.index.PrefixCodedTerms instead?
// If number of terms is more than a few hundred it
// should be a win
// TODO: we also pack terms in FieldCache/DocValues
// ... maybe we can refactor to share that code
// TODO: yet another option is to build the union of the terms in
// an automaton an call intersect on the termsenum if the density is high
int hash = 9;
byte[] serializedTerms = new byte[0];
this.offsets = new int[length + 1];
int lastEndOffset = 0;
int index = 0;
ArrayList<TermsAndField> termsAndFields = new ArrayList<TermsAndField>();
TermsAndField lastTermsAndField = null;
BytesRef previousTerm = null;
String previousField = null;
BytesRef currentTerm;
String currentField;
while ((currentTerm = iter.next()) != null) {
currentField = iter.field();
if (currentField == null) {
throw new IllegalArgumentException("Field must not be null");
}
if (previousField != null) {
// deduplicate
if (previousField.equals(currentField)) {
if (previousTerm.bytesEquals(currentTerm)) {
continue;
}
} else {
final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
lastTermsAndField = new TermsAndField(start, index, previousField);
termsAndFields.add(lastTermsAndField);
}
}
hash = PRIME * hash + currentField.hashCode();
hash = PRIME * hash + currentTerm.hashCode();
if (serializedTerms.length < lastEndOffset + currentTerm.length) {
serializedTerms = ArrayUtil.grow(serializedTerms, lastEndOffset + currentTerm.length);
}
System.arraycopy(currentTerm.bytes, currentTerm.offset, serializedTerms, lastEndOffset, currentTerm.length);
offsets[index] = lastEndOffset;
lastEndOffset += currentTerm.length;
index++;
previousTerm = currentTerm;
previousField = currentField;
}
offsets[index] = lastEndOffset;
final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
lastTermsAndField = new TermsAndField(start, index, previousField);
termsAndFields.add(lastTermsAndField);
this.termsBytes = ArrayUtil.shrink(serializedTerms, lastEndOffset);
this.termsAndFields = termsAndFields.toArray(new TermsAndField[termsAndFields.size()]);
this.hashCode = hash;
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final AtomicReader reader = context.reader();
FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time
final Fields fields = reader.fields();
final BytesRef spare = new BytesRef(this.termsBytes);
if (fields == null) {
return result;
}
Terms terms = null;
TermsEnum termsEnum = null;
DocsEnum docs = null;
for (TermsAndField termsAndField : this.termsAndFields) {
if ((terms = fields.terms(termsAndField.field)) != null) {
termsEnum = terms.iterator(termsEnum); // this won't return null
for (int i = termsAndField.start; i < termsAndField.end; i++) {
spare.offset = offsets[i];
spare.length = offsets[i + 1] - offsets[i];
if (termsEnum.seekExact(spare, false)) { // don't use cache since we could pollute the cache here easily
docs = termsEnum.docs(acceptDocs, docs, 0); // no freq since we don't need them
if (result == null) {
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
result = new FixedBitSet(reader.maxDoc());
// lazy init but don't do it in the hot loop since we could read many docs
result.set(docs.docID());
}
}
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
result.set(docs.docID());
}
}
}
}
}
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if ((obj == null) || (obj.getClass() != this.getClass())) {
return false;
}
XTermsFilter test = (XTermsFilter) obj;
if (test.hashCode == hashCode && this.termsAndFields.length == test.termsAndFields.length) {
// first check the fields before even comparing the bytes
for (int i = 0; i < termsAndFields.length; i++) {
TermsAndField current = termsAndFields[i];
if (!current.equals(test.termsAndFields[i])) {
return false;
}
}
// straight byte comparison since we sort they must be identical
int end = offsets[termsAndFields.length];
byte[] left = this.termsBytes;
byte[] right = test.termsBytes;
for (int i = 0; i < end; i++) {
if (left[i] != right[i]) {
return false;
}
}
return true;
}
return false;
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
BytesRef spare = new BytesRef(termsBytes);
boolean first = true;
for (int i = 0; i < termsAndFields.length; i++) {
TermsAndField current = termsAndFields[i];
for (int j = current.start; j < current.end; j++) {
spare.offset = offsets[j];
spare.length = offsets[j + 1] - offsets[j];
if (!first) {
builder.append(' ');
}
first = false;
builder.append(current.field).append(':');
builder.append(spare.utf8ToString());
}
}
return builder.toString();
}
private static final class TermsAndField {
final int start;
final int end;
final String field;
TermsAndField(int start, int end, String field) {
super();
this.start = start;
this.end = end;
this.field = field;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((field == null) ? 0 : field.hashCode());
result = prime * result + end;
result = prime * result + start;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
TermsAndField other = (TermsAndField) obj;
if (field == null) {
if (other.field != null) return false;
} else if (!field.equals(other.field)) return false;
if (end != other.end) return false;
if (start != other.start) return false;
return true;
}
}
private static abstract class FieldAndTermEnum {
protected String field;
public abstract BytesRef next();
public FieldAndTermEnum() {
}
public FieldAndTermEnum(String field) {
this.field = field;
}
public String field() {
return field;
}
}
/*
* simple utility that returns the in-place sorted list
*/
private static <T extends Comparable<? super T>> List<T> sort(List<T> toSort) {
if (toSort.isEmpty()) {
throw new IllegalArgumentException("no terms provided");
}
Collections.sort(toSort);
return toSort;
}
}

View File

@ -5,13 +5,13 @@ import com.spatial4j.core.shape.jts.JtsGeometry;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.operation.buffer.BufferOp;
import com.vividsolutions.jts.operation.buffer.BufferParameters;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.geo.GeoShapeConstants;
import org.elasticsearch.common.geo.ShapeBuilder;
import org.elasticsearch.common.lucene.search.TermFilter;
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.lucene.search.XTermsFilter;
import org.elasticsearch.common.lucene.spatial.SpatialStrategy;
import org.elasticsearch.common.lucene.spatial.prefix.tree.Node;
import org.elasticsearch.common.lucene.spatial.prefix.tree.SpatialPrefixTree;
@ -53,7 +53,7 @@ public class TermQueryPrefixTreeStrategy extends SpatialStrategy {
for (int i = 0; i < nodes.size(); i++) {
nodeTerms[i] = new BytesRef(nodes.get(i).getTokenString());
}
return new XTermsFilter(getFieldName().indexName(), nodeTerms);
return new TermsFilter(getFieldName().indexName(), nodeTerms);
}
/**

View File

@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.FilterClause;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
@ -37,7 +38,6 @@ import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.Streams;
import org.elasticsearch.common.lucene.search.TermFilter;
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.lucene.search.XTermsFilter;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
@ -453,7 +453,7 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
for (int i = 0; i < typesBytes.length; i++) {
typesBytes[i] = new BytesRef(types[i]);
}
return new XTermsFilter(TypeFieldMapper.NAME, typesBytes);
return new TermsFilter(TypeFieldMapper.NAME, typesBytes);
} else {
XBooleanFilter bool = new XBooleanFilter();
for (String type : types) {

View File

@ -19,8 +19,13 @@
package org.elasticsearch.index.mapper;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.bytes.HashedBytesArray;
import org.elasticsearch.common.lucene.BytesRefs;
/**
*
@ -112,29 +117,41 @@ public final class Uid {
}
public static BytesRef createUidAsBytes(String type, String id) {
BytesRef ref = new BytesRef(type.length() + 1 + id.length());
ref.copyChars(type);
ref.append(DELIMITER_BYTES);
ref.append(new BytesRef(id));
return ref;
return createUidAsBytes(new BytesRef(type), new BytesRef(id));
}
public static BytesRef createUidAsBytes(String type, BytesRef id) {
BytesRef ref = new BytesRef(type.length() + 1 + id.length);
ref.copyChars(type);
ref.append(DELIMITER_BYTES);
ref.append(id);
return ref;
return createUidAsBytes(new BytesRef(type), id);
}
public static BytesRef createUidAsBytes(BytesRef type, BytesRef id) {
BytesRef ref = new BytesRef(type.length + 1 + id.length);
ref.append(type);
ref.append(DELIMITER_BYTES);
ref.append(id);
final BytesRef ref = new BytesRef(type.length + 1 + id.length);
System.arraycopy(type.bytes, type.offset, ref.bytes, 0, type.length);
ref.offset = type.length;
ref.bytes[ref.offset++] = DELIMITER_BYTE;
System.arraycopy(id.bytes, id.offset, ref.bytes, ref.offset, id.length);
ref.offset = 0;
ref.length = ref.bytes.length;
return ref;
}
public static BytesRef[] createTypeUids(Collection<String> types, Object ids) {
return createTypeUids(types, Collections.singletonList(ids));
}
public static BytesRef[] createTypeUids(Collection<String> types, List<? extends Object> ids) {
BytesRef[] uids = new BytesRef[types.size() * ids.size()];
BytesRef typeBytes = new BytesRef();
BytesRef idBytes = new BytesRef();
for (String type : types) {
UnicodeUtil.UTF16toUTF8(type, 0, type.length(), typeBytes);
for (int i = 0; i < uids.length; i++) {
uids[i] = Uid.createUidAsBytes(typeBytes, BytesRefs.toBytesRef(ids.get(i), idBytes));
}
}
return uids;
}
public static String createUid(String type, String id) {
return createUid(new StringBuilder(), type, id);
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
@ -33,7 +34,6 @@ import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.RegexpFilter;
import org.elasticsearch.common.lucene.search.TermFilter;
import org.elasticsearch.common.lucene.search.XTermsFilter;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
@ -451,7 +451,7 @@ public abstract class AbstractFieldMapper<T> implements FieldMapper<T>, Mapper {
for (int i = 0; i < bytesRefs.length; i++) {
bytesRefs[i] = indexedValueForSearch(values.get(i));
}
return new XTermsFilter(names.indexName(), bytesRefs);
return new TermsFilter(names.indexName(), bytesRefs);
}
@Override

View File

@ -19,14 +19,18 @@
package org.elasticsearch.index.mapper.internal;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import static org.elasticsearch.index.mapper.MapperBuilders.id;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseField;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.BytesRefs;
@ -41,16 +45,8 @@ import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.index.search.UidFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.index.mapper.MapperBuilders.id;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseField;
import com.google.common.collect.Iterables;
/**
*
@ -176,7 +172,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
if (fieldType.indexed() || context == null) {
return super.termFilter(value, context);
}
return new UidFilter(context.queryTypes(), ImmutableList.of(BytesRefs.toBytesRef(value)));
return new TermsFilter(UidFieldMapper.NAME, Uid.createTypeUids(context.queryTypes(), value));
}
@Override
@ -184,11 +180,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
if (fieldType.indexed() || context == null) {
return super.termsFilter(values, context);
}
List<BytesRef> bytesRefs = new ArrayList<BytesRef>(values.size());
for (Object value : values) {
bytesRefs.add(BytesRefs.toBytesRef(value));
}
return new UidFilter(context.queryTypes(), bytesRefs);
return new TermsFilter(UidFieldMapper.NAME, Uid.createTypeUids(context.queryTypes(), values));
}
@Override

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
@ -30,7 +31,6 @@ import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.XTermsFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
@ -245,7 +245,7 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
for (String type : context.mapperService().types()) {
typesValues[i++] = Uid.createUidAsBytes(type, bValue);
}
return new XTermsFilter(names.indexName(), typesValues);
return new TermsFilter(names.indexName(), typesValues);
}
@Override
@ -261,7 +261,7 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
bValues.add(Uid.createUidAsBytes(type, bValue));
}
}
return new XTermsFilter(names.indexName(), bValues);
return new TermsFilter(names.indexName(), bValues);
}
/**

View File

@ -21,11 +21,13 @@ package org.elasticsearch.index.query;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.search.UidFilter;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import java.io.IOException;
import java.util.ArrayList;
@ -99,7 +101,7 @@ public class IdsFilterParser implements FilterParser {
types = parseContext.mapperService().types();
}
UidFilter filter = new UidFilter(types, ids);
TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, Uid.createTypeUids(types, ids));
if (filterName != null) {
parseContext.addNamedFilter(filterName, filter);
}

View File

@ -19,19 +19,21 @@
package org.elasticsearch.index.query;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.search.UidFilter;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
/**
*
@ -103,7 +105,7 @@ public class IdsQueryParser implements QueryParser {
types = parseContext.mapperService().types();
}
UidFilter filter = new UidFilter(types, ids);
TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, Uid.createTypeUids(types, ids));
// no need for constant score filter, since we don't cache the filter, and it always takes deletes into account
ConstantScoreQuery query = new ConstantScoreQuery(filter);
query.setBoost(boost);

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.query;
import com.google.common.collect.Lists;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
@ -122,7 +123,7 @@ public class TermsFilterParser implements FilterParser {
for (int i = 0; i < filterValues.length; i++) {
filterValues[i] = BytesRefs.toBytesRef(terms.get(i));
}
filter = new XTermsFilter(fieldName, filterValues);
filter = new TermsFilter(fieldName, filterValues);
}
// cache the whole filter by default, or if explicitly told to
if (cache == null || cache) {

View File

@ -1,107 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.search;
import org.apache.lucene.index.*;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
// LUCENE 4 UPGRADE: we can potentially use TermsFilter here, specifically, now when we don't do bloom filter, batching, and with optimization on single field terms
public class UidFilter extends Filter {
final Term[] uids;
public UidFilter(Collection<String> types, List<BytesRef> ids) {
this.uids = new Term[types.size() * ids.size()];
int i = 0;
for (String type : types) {
for (BytesRef id : ids) {
uids[i++] = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(type, id));
}
}
if (this.uids.length > 1) {
Arrays.sort(this.uids);
}
}
public Term[] getTerms() {
return this.uids;
}
// TODO Optimizations
// - If we have a single id, we can create a SingleIdDocIdSet to save on mem
// - We can use sorted int array DocIdSet to reserve memory compared to OpenBitSet in some cases
@Override
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException {
FixedBitSet set = null;
final AtomicReader reader = ctx.reader();
final TermsEnum termsEnum = reader.terms(UidFieldMapper.NAME).iterator(null);
DocsEnum docsEnum = null;
for (Term uid : uids) {
if (termsEnum.seekExact(uid.bytes(), false)) {
docsEnum = termsEnum.docs(acceptedDocs, docsEnum, 0);
int doc;
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
if (set == null) {
set = new FixedBitSet(reader.maxDoc());
}
set.set(doc);
}
}
}
return set;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
UidFilter uidFilter = (UidFilter) o;
return Arrays.equals(uids, uidFilter.uids);
}
@Override
public int hashCode() {
return Arrays.hashCode(uids);
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
for (Term term : uids) {
if (builder.length() > 0) {
builder.append(' ');
}
builder.append(term);
}
return builder.toString();
}
}

View File

@ -24,12 +24,12 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.*;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.TermFilter;
import org.elasticsearch.common.lucene.search.XTermsFilter;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
@ -93,19 +93,19 @@ public class TermsFilterTests {
AtomicReader reader = new SlowCompositeReaderWrapper(DirectoryReader.open(w, true));
w.close();
XTermsFilter tf = new XTermsFilter(new Term[]{new Term(fieldName, "19")});
TermsFilter tf = new TermsFilter(new Term[]{new Term(fieldName, "19")});
FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs());
assertThat(bits, nullValue());
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20")});
tf = new TermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20")});
bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs());
assertThat(bits.cardinality(), equalTo(1));
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10")});
tf = new TermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10")});
bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs());
assertThat(bits.cardinality(), equalTo(2));
tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10"), new Term(fieldName, "00")});
tf = new TermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10"), new Term(fieldName, "00")});
bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs());
assertThat(bits.cardinality(), equalTo(2));

View File

@ -21,6 +21,7 @@ package org.elasticsearch.test.unit.index.query;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.BoostingQuery;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.*;
@ -1262,8 +1263,8 @@ public class SimpleIndexQueryParserTests {
Query parsedQuery = queryParser.parse(filteredQuery(termQuery("name.first", "shay"), termsFilter("name.last", "banon", "kimchy"))).query();
assertThat(parsedQuery, instanceOf(XFilteredQuery.class));
XFilteredQuery filteredQuery = (XFilteredQuery) parsedQuery;
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
assertThat(filteredQuery.getFilter(), instanceOf(TermsFilter.class));
TermsFilter termsFilter = (TermsFilter) filteredQuery.getFilter();
//assertThat(termsFilter.getTerms().length, equalTo(2));
//assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
}
@ -1276,8 +1277,8 @@ public class SimpleIndexQueryParserTests {
Query parsedQuery = queryParser.parse(query).query();
assertThat(parsedQuery, instanceOf(XFilteredQuery.class));
XFilteredQuery filteredQuery = (XFilteredQuery) parsedQuery;
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
assertThat(filteredQuery.getFilter(), instanceOf(TermsFilter.class));
TermsFilter termsFilter = (TermsFilter) filteredQuery.getFilter();
//assertThat(termsFilter.getTerms().length, equalTo(2));
//assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
}
@ -1290,8 +1291,8 @@ public class SimpleIndexQueryParserTests {
assertThat(parsedQuery.namedFilters().containsKey("test"), equalTo(true));
assertThat(parsedQuery.query(), instanceOf(XFilteredQuery.class));
XFilteredQuery filteredQuery = (XFilteredQuery) parsedQuery.query();
assertThat(filteredQuery.getFilter(), instanceOf(XTermsFilter.class));
XTermsFilter termsFilter = (XTermsFilter) filteredQuery.getFilter();
assertThat(filteredQuery.getFilter(), instanceOf(TermsFilter.class));
TermsFilter termsFilter = (TermsFilter) filteredQuery.getFilter();
//assertThat(termsFilter.getTerms().length, equalTo(2));
//assertThat(termsFilter.getTerms()[0].text(), equalTo("banon"));
}
@ -1968,7 +1969,7 @@ public class SimpleIndexQueryParserTests {
Query parsedQuery = queryParser.parse(query).query();
assertThat(parsedQuery, instanceOf(XConstantScoreQuery.class));
XConstantScoreQuery constantScoreQuery = (XConstantScoreQuery) parsedQuery;
XTermsFilter filter = (XTermsFilter) constantScoreQuery.getFilter();
TermsFilter filter = (TermsFilter) constantScoreQuery.getFilter();
//Term exampleTerm = filter.getTerms()[0];
//assertThat(exampleTerm.field(), equalTo("country"));
}