Add a field data based TermsFilter

Add FieldDataTermsFilter that compares terms out of
the fielddata cache. When filtering on a large
set of terms this filter can be considerably faster
than using a standard lucene terms filter.

Add the "fielddata" execution mode to the
terms filter parser to enable the use of
the new FieldDataTermsFilter.

Add supporting tests and documentation.

Closes #4209
This commit is contained in:
Matt Weber 2013-11-15 15:12:25 -08:00 committed by Martijn van Groningen
parent dab841d4ec
commit a841a422f6
10 changed files with 738 additions and 41 deletions

View File

@ -35,6 +35,15 @@ The `execution` option now has the following options :
building a bit set matching it, and filtering. The total filter is
cached.
`fielddata`::
Generates a terms filters that uses the fielddata cache to
compare terms. This execution mode is great to use when filtering
on a field that is already loaded into the fielddata cache from
faceting, sorting, or index warmers. When filtering on
a large number of terms, this execution can be considerably faster
than the other modes. The total filter is not cached unless
explicitly configured to do so.
`bool`::
Generates a term filter (which is cached) for each term, and
wraps those in a bool filter. The bool filter itself is not cached as it

View File

@ -30,6 +30,7 @@ import org.elasticsearch.common.Nullable;
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.index.similarity.SimilarityProvider;
@ -174,6 +175,8 @@ public interface FieldMapper<T> extends Mapper {
Filter termsFilter(List values, @Nullable QueryParseContext context);
Filter termsFilter(IndexFieldDataService fieldData, List values, @Nullable QueryParseContext context);
Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context);
Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context);

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.mapper.core;
import com.carrotsearch.hppc.ObjectOpenHashSet;
import com.google.common.base.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Field;
@ -44,8 +45,10 @@ import org.elasticsearch.index.codec.postingsformat.PostingFormats;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatService;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.index.search.FieldDataTermsFilter;
import org.elasticsearch.index.similarity.SimilarityLookupService;
import org.elasticsearch.index.similarity.SimilarityProvider;
@ -407,6 +410,21 @@ public abstract class AbstractFieldMapper<T> implements FieldMapper<T> {
return new TermsFilter(names.indexName(), bytesRefs);
}
/**
* A terms filter based on the field data cache
*/
@Override
public Filter termsFilter(IndexFieldDataService fieldDataService, List values, @Nullable QueryParseContext context) {
// create with initial size large enough to avoid rehashing
ObjectOpenHashSet<BytesRef> terms =
new ObjectOpenHashSet<BytesRef>((int) (values.size() * (1 + ObjectOpenHashSet.DEFAULT_LOAD_FACTOR)));
for (int i = 0, len = values.size(); i < len; i++) {
terms.add(indexedValueForSearch(values.get(i)));
}
return FieldDataTermsFilter.newBytes(fieldDataService.getForField(this), terms);
}
@Override
public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
return new TermRangeQuery(names.indexName(),

View File

@ -160,22 +160,12 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
@Override
public BytesRef indexedValueForSearch(Object value) {
long longValue = NumericUtils.doubleToSortableLong(parseValue(value));
long longValue = NumericUtils.doubleToSortableLong(parseDoubleValue(value));
BytesRef bytesRef = new BytesRef();
NumericUtils.longToPrefixCoded(longValue, 0, bytesRef); // 0 because of exact match
return bytesRef;
}
private double parseValue(Object value) {
if (value instanceof Number) {
return ((Number) value).doubleValue();
}
if (value instanceof BytesRef) {
return Double.parseDouble(((BytesRef) value).utf8ToString());
}
return Double.parseDouble(value.toString());
}
@Override
public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) {
double iValue = Double.parseDouble(value);
@ -188,7 +178,7 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
@Override
public Query termQuery(Object value, @Nullable QueryParseContext context) {
double dValue = parseValue(value);
double dValue = parseDoubleValue(value);
return NumericRangeQuery.newDoubleRange(names.indexName(), precisionStep,
dValue, dValue, true, true);
}
@ -196,14 +186,14 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
@Override
public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
return NumericRangeQuery.newDoubleRange(names.indexName(), precisionStep,
lowerTerm == null ? null : parseValue(lowerTerm),
upperTerm == null ? null : parseValue(upperTerm),
lowerTerm == null ? null : parseDoubleValue(lowerTerm),
upperTerm == null ? null : parseDoubleValue(upperTerm),
includeLower, includeUpper);
}
@Override
public Filter termFilter(Object value, @Nullable QueryParseContext context) {
double dValue = parseValue(value);
double dValue = parseDoubleValue(value);
return NumericRangeFilter.newDoubleRange(names.indexName(), precisionStep,
dValue, dValue, true, true);
}
@ -211,8 +201,8 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
@Override
public Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
return NumericRangeFilter.newDoubleRange(names.indexName(), precisionStep,
lowerTerm == null ? null : parseValue(lowerTerm),
upperTerm == null ? null : parseValue(upperTerm),
lowerTerm == null ? null : parseDoubleValue(lowerTerm),
upperTerm == null ? null : parseDoubleValue(upperTerm),
includeLower, includeUpper);
}
@ -223,8 +213,8 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
@Override
public Filter rangeFilter(IndexFieldDataService fieldData, Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
return NumericRangeFieldDataFilter.newDoubleRange((IndexNumericFieldData) fieldData.getForField(this),
lowerTerm == null ? null : parseValue(lowerTerm),
upperTerm == null ? null : parseValue(upperTerm),
lowerTerm == null ? null : parseDoubleValue(lowerTerm),
upperTerm == null ? null : parseDoubleValue(upperTerm),
includeLower, includeUpper);
}

View File

@ -160,20 +160,10 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
@Override
public BytesRef indexedValueForSearch(Object value) {
BytesRef bytesRef = new BytesRef();
NumericUtils.longToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
NumericUtils.longToPrefixCoded(parseLongValue(value), 0, bytesRef); // 0 because of exact match
return bytesRef;
}
private long parseValue(Object value) {
if (value instanceof Number) {
return ((Number) value).longValue();
}
if (value instanceof BytesRef) {
return Long.parseLong(((BytesRef) value).utf8ToString());
}
return Long.parseLong(value.toString());
}
@Override
public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) {
long iValue = Long.parseLong(value);
@ -191,14 +181,14 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
@Override
public Query termQuery(Object value, @Nullable QueryParseContext context) {
long iValue = parseValue(value);
long iValue = parseLongValue(value);
return NumericRangeQuery.newLongRange(names.indexName(), precisionStep,
iValue, iValue, true, true);
}
@Override
public Filter termFilter(Object value, @Nullable QueryParseContext context) {
long iValue = parseValue(value);
long iValue = parseLongValue(value);
return NumericRangeFilter.newLongRange(names.indexName(), precisionStep,
iValue, iValue, true, true);
}
@ -206,24 +196,24 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
@Override
public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
return NumericRangeQuery.newLongRange(names.indexName(), precisionStep,
lowerTerm == null ? null : parseValue(lowerTerm),
upperTerm == null ? null : parseValue(upperTerm),
lowerTerm == null ? null : parseLongValue(lowerTerm),
upperTerm == null ? null : parseLongValue(upperTerm),
includeLower, includeUpper);
}
@Override
public Filter rangeFilter(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
return NumericRangeFilter.newLongRange(names.indexName(), precisionStep,
lowerTerm == null ? null : parseValue(lowerTerm),
upperTerm == null ? null : parseValue(upperTerm),
lowerTerm == null ? null : parseLongValue(lowerTerm),
upperTerm == null ? null : parseLongValue(upperTerm),
includeLower, includeUpper);
}
@Override
public Filter rangeFilter(IndexFieldDataService fieldData, Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) {
return NumericRangeFieldDataFilter.newLongRange((IndexNumericFieldData) fieldData.getForField(this),
lowerTerm == null ? null : parseValue(lowerTerm),
upperTerm == null ? null : parseValue(upperTerm),
lowerTerm == null ? null : parseLongValue(lowerTerm),
upperTerm == null ? null : parseLongValue(upperTerm),
includeLower, includeUpper);
}

View File

@ -19,6 +19,8 @@
package org.elasticsearch.index.mapper.core;
import com.carrotsearch.hppc.DoubleOpenHashSet;
import com.carrotsearch.hppc.LongOpenHashSet;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@ -36,9 +38,11 @@ import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.index.search.FieldDataTermsFilter;
import org.elasticsearch.index.similarity.SimilarityProvider;
import java.io.IOException;
@ -256,6 +260,63 @@ public abstract class NumberFieldMapper<T extends Number> extends AbstractFieldM
*/
public abstract Filter rangeFilter(IndexFieldDataService fieldData, Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context);
/**
* A terms filter based on the field data cache for numeric fields.
*/
@Override
public Filter termsFilter(IndexFieldDataService fieldDataService, List values, @Nullable QueryParseContext context) {
IndexNumericFieldData fieldData = fieldDataService.getForField(this);
if (fieldData.getNumericType().isFloatingPoint()) {
// create with initial size large enough to avoid rehashing
DoubleOpenHashSet terms =
new DoubleOpenHashSet((int) (values.size() * (1 + DoubleOpenHashSet.DEFAULT_LOAD_FACTOR)));
for (int i = 0, len = values.size(); i < len; i++) {
terms.add(parseDoubleValue(values.get(i)));
}
return FieldDataTermsFilter.newDoubles(fieldData, terms);
} else {
// create with initial size large enough to avoid rehashing
LongOpenHashSet terms =
new LongOpenHashSet((int) (values.size() * (1 + LongOpenHashSet.DEFAULT_LOAD_FACTOR)));
for (int i = 0, len = values.size(); i < len; i++) {
terms.add(parseLongValue(values.get(i)));
}
return FieldDataTermsFilter.newLongs(fieldData, terms);
}
}
/**
* Converts an object value into a double
*/
public double parseDoubleValue(Object value) {
if (value instanceof Number) {
return ((Number) value).doubleValue();
}
if (value instanceof BytesRef) {
return Double.parseDouble(((BytesRef) value).utf8ToString());
}
return Double.parseDouble(value.toString());
}
/**
* Converts an object value into a long
*/
public long parseLongValue(Object value) {
if (value instanceof Number) {
return ((Number) value).longValue();
}
if (value instanceof BytesRef) {
return Long.parseLong(((BytesRef) value).utf8ToString());
}
return Long.parseLong(value.toString());
}
/**
* Override the default behavior (to return the string, and return the actual Number instance).
*

View File

@ -50,7 +50,6 @@ import static org.elasticsearch.index.query.support.QueryParsers.wrapSmartNameFi
public class TermsFilterParser implements FilterParser {
public static final String NAME = "terms";
private IndicesTermsFilterCache termsFilterCache;
@Inject
@ -208,6 +207,17 @@ public class TermsFilterParser implements FilterParser {
if (cache == null || cache) {
filter = parseContext.cacheFilter(filter, cacheKey);
}
} else if ("fielddata".equals(execution)) {
// if there are no mappings, then nothing has been indexing yet against this shard, so we can return
// no match (but not cached!), since the FieldDataTermsFilter relies on a mapping...
if (fieldMapper == null) {
return Queries.MATCH_NO_FILTER;
}
filter = fieldMapper.termsFilter(parseContext.fieldData(), terms, parseContext);
if (cache != null && cache) {
filter = parseContext.cacheFilter(filter, cacheKey);
}
} else if ("bool".equals(execution)) {
XBooleanFilter boolFiler = new XBooleanFilter();
if (fieldMapper != null) {
@ -305,7 +315,7 @@ public class TermsFilterParser implements FilterParser {
filter = parseContext.cacheFilter(filter, cacheKey);
}
} else {
throw new QueryParsingException(parseContext.index(), "bool filter execution value [" + execution + "] not supported");
throw new QueryParsingException(parseContext.index(), "terms filter execution value [" + execution + "] not supported");
}
filter = wrapSmartNameFilter(filter, smartNameFieldMappers, parseContext);

View File

@ -0,0 +1,267 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.search;
import com.carrotsearch.hppc.DoubleOpenHashSet;
import com.carrotsearch.hppc.LongOpenHashSet;
import com.carrotsearch.hppc.ObjectOpenHashSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.docset.MatchDocIdSet;
import org.elasticsearch.index.fielddata.*;
import java.io.IOException;
/**
* Similar to a {@link org.apache.lucene.queries.TermsFilter} but pulls terms from the fielddata.
*/
public abstract class FieldDataTermsFilter extends Filter {
final IndexFieldData fieldData;
protected FieldDataTermsFilter(IndexFieldData fieldData) {
this.fieldData = fieldData;
}
/**
* Get a {@link FieldDataTermsFilter} that filters on non-numeric terms found in a hppc {@link ObjectOpenHashSet} of
* {@link BytesRef}.
*
* @param fieldData The fielddata for the field.
* @param terms An {@link ObjectOpenHashSet} of terms.
* @return the filter.
*/
public static FieldDataTermsFilter newBytes(IndexFieldData fieldData, ObjectOpenHashSet<BytesRef> terms) {
return new BytesFieldDataFilter(fieldData, terms);
}
/**
* Get a {@link FieldDataTermsFilter} that filters on non-floating point numeric terms found in a hppc
* {@link LongOpenHashSet}.
*
* @param fieldData The fielddata for the field.
* @param terms A {@link LongOpenHashSet} of terms.
* @return the filter.
*/
public static FieldDataTermsFilter newLongs(IndexNumericFieldData fieldData, LongOpenHashSet terms) {
return new LongsFieldDataFilter(fieldData, terms);
}
/**
* Get a {@link FieldDataTermsFilter} that filters on floating point numeric terms found in a hppc
* {@link DoubleOpenHashSet}.
*
* @param fieldData The fielddata for the field.
* @param terms A {@link DoubleOpenHashSet} of terms.
* @return the filter.
*/
public static FieldDataTermsFilter newDoubles(IndexNumericFieldData fieldData, DoubleOpenHashSet terms) {
return new DoublesFieldDataFilter(fieldData, terms);
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null || !(obj instanceof FieldDataTermsFilter)) return false;
FieldDataTermsFilter that = (FieldDataTermsFilter) obj;
if (!fieldData.getFieldNames().indexName().equals(that.fieldData.getFieldNames().indexName())) return false;
if (this.hashCode() != obj.hashCode()) return false;
return true;
}
@Override
public abstract int hashCode();
@Override
public abstract String toString();
/**
* Filters on non-numeric fields.
*/
protected static class BytesFieldDataFilter extends FieldDataTermsFilter {
final ObjectOpenHashSet<BytesRef> terms;
protected BytesFieldDataFilter(IndexFieldData fieldData, ObjectOpenHashSet<BytesRef> terms) {
super(fieldData);
this.terms = terms;
}
@Override
public int hashCode() {
int hashcode = fieldData.getFieldNames().indexName().hashCode();
hashcode += terms != null ? terms.hashCode() : 0;
return hashcode;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("BytesFieldDataFilter:");
return sb
.append(fieldData.getFieldNames().indexName())
.append(":")
.append(terms != null ? terms.toString() : "")
.toString();
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// make sure there are terms to filter on
if (terms == null || terms.isEmpty()) return null;
final BytesValues values = fieldData.load(context).getBytesValues(false); // load fielddata
return new MatchDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
final int numVals = values.setDocument(doc);
for (int i = 0; i < numVals; i++) {
if (terms.contains(values.nextValue())) {
return true;
}
}
return false;
}
};
}
}
/**
* Filters on non-floating point numeric fields.
*/
protected static class LongsFieldDataFilter extends FieldDataTermsFilter {
final LongOpenHashSet terms;
protected LongsFieldDataFilter(IndexNumericFieldData fieldData, LongOpenHashSet terms) {
super(fieldData);
this.terms = terms;
}
@Override
public int hashCode() {
int hashcode = fieldData.getFieldNames().indexName().hashCode();
hashcode += terms != null ? terms.hashCode() : 0;
return hashcode;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("LongsFieldDataFilter:");
return sb
.append(fieldData.getFieldNames().indexName())
.append(":")
.append(terms != null ? terms.toString() : "")
.toString();
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// make sure there are terms to filter on
if (terms == null || terms.isEmpty()) return null;
IndexNumericFieldData numericFieldData = (IndexNumericFieldData) fieldData;
if (!numericFieldData.getNumericType().isFloatingPoint()) {
final LongValues values = numericFieldData.load(context).getLongValues(); // load fielddata
return new MatchDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
final int numVals = values.setDocument(doc);
for (int i = 0; i < numVals; i++) {
if (terms.contains(values.nextValue())) {
return true;
}
}
return false;
}
};
}
// only get here if wrong fielddata type in which case
// no docs will match so we just return null.
return null;
}
}
/**
* Filters on floating point numeric fields.
*/
protected static class DoublesFieldDataFilter extends FieldDataTermsFilter {
final DoubleOpenHashSet terms;
protected DoublesFieldDataFilter(IndexNumericFieldData fieldData, DoubleOpenHashSet terms) {
super(fieldData);
this.terms = terms;
}
@Override
public int hashCode() {
int hashcode = fieldData.getFieldNames().indexName().hashCode();
hashcode += terms != null ? terms.hashCode() : 0;
return hashcode;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("DoublesFieldDataFilter");
return sb
.append(fieldData.getFieldNames().indexName())
.append(":")
.append(terms != null ? terms.toString() : "")
.toString();
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// make sure there are terms to filter on
if (terms == null || terms.isEmpty()) return null;
// verify we have a floating point numeric fielddata
IndexNumericFieldData indexNumericFieldData = (IndexNumericFieldData) fieldData;
if (indexNumericFieldData.getNumericType().isFloatingPoint()) {
final DoubleValues values = indexNumericFieldData.load(context).getDoubleValues(); // load fielddata
return new MatchDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
final int numVals = values.setDocument(doc);
for (int i = 0; i < numVals; i++) {
if (terms.contains(values.nextValue())) {
return true;
}
}
return false;
}
};
}
// only get here if wrong fielddata type in which case
// no docs will match so we just return null.
return null;
}
}
}

View File

@ -0,0 +1,252 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.search;
import com.carrotsearch.hppc.DoubleOpenHashSet;
import com.carrotsearch.hppc.LongOpenHashSet;
import com.carrotsearch.hppc.ObjectOpenHashSet;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.mapper.ContentPath;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.core.DoubleFieldMapper;
import org.elasticsearch.index.mapper.core.LongFieldMapper;
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.hamcrest.CoreMatchers.equalTo;
/**
*/
public class FieldDataTermsFilterTests extends ElasticsearchTestCase {
protected IndexFieldDataService ifdService;
protected IndexWriter writer;
protected AtomicReader reader;
protected StringFieldMapper strMapper;
protected LongFieldMapper lngMapper;
protected DoubleFieldMapper dblMapper;
@Before
public void setup() throws Exception {
super.setUp();
// setup field mappers
strMapper = new StringFieldMapper.Builder("str_value")
.build(new Mapper.BuilderContext(null, new ContentPath(1)));
lngMapper = new LongFieldMapper.Builder("lng_value")
.build(new Mapper.BuilderContext(null, new ContentPath(1)));
dblMapper = new DoubleFieldMapper.Builder("dbl_value")
.build(new Mapper.BuilderContext(null, new ContentPath(1)));
// create index and fielddata service
ifdService = new IndexFieldDataService(new Index("test"));
writer = new IndexWriter(new RAMDirectory(),
new IndexWriterConfig(Lucene.VERSION, new StandardAnalyzer(Lucene.VERSION)));
int numDocs = 10;
for (int i = 0; i < numDocs; i++) {
Document d = new Document();
d.add(new StringField(strMapper.names().indexName(), "str" + i, Field.Store.NO));
d.add(new LongField(lngMapper.names().indexName(), i, Field.Store.NO));
d.add(new DoubleField(dblMapper.names().indexName(), Double.valueOf(i), Field.Store.NO));
writer.addDocument(d);
}
reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true));
}
@After
public void tearDown() throws Exception {
super.tearDown();
reader.close();
writer.close();
ifdService.clear();
SearchContext.removeCurrent();
}
protected <IFD extends IndexFieldData> IFD getFieldData(FieldMapper fieldMapper) {
return ifdService.getForField(fieldMapper);
}
protected <IFD extends IndexNumericFieldData> IFD getFieldData(NumberFieldMapper fieldMapper) {
return ifdService.getForField(fieldMapper);
}
@Test
public void testBytes() throws Exception {
List<Integer> docs = Arrays.asList(1, 5, 7);
ObjectOpenHashSet<BytesRef> hTerms = new ObjectOpenHashSet<BytesRef>();
List<BytesRef> cTerms = new ArrayList<BytesRef>(docs.size());
for (int i = 0; i < docs.size(); i++) {
BytesRef term = new BytesRef("str" + docs.get(i));
hTerms.add(term);
cTerms.add(term);
}
FieldDataTermsFilter hFilter = FieldDataTermsFilter.newBytes(getFieldData(strMapper), hTerms);
int size = reader.maxDoc();
FixedBitSet result = new FixedBitSet(size);
result.clear(0, size);
assertThat(result.cardinality(), equalTo(0));
result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(docs.size()));
for (int i = 0; i < reader.maxDoc(); i++) {
assertThat(result.get(i), equalTo(docs.contains(i)));
}
// filter from mapper
result.clear(0, size);
assertThat(result.cardinality(), equalTo(0));
result.or(strMapper.termsFilter(ifdService, cTerms, null)
.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(docs.size()));
for (int i = 0; i < reader.maxDoc(); i++) {
assertThat(result.get(i), equalTo(docs.contains(i)));
}
result.clear(0, size);
assertThat(result.cardinality(), equalTo(0));
// filter on a numeric field using BytesRef terms
// should not match any docs
hFilter = FieldDataTermsFilter.newBytes(getFieldData(lngMapper), hTerms);
result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(0));
// filter on a numeric field using BytesRef terms
// should not match any docs
hFilter = FieldDataTermsFilter.newBytes(getFieldData(dblMapper), hTerms);
result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(0));
}
@Test
public void testLongs() throws Exception {
List<Integer> docs = Arrays.asList(1, 5, 7);
LongOpenHashSet hTerms = new LongOpenHashSet();
List<Long> cTerms = new ArrayList<Long>(docs.size());
for (int i = 0; i < docs.size(); i++) {
long term = docs.get(i).longValue();
hTerms.add(term);
cTerms.add(term);
}
FieldDataTermsFilter hFilter = FieldDataTermsFilter.newLongs(getFieldData(lngMapper), hTerms);
int size = reader.maxDoc();
FixedBitSet result = new FixedBitSet(size);
result.clear(0, size);
assertThat(result.cardinality(), equalTo(0));
result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(docs.size()));
for (int i = 0; i < reader.maxDoc(); i++) {
assertThat(result.get(i), equalTo(docs.contains(i)));
}
// filter from mapper
result.clear(0, size);
assertThat(result.cardinality(), equalTo(0));
result.or(lngMapper.termsFilter(ifdService, cTerms, null)
.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(docs.size()));
for (int i = 0; i < reader.maxDoc(); i++) {
assertThat(result.get(i), equalTo(docs.contains(i)));
}
hFilter = FieldDataTermsFilter.newLongs(getFieldData(dblMapper), hTerms);
assertNull(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
}
@Test
public void testDoubles() throws Exception {
List<Integer> docs = Arrays.asList(1, 5, 7);
DoubleOpenHashSet hTerms = new DoubleOpenHashSet();
List<Double> cTerms = new ArrayList<Double>(docs.size());
for (int i = 0; i < docs.size(); i++) {
double term = Double.valueOf(docs.get(i));
hTerms.add(term);
cTerms.add(term);
}
FieldDataTermsFilter hFilter = FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), hTerms);
int size = reader.maxDoc();
FixedBitSet result = new FixedBitSet(size);
result.clear(0, size);
assertThat(result.cardinality(), equalTo(0));
result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(docs.size()));
for (int i = 0; i < reader.maxDoc(); i++) {
assertThat(result.get(i), equalTo(docs.contains(i)));
}
// filter from mapper
result.clear(0, size);
assertThat(result.cardinality(), equalTo(0));
result.or(dblMapper.termsFilter(ifdService, cTerms, null)
.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
assertThat(result.cardinality(), equalTo(docs.size()));
for (int i = 0; i < reader.maxDoc(); i++) {
assertThat(result.get(i), equalTo(docs.contains(i)));
}
hFilter = FieldDataTermsFilter.newDoubles(getFieldData(lngMapper), hTerms);
assertNull(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
}
@Test
public void testNoTerms() throws Exception {
FieldDataTermsFilter hFilterBytes = FieldDataTermsFilter.newBytes(getFieldData(strMapper), new ObjectOpenHashSet<BytesRef>());
FieldDataTermsFilter hFilterLongs = FieldDataTermsFilter.newLongs(getFieldData(lngMapper), new LongOpenHashSet());
FieldDataTermsFilter hFilterDoubles = FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), new DoubleOpenHashSet());
assertNull(hFilterBytes.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
assertNull(hFilterLongs.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
assertNull(hFilterDoubles.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
}
}

View File

@ -195,7 +195,7 @@ public class SimpleQueryTests extends ElasticsearchIntegrationTest {
}
}
@Test // see #3521
@Test // see #3521
public void testAllDocsQueryString() throws InterruptedException, ExecutionException {
client().admin().indices().prepareCreate("test")
.setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_replicas", 0)).execute().actionGet();
@ -990,6 +990,103 @@ public class SimpleQueryTests extends ElasticsearchIntegrationTest {
assertThat(searchResponse.getHits().getTotalHits(), equalTo(0l));
}
@Test
public void testFieldDataTermsFilter() throws Exception {
assertAcked(prepareCreate("test").addMapping("type",
jsonBuilder().startObject().startObject("type").startObject("properties")
.startObject("str").field("type", "string").endObject()
.startObject("lng").field("type", "long").endObject()
.startObject("dbl").field("type", "double").endObject()
.endObject().endObject().endObject()));
ensureGreen();
client().prepareIndex("test", "type", "1").setSource("str", "1", "lng", 1l, "dbl", 1.0d).execute().actionGet();
client().prepareIndex("test", "type", "2").setSource("str", "2", "lng", 2l, "dbl", 2.0d).execute().actionGet();
client().prepareIndex("test", "type", "3").setSource("str", "3", "lng", 3l, "dbl", 3.0d).execute().actionGet();
client().prepareIndex("test", "type", "4").setSource("str", "4", "lng", 4l, "dbl", 4.0d).execute().actionGet();
refresh();
SearchResponse searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("str", "1", "4").execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
assertThat(searchResponse.getHits().getAt(0).getId(), anyOf(equalTo("1"), equalTo("4")));
assertThat(searchResponse.getHits().getAt(1).getId(), anyOf(equalTo("1"), equalTo("4")));
searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("lng", new long[] {2, 3}).execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
assertThat(searchResponse.getHits().getAt(0).getId(), anyOf(equalTo("2"), equalTo("3")));
assertThat(searchResponse.getHits().getAt(1).getId(), anyOf(equalTo("2"), equalTo("3")));
searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("dbl", new double[] {2, 3}).execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
assertThat(searchResponse.getHits().getAt(0).getId(), anyOf(equalTo("2"), equalTo("3")));
assertThat(searchResponse.getHits().getAt(1).getId(), anyOf(equalTo("2"), equalTo("3")));
searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("lng", new int[] {1, 3}).execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
assertThat(searchResponse.getHits().getAt(0).getId(), anyOf(equalTo("1"), equalTo("3")));
assertThat(searchResponse.getHits().getAt(1).getId(), anyOf(equalTo("1"), equalTo("3")));
searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("dbl", new float[] {2, 4}).execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
assertThat(searchResponse.getHits().getAt(0).getId(), anyOf(equalTo("2"), equalTo("4")));
assertThat(searchResponse.getHits().getAt(1).getId(), anyOf(equalTo("2"), equalTo("4")));
// test partial matching
searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("str", "2", "5").execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("2"));
searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("dbl", new double[] {2, 5}).execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("2"));
searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("lng", new long[] {2, 5}).execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("2"));
// test valid type, but no matching terms
searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("str", "5", "6").execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(0l));
searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("dbl", new double[] {5, 6}).execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(0l));
searchResponse = client().prepareSearch("test")
.setQuery(filteredQuery(matchAllQuery(), termsFilter("lng", new long[] {5, 6}).execution("fielddata")))
.execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(0l));
}
@Test
public void testTermsLookupFilter() throws Exception {
assertAcked(prepareCreate("lookup").addMapping("type",