diff --git a/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index c4f0c403a6d..58045c73052 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -309,23 +309,19 @@ public abstract class MappedFieldType extends FieldType { return value; } - /** Returns the indexed value used to construct search "values". */ - public BytesRef indexedValueForSearch(Object value) { + /** Returns the indexed value used to construct search "values". + * This method is used for the default implementations of most + * query factory methods such as {@link #termQuery}. */ + protected BytesRef indexedValueForSearch(Object value) { return BytesRefs.toBytesRef(value); } - /** - * Creates a term associated with the field of this mapper for the given - * value. Its important to use termQuery when building term queries because - * things like ParentFieldMapper override it to make more interesting - * queries. - */ - protected Term createTerm(Object value) { - return new Term(name(), indexedValueForSearch(value)); - } - + /** Generates a query that will only match documents that contain the given value. + * The default implementation returns a {@link TermQuery} over the value bytes, + * boosted by {@link #boost()}. + * @throws IllegalArgumentException if {@code value} cannot be converted to the expected data type */ public Query termQuery(Object value, @Nullable QueryShardContext context) { - TermQuery query = new TermQuery(createTerm(value)); + TermQuery query = new TermQuery(new Term(name(), indexedValueForSearch(value))); if (boost == 1f || (context != null && context.indexVersionCreated().before(Version.V_5_0_0_alpha1))) { return query; @@ -349,11 +345,12 @@ public abstract class MappedFieldType extends FieldType { } public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { - return new FuzzyQuery(createTerm(value), fuzziness.asDistance(BytesRefs.toString(value)), prefixLength, maxExpansions, transpositions); + return new FuzzyQuery(new Term(name(), indexedValueForSearch(value)), + fuzziness.asDistance(BytesRefs.toString(value)), prefixLength, maxExpansions, transpositions); } public Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryShardContext context) { - PrefixQuery query = new PrefixQuery(createTerm(value)); + PrefixQuery query = new PrefixQuery(new Term(name(), indexedValueForSearch(value))); if (method != null) { query.setRewriteMethod(method); } @@ -365,7 +362,7 @@ public abstract class MappedFieldType extends FieldType { throw new QueryShardException(context, "Cannot use regular expression to filter numeric field [" + name + "]"); } - RegexpQuery query = new RegexpQuery(createTerm(value), flags, maxDeterminizedStates); + RegexpQuery query = new RegexpQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates); if (method != null) { query.setRewriteMethod(method); } @@ -453,4 +450,19 @@ public abstract class MappedFieldType extends FieldType { return DocValueFormat.RAW; } + /** + * Extract a {@link Term} from a query created with {@link #termQuery} by + * recursively removing {@link BoostQuery} wrappers. + * @throws IllegalArgumentException if the wrapped query is not a {@link TermQuery} + */ + public static Term extractTerm(Query termQuery) { + while (termQuery instanceof BoostQuery) { + termQuery = ((BoostQuery) termQuery).getQuery(); + } + if (termQuery instanceof TermQuery == false) { + throw new IllegalArgumentException("Cannot extract a term from a query of type " + + termQuery.getClass() + ": " + termQuery); + } + return ((TermQuery) termQuery).getTerm(); + } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java index b79aa94071e..c3c5a5cbccf 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.document.Field; +import org.apache.lucene.index.Term; import org.apache.lucene.search.suggest.document.Completion50PostingsFormat; import org.apache.lucene.search.suggest.document.CompletionAnalyzer; import org.apache.lucene.search.suggest.document.CompletionQuery; @@ -265,14 +266,14 @@ public class CompletionFieldMapper extends FieldMapper implements ArrayValueMapp * Completion prefix query */ public CompletionQuery prefixQuery(Object value) { - return new PrefixCompletionQuery(searchAnalyzer().analyzer(), createTerm(value)); + return new PrefixCompletionQuery(searchAnalyzer().analyzer(), new Term(name(), indexedValueForSearch(value))); } /** * Completion prefix regular expression query */ public CompletionQuery regexpQuery(Object value, int flags, int maxDeterminizedStates) { - return new RegexCompletionQuery(createTerm(value), flags, maxDeterminizedStates); + return new RegexCompletionQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates); } /** @@ -281,7 +282,7 @@ public class CompletionFieldMapper extends FieldMapper implements ArrayValueMapp public CompletionQuery fuzzyQuery(String value, Fuzziness fuzziness, int nonFuzzyPrefixLength, int minFuzzyPrefixLength, int maxExpansions, boolean transpositions, boolean unicodeAware) { - return new FuzzyCompletionQuery(searchAnalyzer().analyzer(), createTerm(value), null, + return new FuzzyCompletionQuery(searchAnalyzer().analyzer(), new Term(name(), indexedValueForSearch(value)), null, fuzziness.asDistance(), transpositions, nonFuzzyPrefixLength, minFuzzyPrefixLength, unicodeAware, maxExpansions); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java index b46438134ad..369f43e524b 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java @@ -204,7 +204,7 @@ public class AllFieldMapper extends MetadataFieldMapper { @Override public Query termQuery(Object value, QueryShardContext context) { - return queryStringTermQuery(createTerm(value)); + return queryStringTermQuery(new Term(name(), indexedValueForSearch(value))); } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java index f8d7f2cec80..e150a8cd10f 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java @@ -185,10 +185,6 @@ public class UidFieldMapper extends MetadataFieldMapper { } } - public Term term(String uid) { - return new Term(fieldType().name(), fieldType().indexedValueForSearch(uid)); - } - @Override protected String contentType() { return CONTENT_TYPE; diff --git a/core/src/main/java/org/elasticsearch/index/query/SpanTermQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/SpanTermQueryBuilder.java index a65be6eb12b..d22c54395ac 100644 --- a/core/src/main/java/org/elasticsearch/index/query/SpanTermQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/SpanTermQueryBuilder.java @@ -20,9 +20,9 @@ package org.elasticsearch.index.query; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.lucene.BytesRefs; @@ -75,17 +75,15 @@ public class SpanTermQueryBuilder extends BaseTermQueryBuilder(name, entry.getValue())); } @@ -203,7 +208,7 @@ public class MultiMatchQuery extends MatchQuery { * we just pick the first field. It shouldn't matter because * fields are already grouped by their analyzers/types. */ - String representativeField = group.get(0).field; + String representativeField = group.get(0).fieldType.name(); Query q = parseGroup(type.matchQueryType(), representativeField, 1f, value, minimumShouldMatch); if (q != null) { queries.add(q); @@ -218,20 +223,7 @@ public class MultiMatchQuery extends MatchQuery { if (blendedFields == null) { return super.blendTerm(term, fieldType); } - final Term[] terms = new Term[blendedFields.length]; - float[] blendedBoost = new float[blendedFields.length]; - for (int i = 0; i < blendedFields.length; i++) { - terms[i] = blendedFields[i].newTerm(term.text()); - blendedBoost[i] = blendedFields[i].boost; - } - if (commonTermsCutoff != null) { - return BlendedTermQuery.commonTermsBlendedQuery(terms, blendedBoost, false, commonTermsCutoff); - } - - if (tieBreaker == 1.0f) { - return BlendedTermQuery.booleanBlendedQuery(terms, blendedBoost, false); - } - return BlendedTermQuery.dismaxBlendedQuery(terms, blendedBoost, tieBreaker); + return MultiMatchQuery.blendTerm(term.bytes(), commonTermsCutoff, tieBreaker, blendedFields); } @Override @@ -245,6 +237,64 @@ public class MultiMatchQuery extends MatchQuery { } } + static Query blendTerm(BytesRef value, Float commonTermsCutoff, float tieBreaker, FieldAndFieldType... blendedFields) { + List queries = new ArrayList<>(); + Term[] terms = new Term[blendedFields.length]; + float[] blendedBoost = new float[blendedFields.length]; + int i = 0; + for (FieldAndFieldType ft : blendedFields) { + Query query; + try { + query = ft.fieldType.termQuery(value, null); + } catch (IllegalArgumentException e) { + // the query expects a certain class of values such as numbers + // of ip addresses and the value can't be parsed, so ignore this + // field + continue; + } + float boost = ft.boost; + while (query instanceof BoostQuery) { + BoostQuery bq = (BoostQuery) query; + query = bq.getQuery(); + boost *= bq.getBoost(); + } + if (query.getClass() == TermQuery.class) { + terms[i] = ((TermQuery) query).getTerm(); + blendedBoost[i] = boost; + i++; + } else { + if (boost != 1f) { + query = new BoostQuery(query, boost); + } + queries.add(query); + } + } + if (i > 0) { + terms = Arrays.copyOf(terms, i); + blendedBoost = Arrays.copyOf(blendedBoost, i); + if (commonTermsCutoff != null) { + queries.add(BlendedTermQuery.commonTermsBlendedQuery(terms, blendedBoost, false, commonTermsCutoff)); + } else if (tieBreaker == 1.0f) { + queries.add(BlendedTermQuery.booleanBlendedQuery(terms, blendedBoost, false)); + } else { + queries.add(BlendedTermQuery.dismaxBlendedQuery(terms, blendedBoost, tieBreaker)); + } + } + if (queries.size() == 1) { + return queries.get(0); + } else { + // best effort: add clauses that are not term queries so that they have an opportunity to match + // however their score contribution will be different + // TODO: can we improve this? + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + bq.setDisableCoord(true); + for (Query query : queries) { + bq.add(query, Occur.SHOULD); + } + return bq.build(); + } + } + @Override protected Query blendTermQuery(Term term, MappedFieldType fieldType) { if (queryBuilder == null) { @@ -262,31 +312,13 @@ public class MultiMatchQuery extends MatchQuery { return queryBuilder.termQuery(fieldType, value); } - private static final class FieldAndFieldType { - final String field; + static final class FieldAndFieldType { final MappedFieldType fieldType; final float boost; - - private FieldAndFieldType(String field, MappedFieldType fieldType, float boost) { - this.field = field; - this.fieldType = fieldType; + FieldAndFieldType(MappedFieldType fieldType, float boost) { + this.fieldType = Objects.requireNonNull(fieldType); this.boost = boost; } - - public Term newTerm(String value) { - try { - /* - * Note that this ignore any overrides the fieldType might do - * for termQuery, meaning things like _parent won't work here. - */ - return new Term(fieldType.name(), fieldType.indexedValueForSearch(value)); - } catch (RuntimeException ex) { - // we can't parse it just use the incoming value -- it will - // just have a DF of 0 at the end of the day and will be ignored - // Note that this is like lenient = true always - } - return new Term(field, value); - } } } diff --git a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java index ca82dea56d3..11f04ae2731 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; import org.apache.lucene.index.SnapshotDeletionPolicy; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryCachingPolicy; import org.apache.lucene.search.UsageTrackingQueryCachingPolicy; import org.apache.lucene.store.AlreadyClosedException; @@ -77,6 +78,7 @@ import org.elasticsearch.index.get.GetStats; import org.elasticsearch.index.get.ShardGetService; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentMapperForType; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; @@ -473,7 +475,10 @@ public class IndexShard extends AbstractIndexShardComponent { if (docMapper.getMapping() != null) { doc.addDynamicMappingsUpdate(docMapper.getMapping()); } - return new Engine.Index(docMapper.getDocumentMapper().uidMapper().term(doc.uid().stringValue()), doc, version, versionType, origin, startTime); + MappedFieldType uidFieldType = docMapper.getDocumentMapper().uidMapper().fieldType(); + Query uidQuery = uidFieldType.termQuery(doc.uid().stringValue(), null); + Term uid = MappedFieldType.extractTerm(uidQuery); + return new Engine.Index(uid, doc, version, versionType, origin, startTime); } /** @@ -509,12 +514,18 @@ public class IndexShard extends AbstractIndexShardComponent { public Engine.Delete prepareDeleteOnPrimary(String type, String id, long version, VersionType versionType) { verifyPrimary(); final DocumentMapper documentMapper = docMapper(type).getDocumentMapper(); - return prepareDelete(type, id, documentMapper.uidMapper().term(Uid.createUid(type, id)), version, versionType, Engine.Operation.Origin.PRIMARY); + final MappedFieldType uidFieldType = documentMapper.uidMapper().fieldType(); + final Query uidQuery = uidFieldType.termQuery(Uid.createUid(type, id), null); + final Term uid = MappedFieldType.extractTerm(uidQuery); + return prepareDelete(type, id, uid, version, versionType, Engine.Operation.Origin.PRIMARY); } public Engine.Delete prepareDeleteOnReplica(String type, String id, long version, VersionType versionType) { final DocumentMapper documentMapper = docMapper(type).getDocumentMapper(); - return prepareDelete(type, id, documentMapper.uidMapper().term(Uid.createUid(type, id)), version, versionType, Engine.Operation.Origin.REPLICA); + final MappedFieldType uidFieldType = documentMapper.uidMapper().fieldType(); + final Query uidQuery = uidFieldType.termQuery(Uid.createUid(type, id), null); + final Term uid = MappedFieldType.extractTerm(uidQuery); + return prepareDelete(type, id, uid, version, versionType, Engine.Operation.Origin.REPLICA); } static Engine.Delete prepareDelete(String type, String id, Term uid, long version, VersionType versionType, Engine.Operation.Origin origin) { diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index a74f560ff4d..0f0a37d909a 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.bucket.significant; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchException; @@ -145,8 +146,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac return result; } - public long getBackgroundFrequency(long term) { - BytesRef indexedVal = fieldType.indexedValueForSearch(term); + public long getBackgroundFrequency(long value) { + Query query = fieldType.termQuery(value, null); + Term term = MappedFieldType.extractTerm(query); + BytesRef indexedVal = term.bytes(); return getBackgroundFrequency(indexedVal); } @@ -256,7 +259,6 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggregatorFactory, List pipelineAggregators, Map metaData) throws IOException { - ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource; final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(); return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, diff --git a/core/src/test/java/org/elasticsearch/index/query/SpanTermQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/SpanTermQueryBuilderTests.java index 8f005d0a231..03551f8c6ae 100644 --- a/core/src/test/java/org/elasticsearch/index/query/SpanTermQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/SpanTermQueryBuilderTests.java @@ -19,9 +19,10 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.index.mapper.MappedFieldType; @@ -44,8 +45,8 @@ public class SpanTermQueryBuilderTests extends AbstractTermQueryTestCase