From ce30afcd01143946430bafcb568a65897101cb86 Mon Sep 17 00:00:00 2001 From: Marios Trivyzas Date: Thu, 30 May 2019 18:04:47 +0200 Subject: [PATCH] Deprecate CommonTermsQuery and cutoff_frequency (#42619) (#42691) Since the max_score optimization landed in Elasticsearch 7, the CommonTermsQuery is redundant and slower. Moreover the cutoff_frequency parameter for MatchQuery and MultiMatchQuery is redundant. Relates to #27096 (cherry picked from commit 04b74497314eeec076753a33b3b6cc11549646e8) --- .../query-dsl/common-terms-query.asciidoc | 7 ++++ docs/reference/query-dsl/match-query.asciidoc | 3 ++ .../search.query/50_queries_with_synonyms.yml | 27 +++++++++++++ .../lucene/queries/BlendedTermQuery.java | 5 +++ .../queries/ExtendedCommonTermsQuery.java | 4 ++ .../index/query/CommonTermsQueryBuilder.java | 11 +++++ .../index/query/MatchQueryBuilder.java | 15 ++++++- .../index/query/MultiMatchQueryBuilder.java | 15 ++++++- .../index/query/QueryBuilders.java | 3 ++ .../index/search/MatchQuery.java | 4 ++ .../elasticsearch/search/SearchModule.java | 7 +++- .../query/CommonTermsQueryBuilderTests.java | 40 +++++++++++++++++++ .../query/CommonTermsQueryParserTests.java | 4 +- .../index/query/MatchQueryBuilderTests.java | 6 +-- .../query/MultiMatchQueryBuilderTests.java | 3 -- .../search/SearchModuleTests.java | 6 +-- .../profile/query/RandomQueryGenerator.java | 33 +-------------- .../test/AbstractQueryTestCase.java | 2 +- 18 files changed, 145 insertions(+), 50 deletions(-) diff --git a/docs/reference/query-dsl/common-terms-query.asciidoc b/docs/reference/query-dsl/common-terms-query.asciidoc index 87288778246..f2d784eb0c4 100644 --- a/docs/reference/query-dsl/common-terms-query.asciidoc +++ b/docs/reference/query-dsl/common-terms-query.asciidoc @@ -1,6 +1,8 @@ [[query-dsl-common-terms-query]] === Common Terms Query +deprecated[7.3.0,"Use <> instead, which skips blocks of documents efficiently, without any configuration, provided that the total number of hits is not tracked."] + The `common` terms query is a modern alternative to stopwords which improves the precision and recall of search results (by taking stopwords into account), without sacrificing performance. @@ -83,6 +85,7 @@ GET /_search } -------------------------------------------------- // CONSOLE +// TEST[warning:Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]] The number of terms which should match can be controlled with the <> @@ -108,6 +111,7 @@ GET /_search } -------------------------------------------------- // CONSOLE +// TEST[warning:Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]] which is roughly equivalent to: @@ -154,6 +158,7 @@ GET /_search } -------------------------------------------------- // CONSOLE +// TEST[warning:Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]] which is roughly equivalent to: @@ -209,6 +214,7 @@ GET /_search } -------------------------------------------------- // CONSOLE +// TEST[warning:Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]] which is roughly equivalent to: @@ -270,6 +276,7 @@ GET /_search } -------------------------------------------------- // CONSOLE +// TEST[warning:Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]] which is roughly equivalent to: diff --git a/docs/reference/query-dsl/match-query.asciidoc b/docs/reference/query-dsl/match-query.asciidoc index 1f8bf6892ab..5e45d2b3212 100644 --- a/docs/reference/query-dsl/match-query.asciidoc +++ b/docs/reference/query-dsl/match-query.asciidoc @@ -103,6 +103,8 @@ GET /_search [[query-dsl-match-query-cutoff]] ===== Cutoff frequency +deprecated[7.3.0,"This option can be omitted as the <> can skip block of documents efficiently, without any configuration, provided that the total number of hits is not tracked."] + The match query supports a `cutoff_frequency` that allows specifying an absolute or relative document frequency where high frequency terms are moved into an optional subquery and are only scored @@ -139,6 +141,7 @@ GET /_search } -------------------------------------------------- // CONSOLE +// TEST[warning:Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]] IMPORTANT: The `cutoff_frequency` option operates on a per-shard-level. This means that when trying it out on test indexes with low document numbers you diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/50_queries_with_synonyms.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/50_queries_with_synonyms.yml index 784ffd9dd12..ce9cc749557 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/50_queries_with_synonyms.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/50_queries_with_synonyms.yml @@ -1,5 +1,8 @@ --- "Test common terms query with stacked tokens": + - skip: + features: "warnings" + - do: indices.create: index: test @@ -47,6 +50,8 @@ refresh: true - do: + warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -62,6 +67,8 @@ - match: { hits.hits.2._id: "3" } - do: + warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -76,6 +83,8 @@ - match: { hits.hits.1._id: "2" } - do: + warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -90,6 +99,8 @@ - match: { hits.hits.2._id: "3" } - do: + warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -103,6 +114,8 @@ - match: { hits.hits.0._id: "2" } - do: + warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -118,6 +131,8 @@ - match: { hits.hits.1._id: "1" } - do: + warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -132,6 +147,8 @@ - match: { hits.hits.0._id: "2" } - do: + warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -144,6 +161,8 @@ - match: { hits.hits.0._id: "2" } - do: + warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -158,6 +177,8 @@ - match: { hits.hits.2._id: "3" } - do: + warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -172,6 +193,8 @@ - match: { hits.hits.1._id: "2" } - do: + warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -187,6 +210,8 @@ - match: { hits.hits.2._id: "3" } - do: + warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: @@ -201,6 +226,8 @@ - match: { hits.hits.1._id: "2" } - do: + warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [multi_match] query can skip block of documents efficiently if the total number of hits is not tracked]' search: rest_total_hits_as_int: true body: diff --git a/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java b/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java index c696d476bbb..f823f3a1426 100644 --- a/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java +++ b/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java @@ -278,6 +278,11 @@ public abstract class BlendedTermQuery extends Query { return Objects.hash(classHash(), Arrays.hashCode(equalsTerms())); } + /** + * @deprecated Since max_score optimization landed in 7.0, normal MultiMatchQuery + * will achieve the same result without any configuration. + */ + @Deprecated public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final float maxTermFrequency) { return new BlendedTermQuery(terms, boosts) { @Override diff --git a/server/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java b/server/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java index 249b7fa83b5..2d70ed8b90a 100644 --- a/server/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java +++ b/server/src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java @@ -26,7 +26,11 @@ import org.elasticsearch.common.lucene.search.Queries; * Extended version of {@link CommonTermsQuery} that allows to pass in a * {@code minimumNumberShouldMatch} specification that uses the actual num of high frequent terms * to calculate the minimum matching terms. + * + * @deprecated Since max_optimization optimization landed in 7.0, normal MatchQuery + * will achieve the same result without any configuration. */ +@Deprecated public class ExtendedCommonTermsQuery extends CommonTermsQuery { public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) { diff --git a/server/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java index 2fd4d710f96..3c2f80a25f7 100644 --- a/server/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java @@ -48,9 +48,16 @@ import java.util.Objects; * and high-frequency terms are added to an optional boolean clause. The * optional clause is only executed if the required "low-frequency' clause * matches. + * + * @deprecated Since max_optimization optimization landed in 7.0, normal MatchQuery + * will achieve the same result without any configuration. */ +@Deprecated public class CommonTermsQueryBuilder extends AbstractQueryBuilder { + public static final String COMMON_TERMS_QUERY_DEPRECATION_MSG = "[match] query which can efficiently " + + "skip blocks of documents if the total number of hits is not tracked"; + public static final String NAME = "common"; public static final float DEFAULT_CUTOFF_FREQ = 0.01f; @@ -87,7 +94,9 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder { + + private static final String CUTOFF_FREQUENCY_DEPRECATION_MSG = "you can omit this option, " + + "the [match] query can skip block of documents efficiently if the total number of hits is not tracked"; + public static final ParseField ZERO_TERMS_QUERY_FIELD = new ParseField("zero_terms_query"); - public static final ParseField CUTOFF_FREQUENCY_FIELD = new ParseField("cutoff_frequency"); + /** + * @deprecated Since max_optimization optimization landed in 7.0, normal MatchQuery + * will achieve the same result without any configuration. + */ + @Deprecated + public static final ParseField CUTOFF_FREQUENCY_FIELD = + new ParseField("cutoff_frequency").withAllDeprecated(CUTOFF_FREQUENCY_DEPRECATION_MSG); public static final ParseField LENIENT_FIELD = new ParseField("lenient"); public static final ParseField FUZZY_TRANSPOSITIONS_FIELD = new ParseField("fuzzy_transpositions"); public static final ParseField FUZZY_REWRITE_FIELD = new ParseField("fuzzy_rewrite"); @@ -252,7 +262,10 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { * Set a cutoff value in [0..1] (or absolute number >=1) representing the * maximum threshold of a terms document frequency to be considered a low * frequency term. + * + * @deprecated see {@link MatchQueryBuilder#CUTOFF_FREQUENCY_FIELD} for more details */ + @Deprecated public MatchQueryBuilder cutoffFrequency(float cutoff) { this.cutoffFrequency = cutoff; return this; diff --git a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index 8200a8068af..2a7c3729fe2 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -51,6 +51,10 @@ import java.util.TreeMap; * Same as {@link MatchQueryBuilder} but supports multiple fields. */ public class MultiMatchQueryBuilder extends AbstractQueryBuilder { + + private static final String CUTOFF_FREQUENCY_DEPRECATION_MSG = "you can omit this option, " + + "the [multi_match] query can skip block of documents efficiently if the total number of hits is not tracked"; + public static final String NAME = "multi_match"; public static final MultiMatchQueryBuilder.Type DEFAULT_TYPE = MultiMatchQueryBuilder.Type.BEST_FIELDS; @@ -64,7 +68,8 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder(MoreLikeThisQueryBuilder.NAME, MoreLikeThisQueryBuilder::new, MoreLikeThisQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(WrapperQueryBuilder.NAME, WrapperQueryBuilder::new, WrapperQueryBuilder::fromXContent)); - registerQuery(new QuerySpec<>(CommonTermsQueryBuilder.NAME, CommonTermsQueryBuilder::new, CommonTermsQueryBuilder::fromXContent)); + registerQuery(new QuerySpec<>(new ParseField(CommonTermsQueryBuilder.NAME).withAllDeprecated(COMMON_TERMS_QUERY_DEPRECATION_MSG), + CommonTermsQueryBuilder::new, CommonTermsQueryBuilder::fromXContent)); registerQuery( new QuerySpec<>(SpanMultiTermQueryBuilder.NAME, SpanMultiTermQueryBuilder::new, SpanMultiTermQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(FunctionScoreQueryBuilder.NAME, FunctionScoreQueryBuilder::new, diff --git a/server/src/test/java/org/elasticsearch/index/query/CommonTermsQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/CommonTermsQueryBuilderTests.java index 5e443ec41ed..d02b60c52d5 100644 --- a/server/src/test/java/org/elasticsearch/index/query/CommonTermsQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/CommonTermsQueryBuilderTests.java @@ -111,6 +111,30 @@ public class CommonTermsQueryBuilderTests extends AbstractQueryTestCase new CommonTermsQueryBuilder(null, "text")); assertEquals("field name is null or empty", e.getMessage()); @@ -146,6 +170,8 @@ public class CommonTermsQueryBuilderTests extends AbstractQueryTestCase parseQuery(shortJson)); assertEquals("[common] query doesn't support multiple fields, found [message1] and [message2]", e.getMessage()); + + assertDeprecationWarning(); + } + + private void assertDeprecationWarning() { + assertWarnings("Deprecated field [common] used, replaced by [" + CommonTermsQueryBuilder.COMMON_TERMS_QUERY_DEPRECATION_MSG + "]"); } } diff --git a/server/src/test/java/org/elasticsearch/index/query/CommonTermsQueryParserTests.java b/server/src/test/java/org/elasticsearch/index/query/CommonTermsQueryParserTests.java index f4e737ea4b0..761520de039 100644 --- a/server/src/test/java/org/elasticsearch/index/query/CommonTermsQueryParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/CommonTermsQueryParserTests.java @@ -22,10 +22,8 @@ package org.elasticsearch.index.query; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.test.ESSingleNodeTestCase; -import java.io.IOException; - public class CommonTermsQueryParserTests extends ESSingleNodeTestCase { - public void testWhenParsedQueryIsNullNoNullPointerExceptionIsThrown() throws IOException { + public void testWhenParsedQueryIsNullNoNullPointerExceptionIsThrown() { final String index = "test-index"; final String type = "test-type"; client() diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java index a7aad3dbc3e..f79bbb86242 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java @@ -124,10 +124,6 @@ public class MatchQueryBuilderTests extends FullTextQueryTestCase query.parse(Type.PHRASE, STRING_FIELD_NAME, "")); } - + private static class MockGraphAnalyzer extends Analyzer { CannedBinaryTokenStream tokenStream; diff --git a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java index 6590a560935..970a4c3a37e 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java @@ -134,9 +134,6 @@ public class MultiMatchQueryBuilderTests extends FullTextQueryTestCase allSupportedQueries = new ArrayList<>(); Collections.addAll(allSupportedQueries, NON_DEPRECATED_QUERIES); Collections.addAll(allSupportedQueries, DEPRECATED_QUERIES); @@ -254,6 +254,7 @@ public class SearchModuleTests extends ESTestCase { Set registeredNonDeprecated = module.getNamedXContents().stream() .filter(e -> e.categoryClass.equals(QueryBuilder.class)) + .filter(e -> e.name.getDeprecatedNames().length == 0) .map(e -> e.name.getPreferredName()) .collect(toSet()); Set registeredAll = module.getNamedXContents().stream() @@ -316,7 +317,6 @@ public class SearchModuleTests extends ESTestCase { private static final String[] NON_DEPRECATED_QUERIES = new String[] { "bool", "boosting", - "common", "constant_score", "dis_max", "exists", @@ -364,7 +364,7 @@ public class SearchModuleTests extends ESTestCase { }; //add here deprecated queries to make sure we log a deprecation warnings when they are used - private static final String[] DEPRECATED_QUERIES = new String[] {}; + private static final String[] DEPRECATED_QUERIES = new String[] {"common"}; /** * Dummy test {@link AggregationBuilder} used to test registering aggregation builders. diff --git a/server/src/test/java/org/elasticsearch/search/profile/query/RandomQueryGenerator.java b/server/src/test/java/org/elasticsearch/search/profile/query/RandomQueryGenerator.java index 00b859394c6..ea9ef964153 100644 --- a/server/src/test/java/org/elasticsearch/search/profile/query/RandomQueryGenerator.java +++ b/server/src/test/java/org/elasticsearch/search/profile/query/RandomQueryGenerator.java @@ -22,11 +22,9 @@ package org.elasticsearch.search.profile.query; import org.apache.lucene.util.English; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.CommonTermsQueryBuilder; import org.elasticsearch.index.query.DisMaxQueryBuilder; import org.elasticsearch.index.query.FuzzyQueryBuilder; import org.elasticsearch.index.query.IdsQueryBuilder; -import org.elasticsearch.index.query.Operator; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.RangeQueryBuilder; @@ -72,7 +70,7 @@ public class RandomQueryGenerator { } private static QueryBuilder randomTerminalQuery(List stringFields, List numericFields, int numDocs) { - switch (randomIntBetween(0,6)) { + switch (randomIntBetween(0,5)) { case 0: return randomTermQuery(stringFields, numDocs); case 1: @@ -82,10 +80,8 @@ public class RandomQueryGenerator { case 3: return QueryBuilders.matchAllQuery(); case 4: - return randomCommonTermsQuery(stringFields, numDocs); - case 5: return randomFuzzyQuery(stringFields); - case 6: + case 5: return randomIDsQuery(); default: return randomTermQuery(stringFields, numDocs); @@ -169,31 +165,6 @@ public class RandomQueryGenerator { return QueryBuilders.constantScoreQuery(randomQueryBuilder(stringFields, numericFields, numDocs, depth - 1)); } - private static QueryBuilder randomCommonTermsQuery(List fields, int numDocs) { - int numTerms = randomInt(numDocs); - - QueryBuilder q = QueryBuilders.commonTermsQuery(randomField(fields), randomQueryString(numTerms)); - if (randomBoolean()) { - ((CommonTermsQueryBuilder)q).boost(randomFloat()); - } - - if (randomBoolean()) { - ((CommonTermsQueryBuilder)q).cutoffFrequency(randomFloat()); - } - - if (randomBoolean()) { - ((CommonTermsQueryBuilder)q).highFreqMinimumShouldMatch(Integer.toString(randomInt(numTerms))) - .highFreqOperator(randomBoolean() ? Operator.AND : Operator.OR); - } - - if (randomBoolean()) { - ((CommonTermsQueryBuilder)q).lowFreqMinimumShouldMatch(Integer.toString(randomInt(numTerms))) - .lowFreqOperator(randomBoolean() ? Operator.AND : Operator.OR); - } - - return q; - } - private static QueryBuilder randomFuzzyQuery(List fields) { QueryBuilder q = QueryBuilders.fuzzyQuery(randomField(fields), randomQueryString(1)); diff --git a/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java index 4abb39bf6a4..095db80e6fe 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java @@ -164,7 +164,7 @@ public abstract class AbstractQueryTestCase> * parse exception. Some specific objects do not cause any exception as they can hold arbitrary content; they can be * declared by overriding {@link #getObjectsHoldingArbitraryContent()}. */ - public final void testUnknownObjectException() throws IOException { + public void testUnknownObjectException() throws IOException { Set candidates = new HashSet<>(); // Adds the valid query to the list of queries to modify and test candidates.add(createTestQueryBuilder().toString());