From 9c95e914715fc37393faec75b863d07c88406d63 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Thu, 5 Oct 2017 10:01:09 +0300 Subject: [PATCH] Expose `fuzzy_transpositions` parameter in fuzzy queries (#26870) Add fuzzy_transpositions parameter to multi_match and query_string queries. Add fuzzy_transpositions, fuzzy_prefix_length and fuzzy_max_expansions parameters to simple_query_string query. --- .../index/query/MultiMatchQueryBuilder.java | 38 ++++++++++- .../index/query/QueryStringQueryBuilder.java | 39 ++++++++++- .../index/query/SimpleQueryStringBuilder.java | 68 +++++++++++++++++++ .../index/search/QueryStringQueryParser.java | 13 +++- .../search/SimpleQueryStringQueryParser.java | 46 +++++++++++-- .../query/MultiMatchQueryBuilderTests.java | 21 ++++++ .../query/QueryStringQueryBuilderTests.java | 18 +++++ .../query/SimpleQueryStringBuilderTests.java | 40 +++++++++++ docs/reference/query-dsl/fuzzy-query.asciidoc | 4 ++ .../query-dsl/multi-match-query.asciidoc | 3 +- .../query-dsl/query-string-query.asciidoc | 3 + .../simple-query-string-query.asciidoc | 9 +++ 12 files changed, 289 insertions(+), 13 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index b439d53e692..4115caa85e7 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -57,6 +57,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder + * The default metric used by fuzzy queries to determine a match is the Damerau-Levenshtein + * distance formula which supports transpositions. Setting transposition to false will + * switch to classic Levenshtein distance.
+ * If not set, Damerau-Levenshtein distance metric will be used. + */ + public MultiMatchQueryBuilder fuzzyTranspositions(boolean fuzzyTranspositions) { + this.fuzzyTranspositions = fuzzyTranspositions; + return this; + } + @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); @@ -573,6 +598,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder fieldsBoosts) throws IOException { @@ -755,6 +785,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder + * The default metric used by fuzzy queries to determine a match is the Damerau-Levenshtein + * distance formula which supports transpositions. Setting transposition to false will + * switch to classic Levenshtein distance.
+ * If not set, Damerau-Levenshtein distance metric will be used. + */ + public QueryStringQueryBuilder fuzzyTranspositions(boolean fuzzyTranspositions) { + this.fuzzyTranspositions = fuzzyTranspositions; + return this; + } + @Override protected void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); @@ -706,6 +732,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder fieldsAndWeights = null; boolean autoGenerateSynonymsPhraseQuery = true; + boolean fuzzyTranspositions = DEFAULT_FUZZY_TRANSPOSITIONS; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); @@ -813,6 +842,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder + * The default metric used by fuzzy queries to determine a match is the Damerau-Levenshtein + * distance formula which supports transpositions. Setting transposition to false will + * switch to classic Levenshtein distance.
+ * If not set, Damerau-Levenshtein distance metric will be used. + */ + public SimpleQueryStringBuilder fuzzyTranspositions(boolean fuzzyTranspositions) { + this.settings.fuzzyTranspositions(fuzzyTranspositions); + return this; + } @Override protected Query doToQuery(QueryShardContext context) throws IOException { @@ -460,6 +513,9 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder 0); + + MultiMatchQueryBuilder qb = new MultiMatchQueryBuilder("text").field(STRING_FIELD_NAME); + qb.fuzziness(Fuzziness.TWO); + qb.prefixLength(2); + qb.maxExpansions(5); + qb.fuzzyTranspositions(false); + + Query query = qb.toQuery(createShardContext()); + FuzzyQuery expected = new FuzzyQuery(new Term(STRING_FIELD_NAME, "text"), 2, 2, 5, false); + + assertEquals(expected, query); + } } diff --git a/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java index 9609c8415a7..c29172d88af 100644 --- a/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java @@ -162,6 +162,9 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase 0); + + Query query = new QueryStringQueryBuilder("text~2") + .field(STRING_FIELD_NAME) + .fuzzyPrefixLength(2) + .fuzzyMaxExpansions(5) + .fuzzyTranspositions(false) + .toQuery(createShardContext()); + FuzzyQuery expected = new FuzzyQuery(new Term(STRING_FIELD_NAME, "text"), 2, 2, 5, false); + assertEquals(expected, query); + } + private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) { Settings build = Settings.builder().put(oldIndexSettings) .put(indexSettings) diff --git a/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java index 76f1bed5870..efacd3c1fab 100644 --- a/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java @@ -105,6 +105,15 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase 0); + + Query query = new SimpleQueryStringBuilder("text~2") + .field(STRING_FIELD_NAME) + .fuzzyPrefixLength(2) + .fuzzyMaxExpansions(5) + .fuzzyTranspositions(false) + .toQuery(createShardContext()); + FuzzyQuery expected = new FuzzyQuery(new Term(STRING_FIELD_NAME, "text"), 2, 2, 5, false); + assertEquals(expected, query); + } + private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) { Settings build = Settings.builder().put(oldIndexSettings) .put(indexSettings) diff --git a/docs/reference/query-dsl/fuzzy-query.asciidoc b/docs/reference/query-dsl/fuzzy-query.asciidoc index 70f7eb48ada..eb3fc917850 100644 --- a/docs/reference/query-dsl/fuzzy-query.asciidoc +++ b/docs/reference/query-dsl/fuzzy-query.asciidoc @@ -63,6 +63,10 @@ GET /_search The maximum number of terms that the `fuzzy` query will expand to. Defaults to `50`. +`transpositions`:: + + Whether fuzzy transpositions (`ab` -> `ba`) are supported. + Default is `false`. WARNING: This query can be very heavy if `prefix_length` is set to `0` and if `max_expansions` is set to a high number. It could result in every term in the diff --git a/docs/reference/query-dsl/multi-match-query.asciidoc b/docs/reference/query-dsl/multi-match-query.asciidoc index 48c3f77d3cb..6f678342248 100644 --- a/docs/reference/query-dsl/multi-match-query.asciidoc +++ b/docs/reference/query-dsl/multi-match-query.asciidoc @@ -137,7 +137,8 @@ follows: Also, accepts `analyzer`, `boost`, `operator`, `minimum_should_match`, `fuzziness`, `lenient`, `prefix_length`, `max_expansions`, `rewrite`, `zero_terms_query`, - `cutoff_frequency` and `auto_generate_synonyms_phrase_query`, as explained in <>. + `cutoff_frequency`, `auto_generate_synonyms_phrase_query` and `fuzzy_transpositions`, + as explained in <>. [IMPORTANT] [[operator-min]] diff --git a/docs/reference/query-dsl/query-string-query.asciidoc b/docs/reference/query-dsl/query-string-query.asciidoc index f42a3b09f9b..29fe70adb2e 100644 --- a/docs/reference/query-dsl/query-string-query.asciidoc +++ b/docs/reference/query-dsl/query-string-query.asciidoc @@ -83,6 +83,9 @@ to `AUTO`. See <> for allowed settings. |`fuzzy_prefix_length` |Set the prefix length for fuzzy queries. Default is `0`. +|`fuzzy_transpositions` |Set to `false` to disable fuzzy transpositions (`ab` -> `ba`). +Default is `true`. + |`phrase_slop` |Sets the default slop for phrases. If zero, then exact phrase matches are required. Default value is `0`. diff --git a/docs/reference/query-dsl/simple-query-string-query.asciidoc b/docs/reference/query-dsl/simple-query-string-query.asciidoc index 1f887cf6311..99fbc131c1b 100644 --- a/docs/reference/query-dsl/simple-query-string-query.asciidoc +++ b/docs/reference/query-dsl/simple-query-string-query.asciidoc @@ -70,6 +70,15 @@ Defaults to `true`. |`all_fields` | deprecated[6.0.0, set `fields` to `*` instead] Perform the query on all fields detected in the mapping that can be queried. + +|`fuzzy_prefix_length` |Set the prefix length for fuzzy queries. Default +is `0`. + +|`fuzzy_max_expansions` |Controls the number of terms fuzzy queries will +expand to. Defaults to `50` + +|`fuzzy_transpositions` |Set to `false` to disable fuzzy transpositions (`ab` -> `ba`). +Default is `true`. |======================================================================= [float]