SQL: add "fuzziness" option to QUERY and MATCH function predicates (#40529)
* Remove unused "locale" and "lowercase_expanded_terms" options from QUERY. (cherry picked from commit c122fc6edddbb99c73ce25168d1152409c0b7892)
This commit is contained in:
parent
7f7d09af2e
commit
89d97905e5
|
@ -61,11 +61,11 @@ case of an `or` operator or all of the low frequency terms in the case of an `an
|
||||||
<<query-dsl-match-query-cutoff>> page.
|
<<query-dsl-match-query-cutoff>> page.
|
||||||
|
|
||||||
NOTE: The allowed optional parameters for a single-field `MATCH()` variant (for the `match` {es} query) are: `analyzer`, `auto_generate_synonyms_phrase_query`,
|
NOTE: The allowed optional parameters for a single-field `MATCH()` variant (for the `match` {es} query) are: `analyzer`, `auto_generate_synonyms_phrase_query`,
|
||||||
`cutoff_frequency`, `lenient`, `fuzzy_transpositions`, `fuzzy_rewrite`, `minimum_should_match`, `operator`,
|
`cutoff_frequency`, `lenient`, `fuzziness`, `fuzzy_transpositions`, `fuzzy_rewrite`, `minimum_should_match`, `operator`,
|
||||||
`max_expansions`, `prefix_length`.
|
`max_expansions`, `prefix_length`.
|
||||||
|
|
||||||
NOTE: The allowed optional parameters for a multi-field `MATCH()` variant (for the `multi_match` {es} query) are: `analyzer`, `auto_generate_synonyms_phrase_query`,
|
NOTE: The allowed optional parameters for a multi-field `MATCH()` variant (for the `multi_match` {es} query) are: `analyzer`, `auto_generate_synonyms_phrase_query`,
|
||||||
`cutoff_frequency`, `lenient`, `fuzzy_transpositions`, `fuzzy_rewrite`, `minimum_should_match`, `operator`,
|
`cutoff_frequency`, `lenient`, `fuzziness`, `fuzzy_transpositions`, `fuzzy_rewrite`, `minimum_should_match`, `operator`,
|
||||||
`max_expansions`, `prefix_length`, `slop`, `tie_breaker`, `type`.
|
`max_expansions`, `prefix_length`, `slop`, `tie_breaker`, `type`.
|
||||||
|
|
||||||
|
|
||||||
|
@ -115,9 +115,9 @@ include-tagged::{sql-specs}/docs/docs.csv-spec[optionalParameterQuery]
|
||||||
|
|
||||||
NOTE: The allowed optional parameters for `QUERY()` are: `allow_leading_wildcard`, `analyze_wildcard`, `analyzer`,
|
NOTE: The allowed optional parameters for `QUERY()` are: `allow_leading_wildcard`, `analyze_wildcard`, `analyzer`,
|
||||||
`auto_generate_synonyms_phrase_query`, `default_field`, `default_operator`, `enable_position_increments`,
|
`auto_generate_synonyms_phrase_query`, `default_field`, `default_operator`, `enable_position_increments`,
|
||||||
`escape`, `fuzzy_max_expansions`, `fuzzy_prefix_length`, `fuzzy_rewrite`, `fuzzy_transpositions`, `lenient`,
|
`escape`, `fuzziness`, `fuzzy_max_expansions`, `fuzzy_prefix_length`, `fuzzy_rewrite`, `fuzzy_transpositions`,
|
||||||
`locale`, `lowercase_expanded_terms`, `max_determinized_states`, `minimum_should_match`, `phrase_slop`, `rewrite`,
|
`lenient`, `max_determinized_states`, `minimum_should_match`, `phrase_slop`, `rewrite`, `quote_analyzer`,
|
||||||
`quote_analyzer`, `quote_field_suffix`, `tie_breaker`, `time_zone`, `type`.
|
`quote_field_suffix`, `tie_breaker`, `time_zone`, `type`.
|
||||||
|
|
||||||
|
|
||||||
[[sql-functions-search-score]]
|
[[sql-functions-search-score]]
|
||||||
|
|
|
@ -30,6 +30,60 @@ SELECT emp_no, first_name, gender, last_name FROM test_emp WHERE QUERY('Man*', '
|
||||||
10096 |Jayson |M |Mandell
|
10096 |Jayson |M |Mandell
|
||||||
;
|
;
|
||||||
|
|
||||||
|
matchWithFuzziness
|
||||||
|
SELECT first_name, SCORE() FROM test_emp WHERE MATCH(first_name, 'geo', 'fuzziness=6');
|
||||||
|
|
||||||
|
first_name:s | SCORE():f
|
||||||
|
----------------+---------------
|
||||||
|
Gino |1.3684646
|
||||||
|
Gao |2.7369292
|
||||||
|
;
|
||||||
|
|
||||||
|
matchWithFuzzinessAuto
|
||||||
|
SELECT first_name, SCORE() FROM test_emp WHERE MATCH(first_name, 'geo', 'fuzziness=AUTO:1,7;fuzzy_rewrite=scoring_boolean');
|
||||||
|
|
||||||
|
first_name:s | SCORE():f
|
||||||
|
----------------+---------------
|
||||||
|
Gao |2.7369292
|
||||||
|
;
|
||||||
|
|
||||||
|
multiMatchWithFuzzinessAuto
|
||||||
|
SELECT first_name, last_name, SCORE() FROM test_emp WHERE MATCH('first_name^3,last_name^5', 'geo hir', 'fuzziness=AUTO:1,5;operator=or') ORDER BY first_name;
|
||||||
|
|
||||||
|
first_name:s | last_name:s | SCORE():f
|
||||||
|
----------------+-----------------+---------------
|
||||||
|
Gao |Dolinsky |8.210788
|
||||||
|
Shir |McClurg |8.210788
|
||||||
|
;
|
||||||
|
|
||||||
|
multiMatchWithFuzziness
|
||||||
|
SELECT first_name, last_name, SCORE() FROM test_emp WHERE MATCH('first_name^3,last_name^5', 'geo hir', 'fuzziness=5;operator=or') ORDER BY first_name;
|
||||||
|
|
||||||
|
first_name:s | last_name:s | SCORE():f
|
||||||
|
----------------+-----------------+---------------
|
||||||
|
Gao |Dolinsky |8.210788
|
||||||
|
Gino |Leonhardt |4.105394
|
||||||
|
Shir |McClurg |8.210788
|
||||||
|
Uri |Lenart |4.105394
|
||||||
|
;
|
||||||
|
|
||||||
|
queryWithFuzziness
|
||||||
|
SELECT first_name, SCORE() FROM test_emp WHERE QUERY('geo~', 'fuzziness=5;default_field=first_name');
|
||||||
|
|
||||||
|
first_name:s | SCORE():f
|
||||||
|
----------------+---------------
|
||||||
|
Gino |1.3684646
|
||||||
|
Gao |2.7369292
|
||||||
|
;
|
||||||
|
|
||||||
|
queryWithFuzzinessAuto
|
||||||
|
SELECT first_name, SCORE() FROM test_emp WHERE QUERY('geo~', 'fuzziness=AUTO:1,5;default_field=first_name');
|
||||||
|
|
||||||
|
first_name:s | SCORE():f
|
||||||
|
----------------+---------------
|
||||||
|
Gao |2.7369292
|
||||||
|
;
|
||||||
|
|
||||||
matchQuery
|
matchQuery
|
||||||
SELECT emp_no, first_name, gender, last_name FROM test_emp WHERE MATCH(first_name, 'Erez');
|
SELECT emp_no, first_name, gender, last_name FROM test_emp WHERE MATCH(first_name, 'Erez');
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
package org.elasticsearch.xpack.sql.querydsl.query;
|
package org.elasticsearch.xpack.sql.querydsl.query;
|
||||||
|
|
||||||
import org.elasticsearch.common.Booleans;
|
import org.elasticsearch.common.Booleans;
|
||||||
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
import org.elasticsearch.index.query.MatchQueryBuilder;
|
import org.elasticsearch.index.query.MatchQueryBuilder;
|
||||||
import org.elasticsearch.index.query.Operator;
|
import org.elasticsearch.index.query.Operator;
|
||||||
import org.elasticsearch.index.query.QueryBuilder;
|
import org.elasticsearch.index.query.QueryBuilder;
|
||||||
|
@ -28,16 +29,17 @@ public class MatchQuery extends LeafQuery {
|
||||||
// TODO: it'd be great if these could be constants instead of Strings, needs a core change to make the fields public first
|
// TODO: it'd be great if these could be constants instead of Strings, needs a core change to make the fields public first
|
||||||
// TODO: add zero terms query support, I'm not sure the best way to parse it yet...
|
// TODO: add zero terms query support, I'm not sure the best way to parse it yet...
|
||||||
// appliers.put("zero_terms_query", (qb, s) -> qb.zeroTermsQuery(s));
|
// appliers.put("zero_terms_query", (qb, s) -> qb.zeroTermsQuery(s));
|
||||||
appliers.put("cutoff_frequency", (qb, s) -> qb.cutoffFrequency(Float.valueOf(s)));
|
|
||||||
appliers.put("lenient", (qb, s) -> qb.lenient(Booleans.parseBoolean(s)));
|
|
||||||
appliers.put("fuzzy_transpositions", (qb, s) -> qb.fuzzyTranspositions(Booleans.parseBoolean(s)));
|
|
||||||
appliers.put("fuzzy_rewrite", (qb, s) -> qb.fuzzyRewrite(s));
|
|
||||||
appliers.put("minimum_should_match", (qb, s) -> qb.minimumShouldMatch(s));
|
|
||||||
appliers.put("operator", (qb, s) -> qb.operator(Operator.fromString(s)));
|
|
||||||
appliers.put("max_expansions", (qb, s) -> qb.maxExpansions(Integer.valueOf(s)));
|
|
||||||
appliers.put("prefix_length", (qb, s) -> qb.prefixLength(Integer.valueOf(s)));
|
|
||||||
appliers.put("analyzer", (qb, s) -> qb.analyzer(s));
|
appliers.put("analyzer", (qb, s) -> qb.analyzer(s));
|
||||||
appliers.put("auto_generate_synonyms_phrase_query", (qb, s) -> qb.autoGenerateSynonymsPhraseQuery(Booleans.parseBoolean(s)));
|
appliers.put("auto_generate_synonyms_phrase_query", (qb, s) -> qb.autoGenerateSynonymsPhraseQuery(Booleans.parseBoolean(s)));
|
||||||
|
appliers.put("cutoff_frequency", (qb, s) -> qb.cutoffFrequency(Float.valueOf(s)));
|
||||||
|
appliers.put("fuzziness", (qb, s) -> qb.fuzziness(Fuzziness.build(s)));
|
||||||
|
appliers.put("fuzzy_transpositions", (qb, s) -> qb.fuzzyTranspositions(Booleans.parseBoolean(s)));
|
||||||
|
appliers.put("fuzzy_rewrite", (qb, s) -> qb.fuzzyRewrite(s));
|
||||||
|
appliers.put("lenient", (qb, s) -> qb.lenient(Booleans.parseBoolean(s)));
|
||||||
|
appliers.put("max_expansions", (qb, s) -> qb.maxExpansions(Integer.valueOf(s)));
|
||||||
|
appliers.put("minimum_should_match", (qb, s) -> qb.minimumShouldMatch(s));
|
||||||
|
appliers.put("operator", (qb, s) -> qb.operator(Operator.fromString(s)));
|
||||||
|
appliers.put("prefix_length", (qb, s) -> qb.prefixLength(Integer.valueOf(s)));
|
||||||
BUILDER_APPLIERS = Collections.unmodifiableMap(appliers);
|
BUILDER_APPLIERS = Collections.unmodifiableMap(appliers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
package org.elasticsearch.xpack.sql.querydsl.query;
|
package org.elasticsearch.xpack.sql.querydsl.query;
|
||||||
|
|
||||||
import org.elasticsearch.common.Booleans;
|
import org.elasticsearch.common.Booleans;
|
||||||
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
||||||
import org.elasticsearch.index.query.Operator;
|
import org.elasticsearch.index.query.Operator;
|
||||||
import org.elasticsearch.index.query.QueryBuilder;
|
import org.elasticsearch.index.query.QueryBuilder;
|
||||||
|
@ -29,18 +30,19 @@ public class MultiMatchQuery extends LeafQuery {
|
||||||
appliers.put("slop", (qb, s) -> qb.slop(Integer.valueOf(s)));
|
appliers.put("slop", (qb, s) -> qb.slop(Integer.valueOf(s)));
|
||||||
// TODO: add zero terms query support, I'm not sure the best way to parse it yet...
|
// TODO: add zero terms query support, I'm not sure the best way to parse it yet...
|
||||||
// appliers.put("zero_terms_query", (qb, s) -> qb.zeroTermsQuery(s));
|
// appliers.put("zero_terms_query", (qb, s) -> qb.zeroTermsQuery(s));
|
||||||
appliers.put("lenient", (qb, s) -> qb.lenient(Booleans.parseBoolean(s)));
|
appliers.put("analyzer", (qb, s) -> qb.analyzer(s));
|
||||||
|
appliers.put("auto_generate_synonyms_phrase_query", (qb, s) -> qb.autoGenerateSynonymsPhraseQuery(Booleans.parseBoolean(s)));
|
||||||
appliers.put("cutoff_frequency", (qb, s) -> qb.cutoffFrequency(Float.valueOf(s)));
|
appliers.put("cutoff_frequency", (qb, s) -> qb.cutoffFrequency(Float.valueOf(s)));
|
||||||
appliers.put("tie_breaker", (qb, s) -> qb.tieBreaker(Float.valueOf(s)));
|
appliers.put("fuzziness", (qb, s) -> qb.fuzziness(Fuzziness.build(s)));
|
||||||
appliers.put("fuzzy_rewrite", (qb, s) -> qb.fuzzyRewrite(s));
|
appliers.put("fuzzy_rewrite", (qb, s) -> qb.fuzzyRewrite(s));
|
||||||
|
appliers.put("fuzzy_transpositions", (qb, s) -> qb.fuzzyTranspositions(Booleans.parseBoolean(s)));
|
||||||
|
appliers.put("lenient", (qb, s) -> qb.lenient(Booleans.parseBoolean(s)));
|
||||||
|
appliers.put("max_expansions", (qb, s) -> qb.maxExpansions(Integer.valueOf(s)));
|
||||||
appliers.put("minimum_should_match", (qb, s) -> qb.minimumShouldMatch(s));
|
appliers.put("minimum_should_match", (qb, s) -> qb.minimumShouldMatch(s));
|
||||||
appliers.put("operator", (qb, s) -> qb.operator(Operator.fromString(s)));
|
appliers.put("operator", (qb, s) -> qb.operator(Operator.fromString(s)));
|
||||||
appliers.put("max_expansions", (qb, s) -> qb.maxExpansions(Integer.valueOf(s)));
|
|
||||||
appliers.put("prefix_length", (qb, s) -> qb.prefixLength(Integer.valueOf(s)));
|
appliers.put("prefix_length", (qb, s) -> qb.prefixLength(Integer.valueOf(s)));
|
||||||
appliers.put("analyzer", (qb, s) -> qb.analyzer(s));
|
appliers.put("tie_breaker", (qb, s) -> qb.tieBreaker(Float.valueOf(s)));
|
||||||
appliers.put("type", (qb, s) -> qb.type(s));
|
appliers.put("type", (qb, s) -> qb.type(s));
|
||||||
appliers.put("auto_generate_synonyms_phrase_query", (qb, s) -> qb.autoGenerateSynonymsPhraseQuery(Booleans.parseBoolean(s)));
|
|
||||||
appliers.put("fuzzy_transpositions", (qb, s) -> qb.fuzzyTranspositions(Booleans.parseBoolean(s)));
|
|
||||||
BUILDER_APPLIERS = Collections.unmodifiableMap(appliers);
|
BUILDER_APPLIERS = Collections.unmodifiableMap(appliers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
package org.elasticsearch.xpack.sql.querydsl.query;
|
package org.elasticsearch.xpack.sql.querydsl.query;
|
||||||
|
|
||||||
import org.elasticsearch.common.Booleans;
|
import org.elasticsearch.common.Booleans;
|
||||||
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
|
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
|
||||||
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
||||||
import org.elasticsearch.index.query.Operator;
|
import org.elasticsearch.index.query.Operator;
|
||||||
|
@ -28,30 +29,29 @@ public class QueryStringQuery extends LeafQuery {
|
||||||
static {
|
static {
|
||||||
HashMap<String, BiConsumer<QueryStringQueryBuilder, String>> appliers = new HashMap<>(28);
|
HashMap<String, BiConsumer<QueryStringQueryBuilder, String>> appliers = new HashMap<>(28);
|
||||||
// TODO: it'd be great if these could be constants instead of Strings, needs a core change to make the fields public first
|
// TODO: it'd be great if these could be constants instead of Strings, needs a core change to make the fields public first
|
||||||
|
appliers.put("allow_leading_wildcard", (qb, s) -> qb.allowLeadingWildcard(Booleans.parseBoolean(s)));
|
||||||
|
appliers.put("analyze_wildcard", (qb, s) -> qb.analyzeWildcard(Booleans.parseBoolean(s)));
|
||||||
|
appliers.put("analyzer", (qb, s) -> qb.analyzer(s));
|
||||||
|
appliers.put("auto_generate_synonyms_phrase_query", (qb, s) -> qb.autoGenerateSynonymsPhraseQuery(Booleans.parseBoolean(s)));
|
||||||
appliers.put("default_field", (qb, s) -> qb.defaultField(s));
|
appliers.put("default_field", (qb, s) -> qb.defaultField(s));
|
||||||
appliers.put("default_operator", (qb, s) -> qb.defaultOperator(Operator.fromString(s)));
|
appliers.put("default_operator", (qb, s) -> qb.defaultOperator(Operator.fromString(s)));
|
||||||
appliers.put("analyzer", (qb, s) -> qb.analyzer(s));
|
|
||||||
appliers.put("quote_analyzer", (qb, s) -> qb.quoteAnalyzer(s));
|
|
||||||
appliers.put("allow_leading_wildcard", (qb, s) -> qb.allowLeadingWildcard(Booleans.parseBoolean(s)));
|
|
||||||
appliers.put("max_determinized_states", (qb, s) -> qb.maxDeterminizedStates(Integer.valueOf(s)));
|
|
||||||
appliers.put("lowercase_expanded_terms", (qb, s) -> {});
|
|
||||||
appliers.put("enable_position_increments", (qb, s) -> qb.enablePositionIncrements(Booleans.parseBoolean(s)));
|
appliers.put("enable_position_increments", (qb, s) -> qb.enablePositionIncrements(Booleans.parseBoolean(s)));
|
||||||
appliers.put("escape", (qb, s) -> qb.escape(Booleans.parseBoolean(s)));
|
appliers.put("escape", (qb, s) -> qb.escape(Booleans.parseBoolean(s)));
|
||||||
appliers.put("fuzzy_prefix_length", (qb, s) -> qb.fuzzyPrefixLength(Integer.valueOf(s)));
|
appliers.put("fuzziness", (qb, s) -> qb.fuzziness(Fuzziness.build(s)));
|
||||||
appliers.put("fuzzy_max_expansions", (qb, s) -> qb.fuzzyMaxExpansions(Integer.valueOf(s)));
|
appliers.put("fuzzy_max_expansions", (qb, s) -> qb.fuzzyMaxExpansions(Integer.valueOf(s)));
|
||||||
|
appliers.put("fuzzy_prefix_length", (qb, s) -> qb.fuzzyPrefixLength(Integer.valueOf(s)));
|
||||||
appliers.put("fuzzy_rewrite", (qb, s) -> qb.fuzzyRewrite(s));
|
appliers.put("fuzzy_rewrite", (qb, s) -> qb.fuzzyRewrite(s));
|
||||||
appliers.put("phrase_slop", (qb, s) -> qb.phraseSlop(Integer.valueOf(s)));
|
appliers.put("fuzzy_transpositions", (qb, s) -> qb.fuzzyTranspositions(Booleans.parseBoolean(s)));
|
||||||
appliers.put("tie_breaker", (qb, s) -> qb.tieBreaker(Float.valueOf(s)));
|
|
||||||
appliers.put("analyze_wildcard", (qb, s) -> qb.analyzeWildcard(Booleans.parseBoolean(s)));
|
|
||||||
appliers.put("rewrite", (qb, s) -> qb.rewrite(s));
|
|
||||||
appliers.put("minimum_should_match", (qb, s) -> qb.minimumShouldMatch(s));
|
|
||||||
appliers.put("quote_field_suffix", (qb, s) -> qb.quoteFieldSuffix(s));
|
|
||||||
appliers.put("lenient", (qb, s) -> qb.lenient(Booleans.parseBoolean(s)));
|
appliers.put("lenient", (qb, s) -> qb.lenient(Booleans.parseBoolean(s)));
|
||||||
appliers.put("locale", (qb, s) -> {});
|
appliers.put("max_determinized_states", (qb, s) -> qb.maxDeterminizedStates(Integer.valueOf(s)));
|
||||||
|
appliers.put("minimum_should_match", (qb, s) -> qb.minimumShouldMatch(s));
|
||||||
|
appliers.put("phrase_slop", (qb, s) -> qb.phraseSlop(Integer.valueOf(s)));
|
||||||
|
appliers.put("rewrite", (qb, s) -> qb.rewrite(s));
|
||||||
|
appliers.put("quote_analyzer", (qb, s) -> qb.quoteAnalyzer(s));
|
||||||
|
appliers.put("quote_field_suffix", (qb, s) -> qb.quoteFieldSuffix(s));
|
||||||
|
appliers.put("tie_breaker", (qb, s) -> qb.tieBreaker(Float.valueOf(s)));
|
||||||
appliers.put("time_zone", (qb, s) -> qb.timeZone(s));
|
appliers.put("time_zone", (qb, s) -> qb.timeZone(s));
|
||||||
appliers.put("type", (qb, s) -> qb.type(MultiMatchQueryBuilder.Type.parse(s, LoggingDeprecationHandler.INSTANCE)));
|
appliers.put("type", (qb, s) -> qb.type(MultiMatchQueryBuilder.Type.parse(s, LoggingDeprecationHandler.INSTANCE)));
|
||||||
appliers.put("auto_generate_synonyms_phrase_query", (qb, s) -> qb.autoGenerateSynonymsPhraseQuery(Booleans.parseBoolean(s)));
|
|
||||||
appliers.put("fuzzy_transpositions", (qb, s) -> qb.fuzzyTranspositions(Booleans.parseBoolean(s)));
|
|
||||||
BUILDER_APPLIERS = Collections.unmodifiableMap(appliers);
|
BUILDER_APPLIERS = Collections.unmodifiableMap(appliers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue