diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index b6f0020fb5b..bbb27aa11db 100644 --- a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -104,6 +104,7 @@ public class MapperQueryParser extends QueryParser { setDefaultOperator(settings.defaultOperator()); setFuzzyPrefixLength(settings.fuzzyPrefixLength()); setLocale(settings.locale()); + setSplitOnWhitespace(settings.splitOnWhitespace()); } /** diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java b/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java index c1fc2ae556e..a03c84d8980 100644 --- a/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java +++ b/core/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java @@ -79,6 +79,8 @@ public class QueryParserSettings { /** To limit effort spent determinizing regexp queries. */ private int maxDeterminizedStates; + private boolean splitOnWhitespace; + public QueryParserSettings(String queryString) { this.queryString = queryString; } @@ -290,4 +292,12 @@ public class QueryParserSettings { public Fuzziness fuzziness() { return fuzziness; } + + public void splitOnWhitespace(boolean value) { + this.splitOnWhitespace = value; + } + + public boolean splitOnWhitespace() { + return splitOnWhitespace; + } } diff --git a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java index 807343237d2..d88484c98a6 100644 --- a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java @@ -26,6 +26,7 @@ import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.automaton.Operations; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; @@ -59,6 +60,8 @@ import java.util.TreeMap; public class QueryStringQueryBuilder extends AbstractQueryBuilder { public static final String NAME = "query_string"; + public static final Version V_5_1_0_UNRELEASED = Version.fromId(5010099); + public static final boolean DEFAULT_AUTO_GENERATE_PHRASE_QUERIES = false; public static final int DEFAULT_MAX_DETERMINED_STATES = Operations.DEFAULT_MAX_DETERMINIZED_STATES; public static final boolean DEFAULT_LOWERCASE_EXPANDED_TERMS = true; @@ -72,6 +75,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder{@value #DEFAULT_SPLIT_ON_WHITESPACE}. + */ + public QueryStringQueryBuilder splitOnWhitespace(boolean value) { + this.splitOnWhitespace = value; + return this; + } + + public boolean splitOnWhitespace() { + return splitOnWhitespace; + } + @Override protected void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); @@ -626,6 +654,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder fieldsAndWeights = new HashMap<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -750,6 +780,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder 0); + // splitOnWhitespace=false + { + QueryStringQueryBuilder queryBuilder = + new QueryStringQueryBuilder("foo bar") + .field(STRING_FIELD_NAME).field(STRING_FIELD_NAME_2) + .splitOnWhitespace(false); + Query query = queryBuilder.toQuery(createShardContext()); + BooleanQuery bq1 = + new BooleanQuery.Builder() + .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "foo")), BooleanClause.Occur.SHOULD)) + .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "bar")), BooleanClause.Occur.SHOULD)) + .build(); + List disjuncts = new ArrayList<>(); + disjuncts.add(bq1); + disjuncts.add(new TermQuery(new Term(STRING_FIELD_NAME_2, "foo bar"))); + DisjunctionMaxQuery expectedQuery = new DisjunctionMaxQuery(disjuncts, 0.0f); + assertThat(query, equalTo(expectedQuery)); + } + + { + QueryStringQueryBuilder queryBuilder = + new QueryStringQueryBuilder("mapped_string:other foo bar") + .field(STRING_FIELD_NAME).field(STRING_FIELD_NAME_2) + .splitOnWhitespace(false); + Query query = queryBuilder.toQuery(createShardContext()); + BooleanQuery bq1 = + new BooleanQuery.Builder() + .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "foo")), BooleanClause.Occur.SHOULD)) + .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "bar")), BooleanClause.Occur.SHOULD)) + .build(); + List disjuncts = new ArrayList<>(); + disjuncts.add(bq1); + disjuncts.add(new TermQuery(new Term(STRING_FIELD_NAME_2, "foo bar"))); + DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(disjuncts, 0.0f); + BooleanQuery expectedQuery = + new BooleanQuery.Builder() + .add(disjunctionMaxQuery, BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term(STRING_FIELD_NAME, "other")), BooleanClause.Occur.SHOULD) + .build(); + assertThat(query, equalTo(expectedQuery)); + } + + { + QueryStringQueryBuilder queryBuilder = + new QueryStringQueryBuilder("foo OR bar") + .field(STRING_FIELD_NAME).field(STRING_FIELD_NAME_2) + .splitOnWhitespace(false); + Query query = queryBuilder.toQuery(createShardContext()); + + List disjuncts1 = new ArrayList<>(); + disjuncts1.add(new TermQuery(new Term(STRING_FIELD_NAME, "foo"))); + disjuncts1.add(new TermQuery(new Term(STRING_FIELD_NAME_2, "foo"))); + DisjunctionMaxQuery maxQuery1 = new DisjunctionMaxQuery(disjuncts1, 0.0f); + + List disjuncts2 = new ArrayList<>(); + disjuncts2.add(new TermQuery(new Term(STRING_FIELD_NAME, "bar"))); + disjuncts2.add(new TermQuery(new Term(STRING_FIELD_NAME_2, "bar"))); + DisjunctionMaxQuery maxQuery2 = new DisjunctionMaxQuery(disjuncts2, 0.0f); + + BooleanQuery expectedQuery = + new BooleanQuery.Builder() + .add(new BooleanClause(maxQuery1, BooleanClause.Occur.SHOULD)) + .add(new BooleanClause(maxQuery2, BooleanClause.Occur.SHOULD)) + .build(); + assertThat(query, equalTo(expectedQuery)); + } + + // split_on_whitespace=false breaks range query with simple syntax + { + // throws an exception when lenient is set to false + QueryStringQueryBuilder queryBuilder = + new QueryStringQueryBuilder(">10 foo") + .field(INT_FIELD_NAME) + .splitOnWhitespace(false); + IllegalArgumentException exc = + expectThrows(IllegalArgumentException.class, () -> queryBuilder.toQuery(createShardContext())); + assertThat(exc.getMessage(), equalTo("For input string: \"10 foo\"")); + } + + { + // returns an empty boolean query when lenient is set to true + QueryStringQueryBuilder queryBuilder = + new QueryStringQueryBuilder(">10 foo") + .field(INT_FIELD_NAME) + .splitOnWhitespace(false) + .lenient(true); + Query query = queryBuilder.toQuery(createShardContext()); + BooleanQuery bq = new BooleanQuery.Builder().build(); + assertThat(bq, equalTo(query)); + } + + // splitOnWhitespace=true + { + QueryStringQueryBuilder queryBuilder = + new QueryStringQueryBuilder("foo bar") + .field(STRING_FIELD_NAME).field(STRING_FIELD_NAME_2) + .splitOnWhitespace(true); + Query query = queryBuilder.toQuery(createShardContext()); + + List disjuncts1 = new ArrayList<>(); + disjuncts1.add(new TermQuery(new Term(STRING_FIELD_NAME, "foo"))); + disjuncts1.add(new TermQuery(new Term(STRING_FIELD_NAME_2, "foo"))); + DisjunctionMaxQuery maxQuery1 = new DisjunctionMaxQuery(disjuncts1, 0.0f); + + List disjuncts2 = new ArrayList<>(); + disjuncts2.add(new TermQuery(new Term(STRING_FIELD_NAME, "bar"))); + disjuncts2.add(new TermQuery(new Term(STRING_FIELD_NAME_2, "bar"))); + DisjunctionMaxQuery maxQuery2 = new DisjunctionMaxQuery(disjuncts2, 0.0f); + + BooleanQuery expectedQuery = + new BooleanQuery.Builder() + .add(new BooleanClause(maxQuery1, BooleanClause.Occur.SHOULD)) + .add(new BooleanClause(maxQuery2, BooleanClause.Occur.SHOULD)) + .build(); + assertThat(query, equalTo(expectedQuery)); + } + + + } + public void testFromJson() throws IOException { String json = "{\n" + @@ -552,6 +676,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase> for a comprehensive example. + +|`split_on_whitespace` |Whether query text should be split on whitespace prior to analysis. + Instead the queryparser would parse around only real 'operators'. + Default to `false`. + |======================================================================= When a multi term query is being generated, one can control how it gets diff --git a/docs/reference/query-dsl/query-string-syntax.asciidoc b/docs/reference/query-dsl/query-string-syntax.asciidoc index 9e847102469..49a8b54c506 100644 --- a/docs/reference/query-dsl/query-string-syntax.asciidoc +++ b/docs/reference/query-dsl/query-string-syntax.asciidoc @@ -282,8 +282,8 @@ A space may also be a reserved character. For instance, if you have a synonym list which converts `"wi fi"` to `"wifi"`, a `query_string` search for `"wi fi"` would fail. The query string parser would interpret your query as a search for `"wi OR fi"`, while the token stored in your -index is actually `"wifi"`. Escaping the space will protect it from -being touched by the query string parser: `"wi\ fi"`. +index is actually `"wifi"`. The option `split_on_whitespace=false` will protect it from +being touched by the query string parser and will let the analysis run on the entire input (`"wi fi"`). **** ===== Empty Query