diff --git a/docs/reference/query-dsl/queries.asciidoc b/docs/reference/query-dsl/queries.asciidoc index caa94759d77..e4d09b4da8b 100644 --- a/docs/reference/query-dsl/queries.asciidoc +++ b/docs/reference/query-dsl/queries.asciidoc @@ -60,6 +60,8 @@ include::queries/prefix-query.asciidoc[] include::queries/query-string-query.asciidoc[] +include::queries/simple-query-string-query.asciidoc[] + include::queries/range-query.asciidoc[] include::queries/regexp-query.asciidoc[] diff --git a/docs/reference/query-dsl/queries/simple-query-string-query.asciidoc b/docs/reference/query-dsl/queries/simple-query-string-query.asciidoc new file mode 100644 index 00000000000..c7fd53cae9f --- /dev/null +++ b/docs/reference/query-dsl/queries/simple-query-string-query.asciidoc @@ -0,0 +1,78 @@ +[[query-dsl-simple-query-string-query]] +=== Simple Query String Query + +A query that uses the SimpleQueryParser to parse its context. Unlike the +regular `query_string` query, the `simple_query_string` query will never +throw an exception, and discards invalid parts of the query. Here is +an example: + +[source,js] +-------------------------------------------------- +{ + "simple_query_string" : { + "query": "\"fried eggs\" +(eggplant | potato) -frittata", + "analyzer": "snowball", + "fields": ["body^5","_all"], + "default_operator": "and" + } +} +-------------------------------------------------- + +The `simple_query_string` top level parameters include: + +[cols="<,<",options="header",] +|======================================================================= +|Parameter |Description +|`query` |The actual query to be parsed. See below for syntax. + +|`fields` |The fields to perform the parsed query against. Defaults to the +`index.query.default_field` index settings, which in turn defaults to `_all`. + +|`default_operator` |The default operator used if no explicit operator +is specified. For example, with a default operator of `OR`, the query +`capital of Hungary` is translated to `capital OR of OR Hungary`, and +with default operator of `AND`, the same query is translated to +`capital AND of AND Hungary`. The default value is `OR`. + +|`analyzer` |The analyzer used to analyze each term of the query when +creating composite queries. +|======================================================================= + +[float] +==== Simple Query String Syntax +The `simple_query_string` supports the following special characters: + +* `+` signifies AND operation +* `|` signifies OR operation +* `-` negates a single token +* `"` wraps a number of tokens to signify a phrase for searching +* `*` at the end of a term signifies a prefix query +* `(` and `)` signify precedence + +In order to search for any of these special characters, they will need to +be escaped with `\`. + +[float] +==== Default Field +When not explicitly specifying the field to search on in the query +string syntax, the `index.query.default_field` will be used to derive +which field to search on. It defaults to `_all` field. + +So, if `_all` field is disabled, it might make sense to change it to set +a different default field. + +[float] +==== Multi Field +The fields parameter can also include pattern based field names, +allowing to automatically expand to the relevant fields (dynamically +introduced fields included). For example: + +[source,js] +-------------------------------------------------- +{ + "simple_query_string" : { + "fields" : ["content", "name.*^5"], + "query" : "foo bar baz" + } +} +-------------------------------------------------- diff --git a/src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java b/src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java new file mode 100644 index 00000000000..91bc21db0c2 --- /dev/null +++ b/src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java @@ -0,0 +1,521 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.queryparser; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.*; +import org.apache.lucene.util.QueryBuilder; +import org.apache.lucene.util.Version; +import org.elasticsearch.common.lucene.Lucene; + +import java.util.Collections; +import java.util.Map; + +/** + * XSimpleQueryParser is used to parse human readable query syntax. + *

+ * The main idea behind this parser is that a person should be able to type + * whatever they want to represent a query, and this parser will do its best + * to interpret what to search for no matter how poorly composed the request + * may be. Tokens are considered to be any of a term, phrase, or subquery for the + * operations described below. Whitespace including ' ' '\n' '\r' and '\t' + * and certain operators may be used to delimit tokens ( ) + | " . + *

+ * Any errors in query syntax will be ignored and the parser will attempt + * to decipher what it can; however, this may mean odd or unexpected results. + *

Query Operators

+ * + *

+ * The {@link #setDefaultOperator default operator} is {@code OR} if no other operator is specified. + * For example, the following will {@code OR} {@code token1} and {@code token2} together: + * token1 token2 + *

+ * Normal operator precedence will be simple order from right to left. + * For example, the following will evaluate {@code token1 OR token2} first, + * then {@code AND} with {@code token3}: + *

token1 | token2 + token3
+ *

Escaping

+ *

+ * An individual term may contain any possible character with certain characters + * requiring escaping using a '{@code \}'. The following characters will need to be escaped in + * terms and phrases: + * {@code + | " ( ) ' \} + *

+ * The '{@code -}' operator is a special case. On individual terms (not phrases) the first + * character of a term that is {@code -} must be escaped; however, any '{@code -}' characters + * beyond the first character do not need to be escaped. + * For example: + *

+ *

+ * The '{@code *}' operator is a special case. On individual terms (not phrases) the last + * character of a term that is '{@code *}' must be escaped; however, any '{@code *}' characters + * before the last character do not need to be escaped: + *

+ *

+ * Note that above examples consider the terms before text processing. + */ +public class XSimpleQueryParser extends QueryBuilder { + + static { + assert Version.LUCENE_46.onOrAfter(Lucene.VERSION) : "Lucene 4.7 adds SimpleQueryParser, remove me!"; + } + + /** Map of fields to query against with their weights */ + protected final Map weights; + /** flags to the parser (to turn features on/off) */ + protected final int flags; + + /** Enables {@code AND} operator (+) */ + public static final int AND_OPERATOR = 1<<0; + /** Enables {@code NOT} operator (-) */ + public static final int NOT_OPERATOR = 1<<1; + /** Enables {@code OR} operator (|) */ + public static final int OR_OPERATOR = 1<<2; + /** Enables {@code PREFIX} operator (*) */ + public static final int PREFIX_OPERATOR = 1<<3; + /** Enables {@code PHRASE} operator (") */ + public static final int PHRASE_OPERATOR = 1<<4; + /** Enables {@code PRECEDENCE} operators: {@code (} and {@code )} */ + public static final int PRECEDENCE_OPERATORS = 1<<5; + /** Enables {@code ESCAPE} operator (\) */ + public static final int ESCAPE_OPERATOR = 1<<6; + /** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */ + public static final int WHITESPACE_OPERATOR = 1<<7; + + private BooleanClause.Occur defaultOperator = BooleanClause.Occur.SHOULD; + + /** Creates a new parser searching over a single field. */ + public XSimpleQueryParser(Analyzer analyzer, String field) { + this(analyzer, Collections.singletonMap(field, 1.0F)); + } + + /** Creates a new parser searching over multiple fields with different weights. */ + public XSimpleQueryParser(Analyzer analyzer, Map weights) { + this(analyzer, weights, -1); + } + + /** Creates a new parser with custom flags used to enable/disable certain features. */ + public XSimpleQueryParser(Analyzer analyzer, Map weights, int flags) { + super(analyzer); + this.weights = weights; + this.flags = flags; + } + + /** Parses the query text and returns parsed query (or null if empty) */ + public Query parse(String queryText) { + char data[] = queryText.toCharArray(); + char buffer[] = new char[data.length]; + + State state = new State(data, buffer, 0, data.length); + parseSubQuery(state); + return state.top; + } + + private void parseSubQuery(State state) { + while (state.index < state.length) { + if (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0) { + // the beginning of a subquery has been found + consumeSubQuery(state); + } else if (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0) { + // this is an extraneous character so it is ignored + ++state.index; + } else if (state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0) { + // the beginning of a phrase has been found + consumePhrase(state); + } else if (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0) { + // an and operation has been explicitly set + // if an operation has already been set this one is ignored + // if a term (or phrase or subquery) has not been found yet the + // operation is also ignored since there is no previous + // term (or phrase or subquery) to and with + if (state.currentOperation == null && state.top != null) { + state.currentOperation = BooleanClause.Occur.MUST; + } + + ++state.index; + } else if (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0) { + // an or operation has been explicitly set + // if an operation has already been set this one is ignored + // if a term (or phrase or subquery) has not been found yet the + // operation is also ignored since there is no previous + // term (or phrase or subquery) to or with + if (state.currentOperation == null && state.top != null) { + state.currentOperation = BooleanClause.Occur.SHOULD; + } + + ++state.index; + } else if (state.data[state.index] == '-' && (flags & NOT_OPERATOR) != 0) { + // a not operator has been found, so increase the not count + // two not operators in a row negate each other + ++state.not; + ++state.index; + + // continue so the not operator is not reset + // before the next character is determined + continue; + } else if ((state.data[state.index] == ' ' + || state.data[state.index] == '\t' + || state.data[state.index] == '\n' + || state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0) { + // ignore any whitespace found as it may have already been + // used a delimiter across a term (or phrase or subquery) + // or is simply extraneous + ++state.index; + } else { + // the beginning of a token has been found + consumeToken(state); + } + + // reset the not operator as even whitespace is not allowed when + // specifying the not operation for a term (or phrase or subquery) + state.not = 0; + } + } + + private void consumeSubQuery(State state) { + assert (flags & PRECEDENCE_OPERATORS) != 0; + int start = ++state.index; + int precedence = 1; + boolean escaped = false; + + while (state.index < state.length) { + if (!escaped) { + if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) { + // an escape character has been found so + // whatever character is next will become + // part of the subquery unless the escape + // character is the last one in the data + escaped = true; + ++state.index; + + continue; + } else if (state.data[state.index] == '(') { + // increase the precedence as there is a + // subquery in the current subquery + ++precedence; + } else if (state.data[state.index] == ')') { + --precedence; + + if (precedence == 0) { + // this should be the end of the subquery + // all characters found will used for + // creating the subquery + break; + } + } + } + + escaped = false; + ++state.index; + } + + if (state.index == state.length) { + // a closing parenthesis was never found so the opening + // parenthesis is considered extraneous and will be ignored + state.index = start; + } else if (state.index == start) { + // a closing parenthesis was found immediately after the opening + // parenthesis so the current operation is reset since it would + // have been applied to this subquery + state.currentOperation = null; + + ++state.index; + } else { + // a complete subquery has been found and is recursively parsed by + // starting over with a new state object + State subState = new State(state.data, state.buffer, start, state.index); + parseSubQuery(subState); + buildQueryTree(state, subState.top); + + ++state.index; + } + } + + private void consumePhrase(State state) { + assert (flags & PHRASE_OPERATOR) != 0; + int start = ++state.index; + int copied = 0; + boolean escaped = false; + + while (state.index < state.length) { + if (!escaped) { + if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) { + // an escape character has been found so + // whatever character is next will become + // part of the phrase unless the escape + // character is the last one in the data + escaped = true; + ++state.index; + + continue; + } else if (state.data[state.index] == '"') { + // this should be the end of the phrase + // all characters found will used for + // creating the phrase query + break; + } + } + + escaped = false; + state.buffer[copied++] = state.data[state.index++]; + } + + if (state.index == state.length) { + // a closing double quote was never found so the opening + // double quote is considered extraneous and will be ignored + state.index = start; + } else if (state.index == start) { + // a closing double quote was found immediately after the opening + // double quote so the current operation is reset since it would + // have been applied to this phrase + state.currentOperation = null; + + ++state.index; + } else { + // a complete phrase has been found and is parsed through + // through the analyzer from the given field + String phrase = new String(state.buffer, 0, copied); + Query branch = newPhraseQuery(phrase); + buildQueryTree(state, branch); + + ++state.index; + } + } + + private void consumeToken(State state) { + int copied = 0; + boolean escaped = false; + boolean prefix = false; + + while (state.index < state.length) { + if (!escaped) { + if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) { + // an escape character has been found so + // whatever character is next will become + // part of the term unless the escape + // character is the last one in the data + escaped = true; + prefix = false; + ++state.index; + + continue; + } else if ((state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0) + || (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0) + || (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0) + || (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0) + || (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0) + || ((state.data[state.index] == ' ' + || state.data[state.index] == '\t' + || state.data[state.index] == '\n' + || state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0)) { + // this should be the end of the term + // all characters found will used for + // creating the term query + break; + } + + // wildcard tracks whether or not the last character + // was a '*' operator that hasn't been escaped + // there must be at least one valid character before + // searching for a prefixed set of terms + prefix = copied > 0 && state.data[state.index] == '*' && (flags & PREFIX_OPERATOR) != 0; + } + + escaped = false; + state.buffer[copied++] = state.data[state.index++]; + } + + if (copied > 0) { + final Query branch; + + if (prefix) { + // if a term is found with a closing '*' it is considered to be a prefix query + // and will have prefix added as an option + String token = new String(state.buffer, 0, copied - 1); + branch = newPrefixQuery(token); + } else { + // a standard term has been found so it will be run through + // the entire analysis chain from the specified schema field + String token = new String(state.buffer, 0, copied); + branch = newDefaultQuery(token); + } + + buildQueryTree(state, branch); + } + } + + // buildQueryTree should be called after a term, phrase, or subquery + // is consumed to be added to our existing query tree + // this method will only add to the existing tree if the branch contained in state is not null + private void buildQueryTree(State state, Query branch) { + if (branch != null) { + // modify our branch to a BooleanQuery wrapper for not + // this is necessary any time a term, phrase, or subquery is negated + if (state.not % 2 == 1) { + BooleanQuery nq = new BooleanQuery(); + nq.add(branch, BooleanClause.Occur.MUST_NOT); + nq.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); + branch = nq; + } + + // first term (or phrase or subquery) found and will begin our query tree + if (state.top == null) { + state.top = branch; + } else { + // more than one term (or phrase or subquery) found + // set currentOperation to the default if no other operation is explicitly set + if (state.currentOperation == null) { + state.currentOperation = defaultOperator; + } + + // operational change requiring a new parent node + // this occurs if the previous operation is not the same as current operation + // because the previous operation must be evaluated separately to preserve + // the proper precedence and the current operation will take over as the top of the tree + if (state.previousOperation != state.currentOperation) { + BooleanQuery bq = new BooleanQuery(); + bq.add(state.top, state.currentOperation); + state.top = bq; + } + + // reset all of the state for reuse + ((BooleanQuery)state.top).add(branch, state.currentOperation); + state.previousOperation = state.currentOperation; + } + + // reset the current operation as it was intended to be applied to + // the incoming term (or phrase or subquery) even if branch was null + // due to other possible errors + state.currentOperation = null; + } + } + + /** + * Factory method to generate a standard query (no phrase or prefix operators). + */ + protected Query newDefaultQuery(String text) { + BooleanQuery bq = new BooleanQuery(true); + for (Map.Entry entry : weights.entrySet()) { + Query q = createBooleanQuery(entry.getKey(), text, defaultOperator); + if (q != null) { + q.setBoost(entry.getValue()); + bq.add(q, BooleanClause.Occur.SHOULD); + } + } + return simplify(bq); + } + + /** + * Factory method to generate a phrase query. + */ + protected Query newPhraseQuery(String text) { + BooleanQuery bq = new BooleanQuery(true); + for (Map.Entry entry : weights.entrySet()) { + Query q = createPhraseQuery(entry.getKey(), text); + if (q != null) { + q.setBoost(entry.getValue()); + bq.add(q, BooleanClause.Occur.SHOULD); + } + } + return simplify(bq); + } + + /** + * Factory method to generate a prefix query. + */ + protected Query newPrefixQuery(String text) { + BooleanQuery bq = new BooleanQuery(true); + for (Map.Entry entry : weights.entrySet()) { + PrefixQuery prefix = new PrefixQuery(new Term(entry.getKey(), text)); + prefix.setBoost(entry.getValue()); + bq.add(prefix, BooleanClause.Occur.SHOULD); + } + return simplify(bq); + } + + /** + * Helper to simplify boolean queries with 0 or 1 clause + */ + protected Query simplify(BooleanQuery bq) { + if (bq.clauses().isEmpty()) { + return null; + } else if (bq.clauses().size() == 1) { + return bq.clauses().get(0).getQuery(); + } else { + return bq; + } + } + + /** + * Returns the implicit operator setting, which will be + * either {@code SHOULD} or {@code MUST}. + */ + public BooleanClause.Occur getDefaultOperator() { + return defaultOperator; + } + + /** + * Sets the implicit operator setting, which must be + * either {@code SHOULD} or {@code MUST}. + */ + public void setDefaultOperator(BooleanClause.Occur operator) { + if (operator != BooleanClause.Occur.SHOULD && operator != BooleanClause.Occur.MUST) { + throw new IllegalArgumentException("invalid operator: only SHOULD or MUST are allowed"); + } + this.defaultOperator = operator; + } + + static class State { + final char[] data; // the characters in the query string + final char[] buffer; // a temporary buffer used to reduce necessary allocations + int index; + int length; + + BooleanClause.Occur currentOperation; + BooleanClause.Occur previousOperation; + int not; + + Query top; + + State(char[] data, char[] buffer, int index, int length) { + this.data = data; + this.buffer = buffer; + this.index = index; + this.length = length; + } + } +} + diff --git a/src/main/java/org/elasticsearch/index/query/QueryBuilders.java b/src/main/java/org/elasticsearch/index/query/QueryBuilders.java index 0af755d51f9..b97f68d2084 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryBuilders.java +++ b/src/main/java/org/elasticsearch/index/query/QueryBuilders.java @@ -403,6 +403,16 @@ public abstract class QueryBuilders { return new QueryStringQueryBuilder(queryString); } + /** + * A query that acts similar to a query_string query, but won't throw + * exceptions for any weird string syntax. See + * {@link org.apache.lucene.queryparser.XSimpleQueryParser} for the full + * supported syntax. + */ + public static SimpleQueryStringBuilder simpleQueryString(String queryString) { + return new SimpleQueryStringBuilder(queryString); + } + /** * The BoostingQuery class can be used to effectively demote results that match a given query. * Unlike the "NOT" clause, this still selects documents that contain undesirable terms, diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java new file mode 100644 index 00000000000..03c60f8f4d6 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java @@ -0,0 +1,117 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +/** + * SimpleQuery is a query parser that acts similar to a query_string + * query, but won't throw exceptions for any weird string syntax. + */ +public class SimpleQueryStringBuilder extends BaseQueryBuilder { + private Map fields = new HashMap(); + private String analyzer; + private Operator operator; + private final String queryText; + + /** + * Operators for the default_operator + */ + public static enum Operator { + AND, + OR + } + + /** + * Construct a new simple query with the given text + */ + public SimpleQueryStringBuilder(String text) { + this.queryText = text; + } + + /** + * Add a field to run the query against + */ + public SimpleQueryStringBuilder field(String field) { + this.fields.put(field, null); + return this; + } + + /** + * Add a field to run the query against with a specific boost + */ + public SimpleQueryStringBuilder field(String field, float boost) { + this.fields.put(field, boost); + return this; + } + + /** + * Specify an analyzer to use for the query + */ + public SimpleQueryStringBuilder analyzer(String analyzer) { + this.analyzer = analyzer; + return this; + } + + /** + * Specify the default operator for the query. Defaults to "OR" if no + * operator is specified + */ + public SimpleQueryStringBuilder defaultOperator(Operator defaultOperator) { + this.operator = defaultOperator; + return this; + } + + @Override + public void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(SimpleQueryStringParser.NAME); + + builder.field("query", queryText); + + if (fields.size() > 0) { + builder.startArray("fields"); + for (Map.Entry entry : fields.entrySet()) { + String field = entry.getKey(); + Float boost = entry.getValue(); + if (boost != null) { + builder.value(field + "^" + boost); + } else { + builder.value(field); + } + } + builder.endArray(); + } + + if (analyzer != null) { + builder.field("analyzer", analyzer); + } + + if (operator != null) { + builder.field("default_operator", operator.name().toLowerCase(Locale.ROOT)); + } + + builder.endObject(); + } +} diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java new file mode 100644 index 00000000000..146318fe3af --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java @@ -0,0 +1,189 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.queryparser.XSimpleQueryParser; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.Query; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.analysis.NamedAnalyzer; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * SimpleQueryStringParser is a query parser that acts similar to a query_string + * query, but won't throw exceptions for any weird string syntax. It supports + * the following: + * + *

+ * + * See: {@link XSimpleQueryParser} for more information. + * + * This query supports these options: + * + * Required: + * {@code query} - query text to be converted into other queries + * + * Optional: + * {@code analyzer} - anaylzer to be used for analyzing tokens to determine + * which kind of query they should be converted into, defaults to "standard" + * {@code default_operator} - default operator for boolean queries, defaults + * to OR + * {@code fields} - fields to search, defaults to _all if not set, allows + * boosting a field with ^n + */ +public class SimpleQueryStringParser implements QueryParser { + + public static final String NAME = "simple_query_string"; + + @Inject + public SimpleQueryStringParser(Settings settings) { + + } + + @Override + public String[] names() { + return new String[]{NAME, Strings.toCamelCase(NAME)}; + } + + @Override + public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException { + XContentParser parser = parseContext.parser(); + + String currentFieldName = null; + String queryBody = null; + String field = null; + Map fieldsAndWeights = null; + BooleanClause.Occur defaultOperator = null; + NamedAnalyzer analyzer = null; + XContentParser.Token token = null; + + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_ARRAY) { + if ("fields".equals(currentFieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + String fField = null; + float fBoost = 1; + char[] text = parser.textCharacters(); + int end = parser.textOffset() + parser.textLength(); + for (int i = parser.textOffset(); i < end; i++) { + if (text[i] == '^') { + int relativeLocation = i - parser.textOffset(); + fField = new String(text, parser.textOffset(), relativeLocation); + fBoost = Float.parseFloat(new String(text, i + 1, parser.textLength() - relativeLocation - 1)); + break; + } + } + if (fField == null) { + fField = parser.text(); + } + + if (fieldsAndWeights == null) { + fieldsAndWeights = new HashMap(); + } + + if (Regex.isSimpleMatchPattern(fField)) { + for (String fieldName : parseContext.mapperService().simpleMatchToIndexNames(fField)) { + fieldsAndWeights.put(fieldName, fBoost); + } + } else { + fieldsAndWeights.put(fField, fBoost); + } + } + } else { + throw new QueryParsingException(parseContext.index(), + "[" + NAME + "] query does not support [" + currentFieldName + "]"); + } + } else if (token.isValue()) { + if ("query".equals(currentFieldName)) { + queryBody = parser.text(); + } else if ("analyzer".equals(currentFieldName)) { + analyzer = parseContext.analysisService().analyzer(parser.text()); + if (analyzer == null) { + throw new QueryParsingException(parseContext.index(), + "[" + NAME + "] analyzer [" + parser.text() + "] not found"); + } + } else if ("field".equals(currentFieldName)) { + field = parser.text(); + } else if ("default_operator".equals(currentFieldName)) { + String op = parser.text(); + if ("or".equalsIgnoreCase(op)) { + defaultOperator = BooleanClause.Occur.SHOULD; + } else if ("and".equalsIgnoreCase(op)) { + defaultOperator = BooleanClause.Occur.MUST; + } else { + throw new QueryParsingException(parseContext.index(), + "[" + NAME + "] default operator [" + op + "] is not allowed"); + } + } else { + throw new QueryParsingException(parseContext.index(), "[" + NAME + "] unsupported field [" + parser.currentName() + "]"); + } + } + } + + // Query text is required + if (queryBody == null) { + throw new QueryParsingException(parseContext.index(), "[" + NAME + "] query text missing"); + } + + // Support specifying only a field instead of a map + if (field == null) { + field = currentFieldName; + } + + // Use the default field (_all) if no fields specified + if (queryBody != null && fieldsAndWeights == null) { + field = parseContext.defaultField(); + } + + // Use standard analyzer by default + if (analyzer == null) { + analyzer = parseContext.analysisService().analyzer("standard"); + } + + XSimpleQueryParser sqp; + if (fieldsAndWeights != null) { + sqp = new XSimpleQueryParser(analyzer, fieldsAndWeights); + } else { + sqp = new XSimpleQueryParser(analyzer, field); + } + + if (defaultOperator != null) { + sqp.setDefaultOperator(defaultOperator); + } + + return sqp.parse(queryBody); + } +} diff --git a/src/main/java/org/elasticsearch/indices/query/IndicesQueriesModule.java b/src/main/java/org/elasticsearch/indices/query/IndicesQueriesModule.java index 7fd0736dae5..e25577fdd4d 100644 --- a/src/main/java/org/elasticsearch/indices/query/IndicesQueriesModule.java +++ b/src/main/java/org/elasticsearch/indices/query/IndicesQueriesModule.java @@ -106,6 +106,7 @@ public class IndicesQueriesModule extends AbstractModule { qpBinders.addBinding().to(CommonTermsQueryParser.class).asEagerSingleton(); qpBinders.addBinding().to(SpanMultiTermQueryParser.class).asEagerSingleton(); qpBinders.addBinding().to(FunctionScoreQueryParser.class).asEagerSingleton(); + qpBinders.addBinding().to(SimpleQueryStringParser.class).asEagerSingleton(); if (ShapesAvailability.JTS_AVAILABLE) { qpBinders.addBinding().to(GeoShapeQueryParser.class).asEagerSingleton(); diff --git a/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java index 2d401d19fb7..680402eac83 100644 --- a/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java @@ -2300,4 +2300,12 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase { Query parsedQuery = queryParser.parse(query).query(); assertThat(parsedQuery, instanceOf(BooleanQuery.class)); } + + @Test + public void testSimpleQueryString() throws Exception { + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/index/query/simple-query-string.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(BooleanQuery.class)); + } } diff --git a/src/test/java/org/elasticsearch/index/query/simple-query-string.json b/src/test/java/org/elasticsearch/index/query/simple-query-string.json new file mode 100644 index 00000000000..9208e8876fe --- /dev/null +++ b/src/test/java/org/elasticsearch/index/query/simple-query-string.json @@ -0,0 +1,8 @@ +{ + "simple_query_string": { + "query": "foo bar", + "analyzer": "keyword", + "fields": ["body^5","_all"], + "default_operator": "and" + } +} diff --git a/src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java b/src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java index 2be094f69c4..c972565ea70 100644 --- a/src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java +++ b/src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java @@ -1888,4 +1888,40 @@ public class SimpleQueryTests extends ElasticsearchIntegrationTest { return FilterBuilders.numericRangeFilter(field).from(from).to(to); } } + + @Test + public void testSimpleQueryString() { + assertAcked(client().admin().indices().prepareCreate("test").setSettings(SETTING_NUMBER_OF_SHARDS, 1)); + client().prepareIndex("test", "type1", "1").setSource("body", "foo").get(); + client().prepareIndex("test", "type1", "2").setSource("body", "bar").get(); + client().prepareIndex("test", "type1", "3").setSource("body", "foo bar").get(); + client().prepareIndex("test", "type1", "4").setSource("body", "quux baz eggplant").get(); + client().prepareIndex("test", "type1", "5").setSource("body", "quux baz spaghetti").get(); + client().prepareIndex("test", "type1", "6").setSource("otherbody", "spaghetti").get(); + refresh(); + + SearchResponse searchResponse = client().prepareSearch().setQuery(simpleQueryString("foo bar")).get(); + assertHitCount(searchResponse, 3l); + assertSearchHits(searchResponse, "1", "2", "3"); + + searchResponse = client().prepareSearch().setQuery( + simpleQueryString("foo bar").defaultOperator(SimpleQueryStringBuilder.Operator.AND)).get(); + assertHitCount(searchResponse, 1l); + assertFirstHit(searchResponse, hasId("3")); + + searchResponse = client().prepareSearch().setQuery(simpleQueryString("\"quux baz\" +(eggplant | spaghetti)")).get(); + assertHitCount(searchResponse, 2l); + assertSearchHits(searchResponse, "4", "5"); + + searchResponse = client().prepareSearch().setQuery( + simpleQueryString("eggplants").analyzer("snowball")).get(); + assertHitCount(searchResponse, 1l); + assertFirstHit(searchResponse, hasId("4")); + + searchResponse = client().prepareSearch().setQuery( + simpleQueryString("spaghetti").field("body", 10.0f).field("otherbody", 2.0f)).get(); + assertHitCount(searchResponse, 2l); + assertFirstHit(searchResponse, hasId("5")); + assertSearchHits(searchResponse, "5", "6"); + } }