From 465036655f5f983754a0fa173b7fc5bffc05dc8d Mon Sep 17 00:00:00 2001 From: kimchy Date: Sun, 8 May 2011 21:42:25 +0300 Subject: [PATCH] Query DSL: Text Queries (boolean, phrase, and phrase_prefix), closes #917. --- .../lucene/search/MatchNoDocsQuery.java | 122 ++++++ .../lucene/search/MultiPhrasePrefixQuery.java | 258 +++++++++++++ .../index/query/IndexQueryParserModule.java | 1 + .../index/query/xcontent/QueryBuilders.java | 30 ++ .../query/xcontent/TextQueryBuilder.java | 158 ++++++++ .../index/query/xcontent/TextQueryParser.java | 138 +++++++ .../index/search/TextQueryParser.java | 353 ++++++++++++++++++ .../search/MultiPhrasePrefixQueryTests.java | 67 ++++ .../xcontent/SimpleIndexQueryParserTests.java | 52 +++ .../index/query/xcontent/text1.json | 5 + .../index/query/xcontent/text2.json | 8 + .../index/query/xcontent/text3.json | 8 + .../index/query/xcontent/text4.json | 8 + .../index/query/xcontent/text4_2.json | 7 + 14 files changed, 1215 insertions(+) create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsQuery.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/TextQueryBuilder.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/TextQueryParser.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/index/search/TextQueryParser.java create mode 100644 modules/elasticsearch/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java create mode 100644 modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text1.json create mode 100644 modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text2.json create mode 100644 modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text3.json create mode 100644 modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text4.json create mode 100644 modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text4_2.json diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsQuery.java b/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsQuery.java new file mode 100644 index 00000000000..e29d5ee01c2 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsQuery.java @@ -0,0 +1,122 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene.search; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.*; + +import java.io.IOException; +import java.util.Set; + +/** + * Query that matches no documents. + */ +public final class MatchNoDocsQuery extends Query { + + public static MatchNoDocsQuery INSTANCE = new MatchNoDocsQuery(); + + /** + * Since all instances of this class are equal to each other, + * we have a constant hash code. + */ + private static final int HASH_CODE = 12345; + + /** + * Weight implementation that matches no documents. + */ + private class MatchNoDocsWeight extends Weight { + /** + * The similarity implementation. + */ + private final Similarity similarity; + + + /** + * Creates a new weight that matches nothing. + * + * @param searcher the search to match for + */ + public MatchNoDocsWeight(final Searcher searcher) { + this.similarity = searcher.getSimilarity(); + } + + @Override + public String toString() { + return "weight(" + MatchNoDocsQuery.this + ")"; + } + + @Override + public Query getQuery() { + return MatchNoDocsQuery.this; + } + + @Override + public float getValue() { + return 0; + } + + @Override + public float sumOfSquaredWeights() { + return 0; + } + + @Override + public void normalize(final float queryNorm) { + } + + @Override + public Scorer scorer(final IndexReader reader, + final boolean scoreDocsInOrder, + final boolean topScorer) throws IOException { + return null; + } + + @Override + public Explanation explain(final IndexReader reader, + final int doc) { + return new ComplexExplanation(false, 0, "MatchNoDocs matches nothing"); + } + } + + @Override + public Weight createWeight(final Searcher searcher) { + return new MatchNoDocsWeight(searcher); + } + + @Override + public void extractTerms(final Set terms) { + } + + @Override + public String toString(final String field) { + return "MatchNoDocsQuery"; + } + + @Override + public boolean equals(final Object o) { + return o instanceof MatchAllDocsQuery; + } + + @Override + public int hashCode() { + return HASH_CODE; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java b/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java new file mode 100644 index 00000000000..cc7cde02ac5 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java @@ -0,0 +1,258 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene.search; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.ToStringUtils; + +import java.io.IOException; +import java.util.*; + +public class MultiPhrasePrefixQuery extends Query { + + private String field; + private ArrayList termArrays = new ArrayList(); + private ArrayList positions = new ArrayList(); + private int maxExpansions = Integer.MAX_VALUE; + + private int slop = 0; + + /** + * Sets the phrase slop for this query. + * + * @see org.apache.lucene.search.PhraseQuery#setSlop(int) + */ + public void setSlop(int s) { + slop = s; + } + + public void setMaxExpansions(int maxExpansions) { + this.maxExpansions = maxExpansions; + } + + /** + * Sets the phrase slop for this query. + * + * @see org.apache.lucene.search.PhraseQuery#getSlop() + */ + public int getSlop() { + return slop; + } + + /** + * Add a single term at the next position in the phrase. + * + * @see org.apache.lucene.search.PhraseQuery#add(Term) + */ + public void add(Term term) { + add(new Term[]{term}); + } + + /** + * Add multiple terms at the next position in the phrase. Any of the terms + * may match. + * + * @see org.apache.lucene.search.PhraseQuery#add(Term) + */ + public void add(Term[] terms) { + int position = 0; + if (positions.size() > 0) + position = positions.get(positions.size() - 1).intValue() + 1; + + add(terms, position); + } + + /** + * Allows to specify the relative position of terms within the phrase. + * + * @param terms + * @param position + * @see org.apache.lucene.search.PhraseQuery#add(Term, int) + */ + public void add(Term[] terms, int position) { + if (termArrays.size() == 0) + field = terms[0].field(); + + for (int i = 0; i < terms.length; i++) { + if (terms[i].field() != field) { + throw new IllegalArgumentException( + "All phrase terms must be in the same field (" + field + "): " + + terms[i]); + } + } + + termArrays.add(terms); + positions.add(Integer.valueOf(position)); + } + + /** + * Returns a List of the terms in the multiphrase. + * Do not modify the List or its contents. + */ + public List getTermArrays() { + return Collections.unmodifiableList(termArrays); + } + + /** + * Returns the relative positions of terms in this phrase. + */ + public int[] getPositions() { + int[] result = new int[positions.size()]; + for (int i = 0; i < positions.size(); i++) + result[i] = positions.get(i).intValue(); + return result; + } + + @Override public Query rewrite(IndexReader reader) throws IOException { + MultiPhraseQuery query = new MultiPhraseQuery(); + query.setSlop(slop); + int sizeMinus1 = termArrays.size() - 1; + for (int i = 0; i < sizeMinus1; i++) { + query.add(termArrays.get(i), positions.get(i)); + } + Term[] suffixTerms = termArrays.get(sizeMinus1); + int position = positions.get(sizeMinus1); + List terms = new ArrayList(); + for (Term term : suffixTerms) { + getPrefixTerms(terms, term, reader); + } + if (terms.isEmpty()) { + return MatchNoDocsQuery.INSTANCE; + } + query.add(terms.toArray(new Term[terms.size()]), position); + return query.rewrite(reader); + } + + private void getPrefixTerms(List terms, final Term prefix, final IndexReader reader) throws IOException { + TermEnum enumerator = reader.terms(prefix); + try { + do { + Term term = enumerator.term(); + if (term != null + && term.text().startsWith(prefix.text()) + && term.field().equals(field)) { + terms.add(term); + } else { + break; + } + if (terms.size() > maxExpansions) { + break; + } + } while (enumerator.next()); + } finally { + enumerator.close(); + } + } + + @Override + public final String toString(String f) { + StringBuilder buffer = new StringBuilder(); + if (field == null || !field.equals(f)) { + buffer.append(field); + buffer.append(":"); + } + + buffer.append("\""); + Iterator i = termArrays.iterator(); + while (i.hasNext()) { + Term[] terms = i.next(); + if (terms.length > 1) { + buffer.append("("); + for (int j = 0; j < terms.length; j++) { + buffer.append(terms[j].text()); + if (j < terms.length - 1) + buffer.append(" "); + } + buffer.append(")"); + } else { + buffer.append(terms[0].text()); + } + if (i.hasNext()) + buffer.append(" "); + } + buffer.append("\""); + + if (slop != 0) { + buffer.append("~"); + buffer.append(slop); + } + + buffer.append(ToStringUtils.boost(getBoost())); + + return buffer.toString(); + } + + /** + * Returns true if o is equal to this. + */ + @Override + public boolean equals(Object o) { + if (!(o instanceof MultiPhrasePrefixQuery)) return false; + MultiPhrasePrefixQuery other = (MultiPhrasePrefixQuery) o; + return this.getBoost() == other.getBoost() + && this.slop == other.slop + && termArraysEquals(this.termArrays, other.termArrays) + && this.positions.equals(other.positions); + } + + /** + * Returns a hash code value for this object. + */ + @Override + public int hashCode() { + return Float.floatToIntBits(getBoost()) + ^ slop + ^ termArraysHashCode() + ^ positions.hashCode() + ^ 0x4AC65113; + } + + // Breakout calculation of the termArrays hashcode + private int termArraysHashCode() { + int hashCode = 1; + for (final Term[] termArray : termArrays) { + hashCode = 31 * hashCode + + (termArray == null ? 0 : Arrays.hashCode(termArray)); + } + return hashCode; + } + + // Breakout calculation of the termArrays equals + private boolean termArraysEquals(List termArrays1, List termArrays2) { + if (termArrays1.size() != termArrays2.size()) { + return false; + } + ListIterator iterator1 = termArrays1.listIterator(); + ListIterator iterator2 = termArrays2.listIterator(); + while (iterator1.hasNext()) { + Term[] termArray1 = iterator1.next(); + Term[] termArray2 = iterator2.next(); + if (!(termArray1 == null ? termArray2 == null : Arrays.equals(termArray1, + termArray2))) { + return false; + } + } + return true; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/IndexQueryParserModule.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/IndexQueryParserModule.java index 9d8352c07ce..600e1a2d244 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/IndexQueryParserModule.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/IndexQueryParserModule.java @@ -220,6 +220,7 @@ public class IndexQueryParserModule extends AbstractModule { private static class DefaultQueryProcessors extends QueryParsersProcessor { @Override public void processXContentQueryParsers(XContentQueryParsersBindings bindings) { + bindings.processXContentQueryParser(TextQueryParser.NAME, TextQueryParser.class); bindings.processXContentQueryParser(HasChildQueryParser.NAME, HasChildQueryParser.class); bindings.processXContentQueryParser(TopChildrenQueryParser.NAME, TopChildrenQueryParser.class); bindings.processXContentQueryParser(DisMaxQueryParser.NAME, DisMaxQueryParser.class); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/QueryBuilders.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/QueryBuilders.java index 019c5eae2e2..94852d2f870 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/QueryBuilders.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/QueryBuilders.java @@ -33,6 +33,36 @@ public abstract class QueryBuilders { return new MatchAllQueryBuilder(); } + /** + * Creates a text query with type "BOOLEAN" for the provided field name and text. + * + * @param name The field name. + * @param text The query text (to be analyzed). + */ + public static TextQueryBuilder text(String name, Object text) { + return new TextQueryBuilder(name, text).type(TextQueryBuilder.Type.BOOLEAN); + } + + /** + * Creates a text query with type "PHRASE" for the provided field name and text. + * + * @param name The field name. + * @param text The query text (to be analyzed). + */ + public static TextQueryBuilder textPhrase(String name, Object text) { + return new TextQueryBuilder(name, text).type(TextQueryBuilder.Type.PHRASE); + } + + /** + * Creates a text query with type "PHRASE_PREFIX" for the provided field name and text. + * + * @param name The field name. + * @param text The query text (to be analyzed). + */ + public static TextQueryBuilder textPhrasePrefix(String name, Object text) { + return new TextQueryBuilder(name, text).type(TextQueryBuilder.Type.PHRASE_PREFIX); + } + /** * A query that generates the union of documents produced by its sub-queries, and that scores each document * with the maximum score for that document as produced by any sub-query, plus a tie breaking increment for any diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/TextQueryBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/TextQueryBuilder.java new file mode 100644 index 00000000000..cd438fd7a88 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/TextQueryBuilder.java @@ -0,0 +1,158 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query.xcontent; + +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * Text query is a query that analyzes the text and constructs a query as the result of the analysis. It + * can construct different queries based on the type provided. + */ +public class TextQueryBuilder extends BaseQueryBuilder { + + public static enum Operator { + OR, + AND + } + + public static enum Type { + /** + * The text is analyzed and terms are added to a boolean query. + */ + BOOLEAN, + /** + * The text is analyzed and used as a phrase query. + */ + PHRASE, + /** + * The text is analyzed and used in a phrase query, with the last term acting as a prefix. + */ + PHRASE_PREFIX + } + + private final String name; + + private final Object text; + + private Type type; + + private Operator operator; + + private String analyzer; + + private Integer slop; + + private String fuzziness; + + private Integer prefixLength; + + private Integer maxExpansions; + + /** + * Constructs a new text query. + */ + public TextQueryBuilder(String name, Object text) { + this.name = name; + this.text = text; + } + + /** + * Sets the type of the text query. + */ + public TextQueryBuilder type(Type type) { + this.type = type; + return this; + } + + /** + * Sets the operator to use when using a boolean query. Defaults to OR. + */ + public TextQueryBuilder operator(Operator operator) { + this.operator = operator; + return this; + } + + /** + * Explicitly set the analyzer to use. Defaults to use explicit mapping config for the field, or, if not + * set, the default search analyzer. + */ + public TextQueryBuilder analyzer(String analyzer) { + this.analyzer = analyzer; + return this; + } + + /** + * Set the phrase slop if evaluated to a phrase query type. + */ + public TextQueryBuilder slop(int slop) { + this.slop = slop; + return this; + } + + /** + * Sets the minimum similarity used when evaluated to a fuzzy query type. Defaults to "0.5". + */ + public TextQueryBuilder fuzziness(Object fuzziness) { + this.fuzziness = fuzziness.toString(); + return this; + } + + /** + * When using fuzzy or prefix type query, the number of term expansions to use. Defaults to unbounded + * so its recommended to set it to a reasonable value for faster execution. + */ + public TextQueryBuilder maxExpansions(int maxExpansions) { + this.maxExpansions = maxExpansions; + return this; + } + + @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(TextQueryParser.NAME); + builder.startObject(name); + + builder.field("text", text); + if (type != null) { + builder.field("type", type.toString().toLowerCase()); + } + if (operator != null) { + builder.field("operator", operator.toString()); + } + if (analyzer != null) { + builder.field("analyzer", analyzer); + } + if (slop != null) { + builder.field("slop", slop); + } + if (fuzziness != null) { + builder.field("fuzziness", fuzziness); + } + if (prefixLength != null) { + builder.field("prefix_length", prefixLength); + } + if (maxExpansions != null) { + builder.field("max_expansions", maxExpansions); + } + + builder.endObject(); + builder.endObject(); + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/TextQueryParser.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/TextQueryParser.java new file mode 100644 index 00000000000..e8bed74885b --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/TextQueryParser.java @@ -0,0 +1,138 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query.xcontent; + +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.AbstractIndexComponent; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.query.QueryParsingException; +import org.elasticsearch.index.settings.IndexSettings; + +import java.io.IOException; + +/** + * @author kimchy (shay.banon) + */ +public class TextQueryParser extends AbstractIndexComponent implements XContentQueryParser { + + public static final String NAME = "text"; + + @Inject public TextQueryParser(Index index, @IndexSettings Settings settings) { + super(index, settings); + } + + @Override public String[] names() { + return new String[]{NAME, "text_phrase", "textPhrase", "text_phrase_prefix", "textPhrasePrefix", "fuzzyText", "fuzzy_text"}; + } + + @Override public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException { + XContentParser parser = parseContext.parser(); + + org.elasticsearch.index.search.TextQueryParser.Type type = org.elasticsearch.index.search.TextQueryParser.Type.BOOLEAN; + if ("text_phrase".equals(parser.currentName()) || "textPhrase".equals(parser.currentName())) { + type = org.elasticsearch.index.search.TextQueryParser.Type.PHRASE; + } else if ("text_phrase_prefix".equals(parser.currentName()) || "textPhrasePrefix".equals(parser.currentName())) { + type = org.elasticsearch.index.search.TextQueryParser.Type.PHRASE_PREFIX; + } + + XContentParser.Token token = parser.nextToken(); + assert token == XContentParser.Token.FIELD_NAME; + String fieldName = parser.currentName(); + + String text = null; + float boost = 1.0f; + int phraseSlop = 0; + String analyzer = null; + String fuzziness = null; + int prefixLength = FuzzyQuery.defaultPrefixLength; + int maxExpansions = FuzzyQuery.defaultMaxExpansions; + BooleanClause.Occur occur = BooleanClause.Occur.SHOULD; + + token = parser.nextToken(); + if (token == XContentParser.Token.START_OBJECT) { + String currentFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + if ("query".equals(currentFieldName)) { + text = parser.text(); + } else if ("type".equals(currentFieldName)) { + String tStr = parser.text(); + if ("boolean".equals(tStr)) { + type = org.elasticsearch.index.search.TextQueryParser.Type.BOOLEAN; + } else if ("phrase".equals(tStr)) { + type = org.elasticsearch.index.search.TextQueryParser.Type.PHRASE; + } else if ("phrase_prefix".equals(tStr) || "phrasePrefix".equals(currentFieldName)) { + type = org.elasticsearch.index.search.TextQueryParser.Type.PHRASE_PREFIX; + } + } else if ("analyzer".equals(currentFieldName)) { + analyzer = parser.textOrNull(); + } else if ("boost".equals(currentFieldName)) { + boost = parser.floatValue(); + } else if ("slop".equals(currentFieldName) || "phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) { + phraseSlop = parser.intValue(); + } else if ("fuzziness".equals(currentFieldName)) { + fuzziness = parser.textOrNull(); + } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { + prefixLength = parser.intValue(); + } else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) { + maxExpansions = parser.intValue(); + } else if ("operator".equals(currentFieldName)) { + String op = parser.text(); + if ("or".equalsIgnoreCase(op)) { + occur = BooleanClause.Occur.SHOULD; + } else if ("and".equalsIgnoreCase(op)) { + occur = BooleanClause.Occur.MUST; + } else { + throw new QueryParsingException(index, "text query requires operator to be either 'and' or 'or', not [" + op + "]"); + } + } + } + } + parser.nextToken(); + } else { + text = parser.text(); + // move to the next token + parser.nextToken(); + } + + if (text == null) { + throw new QueryParsingException(index, "No text specified for text query"); + } + + org.elasticsearch.index.search.TextQueryParser tQP = new org.elasticsearch.index.search.TextQueryParser(parseContext, fieldName, text); + tQP.setPhraseSlop(phraseSlop); + tQP.setAnalyzer(analyzer); + tQP.setFuzziness(fuzziness); + tQP.setFuzzyPrefixLength(prefixLength); + tQP.setMaxExpansions(maxExpansions); + tQP.setOccur(occur); + + Query query = tQP.parse(type); + query.setBoost(boost); + return query; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/search/TextQueryParser.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/search/TextQueryParser.java new file mode 100644 index 00000000000..611f7d7ac5d --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/search/TextQueryParser.java @@ -0,0 +1,353 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.search; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.*; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.ElasticSearchIllegalStateException; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.io.FastStringReader; +import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.query.xcontent.QueryParseContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.elasticsearch.index.query.support.QueryParsers.*; + +public class TextQueryParser { + + public static enum Type { + BOOLEAN, + PHRASE, + PHRASE_PREFIX + } + + private final QueryParseContext parseContext; + + private final String fieldName; + + private final String text; + + private String analyzer; + + private BooleanClause.Occur occur = BooleanClause.Occur.SHOULD; + + private boolean enablePositionIncrements = true; + + private int phraseSlop = 0; + + private String fuzziness = null; + private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; + private int maxExpansions = FuzzyQuery.defaultMaxExpansions; + + public TextQueryParser(QueryParseContext parseContext, String fieldName, String text) { + this.parseContext = parseContext; + this.fieldName = fieldName; + this.text = text; + } + + public void setAnalyzer(String analyzer) { + this.analyzer = analyzer; + } + + public void setOccur(BooleanClause.Occur occur) { + this.occur = occur; + } + + public void setEnablePositionIncrements(boolean enablePositionIncrements) { + this.enablePositionIncrements = enablePositionIncrements; + } + + public void setPhraseSlop(int phraseSlop) { + this.phraseSlop = phraseSlop; + } + + public void setFuzziness(String fuzziness) { + this.fuzziness = fuzziness; + } + + public void setFuzzyPrefixLength(int fuzzyPrefixLength) { + this.fuzzyPrefixLength = fuzzyPrefixLength; + } + + public void setMaxExpansions(int maxExpansions) { + this.maxExpansions = maxExpansions; + } + + public Query parse(Type type) { + FieldMapper mapper = null; + String field = fieldName; + MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); + if (smartNameFieldMappers != null) { + if (smartNameFieldMappers.hasMapper()) { + mapper = smartNameFieldMappers.mapper(); + if (mapper != null) { + field = mapper.names().indexName(); + } + } + } + + if (mapper != null && mapper.useFieldQueryWithQueryString()) { + return wrapSmartNameQuery(mapper.fieldQuery(text, parseContext), smartNameFieldMappers, parseContext); + } + + Analyzer analyzer = null; + if (this.analyzer == null) { + if (mapper != null) { + analyzer = mapper.searchAnalyzer(); + } + if (analyzer == null) { + analyzer = parseContext.mapperService().searchAnalyzer(); + } + } else { + analyzer = parseContext.mapperService().analysisService().analyzer(this.analyzer); + if (analyzer == null) { + throw new ElasticSearchIllegalArgumentException("No analyzer found for [" + this.analyzer + "]"); + } + } + + // Logic similar to QueryParser#getFieldQuery + + TokenStream source; + try { + source = analyzer.reusableTokenStream(field, new FastStringReader(text)); + source.reset(); + } catch (IOException e) { + source = analyzer.tokenStream(field, new FastStringReader(text)); + } + CachingTokenFilter buffer = new CachingTokenFilter(source); + CharTermAttribute termAtt = null; + PositionIncrementAttribute posIncrAtt = null; + int numTokens = 0; + + boolean success = false; + try { + buffer.reset(); + success = true; + } catch (IOException e) { + // success==false if we hit an exception + } + if (success) { + if (buffer.hasAttribute(CharTermAttribute.class)) { + termAtt = buffer.getAttribute(CharTermAttribute.class); + } + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); + } + } + + int positionCount = 0; + boolean severalTokensAtSamePosition = false; + + boolean hasMoreTokens = false; + if (termAtt != null) { + try { + hasMoreTokens = buffer.incrementToken(); + while (hasMoreTokens) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + severalTokensAtSamePosition = true; + } + hasMoreTokens = buffer.incrementToken(); + } + } catch (IOException e) { + // ignore + } + } + try { + // rewind the buffer stream + buffer.reset(); + + // close original stream - all tokens buffered + source.close(); + } catch (IOException e) { + // ignore + } + + Term termFactory = new Term(field); + if (numTokens == 0) { + return MatchNoDocsQuery.INSTANCE; + } else if (type == Type.BOOLEAN) { + if (numTokens == 1) { + String term = null; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + Query q = newTermQuery(mapper, termFactory.createTerm(term)); + return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); + } + BooleanQuery q = new BooleanQuery(positionCount == 1); + for (int i = 0; i < numTokens; i++) { + String term = null; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + Query currentQuery = newTermQuery(mapper, termFactory.createTerm(term)); + q.add(currentQuery, occur); + } + return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); + } else if (type == Type.PHRASE) { + if (severalTokensAtSamePosition) { + MultiPhraseQuery mpq = new MultiPhraseQuery(); + mpq.setSlop(phraseSlop); + List multiTerms = new ArrayList(); + int position = -1; + for (int i = 0; i < numTokens; i++) { + String term = null; + int positionIncrement = 1; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); + } + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + if (positionIncrement > 0 && multiTerms.size() > 0) { + if (enablePositionIncrements) { + mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); + } else { + mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); + } + multiTerms.clear(); + } + position += positionIncrement; + multiTerms.add(termFactory.createTerm(term)); + } + if (enablePositionIncrements) { + mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); + } else { + mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); + } + return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext); + } else { + PhraseQuery pq = new PhraseQuery(); + pq.setSlop(phraseSlop); + int position = -1; + + + for (int i = 0; i < numTokens; i++) { + String term = null; + int positionIncrement = 1; + + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); + } + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + if (enablePositionIncrements) { + position += positionIncrement; + pq.add(termFactory.createTerm(term), position); + } else { + pq.add(termFactory.createTerm(term)); + } + } + return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext); + } + } else if (type == Type.PHRASE_PREFIX) { + MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery(); + mpq.setSlop(phraseSlop); + mpq.setMaxExpansions(maxExpansions); + List multiTerms = new ArrayList(); + int position = -1; + for (int i = 0; i < numTokens; i++) { + String term = null; + int positionIncrement = 1; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); + } + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + if (positionIncrement > 0 && multiTerms.size() > 0) { + if (enablePositionIncrements) { + mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); + } else { + mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); + } + multiTerms.clear(); + } + position += positionIncrement; + multiTerms.add(termFactory.createTerm(term)); + } + if (enablePositionIncrements) { + mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); + } else { + mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); + } + return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext); + } + + throw new ElasticSearchIllegalStateException("No type found for [" + type + "]"); + } + + private Query newTermQuery(@Nullable FieldMapper mapper, Term term) { + if (fuzziness != null) { + if (mapper != null) { + return mapper.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions); + } + return new FuzzyQuery(term, Float.parseFloat(fuzziness), fuzzyPrefixLength, maxExpansions); + } + if (mapper != null) { + Query termQuery = mapper.queryStringTermQuery(term); + if (termQuery != null) { + return termQuery; + } + } + return new TermQuery(term); + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java b/modules/elasticsearch/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java new file mode 100644 index 00000000000..e30595deed9 --- /dev/null +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java @@ -0,0 +1,67 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene.search; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.RAMDirectory; +import org.elasticsearch.common.lucene.Lucene; +import org.testng.annotations.Test; + +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +@Test +public class MultiPhrasePrefixQueryTests { + + @Test public void simpleTests() throws Exception { + IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); + Document doc = new Document(); + doc.add(new Field("field", "aaa bbb ccc ddd", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + IndexReader reader = IndexReader.open(writer, true); + IndexSearcher searcher = new IndexSearcher(reader); + + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); + query.add(new Term("field", "aa")); + assertThat(Lucene.count(searcher, query, 0), equalTo(1l)); + + query = new MultiPhrasePrefixQuery(); + query.add(new Term("field", "aaa")); + query.add(new Term("field", "bb")); + assertThat(Lucene.count(searcher, query, 0), equalTo(1l)); + + query = new MultiPhrasePrefixQuery(); + query.setSlop(1); + query.add(new Term("field", "aaa")); + query.add(new Term("field", "cc")); + assertThat(Lucene.count(searcher, query, 0), equalTo(1l)); + + query = new MultiPhrasePrefixQuery(); + query.setSlop(1); + query.add(new Term("field", "xxx")); + assertThat(Lucene.count(searcher, query, 0), equalTo(0l)); + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/SimpleIndexQueryParserTests.java b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/SimpleIndexQueryParserTests.java index 0ad0751f070..de6c72dd10c 100644 --- a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/SimpleIndexQueryParserTests.java +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/SimpleIndexQueryParserTests.java @@ -390,6 +390,58 @@ public class SimpleIndexQueryParserTests { assertThat(fieldQuery.includesMin(), equalTo(true)); } + @Test public void testTextQuery1() throws IOException { + IndexQueryParser queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/index/query/xcontent/text1.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) parsedQuery; + assertThat((double) booleanQuery.getBoost(), closeTo(1.0d, 0.00001d)); + assertThat(((TermQuery) booleanQuery.getClauses()[0].getQuery()).getTerm(), equalTo(new Term("name.first", "aaa"))); + assertThat(((TermQuery) booleanQuery.getClauses()[1].getQuery()).getTerm(), equalTo(new Term("name.first", "bbb"))); + } + + @Test public void testTextQuery2() throws IOException { + IndexQueryParser queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/index/query/xcontent/text2.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) parsedQuery; + assertThat((double) booleanQuery.getBoost(), closeTo(1.5d, 0.00001d)); + assertThat(((TermQuery) booleanQuery.getClauses()[0].getQuery()).getTerm(), equalTo(new Term("name.first", "aaa"))); + assertThat(((TermQuery) booleanQuery.getClauses()[1].getQuery()).getTerm(), equalTo(new Term("name.first", "bbb"))); + } + + @Test public void testTextQuery3() throws IOException { + IndexQueryParser queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/index/query/xcontent/text3.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(PhraseQuery.class)); + PhraseQuery phraseQuery = (PhraseQuery) parsedQuery; + assertThat(phraseQuery.getTerms()[0], equalTo(new Term("name.first", "aaa"))); + assertThat(phraseQuery.getTerms()[1], equalTo(new Term("name.first", "bbb"))); + } + + @Test public void testTextQuery4() throws IOException { + IndexQueryParser queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/index/query/xcontent/text4.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class)); + MultiPhrasePrefixQuery phraseQuery = (MultiPhrasePrefixQuery) parsedQuery; + assertThat(phraseQuery.getTermArrays().get(0)[0], equalTo(new Term("name.first", "aaa"))); + assertThat(phraseQuery.getTermArrays().get(1)[0], equalTo(new Term("name.first", "bbb"))); + } + + @Test public void testTextQuery4_2() throws IOException { + IndexQueryParser queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/index/query/xcontent/text4_2.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class)); + MultiPhrasePrefixQuery phraseQuery = (MultiPhrasePrefixQuery) parsedQuery; + assertThat(phraseQuery.getTermArrays().get(0)[0], equalTo(new Term("name.first", "aaa"))); + assertThat(phraseQuery.getTermArrays().get(1)[0], equalTo(new Term("name.first", "bbb"))); + } + @Test public void testTermWithBoostQueryBuilder() throws IOException { IndexQueryParser queryParser = queryParser(); Query parsedQuery = queryParser.parse(termQuery("age", 34).boost(2.0f)).query(); diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text1.json b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text1.json new file mode 100644 index 00000000000..92902998f29 --- /dev/null +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text1.json @@ -0,0 +1,5 @@ +{ + "text" : { + "name.first" : "aaa bbb" + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text2.json b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text2.json new file mode 100644 index 00000000000..641b6665324 --- /dev/null +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text2.json @@ -0,0 +1,8 @@ +{ + "text" : { + "name.first" : { + "query" : "aaa bbb", + "boost" : 1.5 + } + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text3.json b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text3.json new file mode 100644 index 00000000000..cd40e0cf078 --- /dev/null +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text3.json @@ -0,0 +1,8 @@ +{ + "text" : { + "name.first" : { + "query" : "aaa bbb", + "type" : "phrase" + } + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text4.json b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text4.json new file mode 100644 index 00000000000..76309f37f85 --- /dev/null +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text4.json @@ -0,0 +1,8 @@ +{ + "text" : { + "name.first" : { + "query" : "aaa bbb", + "type" : "phrase_prefix" + } + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text4_2.json b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text4_2.json new file mode 100644 index 00000000000..56ed5ee37ec --- /dev/null +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/xcontent/text4_2.json @@ -0,0 +1,7 @@ +{ + "text_phrase_prefix" : { + "name.first" : { + "query" : "aaa bbb" + } + } +} \ No newline at end of file