Query DSL: Text Queries (boolean, phrase, and phrase_prefix), closes #917.

This commit is contained in:
kimchy 2011-05-08 21:42:25 +03:00
parent e66c78ad64
commit 465036655f
14 changed files with 1215 additions and 0 deletions

View File

@ -0,0 +1,122 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import java.io.IOException;
import java.util.Set;
/**
* Query that matches no documents.
*/
public final class MatchNoDocsQuery extends Query {
public static MatchNoDocsQuery INSTANCE = new MatchNoDocsQuery();
/**
* Since all instances of this class are equal to each other,
* we have a constant hash code.
*/
private static final int HASH_CODE = 12345;
/**
* Weight implementation that matches no documents.
*/
private class MatchNoDocsWeight extends Weight {
/**
* The similarity implementation.
*/
private final Similarity similarity;
/**
* Creates a new weight that matches nothing.
*
* @param searcher the search to match for
*/
public MatchNoDocsWeight(final Searcher searcher) {
this.similarity = searcher.getSimilarity();
}
@Override
public String toString() {
return "weight(" + MatchNoDocsQuery.this + ")";
}
@Override
public Query getQuery() {
return MatchNoDocsQuery.this;
}
@Override
public float getValue() {
return 0;
}
@Override
public float sumOfSquaredWeights() {
return 0;
}
@Override
public void normalize(final float queryNorm) {
}
@Override
public Scorer scorer(final IndexReader reader,
final boolean scoreDocsInOrder,
final boolean topScorer) throws IOException {
return null;
}
@Override
public Explanation explain(final IndexReader reader,
final int doc) {
return new ComplexExplanation(false, 0, "MatchNoDocs matches nothing");
}
}
@Override
public Weight createWeight(final Searcher searcher) {
return new MatchNoDocsWeight(searcher);
}
@Override
public void extractTerms(final Set<Term> terms) {
}
@Override
public String toString(final String field) {
return "MatchNoDocsQuery";
}
@Override
public boolean equals(final Object o) {
return o instanceof MatchAllDocsQuery;
}
@Override
public int hashCode() {
return HASH_CODE;
}
}

View File

@ -0,0 +1,258 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.*;
public class MultiPhrasePrefixQuery extends Query {
private String field;
private ArrayList<Term[]> termArrays = new ArrayList<Term[]>();
private ArrayList<Integer> positions = new ArrayList<Integer>();
private int maxExpansions = Integer.MAX_VALUE;
private int slop = 0;
/**
* Sets the phrase slop for this query.
*
* @see org.apache.lucene.search.PhraseQuery#setSlop(int)
*/
public void setSlop(int s) {
slop = s;
}
public void setMaxExpansions(int maxExpansions) {
this.maxExpansions = maxExpansions;
}
/**
* Sets the phrase slop for this query.
*
* @see org.apache.lucene.search.PhraseQuery#getSlop()
*/
public int getSlop() {
return slop;
}
/**
* Add a single term at the next position in the phrase.
*
* @see org.apache.lucene.search.PhraseQuery#add(Term)
*/
public void add(Term term) {
add(new Term[]{term});
}
/**
* Add multiple terms at the next position in the phrase. Any of the terms
* may match.
*
* @see org.apache.lucene.search.PhraseQuery#add(Term)
*/
public void add(Term[] terms) {
int position = 0;
if (positions.size() > 0)
position = positions.get(positions.size() - 1).intValue() + 1;
add(terms, position);
}
/**
* Allows to specify the relative position of terms within the phrase.
*
* @param terms
* @param position
* @see org.apache.lucene.search.PhraseQuery#add(Term, int)
*/
public void add(Term[] terms, int position) {
if (termArrays.size() == 0)
field = terms[0].field();
for (int i = 0; i < terms.length; i++) {
if (terms[i].field() != field) {
throw new IllegalArgumentException(
"All phrase terms must be in the same field (" + field + "): "
+ terms[i]);
}
}
termArrays.add(terms);
positions.add(Integer.valueOf(position));
}
/**
* Returns a List of the terms in the multiphrase.
* Do not modify the List or its contents.
*/
public List<Term[]> getTermArrays() {
return Collections.unmodifiableList(termArrays);
}
/**
* Returns the relative positions of terms in this phrase.
*/
public int[] getPositions() {
int[] result = new int[positions.size()];
for (int i = 0; i < positions.size(); i++)
result[i] = positions.get(i).intValue();
return result;
}
@Override public Query rewrite(IndexReader reader) throws IOException {
MultiPhraseQuery query = new MultiPhraseQuery();
query.setSlop(slop);
int sizeMinus1 = termArrays.size() - 1;
for (int i = 0; i < sizeMinus1; i++) {
query.add(termArrays.get(i), positions.get(i));
}
Term[] suffixTerms = termArrays.get(sizeMinus1);
int position = positions.get(sizeMinus1);
List<Term> terms = new ArrayList<Term>();
for (Term term : suffixTerms) {
getPrefixTerms(terms, term, reader);
}
if (terms.isEmpty()) {
return MatchNoDocsQuery.INSTANCE;
}
query.add(terms.toArray(new Term[terms.size()]), position);
return query.rewrite(reader);
}
private void getPrefixTerms(List<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
TermEnum enumerator = reader.terms(prefix);
try {
do {
Term term = enumerator.term();
if (term != null
&& term.text().startsWith(prefix.text())
&& term.field().equals(field)) {
terms.add(term);
} else {
break;
}
if (terms.size() > maxExpansions) {
break;
}
} while (enumerator.next());
} finally {
enumerator.close();
}
}
@Override
public final String toString(String f) {
StringBuilder buffer = new StringBuilder();
if (field == null || !field.equals(f)) {
buffer.append(field);
buffer.append(":");
}
buffer.append("\"");
Iterator<Term[]> i = termArrays.iterator();
while (i.hasNext()) {
Term[] terms = i.next();
if (terms.length > 1) {
buffer.append("(");
for (int j = 0; j < terms.length; j++) {
buffer.append(terms[j].text());
if (j < terms.length - 1)
buffer.append(" ");
}
buffer.append(")");
} else {
buffer.append(terms[0].text());
}
if (i.hasNext())
buffer.append(" ");
}
buffer.append("\"");
if (slop != 0) {
buffer.append("~");
buffer.append(slop);
}
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
/**
* Returns true if <code>o</code> is equal to this.
*/
@Override
public boolean equals(Object o) {
if (!(o instanceof MultiPhrasePrefixQuery)) return false;
MultiPhrasePrefixQuery other = (MultiPhrasePrefixQuery) o;
return this.getBoost() == other.getBoost()
&& this.slop == other.slop
&& termArraysEquals(this.termArrays, other.termArrays)
&& this.positions.equals(other.positions);
}
/**
* Returns a hash code value for this object.
*/
@Override
public int hashCode() {
return Float.floatToIntBits(getBoost())
^ slop
^ termArraysHashCode()
^ positions.hashCode()
^ 0x4AC65113;
}
// Breakout calculation of the termArrays hashcode
private int termArraysHashCode() {
int hashCode = 1;
for (final Term[] termArray : termArrays) {
hashCode = 31 * hashCode
+ (termArray == null ? 0 : Arrays.hashCode(termArray));
}
return hashCode;
}
// Breakout calculation of the termArrays equals
private boolean termArraysEquals(List<Term[]> termArrays1, List<Term[]> termArrays2) {
if (termArrays1.size() != termArrays2.size()) {
return false;
}
ListIterator<Term[]> iterator1 = termArrays1.listIterator();
ListIterator<Term[]> iterator2 = termArrays2.listIterator();
while (iterator1.hasNext()) {
Term[] termArray1 = iterator1.next();
Term[] termArray2 = iterator2.next();
if (!(termArray1 == null ? termArray2 == null : Arrays.equals(termArray1,
termArray2))) {
return false;
}
}
return true;
}
}

View File

@ -220,6 +220,7 @@ public class IndexQueryParserModule extends AbstractModule {
private static class DefaultQueryProcessors extends QueryParsersProcessor {
@Override public void processXContentQueryParsers(XContentQueryParsersBindings bindings) {
bindings.processXContentQueryParser(TextQueryParser.NAME, TextQueryParser.class);
bindings.processXContentQueryParser(HasChildQueryParser.NAME, HasChildQueryParser.class);
bindings.processXContentQueryParser(TopChildrenQueryParser.NAME, TopChildrenQueryParser.class);
bindings.processXContentQueryParser(DisMaxQueryParser.NAME, DisMaxQueryParser.class);

View File

@ -33,6 +33,36 @@ public abstract class QueryBuilders {
return new MatchAllQueryBuilder();
}
/**
* Creates a text query with type "BOOLEAN" for the provided field name and text.
*
* @param name The field name.
* @param text The query text (to be analyzed).
*/
public static TextQueryBuilder text(String name, Object text) {
return new TextQueryBuilder(name, text).type(TextQueryBuilder.Type.BOOLEAN);
}
/**
* Creates a text query with type "PHRASE" for the provided field name and text.
*
* @param name The field name.
* @param text The query text (to be analyzed).
*/
public static TextQueryBuilder textPhrase(String name, Object text) {
return new TextQueryBuilder(name, text).type(TextQueryBuilder.Type.PHRASE);
}
/**
* Creates a text query with type "PHRASE_PREFIX" for the provided field name and text.
*
* @param name The field name.
* @param text The query text (to be analyzed).
*/
public static TextQueryBuilder textPhrasePrefix(String name, Object text) {
return new TextQueryBuilder(name, text).type(TextQueryBuilder.Type.PHRASE_PREFIX);
}
/**
* A query that generates the union of documents produced by its sub-queries, and that scores each document
* with the maximum score for that document as produced by any sub-query, plus a tie breaking increment for any

View File

@ -0,0 +1,158 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query.xcontent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
/**
* Text query is a query that analyzes the text and constructs a query as the result of the analysis. It
* can construct different queries based on the type provided.
*/
public class TextQueryBuilder extends BaseQueryBuilder {
public static enum Operator {
OR,
AND
}
public static enum Type {
/**
* The text is analyzed and terms are added to a boolean query.
*/
BOOLEAN,
/**
* The text is analyzed and used as a phrase query.
*/
PHRASE,
/**
* The text is analyzed and used in a phrase query, with the last term acting as a prefix.
*/
PHRASE_PREFIX
}
private final String name;
private final Object text;
private Type type;
private Operator operator;
private String analyzer;
private Integer slop;
private String fuzziness;
private Integer prefixLength;
private Integer maxExpansions;
/**
* Constructs a new text query.
*/
public TextQueryBuilder(String name, Object text) {
this.name = name;
this.text = text;
}
/**
* Sets the type of the text query.
*/
public TextQueryBuilder type(Type type) {
this.type = type;
return this;
}
/**
* Sets the operator to use when using a boolean query. Defaults to <tt>OR</tt>.
*/
public TextQueryBuilder operator(Operator operator) {
this.operator = operator;
return this;
}
/**
* Explicitly set the analyzer to use. Defaults to use explicit mapping config for the field, or, if not
* set, the default search analyzer.
*/
public TextQueryBuilder analyzer(String analyzer) {
this.analyzer = analyzer;
return this;
}
/**
* Set the phrase slop if evaluated to a phrase query type.
*/
public TextQueryBuilder slop(int slop) {
this.slop = slop;
return this;
}
/**
* Sets the minimum similarity used when evaluated to a fuzzy query type. Defaults to "0.5".
*/
public TextQueryBuilder fuzziness(Object fuzziness) {
this.fuzziness = fuzziness.toString();
return this;
}
/**
* When using fuzzy or prefix type query, the number of term expansions to use. Defaults to unbounded
* so its recommended to set it to a reasonable value for faster execution.
*/
public TextQueryBuilder maxExpansions(int maxExpansions) {
this.maxExpansions = maxExpansions;
return this;
}
@Override public void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(TextQueryParser.NAME);
builder.startObject(name);
builder.field("text", text);
if (type != null) {
builder.field("type", type.toString().toLowerCase());
}
if (operator != null) {
builder.field("operator", operator.toString());
}
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
if (slop != null) {
builder.field("slop", slop);
}
if (fuzziness != null) {
builder.field("fuzziness", fuzziness);
}
if (prefixLength != null) {
builder.field("prefix_length", prefixLength);
}
if (maxExpansions != null) {
builder.field("max_expansions", maxExpansions);
}
builder.endObject();
builder.endObject();
}
}

View File

@ -0,0 +1,138 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query.xcontent;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.query.QueryParsingException;
import org.elasticsearch.index.settings.IndexSettings;
import java.io.IOException;
/**
* @author kimchy (shay.banon)
*/
public class TextQueryParser extends AbstractIndexComponent implements XContentQueryParser {
public static final String NAME = "text";
@Inject public TextQueryParser(Index index, @IndexSettings Settings settings) {
super(index, settings);
}
@Override public String[] names() {
return new String[]{NAME, "text_phrase", "textPhrase", "text_phrase_prefix", "textPhrasePrefix", "fuzzyText", "fuzzy_text"};
}
@Override public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
XContentParser parser = parseContext.parser();
org.elasticsearch.index.search.TextQueryParser.Type type = org.elasticsearch.index.search.TextQueryParser.Type.BOOLEAN;
if ("text_phrase".equals(parser.currentName()) || "textPhrase".equals(parser.currentName())) {
type = org.elasticsearch.index.search.TextQueryParser.Type.PHRASE;
} else if ("text_phrase_prefix".equals(parser.currentName()) || "textPhrasePrefix".equals(parser.currentName())) {
type = org.elasticsearch.index.search.TextQueryParser.Type.PHRASE_PREFIX;
}
XContentParser.Token token = parser.nextToken();
assert token == XContentParser.Token.FIELD_NAME;
String fieldName = parser.currentName();
String text = null;
float boost = 1.0f;
int phraseSlop = 0;
String analyzer = null;
String fuzziness = null;
int prefixLength = FuzzyQuery.defaultPrefixLength;
int maxExpansions = FuzzyQuery.defaultMaxExpansions;
BooleanClause.Occur occur = BooleanClause.Occur.SHOULD;
token = parser.nextToken();
if (token == XContentParser.Token.START_OBJECT) {
String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token.isValue()) {
if ("query".equals(currentFieldName)) {
text = parser.text();
} else if ("type".equals(currentFieldName)) {
String tStr = parser.text();
if ("boolean".equals(tStr)) {
type = org.elasticsearch.index.search.TextQueryParser.Type.BOOLEAN;
} else if ("phrase".equals(tStr)) {
type = org.elasticsearch.index.search.TextQueryParser.Type.PHRASE;
} else if ("phrase_prefix".equals(tStr) || "phrasePrefix".equals(currentFieldName)) {
type = org.elasticsearch.index.search.TextQueryParser.Type.PHRASE_PREFIX;
}
} else if ("analyzer".equals(currentFieldName)) {
analyzer = parser.textOrNull();
} else if ("boost".equals(currentFieldName)) {
boost = parser.floatValue();
} else if ("slop".equals(currentFieldName) || "phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) {
phraseSlop = parser.intValue();
} else if ("fuzziness".equals(currentFieldName)) {
fuzziness = parser.textOrNull();
} else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) {
prefixLength = parser.intValue();
} else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) {
maxExpansions = parser.intValue();
} else if ("operator".equals(currentFieldName)) {
String op = parser.text();
if ("or".equalsIgnoreCase(op)) {
occur = BooleanClause.Occur.SHOULD;
} else if ("and".equalsIgnoreCase(op)) {
occur = BooleanClause.Occur.MUST;
} else {
throw new QueryParsingException(index, "text query requires operator to be either 'and' or 'or', not [" + op + "]");
}
}
}
}
parser.nextToken();
} else {
text = parser.text();
// move to the next token
parser.nextToken();
}
if (text == null) {
throw new QueryParsingException(index, "No text specified for text query");
}
org.elasticsearch.index.search.TextQueryParser tQP = new org.elasticsearch.index.search.TextQueryParser(parseContext, fieldName, text);
tQP.setPhraseSlop(phraseSlop);
tQP.setAnalyzer(analyzer);
tQP.setFuzziness(fuzziness);
tQP.setFuzzyPrefixLength(prefixLength);
tQP.setMaxExpansions(maxExpansions);
tQP.setOccur(occur);
Query query = tQP.parse(type);
query.setBoost(boost);
return query;
}
}

View File

@ -0,0 +1,353 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.query.xcontent.QueryParseContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import static org.elasticsearch.index.query.support.QueryParsers.*;
public class TextQueryParser {
public static enum Type {
BOOLEAN,
PHRASE,
PHRASE_PREFIX
}
private final QueryParseContext parseContext;
private final String fieldName;
private final String text;
private String analyzer;
private BooleanClause.Occur occur = BooleanClause.Occur.SHOULD;
private boolean enablePositionIncrements = true;
private int phraseSlop = 0;
private String fuzziness = null;
private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
private int maxExpansions = FuzzyQuery.defaultMaxExpansions;
public TextQueryParser(QueryParseContext parseContext, String fieldName, String text) {
this.parseContext = parseContext;
this.fieldName = fieldName;
this.text = text;
}
public void setAnalyzer(String analyzer) {
this.analyzer = analyzer;
}
public void setOccur(BooleanClause.Occur occur) {
this.occur = occur;
}
public void setEnablePositionIncrements(boolean enablePositionIncrements) {
this.enablePositionIncrements = enablePositionIncrements;
}
public void setPhraseSlop(int phraseSlop) {
this.phraseSlop = phraseSlop;
}
public void setFuzziness(String fuzziness) {
this.fuzziness = fuzziness;
}
public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
this.fuzzyPrefixLength = fuzzyPrefixLength;
}
public void setMaxExpansions(int maxExpansions) {
this.maxExpansions = maxExpansions;
}
public Query parse(Type type) {
FieldMapper mapper = null;
String field = fieldName;
MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
if (smartNameFieldMappers != null) {
if (smartNameFieldMappers.hasMapper()) {
mapper = smartNameFieldMappers.mapper();
if (mapper != null) {
field = mapper.names().indexName();
}
}
}
if (mapper != null && mapper.useFieldQueryWithQueryString()) {
return wrapSmartNameQuery(mapper.fieldQuery(text, parseContext), smartNameFieldMappers, parseContext);
}
Analyzer analyzer = null;
if (this.analyzer == null) {
if (mapper != null) {
analyzer = mapper.searchAnalyzer();
}
if (analyzer == null) {
analyzer = parseContext.mapperService().searchAnalyzer();
}
} else {
analyzer = parseContext.mapperService().analysisService().analyzer(this.analyzer);
if (analyzer == null) {
throw new ElasticSearchIllegalArgumentException("No analyzer found for [" + this.analyzer + "]");
}
}
// Logic similar to QueryParser#getFieldQuery
TokenStream source;
try {
source = analyzer.reusableTokenStream(field, new FastStringReader(text));
source.reset();
} catch (IOException e) {
source = analyzer.tokenStream(field, new FastStringReader(text));
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
CharTermAttribute termAtt = null;
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
boolean success = false;
try {
buffer.reset();
success = true;
} catch (IOException e) {
// success==false if we hit an exception
}
if (success) {
if (buffer.hasAttribute(CharTermAttribute.class)) {
termAtt = buffer.getAttribute(CharTermAttribute.class);
}
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
}
}
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
boolean hasMoreTokens = false;
if (termAtt != null) {
try {
hasMoreTokens = buffer.incrementToken();
while (hasMoreTokens) {
numTokens++;
int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
if (positionIncrement != 0) {
positionCount += positionIncrement;
} else {
severalTokensAtSamePosition = true;
}
hasMoreTokens = buffer.incrementToken();
}
} catch (IOException e) {
// ignore
}
}
try {
// rewind the buffer stream
buffer.reset();
// close original stream - all tokens buffered
source.close();
} catch (IOException e) {
// ignore
}
Term termFactory = new Term(field);
if (numTokens == 0) {
return MatchNoDocsQuery.INSTANCE;
} else if (type == Type.BOOLEAN) {
if (numTokens == 1) {
String term = null;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
Query q = newTermQuery(mapper, termFactory.createTerm(term));
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
}
BooleanQuery q = new BooleanQuery(positionCount == 1);
for (int i = 0; i < numTokens; i++) {
String term = null;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
Query currentQuery = newTermQuery(mapper, termFactory.createTerm(term));
q.add(currentQuery, occur);
}
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
} else if (type == Type.PHRASE) {
if (severalTokensAtSamePosition) {
MultiPhraseQuery mpq = new MultiPhraseQuery();
mpq.setSlop(phraseSlop);
List<Term> multiTerms = new ArrayList<Term>();
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
if (positionIncrement > 0 && multiTerms.size() > 0) {
if (enablePositionIncrements) {
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
} else {
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
}
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(termFactory.createTerm(term));
}
if (enablePositionIncrements) {
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
} else {
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
}
return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext);
} else {
PhraseQuery pq = new PhraseQuery();
pq.setSlop(phraseSlop);
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
if (enablePositionIncrements) {
position += positionIncrement;
pq.add(termFactory.createTerm(term), position);
} else {
pq.add(termFactory.createTerm(term));
}
}
return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext);
}
} else if (type == Type.PHRASE_PREFIX) {
MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery();
mpq.setSlop(phraseSlop);
mpq.setMaxExpansions(maxExpansions);
List<Term> multiTerms = new ArrayList<Term>();
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
if (positionIncrement > 0 && multiTerms.size() > 0) {
if (enablePositionIncrements) {
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
} else {
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
}
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(termFactory.createTerm(term));
}
if (enablePositionIncrements) {
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
} else {
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
}
return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext);
}
throw new ElasticSearchIllegalStateException("No type found for [" + type + "]");
}
private Query newTermQuery(@Nullable FieldMapper mapper, Term term) {
if (fuzziness != null) {
if (mapper != null) {
return mapper.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions);
}
return new FuzzyQuery(term, Float.parseFloat(fuzziness), fuzzyPrefixLength, maxExpansions);
}
if (mapper != null) {
Query termQuery = mapper.queryStringTermQuery(term);
if (termQuery != null) {
return termQuery;
}
}
return new TermQuery(term);
}
}

View File

@ -0,0 +1,67 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.RAMDirectory;
import org.elasticsearch.common.lucene.Lucene;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
@Test
public class MultiPhrasePrefixQueryTests {
@Test public void simpleTests() throws Exception {
IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));
Document doc = new Document();
doc.add(new Field("field", "aaa bbb ccc ddd", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
IndexReader reader = IndexReader.open(writer, true);
IndexSearcher searcher = new IndexSearcher(reader);
MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery();
query.add(new Term("field", "aa"));
assertThat(Lucene.count(searcher, query, 0), equalTo(1l));
query = new MultiPhrasePrefixQuery();
query.add(new Term("field", "aaa"));
query.add(new Term("field", "bb"));
assertThat(Lucene.count(searcher, query, 0), equalTo(1l));
query = new MultiPhrasePrefixQuery();
query.setSlop(1);
query.add(new Term("field", "aaa"));
query.add(new Term("field", "cc"));
assertThat(Lucene.count(searcher, query, 0), equalTo(1l));
query = new MultiPhrasePrefixQuery();
query.setSlop(1);
query.add(new Term("field", "xxx"));
assertThat(Lucene.count(searcher, query, 0), equalTo(0l));
}
}

View File

@ -390,6 +390,58 @@ public class SimpleIndexQueryParserTests {
assertThat(fieldQuery.includesMin(), equalTo(true));
}
@Test public void testTextQuery1() throws IOException {
IndexQueryParser queryParser = queryParser();
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/xcontent/text1.json");
Query parsedQuery = queryParser.parse(query).query();
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
BooleanQuery booleanQuery = (BooleanQuery) parsedQuery;
assertThat((double) booleanQuery.getBoost(), closeTo(1.0d, 0.00001d));
assertThat(((TermQuery) booleanQuery.getClauses()[0].getQuery()).getTerm(), equalTo(new Term("name.first", "aaa")));
assertThat(((TermQuery) booleanQuery.getClauses()[1].getQuery()).getTerm(), equalTo(new Term("name.first", "bbb")));
}
@Test public void testTextQuery2() throws IOException {
IndexQueryParser queryParser = queryParser();
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/xcontent/text2.json");
Query parsedQuery = queryParser.parse(query).query();
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
BooleanQuery booleanQuery = (BooleanQuery) parsedQuery;
assertThat((double) booleanQuery.getBoost(), closeTo(1.5d, 0.00001d));
assertThat(((TermQuery) booleanQuery.getClauses()[0].getQuery()).getTerm(), equalTo(new Term("name.first", "aaa")));
assertThat(((TermQuery) booleanQuery.getClauses()[1].getQuery()).getTerm(), equalTo(new Term("name.first", "bbb")));
}
@Test public void testTextQuery3() throws IOException {
IndexQueryParser queryParser = queryParser();
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/xcontent/text3.json");
Query parsedQuery = queryParser.parse(query).query();
assertThat(parsedQuery, instanceOf(PhraseQuery.class));
PhraseQuery phraseQuery = (PhraseQuery) parsedQuery;
assertThat(phraseQuery.getTerms()[0], equalTo(new Term("name.first", "aaa")));
assertThat(phraseQuery.getTerms()[1], equalTo(new Term("name.first", "bbb")));
}
@Test public void testTextQuery4() throws IOException {
IndexQueryParser queryParser = queryParser();
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/xcontent/text4.json");
Query parsedQuery = queryParser.parse(query).query();
assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class));
MultiPhrasePrefixQuery phraseQuery = (MultiPhrasePrefixQuery) parsedQuery;
assertThat(phraseQuery.getTermArrays().get(0)[0], equalTo(new Term("name.first", "aaa")));
assertThat(phraseQuery.getTermArrays().get(1)[0], equalTo(new Term("name.first", "bbb")));
}
@Test public void testTextQuery4_2() throws IOException {
IndexQueryParser queryParser = queryParser();
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/xcontent/text4_2.json");
Query parsedQuery = queryParser.parse(query).query();
assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class));
MultiPhrasePrefixQuery phraseQuery = (MultiPhrasePrefixQuery) parsedQuery;
assertThat(phraseQuery.getTermArrays().get(0)[0], equalTo(new Term("name.first", "aaa")));
assertThat(phraseQuery.getTermArrays().get(1)[0], equalTo(new Term("name.first", "bbb")));
}
@Test public void testTermWithBoostQueryBuilder() throws IOException {
IndexQueryParser queryParser = queryParser();
Query parsedQuery = queryParser.parse(termQuery("age", 34).boost(2.0f)).query();

View File

@ -0,0 +1,5 @@
{
"text" : {
"name.first" : "aaa bbb"
}
}

View File

@ -0,0 +1,8 @@
{
"text" : {
"name.first" : {
"query" : "aaa bbb",
"boost" : 1.5
}
}
}

View File

@ -0,0 +1,8 @@
{
"text" : {
"name.first" : {
"query" : "aaa bbb",
"type" : "phrase"
}
}
}

View File

@ -0,0 +1,8 @@
{
"text" : {
"name.first" : {
"query" : "aaa bbb",
"type" : "phrase_prefix"
}
}
}

View File

@ -0,0 +1,7 @@
{
"text_phrase_prefix" : {
"name.first" : {
"query" : "aaa bbb"
}
}
}