Merge pull request #17776 from jimferenczi/simple_query_string_wildcard

Apply the default operator on analyzed wildcard in simple_query_string builder
This commit is contained in:
Jim Ferenczi 2016-04-15 22:22:55 +02:00
commit 4575d79522
7 changed files with 348 additions and 111 deletions

View File

@ -502,7 +502,6 @@
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]query[/\\]QueryBuilders.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]query[/\\]QueryShardContext.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]query[/\\]QueryValidationException.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]query[/\\]SimpleQueryParser.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]query[/\\]support[/\\]InnerHitsQueryParserHelper.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]query[/\\]support[/\\]QueryParsers.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]search[/\\]MatchQuery.java" checks="LineLength" />

View File

@ -530,42 +530,45 @@ public class MapperQueryParser extends QueryParser {
}
}
if (tlist.size() == 0) {
return null;
}
if (tlist.size() == 1 && tlist.get(0).size() == 1) {
return super.getPrefixQuery(field, tlist.get(0).get(0));
} else {
// build a boolean query with prefix on the last position only.
List<BooleanClause> clauses = new ArrayList<>();
for (int pos = 0; pos < tlist.size(); pos++) {
List<String> plist = tlist.get(pos);
boolean isLastPos = (pos == tlist.size()-1);
Query posQuery;
if (plist.size() == 1) {
if (isLastPos) {
posQuery = getPrefixQuery(field, plist.get(0));
} else {
posQuery = newTermQuery(new Term(field, plist.get(0)));
}
} else if (isLastPos == false) {
// build a synonym query for terms in the same position.
Term[] terms = new Term[plist.size()];
for (int i = 0; i < plist.size(); i++) {
terms[i] = new Term(field, plist.get(i));
}
posQuery = new SynonymQuery(terms);
} else {
List<BooleanClause> innerClauses = new ArrayList<>();
for (String token : plist) {
innerClauses.add(new BooleanClause(getPrefixQuery(field, token),
BooleanClause.Occur.SHOULD));
}
posQuery = getBooleanQueryCoordDisabled(innerClauses);
}
clauses.add(new BooleanClause(posQuery,
getDefaultOperator() == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD));
}
return getBooleanQuery(clauses);
}
// build a boolean query with prefix on the last position only.
List<BooleanClause> clauses = new ArrayList<>();
for (int pos = 0; pos < tlist.size(); pos++) {
List<String> plist = tlist.get(pos);
boolean isLastPos = (pos == tlist.size() - 1);
Query posQuery;
if (plist.size() == 1) {
if (isLastPos) {
posQuery = super.getPrefixQuery(field, plist.get(0));
} else {
posQuery = newTermQuery(new Term(field, plist.get(0)));
}
} else if (isLastPos == false) {
// build a synonym query for terms in the same position.
Term[] terms = new Term[plist.size()];
for (int i = 0; i < plist.size(); i++) {
terms[i] = new Term(field, plist.get(i));
}
posQuery = new SynonymQuery(terms);
} else {
List<BooleanClause> innerClauses = new ArrayList<>();
for (String token : plist) {
innerClauses.add(new BooleanClause(super.getPrefixQuery(field, token),
BooleanClause.Occur.SHOULD));
}
posQuery = getBooleanQueryCoordDisabled(innerClauses);
}
clauses.add(new BooleanClause(posQuery,
getDefaultOperator() == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD));
}
return getBooleanQuery(clauses);
}
@Override

View File

@ -19,9 +19,9 @@
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@ -29,13 +29,15 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.search.SynonymQuery;
import org.elasticsearch.index.mapper.MappedFieldType;
import java.io.IOException;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.List;
import java.util.ArrayList;
/**
* Wrapper class for Lucene's SimpleQueryParser that allows us to redefine
@ -47,7 +49,8 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
private QueryShardContext context;
/** Creates a new parser with custom flags used to enable/disable certain features. */
public SimpleQueryParser(Analyzer analyzer, Map<String, Float> weights, int flags, Settings settings, QueryShardContext context) {
public SimpleQueryParser(Analyzer analyzer, Map<String, Float> weights, int flags,
Settings settings, QueryShardContext context) {
super(analyzer, weights, flags);
this.settings = settings;
this.context = context;
@ -167,62 +170,79 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
/**
* Analyze the given string using its analyzer, constructing either a
* {@code PrefixQuery} or a {@code BooleanQuery} made up
* of {@code PrefixQuery}s
* of {@code TermQuery}s and {@code PrefixQuery}s
*/
private Query newPossiblyAnalyzedQuery(String field, String termStr) {
List<List<String>> tlist = new ArrayList<> ();
// get Analyzer from superclass and tokenize the term
try (TokenStream source = getAnalyzer().tokenStream(field, termStr)) {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
CachingTokenFilter buffer = new CachingTokenFilter(source);
buffer.reset();
source.reset();
List<String> currentPos = new ArrayList<>();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);
TermToBytesRefAttribute termAtt = null;
int numTokens = 0;
boolean hasMoreTokens = false;
termAtt = buffer.getAttribute(TermToBytesRefAttribute.class);
if (termAtt != null) {
try {
hasMoreTokens = buffer.incrementToken();
while (hasMoreTokens) {
numTokens++;
hasMoreTokens = buffer.incrementToken();
try {
boolean hasMoreTokens = source.incrementToken();
while (hasMoreTokens) {
if (currentPos.isEmpty() == false && posAtt.getPositionIncrement() > 0) {
tlist.add(currentPos);
currentPos = new ArrayList<>();
}
} catch (IOException e) {
// ignore
currentPos.add(termAtt.toString());
hasMoreTokens = source.incrementToken();
}
}
// rewind buffer
buffer.reset();
if (numTokens == 0) {
return null;
} else if (numTokens == 1) {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
if (currentPos.isEmpty() == false) {
tlist.add(currentPos);
}
return new PrefixQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())));
} else {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
for (int i = 0; i < numTokens; i++) {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
bq.add(new BooleanClause(new PrefixQuery(new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()))), BooleanClause.Occur.SHOULD));
}
return bq.build();
} catch (IOException e) {
// ignore
// TODO: we should not ignore the exception and return a prefix query with the original term ?
}
} catch (IOException e) {
// Bail on any exceptions, going with a regular prefix query
return new PrefixQuery(new Term(field, termStr));
}
if (tlist.size() == 0) {
return null;
}
if (tlist.size() == 1 && tlist.get(0).size() == 1) {
return new PrefixQuery(new Term(field, tlist.get(0).get(0)));
}
// build a boolean query with prefix on the last position only.
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (int pos = 0; pos < tlist.size(); pos++) {
List<String> plist = tlist.get(pos);
boolean isLastPos = (pos == tlist.size()-1);
Query posQuery;
if (plist.size() == 1) {
if (isLastPos) {
posQuery = new PrefixQuery(new Term(field, plist.get(0)));
} else {
posQuery = newTermQuery(new Term(field, plist.get(0)));
}
} else if (isLastPos == false) {
// build a synonym query for terms in the same position.
Term[] terms = new Term[plist.size()];
for (int i = 0; i < plist.size(); i++) {
terms[i] = new Term(field, plist.get(i));
}
posQuery = new SynonymQuery(terms);
} else {
BooleanQuery.Builder innerBuilder = new BooleanQuery.Builder();
for (String token : plist) {
innerBuilder.add(new BooleanClause(new PrefixQuery(new Term(field, token)),
BooleanClause.Occur.SHOULD));
}
posQuery = innerBuilder.setDisableCoord(true).build();
}
builder.add(new BooleanClause(posQuery, getDefaultOperator()));
}
return builder.build();
}
/**
* Class encapsulating the settings for the SimpleQueryString query, with
* their default values

View File

@ -645,13 +645,6 @@ public abstract class AbstractQueryTestCase<QB extends AbstractQueryBuilder<QB>>
assertThat(termQuery.getTerm().text().toLowerCase(Locale.ROOT), equalTo(value.toLowerCase(Locale.ROOT)));
}
protected static void assertPrefixQuery(Query query, String field, String value) {
assertThat(query, instanceOf(PrefixQuery.class));
PrefixQuery prefixQuery = (PrefixQuery) query;
assertThat(prefixQuery.getPrefix().field(), equalTo(field));
assertThat(prefixQuery.getPrefix().text(), equalTo(value));
}
/**
* Test serialization and deserialization of the test query.
*/

View File

@ -0,0 +1,68 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import java.io.IOException;
public class MockRepeatAnalyzer extends Analyzer {
private static class MockRepeatFilter extends TokenFilter {
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
String repeat;
public MockRepeatFilter(TokenStream input) {
super(input);
}
@Override
public final boolean incrementToken() throws IOException {
if (repeat != null) {
// add repeat token
clearAttributes();
termAtt.setEmpty().append(repeat);
posIncAtt.setPositionIncrement(0);
repeat = null;
return true;
}
if (input.incrementToken()) {
repeat = termAtt.toString();
return true;
} else {
return false;
}
}
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new StandardTokenizer();
TokenStream repeatFilter = new MockRepeatFilter(tokenizer);
return new TokenStreamComponents(tokenizer, repeatFilter);
}
}

View File

@ -21,6 +21,8 @@ package org.elasticsearch.index.query;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MapperQueryParser;
import org.apache.lucene.queryparser.classic.QueryParserSettings;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
@ -32,13 +34,18 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
import org.elasticsearch.Version;
import org.elasticsearch.common.lucene.all.AllTermQuery;
import org.elasticsearch.common.unit.Fuzziness;
import org.hamcrest.Matchers;
import org.joda.time.DateTimeZone;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
@ -298,32 +305,70 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
assertTermOrBoostQuery(disjuncts.get(1), STRING_FIELD_NAME_2, "test", 1.0f);
}
public void testToQueryPrefixQuery() throws Exception {
public void testToQueryWildcarQuery() throws Exception {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
for (Operator op : Operator.values()) {
Query query = queryStringQuery("foo-bar-foobar*")
.defaultField(STRING_FIELD_NAME)
.analyzeWildcard(true)
.analyzer("standard")
.defaultOperator(op)
.toQuery(createShardContext());
assertThat(query, instanceOf(BooleanQuery.class));
BooleanQuery bq = (BooleanQuery) query;
assertThat(bq.clauses().size(), equalTo(3));
String[] expectedTerms = new String[]{"foo", "bar", "foobar"};
for (int i = 0; i < bq.clauses().size(); i++) {
BooleanClause clause = bq.clauses().get(i);
if (i != bq.clauses().size() - 1) {
assertTermQuery(clause.getQuery(), STRING_FIELD_NAME, expectedTerms[i]);
} else {
assertPrefixQuery(clause.getQuery(), STRING_FIELD_NAME, expectedTerms[i]);
}
if (op == Operator.AND) {
assertThat(clause.getOccur(), equalTo(BooleanClause.Occur.MUST));
} else {
assertThat(clause.getOccur(), equalTo(BooleanClause.Occur.SHOULD));
}
}
BooleanClause.Occur defaultOp = op.toBooleanClauseOccur();
MapperQueryParser queryParser = new MapperQueryParser(createShardContext());
QueryParserSettings settings = new QueryParserSettings("first foo-bar-foobar* last");
settings.defaultField(STRING_FIELD_NAME);
settings.fieldsAndWeights(Collections.emptyMap());
settings.analyzeWildcard(true);
settings.fuzziness(Fuzziness.AUTO);
settings.rewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
settings.defaultOperator(op.toQueryParserOperator());
queryParser.reset(settings);
Query query = queryParser.parse("first foo-bar-foobar* last");
Query expectedQuery =
new BooleanQuery.Builder()
.add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "first")), defaultOp))
.add(new BooleanQuery.Builder()
.add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "foo")), defaultOp))
.add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "bar")), defaultOp))
.add(new BooleanClause(new PrefixQuery(new Term(STRING_FIELD_NAME, "foobar")), defaultOp))
.build(), defaultOp)
.add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "last")), defaultOp))
.build();
assertThat(query, Matchers.equalTo(expectedQuery));
}
}
public void testToQueryWilcardQueryWithSynonyms() throws Exception {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
for (Operator op : Operator.values()) {
BooleanClause.Occur defaultOp = op.toBooleanClauseOccur();
MapperQueryParser queryParser = new MapperQueryParser(createShardContext());
QueryParserSettings settings = new QueryParserSettings("first foo-bar-foobar* last");
settings.defaultField(STRING_FIELD_NAME);
settings.fieldsAndWeights(Collections.emptyMap());
settings.analyzeWildcard(true);
settings.fuzziness(Fuzziness.AUTO);
settings.rewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
settings.defaultOperator(op.toQueryParserOperator());
settings.forceAnalyzer(new MockRepeatAnalyzer());
queryParser.reset(settings);
Query query = queryParser.parse("first foo-bar-foobar* last");
Query expectedQuery = new BooleanQuery.Builder()
.add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "first"),
new Term(STRING_FIELD_NAME, "first")), defaultOp))
.add(new BooleanQuery.Builder()
.add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "foo"),
new Term(STRING_FIELD_NAME, "foo")), defaultOp))
.add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "bar"),
new Term(STRING_FIELD_NAME, "bar")), defaultOp))
.add(new BooleanQuery.Builder()
.add(new BooleanClause(new PrefixQuery(new Term(STRING_FIELD_NAME, "foobar")),
BooleanClause.Occur.SHOULD))
.add(new BooleanClause(new PrefixQuery(new Term(STRING_FIELD_NAME, "foobar")),
BooleanClause.Occur.SHOULD))
.setDisableCoord(true)
.build(), defaultOp)
.build(), defaultOp)
.add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "last"),
new Term(STRING_FIELD_NAME, "last")), defaultOp))
.build();
assertThat(query, Matchers.equalTo(expectedQuery));
}
}

View File

@ -0,0 +1,109 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause;
import org.elasticsearch.test.ESTestCase;
import java.util.HashMap;
import java.util.Map;
import static org.hamcrest.Matchers.equalTo;
public class SimpleQueryParserTests extends ESTestCase {
private static class MockSimpleQueryParser extends SimpleQueryParser {
public MockSimpleQueryParser(Analyzer analyzer, Map<String, Float> weights, int flags, Settings settings) {
super(analyzer, weights, flags, settings, null);
}
@Override
protected Query newTermQuery(Term term) {
return new TermQuery(term);
}
}
public void testAnalyzeWildcard() {
SimpleQueryParser.Settings settings = new SimpleQueryParser.Settings();
settings.analyzeWildcard(true);
Map<String, Float> weights = new HashMap<>();
weights.put("field1", 1.0f);
SimpleQueryParser parser = new MockSimpleQueryParser(new StandardAnalyzer(), weights, -1, settings);
for (Operator op : Operator.values()) {
BooleanClause.Occur defaultOp = op.toBooleanClauseOccur();
parser.setDefaultOperator(defaultOp);
Query query = parser.parse("first foo-bar-foobar* last");
Query expectedQuery =
new BooleanQuery.Builder()
.add(new BooleanClause(new TermQuery(new Term("field1", "first")), defaultOp))
.add(new BooleanQuery.Builder()
.add(new BooleanClause(new TermQuery(new Term("field1", "foo")), defaultOp))
.add(new BooleanClause(new TermQuery(new Term("field1", "bar")), defaultOp))
.add(new BooleanClause(new PrefixQuery(new Term("field1", "foobar")), defaultOp))
.build(), defaultOp)
.add(new BooleanClause(new TermQuery(new Term("field1", "last")), defaultOp))
.build();
assertThat(query, equalTo(expectedQuery));
}
}
public void testAnalyzerWildcardWithSynonyms() {
SimpleQueryParser.Settings settings = new SimpleQueryParser.Settings();
settings.analyzeWildcard(true);
Map<String, Float> weights = new HashMap<>();
weights.put("field1", 1.0f);
SimpleQueryParser parser = new MockSimpleQueryParser(new MockRepeatAnalyzer(), weights, -1, settings);
for (Operator op : Operator.values()) {
BooleanClause.Occur defaultOp = op.toBooleanClauseOccur();
parser.setDefaultOperator(defaultOp);
Query query = parser.parse("first foo-bar-foobar* last");
Query expectedQuery = new BooleanQuery.Builder()
.add(new BooleanClause(new SynonymQuery(new Term("field1", "first"),
new Term("field1", "first")), defaultOp))
.add(new BooleanQuery.Builder()
.add(new BooleanClause(new SynonymQuery(new Term("field1", "foo"),
new Term("field1", "foo")), defaultOp))
.add(new BooleanClause(new SynonymQuery(new Term("field1", "bar"),
new Term("field1", "bar")), defaultOp))
.add(new BooleanQuery.Builder()
.add(new BooleanClause(new PrefixQuery(new Term("field1", "foobar")),
BooleanClause.Occur.SHOULD))
.add(new BooleanClause(new PrefixQuery(new Term("field1", "foobar")),
BooleanClause.Occur.SHOULD))
.setDisableCoord(true)
.build(), defaultOp)
.build(), defaultOp)
.add(new BooleanClause(new SynonymQuery(new Term("field1", "last"),
new Term("field1", "last")), defaultOp))
.build();
assertThat(query, equalTo(expectedQuery));
}
}
}