`query_string` should use indexed prefixes (#36895)

The QueryStringQueryBuilder does not currently delegate to the field mapper's prefixQuery
method, so does not use indexed prefixes. This commit corrects this.

It also fixes a bug where a query a* would not match the word a if indexed prefixes were used with
a minchar setting of 2.
This commit is contained in:
Alan Woodward 2019-01-02 20:12:24 +00:00 committed by GitHub
parent 265fdce312
commit 7a0047744d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 68 additions and 17 deletions

View File

@ -13,15 +13,15 @@ setup:
text:
type: text
index_prefixes:
min_chars: 1
max_chars: 10
min_chars: 2
max_chars: 5
- do:
index:
index: test
type: test
id: 1
body: { text: some short words and a stupendously long one }
body: { text: some short words with a stupendously long one }
- do:
indices.refresh:
@ -63,10 +63,11 @@ setup:
rest_total_hits_as_int: true
index: test
body:
explain: true
query:
query_string:
default_field: text
query: s*
query: a*
boost: 2
- match: {hits.total: 1}

View File

@ -33,6 +33,8 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
@ -156,7 +158,7 @@ public class TextFieldMapper extends FieldMapper {
if (maxChars >= 20) {
throw new IllegalArgumentException("max_chars [" + maxChars + "] must be less than 20");
}
this.prefixFieldType = new PrefixFieldType(name() + "._index_prefix", minChars, maxChars);
this.prefixFieldType = new PrefixFieldType(name(), name() + "._index_prefix", minChars, maxChars);
fieldType().setPrefixFieldType(this.prefixFieldType);
return this;
}
@ -347,14 +349,16 @@ public class TextFieldMapper extends FieldMapper {
final int minChars;
final int maxChars;
final String parentField;
PrefixFieldType(String name, int minChars, int maxChars) {
PrefixFieldType(String parentField, String name, int minChars, int maxChars) {
setTokenized(true);
setOmitNorms(true);
setIndexOptions(IndexOptions.DOCS);
setName(name);
this.minChars = minChars;
this.maxChars = maxChars;
this.parentField = parentField;
}
PrefixFieldType setAnalyzer(NamedAnalyzer delegate) {
@ -387,12 +391,15 @@ public class TextFieldMapper extends FieldMapper {
Automaton automaton = Operations.concatenate(automata);
AutomatonQuery query = new AutomatonQuery(new Term(name(), value + "*"), automaton);
query.setRewriteMethod(method);
return query;
return new BooleanQuery.Builder()
.add(query, BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(parentField, value)), BooleanClause.Occur.SHOULD)
.build();
}
@Override
public PrefixFieldType clone() {
return new PrefixFieldType(name(), minChars, maxChars);
return new PrefixFieldType(parentField, name(), minChars, maxChars);
}
@Override

View File

@ -42,15 +42,14 @@ import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.StringFieldType;
import org.elasticsearch.index.query.ExistsQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryShardContext;
@ -507,7 +506,7 @@ public class QueryStringQueryParser extends XQueryParser {
}
setAnalyzer(forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer);
Query query = null;
if (currentFieldType instanceof StringFieldType == false) {
if (currentFieldType.tokenized() == false) {
query = currentFieldType.prefixQuery(termStr, getMultiTermRewriteMethod(), context);
} else {
query = getPossiblyAnalyzedPrefixQuery(currentFieldType.name(), termStr);
@ -525,7 +524,8 @@ public class QueryStringQueryParser extends XQueryParser {
private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException {
if (analyzeWildcard == false) {
return super.getPrefixQuery(field, termStr);
return currentFieldType.prefixQuery(getAnalyzer().normalize(field, termStr).utf8ToString(),
getMultiTermRewriteMethod(), context);
}
List<List<String> > tlist;
// get Analyzer from superclass and tokenize the term
@ -568,7 +568,7 @@ public class QueryStringQueryParser extends XQueryParser {
}
if (tlist.size() == 1 && tlist.get(0).size() == 1) {
return super.getPrefixQuery(field, tlist.get(0).get(0));
return currentFieldType.prefixQuery(tlist.get(0).get(0), getMultiTermRewriteMethod(), context);
}
// build a boolean query with prefix on the last position only.
@ -579,7 +579,7 @@ public class QueryStringQueryParser extends XQueryParser {
Query posQuery;
if (plist.size() == 1) {
if (isLastPos) {
posQuery = super.getPrefixQuery(field, plist.get(0));
posQuery = currentFieldType.prefixQuery(plist.get(0), getMultiTermRewriteMethod(), context);
} else {
posQuery = newTermQuery(new Term(field, plist.get(0)));
}

View File

@ -21,6 +21,8 @@ package org.elasticsearch.index.mapper;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery;
@ -40,6 +42,7 @@ import java.util.Arrays;
import java.util.List;
import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
import static org.hamcrest.Matchers.equalTo;
public class TextFieldTypeTests extends FieldTypeTestCase {
@Override
@ -90,7 +93,7 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
TextFieldMapper.TextFieldType tft = (TextFieldMapper.TextFieldType)ft;
TextFieldMapper.PrefixFieldType pft = tft.getPrefixFieldType();
if (pft == null) {
tft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft.name(), 3, 3));
tft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft.name(), ft.name() + "._index_prefix", 3, 3));
}
else {
tft.setPrefixFieldType(null);
@ -156,7 +159,7 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
public void testIndexPrefixes() {
TextFieldMapper.TextFieldType ft = new TextFieldMapper.TextFieldType();
ft.setName("field");
ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType("field._index_prefix", 2, 10));
ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType("field", "field._index_prefix", 2, 10));
Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, null);
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field._index_prefix", "goin"))), q);
@ -167,6 +170,12 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
q = ft.prefixQuery("g", CONSTANT_SCORE_REWRITE, null);
Automaton automaton
= Operations.concatenate(Arrays.asList(Automata.makeChar('g'), Automata.makeAnyChar()));
assertEquals(new ConstantScoreQuery(new AutomatonQuery(new Term("field._index_prefix", "g*"), automaton)), q);
Query expected = new ConstantScoreQuery(new BooleanQuery.Builder()
.add(new AutomatonQuery(new Term("field._index_prefix", "g*"), automaton), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("field", "g")), BooleanClause.Occur.SHOULD)
.build());
assertThat(q, equalTo(expected));
}
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.query;
import org.apache.lucene.analysis.MockSynonymAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
@ -45,6 +46,9 @@ import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
import org.elasticsearch.Version;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
@ -69,6 +73,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.AbstractQueryBuilder.parseInnerQueryBuilder;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertBooleanSubQuery;
@ -78,6 +83,20 @@ import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.instanceOf;
public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStringQueryBuilder> {
@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
.startObject("prefix_field")
.field("type", "text")
.startObject("index_prefixes").endObject()
.endObject()
.endObject().endObject().endObject();
mapperService.merge("_doc",
new CompressedXContent(Strings.toString(mapping)), MapperService.MergeReason.MAPPING_UPDATE);
}
@Override
protected QueryStringQueryBuilder doCreateTestQueryBuilder() {
int numTerms = randomIntBetween(0, 5);
@ -535,6 +554,21 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
}
}
public void testToQueryWildcardWithIndexedPrefixes() throws Exception {
QueryStringQueryParser queryParser = new QueryStringQueryParser(createShardContext(), "prefix_field");
Query query = queryParser.parse("foo*");
Query expectedQuery = new ConstantScoreQuery(new TermQuery(new Term("prefix_field._index_prefix", "foo")));
assertThat(query, equalTo(expectedQuery));
query = queryParser.parse("g*");
Automaton a = Operations.concatenate(Arrays.asList(Automata.makeChar('g'), Automata.makeAnyChar()));
expectedQuery = new ConstantScoreQuery(new BooleanQuery.Builder()
.add(new AutomatonQuery(new Term("prefix_field._index_prefix", "g*"), a), Occur.SHOULD)
.add(new TermQuery(new Term("prefix_field", "g")), Occur.SHOULD)
.build());
assertThat(query, equalTo(expectedQuery));
}
public void testToQueryWilcardQueryWithSynonyms() throws Exception {
for (Operator op : Operator.values()) {
BooleanClause.Occur defaultOp = op.toBooleanClauseOccur();