Fix (simple)_query_string to ignore removed terms (#28871)
This change ensures that we ignore terms removed from the analysis rather than returning a match_no_docs query for the part that contain the stop word. For instance a query like "the AND fox" should ignore "the" if it is considered as a stop word instead of adding a match_no_docs query. This change also fixes the analysis of prefix terms that start with a stop word (e.g. `the*`). In such case if `analyze_wildcard` is true and `the` is considered as a stop word this part of the query is rewritten into a match_no_docs query. Since it's a prefix query this change forces the prefix query on `the` even if it is removed from the analysis. Fixes #28855 Fixes #28856
This commit is contained in:
parent
5689dc1182
commit
c26bd6046b
|
@ -102,7 +102,10 @@ public class MatchQuery {
|
||||||
|
|
||||||
public enum ZeroTermsQuery implements Writeable {
|
public enum ZeroTermsQuery implements Writeable {
|
||||||
NONE(0),
|
NONE(0),
|
||||||
ALL(1);
|
ALL(1),
|
||||||
|
// this is used internally to make sure that query_string and simple_query_string
|
||||||
|
// ignores query part that removes all tokens.
|
||||||
|
NULL(2);
|
||||||
|
|
||||||
private final int ordinal;
|
private final int ordinal;
|
||||||
|
|
||||||
|
@ -312,10 +315,16 @@ public class MatchQuery {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Query zeroTermsQuery() {
|
protected Query zeroTermsQuery() {
|
||||||
if (zeroTermsQuery == DEFAULT_ZERO_TERMS_QUERY) {
|
switch (zeroTermsQuery) {
|
||||||
return Queries.newMatchNoDocsQuery("Matching no documents because no terms present.");
|
case NULL:
|
||||||
|
return null;
|
||||||
|
case NONE:
|
||||||
|
return Queries.newMatchNoDocsQuery("Matching no documents because no terms present");
|
||||||
|
case ALL:
|
||||||
|
return Queries.newMatchAllQuery();
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException("unknown zeroTermsQuery " + zeroTermsQuery);
|
||||||
}
|
}
|
||||||
return Queries.newMatchAllQuery();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MatchQueryBuilder extends QueryBuilder {
|
private class MatchQueryBuilder extends QueryBuilder {
|
||||||
|
|
|
@ -147,6 +147,7 @@ public class QueryStringQueryParser extends XQueryParser {
|
||||||
this.context = context;
|
this.context = context;
|
||||||
this.fieldsAndWeights = Collections.unmodifiableMap(fieldsAndWeights);
|
this.fieldsAndWeights = Collections.unmodifiableMap(fieldsAndWeights);
|
||||||
this.queryBuilder = new MultiMatchQuery(context);
|
this.queryBuilder = new MultiMatchQuery(context);
|
||||||
|
queryBuilder.setZeroTermsQuery(MatchQuery.ZeroTermsQuery.NULL);
|
||||||
queryBuilder.setLenient(lenient);
|
queryBuilder.setLenient(lenient);
|
||||||
this.lenient = lenient;
|
this.lenient = lenient;
|
||||||
}
|
}
|
||||||
|
@ -343,7 +344,6 @@ public class QueryStringQueryParser extends XQueryParser {
|
||||||
if (fields.isEmpty()) {
|
if (fields.isEmpty()) {
|
||||||
return newUnmappedFieldQuery(field);
|
return newUnmappedFieldQuery(field);
|
||||||
}
|
}
|
||||||
final Query query;
|
|
||||||
Analyzer oldAnalyzer = queryBuilder.analyzer;
|
Analyzer oldAnalyzer = queryBuilder.analyzer;
|
||||||
int oldSlop = queryBuilder.phraseSlop;
|
int oldSlop = queryBuilder.phraseSlop;
|
||||||
try {
|
try {
|
||||||
|
@ -353,7 +353,7 @@ public class QueryStringQueryParser extends XQueryParser {
|
||||||
queryBuilder.setAnalyzer(forceAnalyzer);
|
queryBuilder.setAnalyzer(forceAnalyzer);
|
||||||
}
|
}
|
||||||
queryBuilder.setPhraseSlop(slop);
|
queryBuilder.setPhraseSlop(slop);
|
||||||
query = queryBuilder.parse(MultiMatchQueryBuilder.Type.PHRASE, fields, queryText, null);
|
Query query = queryBuilder.parse(MultiMatchQueryBuilder.Type.PHRASE, fields, queryText, null);
|
||||||
return applySlop(query, slop);
|
return applySlop(query, slop);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new ParseException(e.getMessage());
|
throw new ParseException(e.getMessage());
|
||||||
|
@ -555,7 +555,7 @@ public class QueryStringQueryParser extends XQueryParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tlist.size() == 0) {
|
if (tlist.size() == 0) {
|
||||||
return new MatchNoDocsQuery("analysis was empty for " + field + ":" + termStr);
|
return super.getPrefixQuery(field, termStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tlist.size() == 1 && tlist.get(0).size() == 1) {
|
if (tlist.size() == 1 && tlist.get(0).size() == 1) {
|
||||||
|
@ -763,7 +763,7 @@ public class QueryStringQueryParser extends XQueryParser {
|
||||||
@Override
|
@Override
|
||||||
public Query parse(String query) throws ParseException {
|
public Query parse(String query) throws ParseException {
|
||||||
if (query.trim().isEmpty()) {
|
if (query.trim().isEmpty()) {
|
||||||
return queryBuilder.zeroTermsQuery();
|
return Queries.newMatchNoDocsQuery("Matching no documents because no terms present");
|
||||||
}
|
}
|
||||||
return super.parse(query);
|
return super.parse(query);
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,6 +74,7 @@ public class SimpleQueryStringQueryParser extends SimpleQueryParser {
|
||||||
this.queryBuilder = new MultiMatchQuery(context);
|
this.queryBuilder = new MultiMatchQuery(context);
|
||||||
this.queryBuilder.setAutoGenerateSynonymsPhraseQuery(settings.autoGenerateSynonymsPhraseQuery());
|
this.queryBuilder.setAutoGenerateSynonymsPhraseQuery(settings.autoGenerateSynonymsPhraseQuery());
|
||||||
this.queryBuilder.setLenient(settings.lenient());
|
this.queryBuilder.setLenient(settings.lenient());
|
||||||
|
this.queryBuilder.setZeroTermsQuery(MatchQuery.ZeroTermsQuery.NULL);
|
||||||
if (analyzer != null) {
|
if (analyzer != null) {
|
||||||
this.queryBuilder.setAnalyzer(analyzer);
|
this.queryBuilder.setAnalyzer(analyzer);
|
||||||
}
|
}
|
||||||
|
|
|
@ -111,7 +111,7 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
|
||||||
}
|
}
|
||||||
|
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
matchQuery.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.values()));
|
matchQuery.zeroTermsQuery(randomFrom(ZeroTermsQuery.ALL, ZeroTermsQuery.NONE));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
|
|
|
@ -129,7 +129,7 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
|
||||||
query.cutoffFrequency((float) 10 / randomIntBetween(1, 100));
|
query.cutoffFrequency((float) 10 / randomIntBetween(1, 100));
|
||||||
}
|
}
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
query.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.values()));
|
query.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.NONE, MatchQuery.ZeroTermsQuery.ALL));
|
||||||
}
|
}
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
query.autoGenerateSynonymsPhraseQuery(randomBoolean());
|
query.autoGenerateSynonymsPhraseQuery(randomBoolean());
|
||||||
|
|
|
@ -1052,6 +1052,33 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
|
||||||
assertEquals(expected, query);
|
assertEquals(expected, query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testWithStopWords() throws Exception {
|
||||||
|
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||||
|
Query query = new QueryStringQueryBuilder("the quick fox")
|
||||||
|
.field(STRING_FIELD_NAME)
|
||||||
|
.analyzer("english")
|
||||||
|
.toQuery(createShardContext());
|
||||||
|
BooleanQuery expected = new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
assertEquals(expected, query);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWithPrefixStopWords() throws Exception {
|
||||||
|
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||||
|
Query query = new QueryStringQueryBuilder("the* quick fox")
|
||||||
|
.field(STRING_FIELD_NAME)
|
||||||
|
.analyzer("english")
|
||||||
|
.toQuery(createShardContext());
|
||||||
|
BooleanQuery expected = new BooleanQuery.Builder()
|
||||||
|
.add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
assertEquals(expected, query);
|
||||||
|
}
|
||||||
|
|
||||||
private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
|
private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
|
||||||
Settings build = Settings.builder().put(oldIndexSettings)
|
Settings build = Settings.builder().put(oldIndexSettings)
|
||||||
.put(indexSettings)
|
.put(indexSettings)
|
||||||
|
|
|
@ -625,6 +625,33 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
|
||||||
assertEquals(expected, query);
|
assertEquals(expected, query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testWithStopWords() throws Exception {
|
||||||
|
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||||
|
Query query = new SimpleQueryStringBuilder("the quick fox")
|
||||||
|
.field(STRING_FIELD_NAME)
|
||||||
|
.analyzer("english")
|
||||||
|
.toQuery(createShardContext());
|
||||||
|
BooleanQuery expected = new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
assertEquals(expected, query);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWithPrefixStopWords() throws Exception {
|
||||||
|
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||||
|
Query query = new SimpleQueryStringBuilder("the* quick fox")
|
||||||
|
.field(STRING_FIELD_NAME)
|
||||||
|
.analyzer("english")
|
||||||
|
.toQuery(createShardContext());
|
||||||
|
BooleanQuery expected = new BooleanQuery.Builder()
|
||||||
|
.add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
assertEquals(expected, query);
|
||||||
|
}
|
||||||
|
|
||||||
private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
|
private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
|
||||||
Settings build = Settings.builder().put(oldIndexSettings)
|
Settings build = Settings.builder().put(oldIndexSettings)
|
||||||
.put(indexSettings)
|
.put(indexSettings)
|
||||||
|
|
Loading…
Reference in New Issue