mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-27 02:18:42 +00:00
Don't use index_phrases on graph queries (#44340)
Due to https://issues.apache.org/jira/browse/LUCENE-8916, when you try to use a synonym filter with the index_phrases option on a text field, you can end up with null values in a Phrase query, leading to weird exceptions further down the querying chain. As a workaround, this commit disables the index_phrases optimization for queries that produce token graphs. Fixes #43976
This commit is contained in:
parent
ddd740162e
commit
b6a0f098e6
@ -27,6 +27,9 @@ setup:
|
|||||||
properties:
|
properties:
|
||||||
field:
|
field:
|
||||||
type: text
|
type: text
|
||||||
|
phrase_field:
|
||||||
|
type: text
|
||||||
|
index_phrases: true
|
||||||
|
|
||||||
- do:
|
- do:
|
||||||
index:
|
index:
|
||||||
@ -204,3 +207,26 @@ setup:
|
|||||||
- match: { hits.hits.2._id: "1" }
|
- match: { hits.hits.2._id: "1" }
|
||||||
- match: { hits.hits.3._id: "8" }
|
- match: { hits.hits.3._id: "8" }
|
||||||
- match: { hits.hits.4._id: "2" }
|
- match: { hits.hits.4._id: "2" }
|
||||||
|
|
||||||
|
---
|
||||||
|
"index_phrases":
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
id: 9
|
||||||
|
body:
|
||||||
|
phrase_field: "bar baz"
|
||||||
|
refresh: true
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
rest_total_hits_as_int: true
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match:
|
||||||
|
phrase_field:
|
||||||
|
query: bar baz
|
||||||
|
analyzer: lower_graph_syns
|
||||||
|
- match: { hits.total: 1 }
|
||||||
|
|
||||||
|
@ -26,12 +26,15 @@ import org.apache.lucene.analysis.TokenFilter;
|
|||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||||
import org.apache.lucene.analysis.shingle.FixedShingleFilter;
|
import org.apache.lucene.analysis.shingle.FixedShingleFilter;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.queries.intervals.Intervals;
|
||||||
|
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||||
import org.apache.lucene.search.AutomatonQuery;
|
import org.apache.lucene.search.AutomatonQuery;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
@ -44,8 +47,6 @@ import org.apache.lucene.search.PrefixQuery;
|
|||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SynonymQuery;
|
import org.apache.lucene.search.SynonymQuery;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.queries.intervals.Intervals;
|
|
||||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
|
||||||
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
@ -688,7 +689,10 @@ public class TextFieldMapper extends FieldMapper {
|
|||||||
@Override
|
@Override
|
||||||
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {
|
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {
|
||||||
String field = name();
|
String field = name();
|
||||||
if (indexPhrases && slop == 0 && hasGaps(stream) == false) {
|
// we can't use the index_phrases shortcut with slop, if there are gaps in the stream,
|
||||||
|
// or if the incoming token stream is the output of a token graph due to
|
||||||
|
// https://issues.apache.org/jira/browse/LUCENE-8916
|
||||||
|
if (indexPhrases && slop == 0 && hasGaps(stream) == false && stream.hasAttribute(BytesTermAttribute.class) == false) {
|
||||||
stream = new FixedShingleFilter(stream, 2);
|
stream = new FixedShingleFilter(stream, 2);
|
||||||
field = field + FAST_PHRASE_SUFFIX;
|
field = field + FAST_PHRASE_SUFFIX;
|
||||||
}
|
}
|
||||||
@ -701,6 +705,9 @@ public class TextFieldMapper extends FieldMapper {
|
|||||||
|
|
||||||
stream.reset();
|
stream.reset();
|
||||||
while (stream.incrementToken()) {
|
while (stream.incrementToken()) {
|
||||||
|
if (termAtt.getBytesRef() == null) {
|
||||||
|
throw new IllegalStateException("Null term while building phrase query");
|
||||||
|
}
|
||||||
if (enablePosIncrements) {
|
if (enablePosIncrements) {
|
||||||
position += posIncrAtt.getPositionIncrement();
|
position += posIncrAtt.getPositionIncrement();
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,10 @@
|
|||||||
|
|
||||||
package org.elasticsearch.index.mapper;
|
package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.CannedTokenStream;
|
||||||
import org.apache.lucene.analysis.MockSynonymAnalyzer;
|
import org.apache.lucene.analysis.MockSynonymAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.index.DocValuesType;
|
import org.apache.lucene.index.DocValuesType;
|
||||||
@ -30,6 +33,8 @@ import org.apache.lucene.index.LeafReader;
|
|||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.MultiPhraseQuery;
|
import org.apache.lucene.search.MultiPhraseQuery;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
@ -831,6 +836,28 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||||||
new Term("synfield._index_phrase", "motor dog")})
|
new Term("synfield._index_phrase", "motor dog")})
|
||||||
.build()));
|
.build()));
|
||||||
|
|
||||||
|
// https://github.com/elastic/elasticsearch/issues/43976
|
||||||
|
CannedTokenStream cts = new CannedTokenStream(
|
||||||
|
new Token("foo", 1, 0, 2, 2),
|
||||||
|
new Token("bar", 0, 0, 2),
|
||||||
|
new Token("baz", 1, 0, 2)
|
||||||
|
);
|
||||||
|
Analyzer synonymAnalyzer = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
|
return new TokenStreamComponents(reader -> {}, cts);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
matchQuery.setAnalyzer(synonymAnalyzer);
|
||||||
|
Query q7 = matchQuery.parse(MatchQuery.Type.BOOLEAN, "synfield", "foo");
|
||||||
|
assertThat(q7, is(new BooleanQuery.Builder().add(new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("synfield", "foo")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new PhraseQuery.Builder()
|
||||||
|
.add(new Term("synfield", "bar"))
|
||||||
|
.add(new Term("synfield", "baz"))
|
||||||
|
.build(), BooleanClause.Occur.SHOULD)
|
||||||
|
.build(), BooleanClause.Occur.SHOULD).build()));
|
||||||
|
|
||||||
ParsedDocument doc = mapper.parse(new SourceToParse("test", "type", "1", BytesReference
|
ParsedDocument doc = mapper.parse(new SourceToParse("test", "type", "1", BytesReference
|
||||||
.bytes(XContentFactory.jsonBuilder()
|
.bytes(XContentFactory.jsonBuilder()
|
||||||
.startObject()
|
.startObject()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user