LUCENE-7827: AnalysingInfixSuggester omits textgrams when

minPrefixChars=0
This commit is contained in:
Mikhail Khludnev 2017-08-23 19:55:32 +03:00
parent a3bcf77705
commit 7760b35645
2 changed files with 14 additions and 3 deletions

View File

@ -39,6 +39,9 @@ Optimizations
* LUCENE-7939: MinShouldMatchSumScorer now leverages two-phase iteration in * LUCENE-7939: MinShouldMatchSumScorer now leverages two-phase iteration in
order to be faster when used in conjunctions. (Adrien Grand) order to be faster when used in conjunctions. (Adrien Grand)
* LUCENE-7827: AnalyzingInfixSuggester doesn't create "textgrams"
when minPrefixChar=0 (Mikhail Khludnev)
Bug Fixes Bug Fixes
* LUCENE-7916: Prevent ArrayIndexOutOfBoundsException if ICUTokenizer is used * LUCENE-7916: Prevent ArrayIndexOutOfBoundsException if ICUTokenizer is used

View File

@ -107,6 +107,10 @@ import org.apache.lucene.util.RamUsageEstimator;
public class AnalyzingInfixSuggester extends Lookup implements Closeable { public class AnalyzingInfixSuggester extends Lookup implements Closeable {
/** edgegrams for searching short prefixes without Prefix Query
* that's controlled by {@linkplain #minPrefixChars} */
protected final static String TEXTGRAMS_FIELD_NAME = "textgrams";
/** Field name used for the indexed text. */ /** Field name used for the indexed text. */
protected final static String TEXT_FIELD_NAME = "text"; protected final static String TEXT_FIELD_NAME = "text";
@ -353,7 +357,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
@Override @Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
if (fieldName.equals("textgrams") && minPrefixChars > 0) { assert !(fieldName.equals(TEXTGRAMS_FIELD_NAME) && minPrefixChars == 0)
: "no need \"textgrams\" when minPrefixChars="+minPrefixChars;
if (fieldName.equals(TEXTGRAMS_FIELD_NAME) && minPrefixChars > 0) {
// TODO: should use an EdgeNGramTokenFilterFactory here // TODO: should use an EdgeNGramTokenFilterFactory here
TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), 1, minPrefixChars); TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), 1, minPrefixChars);
return new TokenStreamComponents(components.getTokenizer(), filter); return new TokenStreamComponents(components.getTokenizer(), filter);
@ -410,7 +416,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
Document doc = new Document(); Document doc = new Document();
FieldType ft = getTextFieldType(); FieldType ft = getTextFieldType();
doc.add(new Field(TEXT_FIELD_NAME, textString, ft)); doc.add(new Field(TEXT_FIELD_NAME, textString, ft));
doc.add(new Field("textgrams", textString, ft)); if (minPrefixChars>0) {
doc.add(new Field(TEXTGRAMS_FIELD_NAME, textString, ft));
}
doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO)); doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO));
doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text)); doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text));
doc.add(new NumericDocValuesField("weight", weight)); doc.add(new NumericDocValuesField("weight", weight));
@ -474,7 +482,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
protected Query getLastTokenQuery(String token) throws IOException { protected Query getLastTokenQuery(String token) throws IOException {
if (token.length() < minPrefixChars) { if (token.length() < minPrefixChars) {
// The leading ngram was directly indexed: // The leading ngram was directly indexed:
return new TermQuery(new Term("textgrams", token)); return new TermQuery(new Term(TEXTGRAMS_FIELD_NAME, token));
} }
return new PrefixQuery(new Term(TEXT_FIELD_NAME, token)); return new PrefixQuery(new Term(TEXT_FIELD_NAME, token));