AsciiFoldingFilter's multi-term component should never preserve the original token. (#21982)
This ports the fix of https://issues.apache.org/jira/browse/LUCENE-7536 to Elasticsearch's ASCIIFoldingTokenFilterFactory.
This commit is contained in:
parent
c8f241f284
commit
26cbda41ea
|
@ -47,6 +47,20 @@ public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory i
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object getMultiTermComponent() {
|
public Object getMultiTermComponent() {
|
||||||
return this;
|
if (preserveOriginal == false) {
|
||||||
|
return this;
|
||||||
|
} else {
|
||||||
|
// See https://issues.apache.org/jira/browse/LUCENE-7536 for the reasoning
|
||||||
|
return new TokenFilterFactory() {
|
||||||
|
@Override
|
||||||
|
public String name() {
|
||||||
|
return ASCIIFoldingTokenFilterFactory.this.name();
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
|
return new ASCIIFoldingFilter(tokenStream, false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,5 +55,12 @@ public class ASCIIFoldingTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer();
|
Tokenizer tokenizer = new WhitespaceTokenizer();
|
||||||
tokenizer.setReader(new StringReader(source));
|
tokenizer.setReader(new StringReader(source));
|
||||||
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
||||||
|
|
||||||
|
// but the multi-term aware component still emits a single token
|
||||||
|
tokenFilter = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilter).getMultiTermComponent();
|
||||||
|
tokenizer = new WhitespaceTokenizer();
|
||||||
|
tokenizer.setReader(new StringReader(source));
|
||||||
|
expected = new String[]{"Anspruche"};
|
||||||
|
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue