add javadocs explaining SynonymGraphFilter's ignoreCase

This commit is contained in:
Mike McCandless 2017-02-23 07:22:57 -05:00
parent 8ed8ecfc7e
commit 3ad6e41910
2 changed files with 42 additions and 0 deletions

View File

@ -160,6 +160,14 @@ public final class SynonymGraphFilter extends TokenFilter {
}
}
/**
* Apply previously built synonyms to incoming tokens.
* @param input input tokenstream
* @param synonyms synonym map
* @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}.
* Note, if you set this to true, it's your responsibility to lowercase
* the input entries when you create the {@link SynonymMap}
*/
public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
super(input);
this.synonyms = synonyms;

View File

@ -23,6 +23,7 @@ import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
@ -1922,4 +1923,37 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
new int[]{1, 1, 0, 1, 1});
a.close();
}
public void testUpperCase() throws IOException {
assertMapping("word", "synonym");
assertMapping("word".toUpperCase(Locale.ROOT), "synonym");
}
private void assertMapping(String inputString, String outputString) throws IOException {
SynonymMap.Builder builder = new SynonymMap.Builder(false);
// the rules must be lowercased up front, but the incoming tokens will be case insensitive:
CharsRef input = SynonymMap.Builder.join(inputString.toLowerCase(Locale.ROOT).split(" "), new CharsRefBuilder());
CharsRef output = SynonymMap.Builder.join(outputString.split(" "), new CharsRefBuilder());
builder.add(input, output, true);
Analyzer analyzer = new CustomAnalyzer(builder.build());
TokenStream tokenStream = analyzer.tokenStream("field", inputString);
assertTokenStreamContents(tokenStream, new String[]{
outputString, inputString
});
}
static class CustomAnalyzer extends Analyzer {
private SynonymMap synonymMap;
CustomAnalyzer(SynonymMap synonymMap) {
this.synonymMap = synonymMap;
}
@Override
protected TokenStreamComponents createComponents(String s) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream tokenStream = new SynonymGraphFilter(tokenizer, synonymMap, true); // Ignore case True
return new TokenStreamComponents(tokenizer, tokenStream);
}
}
}