From 3ad6e41910158a46025ff78330d78a31a7081887 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Thu, 23 Feb 2017 07:22:57 -0500 Subject: [PATCH] add javadocs explaining SynonymGraphFilter's ignoreCase --- .../analysis/synonym/SynonymGraphFilter.java | 8 +++++ .../synonym/TestSynonymGraphFilter.java | 34 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java index 788db0a15a9..e59e61bf723 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java @@ -160,6 +160,14 @@ public final class SynonymGraphFilter extends TokenFilter { } } + /** + * Apply previously built synonyms to incoming tokens. + * @param input input tokenstream + * @param synonyms synonym map + * @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}. + * Note, if you set this to true, it's your responsibility to lowercase + * the input entries when you create the {@link SynonymMap} + */ public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) { super(input); this.synonyms = synonyms; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java index e00a1654c2a..730d00ac0af 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java @@ -23,6 +23,7 @@ import java.text.ParseException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Set; import org.apache.lucene.analysis.Analyzer; @@ -1922,4 +1923,37 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase { new int[]{1, 1, 0, 1, 1}); a.close(); } + + public void testUpperCase() throws IOException { + assertMapping("word", "synonym"); + assertMapping("word".toUpperCase(Locale.ROOT), "synonym"); + } + + private void assertMapping(String inputString, String outputString) throws IOException { + SynonymMap.Builder builder = new SynonymMap.Builder(false); + // the rules must be lowercased up front, but the incoming tokens will be case insensitive: + CharsRef input = SynonymMap.Builder.join(inputString.toLowerCase(Locale.ROOT).split(" "), new CharsRefBuilder()); + CharsRef output = SynonymMap.Builder.join(outputString.split(" "), new CharsRefBuilder()); + builder.add(input, output, true); + Analyzer analyzer = new CustomAnalyzer(builder.build()); + TokenStream tokenStream = analyzer.tokenStream("field", inputString); + assertTokenStreamContents(tokenStream, new String[]{ + outputString, inputString + }); + } + + static class CustomAnalyzer extends Analyzer { + private SynonymMap synonymMap; + + CustomAnalyzer(SynonymMap synonymMap) { + this.synonymMap = synonymMap; + } + + @Override + protected TokenStreamComponents createComponents(String s) { + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); + TokenStream tokenStream = new SynonymGraphFilter(tokenizer, synonymMap, true); // Ignore case True + return new TokenStreamComponents(tokenizer, tokenStream); + } + } }