mirror of https://github.com/apache/lucene.git
add javadocs explaining SynonymGraphFilter's ignoreCase
This commit is contained in:
parent
8ed8ecfc7e
commit
3ad6e41910
|
@ -160,6 +160,14 @@ public final class SynonymGraphFilter extends TokenFilter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply previously built synonyms to incoming tokens.
|
||||||
|
* @param input input tokenstream
|
||||||
|
* @param synonyms synonym map
|
||||||
|
* @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}.
|
||||||
|
* Note, if you set this to true, it's your responsibility to lowercase
|
||||||
|
* the input entries when you create the {@link SynonymMap}
|
||||||
|
*/
|
||||||
public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
|
public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
|
||||||
super(input);
|
super(input);
|
||||||
this.synonyms = synonyms;
|
this.synonyms = synonyms;
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.text.ParseException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
@ -1922,4 +1923,37 @@ public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {
|
||||||
new int[]{1, 1, 0, 1, 1});
|
new int[]{1, 1, 0, 1, 1});
|
||||||
a.close();
|
a.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testUpperCase() throws IOException {
|
||||||
|
assertMapping("word", "synonym");
|
||||||
|
assertMapping("word".toUpperCase(Locale.ROOT), "synonym");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertMapping(String inputString, String outputString) throws IOException {
|
||||||
|
SynonymMap.Builder builder = new SynonymMap.Builder(false);
|
||||||
|
// the rules must be lowercased up front, but the incoming tokens will be case insensitive:
|
||||||
|
CharsRef input = SynonymMap.Builder.join(inputString.toLowerCase(Locale.ROOT).split(" "), new CharsRefBuilder());
|
||||||
|
CharsRef output = SynonymMap.Builder.join(outputString.split(" "), new CharsRefBuilder());
|
||||||
|
builder.add(input, output, true);
|
||||||
|
Analyzer analyzer = new CustomAnalyzer(builder.build());
|
||||||
|
TokenStream tokenStream = analyzer.tokenStream("field", inputString);
|
||||||
|
assertTokenStreamContents(tokenStream, new String[]{
|
||||||
|
outputString, inputString
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
static class CustomAnalyzer extends Analyzer {
|
||||||
|
private SynonymMap synonymMap;
|
||||||
|
|
||||||
|
CustomAnalyzer(SynonymMap synonymMap) {
|
||||||
|
this.synonymMap = synonymMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String s) {
|
||||||
|
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||||
|
TokenStream tokenStream = new SynonymGraphFilter(tokenizer, synonymMap, true); // Ignore case True
|
||||||
|
return new TokenStreamComponents(tokenizer, tokenStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue