mirror of https://github.com/apache/lucene.git
[LUCENE-3744] - applied patch for whiteList usage in TypeTokenFilter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1240034 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8a70316f46
commit
6d3bb736f3
|
@ -31,17 +31,24 @@ public final class TypeTokenFilter extends FilteringTokenFilter {
|
|||
|
||||
private final Set<String> stopTypes;
|
||||
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
|
||||
private final boolean useWhiteList;
|
||||
|
||||
public TypeTokenFilter(boolean enablePositionIncrements, TokenStream input, Set<String> stopTypes) {
|
||||
public TypeTokenFilter(boolean enablePositionIncrements, TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
|
||||
super(enablePositionIncrements, input);
|
||||
this.stopTypes = stopTypes;
|
||||
this.useWhiteList = useWhiteList;
|
||||
}
|
||||
|
||||
public TypeTokenFilter(boolean enablePositionIncrements, TokenStream input, Set<String> stopTypes) {
|
||||
this(enablePositionIncrements, input, stopTypes, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next input Token whose typeAttribute.type() is not a stop type.
|
||||
* By default accept the token if its type is not a stop type.
|
||||
* When the useWhiteList parameter is set to true then accept the token if its type is contained in the stopTypes
|
||||
*/
|
||||
@Override
|
||||
protected boolean accept() throws IOException {
|
||||
return !stopTypes.contains(typeAttribute.type());
|
||||
return useWhiteList == stopTypes.contains(typeAttribute.type());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.util.English;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
|
@ -83,6 +84,13 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase {
|
|||
stpf.close();
|
||||
}
|
||||
|
||||
public void testTypeFilterWhitelist() throws IOException {
|
||||
StringReader reader = new StringReader("121 is palindrome, while 123 is not");
|
||||
Set<String> stopTypes = Collections.singleton("<NUM>");
|
||||
TokenStream stream = new TypeTokenFilter(true, new StandardTokenizer(TEST_VERSION_CURRENT, reader), stopTypes, true);
|
||||
assertTokenStreamContents(stream, new String[]{"121", "123"});
|
||||
}
|
||||
|
||||
// print debug info depending on VERBOSE
|
||||
private static void log(String s) {
|
||||
if (VERBOSE) {
|
||||
|
|
Loading…
Reference in New Issue