[LUCENE-3744] - applied patch for whiteList usage in TypeTokenFilter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1240034 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tommaso Teofili 2012-02-03 09:13:17 +00:00
parent 8a70316f46
commit 6d3bb736f3
2 changed files with 18 additions and 3 deletions

View File

@ -31,17 +31,24 @@ public final class TypeTokenFilter extends FilteringTokenFilter {
private final Set<String> stopTypes;
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
private final boolean useWhiteList;
public TypeTokenFilter(boolean enablePositionIncrements, TokenStream input, Set<String> stopTypes) {
public TypeTokenFilter(boolean enablePositionIncrements, TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
super(enablePositionIncrements, input);
this.stopTypes = stopTypes;
this.useWhiteList = useWhiteList;
}
public TypeTokenFilter(boolean enablePositionIncrements, TokenStream input, Set<String> stopTypes) {
this(enablePositionIncrements, input, stopTypes, false);
}
/**
* Returns the next input Token whose typeAttribute.type() is not a stop type.
* By default accept the token if its type is not a stop type.
* When the useWhiteList parameter is set to true then accept the token if its type is contained in the stopTypes
*/
@Override
protected boolean accept() throws IOException {
return !stopTypes.contains(typeAttribute.type());
return useWhiteList == stopTypes.contains(typeAttribute.type());
}
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.util.English;
import java.io.IOException;
import java.io.StringReader;
import java.util.Collections;
import java.util.Set;
@ -83,6 +84,13 @@ public class TestTypeTokenFilter extends BaseTokenStreamTestCase {
stpf.close();
}
public void testTypeFilterWhitelist() throws IOException {
StringReader reader = new StringReader("121 is palindrome, while 123 is not");
Set<String> stopTypes = Collections.singleton("<NUM>");
TokenStream stream = new TypeTokenFilter(true, new StandardTokenizer(TEST_VERSION_CURRENT, reader), stopTypes, true);
assertTokenStreamContents(stream, new String[]{"121", "123"});
}
// print debug info depending on VERBOSE
private static void log(String s) {
if (VERBOSE) {