mirror of https://github.com/apache/lucene.git
LUCENE-10353: add random null injection to TestRandomChains (#586)
Co-authored-by: Uwe Schindler <uschindler@apache.org>, Robert Muir <rmuir@apache.org>
This commit is contained in:
parent
603a43f668
commit
f2e00bb9e0
|
@ -165,6 +165,9 @@ Bug Fixes
|
||||||
* LUCENE-10352: Fixed ctor argument checks: JapaneseKatakanaStemFilter,
|
* LUCENE-10352: Fixed ctor argument checks: JapaneseKatakanaStemFilter,
|
||||||
DoubleMetaphoneFilter (Uwe Schindler, Robert Muir)
|
DoubleMetaphoneFilter (Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
|
* LLUCENE-10353: Add random null injection to TestRandomChains. (Robert Muir,
|
||||||
|
Uwe Schindler)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,6 @@ module org.apache.lucene.analysis.tests {
|
||||||
requires org.apache.lucene.analysis.smartcn;
|
requires org.apache.lucene.analysis.smartcn;
|
||||||
requires org.apache.lucene.analysis.stempel;
|
requires org.apache.lucene.analysis.stempel;
|
||||||
requires org.apache.lucene.test_framework;
|
requires org.apache.lucene.test_framework;
|
||||||
requires junit;
|
|
||||||
|
|
||||||
exports org.apache.lucene.analysis.tests;
|
exports org.apache.lucene.analysis.tests;
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,7 @@ import java.util.HashSet;
|
||||||
import java.util.IdentityHashMap;
|
import java.util.IdentityHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
@ -646,6 +647,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
static <T> T newRandomArg(Random random, Class<T> paramType) {
|
static <T> T newRandomArg(Random random, Class<T> paramType) {
|
||||||
|
// if the argument type is not a primitive, return 1/10th of all cases null:
|
||||||
|
if (!paramType.isPrimitive() && random.nextInt(10) == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
final Function<Random, Object> producer = argProducers.get(paramType);
|
final Function<Random, Object> producer = argProducers.get(paramType);
|
||||||
assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
|
assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
|
||||||
return (T) producer.apply(random);
|
return (T) producer.apply(random);
|
||||||
|
@ -754,10 +759,11 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
||||||
} catch (InvocationTargetException ite) {
|
} catch (InvocationTargetException ite) {
|
||||||
final Throwable cause = ite.getCause();
|
final Throwable cause = ite.getCause();
|
||||||
if (cause instanceof IllegalArgumentException
|
if (cause instanceof IllegalArgumentException
|
||||||
|
|| (cause instanceof NullPointerException && Stream.of(args).anyMatch(Objects::isNull))
|
||||||
|| cause instanceof UnsupportedOperationException) {
|
|| cause instanceof UnsupportedOperationException) {
|
||||||
// thats ok, ignore
|
// thats ok, ignore
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.err.println("Ignoring IAE/UOE from ctor:");
|
System.err.println("Ignoring IAE/UOE/NPE from ctor:");
|
||||||
cause.printStackTrace(System.err);
|
cause.printStackTrace(System.err);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.analysis.compound;
|
package org.apache.lucene.analysis.compound;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
|
import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
|
||||||
|
@ -32,7 +33,7 @@ import org.xml.sax.InputSource;
|
||||||
* this.
|
* this.
|
||||||
*/
|
*/
|
||||||
public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
|
public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
|
||||||
private HyphenationTree hyphenator;
|
private final HyphenationTree hyphenator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
|
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
|
||||||
|
@ -74,7 +75,7 @@ public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterB
|
||||||
boolean onlyLongestMatch) {
|
boolean onlyLongestMatch) {
|
||||||
super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
|
super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
|
||||||
|
|
||||||
this.hyphenator = hyphenator;
|
this.hyphenator = Objects.requireNonNull(hyphenator, "hyphenator");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.core;
|
package org.apache.lucene.analysis.core;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -38,7 +39,7 @@ public final class TypeTokenFilter extends FilteringTokenFilter {
|
||||||
*/
|
*/
|
||||||
public TypeTokenFilter(TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
|
public TypeTokenFilter(TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
|
||||||
super(input);
|
super(input);
|
||||||
this.stopTypes = stopTypes;
|
this.stopTypes = Objects.requireNonNull(stopTypes, "stopTypes");
|
||||||
this.useWhiteList = useWhiteList;
|
this.useWhiteList = useWhiteList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.miscellaneous;
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -42,7 +43,7 @@ public final class KeepWordFilter extends FilteringTokenFilter {
|
||||||
*/
|
*/
|
||||||
public KeepWordFilter(TokenStream in, CharArraySet words) {
|
public KeepWordFilter(TokenStream in, CharArraySet words) {
|
||||||
super(in);
|
super(in);
|
||||||
this.words = words;
|
this.words = Objects.requireNonNull(words, "words");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.miscellaneous;
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -39,7 +40,7 @@ public final class SetKeywordMarkerFilter extends KeywordMarkerFilter {
|
||||||
*/
|
*/
|
||||||
public SetKeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) {
|
public SetKeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) {
|
||||||
super(in);
|
super(in);
|
||||||
this.keywordSet = keywordSet;
|
this.keywordSet = Objects.requireNonNull(keywordSet, "keywordSet");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.pattern;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import org.apache.lucene.analysis.charfilter.BaseCharFilter;
|
import org.apache.lucene.analysis.charfilter.BaseCharFilter;
|
||||||
|
@ -46,8 +47,8 @@ public class PatternReplaceCharFilter extends BaseCharFilter {
|
||||||
|
|
||||||
public PatternReplaceCharFilter(Pattern pattern, String replacement, Reader in) {
|
public PatternReplaceCharFilter(Pattern pattern, String replacement, Reader in) {
|
||||||
super(in);
|
super(in);
|
||||||
this.pattern = pattern;
|
this.pattern = Objects.requireNonNull(pattern, "pattern");
|
||||||
this.replacement = replacement;
|
this.replacement = Objects.requireNonNull(replacement, "replacement");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -18,8 +18,10 @@
|
||||||
package org.apache.lucene.analysis.pattern;
|
package org.apache.lucene.analysis.pattern;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.stream.Stream;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -46,6 +48,10 @@ public class PatternTypingFilter extends TokenFilter {
|
||||||
|
|
||||||
public PatternTypingFilter(TokenStream input, PatternTypingRule... replacementAndFlagByPattern) {
|
public PatternTypingFilter(TokenStream input, PatternTypingRule... replacementAndFlagByPattern) {
|
||||||
super(input);
|
super(input);
|
||||||
|
if (replacementAndFlagByPattern == null
|
||||||
|
|| Stream.of(replacementAndFlagByPattern).anyMatch(Objects::isNull)) {
|
||||||
|
throw new NullPointerException("replacementAndFlagByPattern");
|
||||||
|
}
|
||||||
this.replacementAndFlagByPattern = replacementAndFlagByPattern;
|
this.replacementAndFlagByPattern = replacementAndFlagByPattern;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.analysis.payloads;
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -45,7 +46,7 @@ public final class DelimitedPayloadTokenFilter extends TokenFilter {
|
||||||
public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) {
|
public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) {
|
||||||
super(input);
|
super(input);
|
||||||
this.delimiter = delimiter;
|
this.delimiter = delimiter;
|
||||||
this.encoder = encoder;
|
this.encoder = Objects.requireNonNull(encoder, "encoder");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.analysis.payloads;
|
package org.apache.lucene.analysis.payloads;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
|
@ -29,20 +30,17 @@ import org.apache.lucene.util.BytesRef;
|
||||||
*/
|
*/
|
||||||
public class NumericPayloadTokenFilter extends TokenFilter {
|
public class NumericPayloadTokenFilter extends TokenFilter {
|
||||||
|
|
||||||
private String typeMatch;
|
private final String typeMatch;
|
||||||
private BytesRef thePayload;
|
private final BytesRef thePayload;
|
||||||
|
|
||||||
private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
|
private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
|
||||||
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
||||||
|
|
||||||
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
|
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
|
||||||
super(input);
|
super(input);
|
||||||
if (typeMatch == null) {
|
this.typeMatch = Objects.requireNonNull(typeMatch, "typeMatch");
|
||||||
throw new IllegalArgumentException("typeMatch must not be null");
|
|
||||||
}
|
|
||||||
// Need to encode the payload
|
// Need to encode the payload
|
||||||
thePayload = new BytesRef(PayloadHelper.encodeFloat(payload));
|
this.thePayload = new BytesRef(PayloadHelper.encodeFloat(payload));
|
||||||
this.typeMatch = typeMatch;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.analysis.snowball;
|
package org.apache.lucene.analysis.snowball;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -53,7 +54,7 @@ public final class SnowballFilter extends TokenFilter {
|
||||||
|
|
||||||
public SnowballFilter(TokenStream input, SnowballStemmer stemmer) {
|
public SnowballFilter(TokenStream input, SnowballStemmer stemmer) {
|
||||||
super(input);
|
super(input);
|
||||||
this.stemmer = stemmer;
|
this.stemmer = Objects.requireNonNull(stemmer, "stemmer");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -68,6 +69,7 @@ public final class SnowballFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
public SnowballFilter(TokenStream in, String name) {
|
public SnowballFilter(TokenStream in, String name) {
|
||||||
super(in);
|
super(in);
|
||||||
|
Objects.requireNonNull(name, "name");
|
||||||
// Class.forName is frowned upon in place of the ResourceLoader but in this case,
|
// Class.forName is frowned upon in place of the ResourceLoader but in this case,
|
||||||
// the factory will use the other constructor so that the program is already loaded.
|
// the factory will use the other constructor so that the program is already loaded.
|
||||||
try {
|
try {
|
||||||
|
@ -75,7 +77,7 @@ public final class SnowballFilter extends TokenFilter {
|
||||||
Class.forName("org.tartarus.snowball.ext." + name + "Stemmer")
|
Class.forName("org.tartarus.snowball.ext." + name + "Stemmer")
|
||||||
.asSubclass(SnowballStemmer.class);
|
.asSubclass(SnowballStemmer.class);
|
||||||
stemmer = stemClass.getConstructor().newInstance();
|
stemmer = stemClass.getConstructor().newInstance();
|
||||||
} catch (Exception e) {
|
} catch (ReflectiveOperationException e) {
|
||||||
throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e);
|
throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.analysis.util;
|
package org.apache.lucene.analysis.util;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -40,7 +41,7 @@ public final class ElisionFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
public ElisionFilter(TokenStream input, CharArraySet articles) {
|
public ElisionFilter(TokenStream input, CharArraySet articles) {
|
||||||
super(input);
|
super(input);
|
||||||
this.articles = articles;
|
this.articles = Objects.requireNonNull(articles, "articles");
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start */
|
/** Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start */
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.icu;
|
||||||
import com.ibm.icu.text.Normalizer;
|
import com.ibm.icu.text.Normalizer;
|
||||||
import com.ibm.icu.text.Normalizer2;
|
import com.ibm.icu.text.Normalizer2;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -71,7 +72,7 @@ public class ICUNormalizer2Filter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
public ICUNormalizer2Filter(TokenStream input, Normalizer2 normalizer) {
|
public ICUNormalizer2Filter(TokenStream input, Normalizer2 normalizer) {
|
||||||
super(input);
|
super(input);
|
||||||
this.normalizer = normalizer;
|
this.normalizer = Objects.requireNonNull(normalizer, "normalizer");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.ja;
|
package org.apache.lucene.analysis.ja;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -34,7 +35,7 @@ public final class JapanesePartOfSpeechStopFilter extends FilteringTokenFilter {
|
||||||
*/
|
*/
|
||||||
public JapanesePartOfSpeechStopFilter(TokenStream input, Set<String> stopTags) {
|
public JapanesePartOfSpeechStopFilter(TokenStream input, Set<String> stopTags) {
|
||||||
super(input);
|
super(input);
|
||||||
this.stopTags = stopTags;
|
this.stopTags = Objects.requireNonNull(stopTags, "stopTags");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.analysis.stempel;
|
package org.apache.lucene.analysis.stempel;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -58,7 +59,10 @@ public final class StempelFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
public StempelFilter(TokenStream in, StempelStemmer stemmer, int minLength) {
|
public StempelFilter(TokenStream in, StempelStemmer stemmer, int minLength) {
|
||||||
super(in);
|
super(in);
|
||||||
this.stemmer = stemmer;
|
this.stemmer = Objects.requireNonNull(stemmer, "stemmer");
|
||||||
|
if (minLength < 1) {
|
||||||
|
throw new IllegalArgumentException("minLength must be >=1");
|
||||||
|
}
|
||||||
this.minLength = minLength;
|
this.minLength = minLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,7 +70,7 @@ public final class StempelFilter extends TokenFilter {
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (input.incrementToken()) {
|
if (input.incrementToken()) {
|
||||||
if (!keywordAtt.isKeyword() && termAtt.length() > minLength) {
|
if (!keywordAtt.isKeyword() && termAtt.length() >= minLength) {
|
||||||
StringBuilder sb = stemmer.stem(termAtt);
|
StringBuilder sb = stemmer.stem(termAtt);
|
||||||
if (sb != null) // if we can't stem it, return unchanged
|
if (sb != null) // if we can't stem it, return unchanged
|
||||||
termAtt.setEmpty().append(sb);
|
termAtt.setEmpty().append(sb);
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
/** Removes stop words from a token stream. */
|
/** Removes stop words from a token stream. */
|
||||||
|
@ -35,7 +36,7 @@ public class StopFilter extends FilteringTokenFilter {
|
||||||
*/
|
*/
|
||||||
public StopFilter(TokenStream in, CharArraySet stopWords) {
|
public StopFilter(TokenStream in, CharArraySet stopWords) {
|
||||||
super(in);
|
super(in);
|
||||||
this.stopWords = stopWords;
|
this.stopWords = Objects.requireNonNull(stopWords, "stopWords");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -72,9 +73,7 @@ public class StopFilter extends FilteringTokenFilter {
|
||||||
* @return a Set containing the words
|
* @return a Set containing the words
|
||||||
*/
|
*/
|
||||||
public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) {
|
public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) {
|
||||||
CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
|
return makeStopSet(Arrays.asList(Objects.requireNonNull(stopWords, "stopWords")), ignoreCase);
|
||||||
stopSet.addAll(Arrays.asList(stopWords));
|
|
||||||
return stopSet;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -86,6 +85,7 @@ public class StopFilter extends FilteringTokenFilter {
|
||||||
* @return A Set ({@link CharArraySet}) containing the words
|
* @return A Set ({@link CharArraySet}) containing the words
|
||||||
*/
|
*/
|
||||||
public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase) {
|
public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase) {
|
||||||
|
Objects.requireNonNull(stopWords, "stopWords");
|
||||||
CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
|
CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
|
||||||
stopSet.addAll(stopWords);
|
stopSet.addAll(stopWords);
|
||||||
return stopSet;
|
return stopSet;
|
||||||
|
|
Loading…
Reference in New Issue