LUCENE-10353: add random null injection to TestRandomChains (#586)

Co-authored-by: Uwe Schindler <uschindler@apache.org>, Robert Muir <rmuir@apache.org>
This commit is contained in:
Robert Muir 2022-01-06 10:56:49 -05:00 committed by GitHub
parent 603a43f668
commit f2e00bb9e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 55 additions and 28 deletions

View File

@ -165,6 +165,9 @@ Bug Fixes
* LUCENE-10352: Fixed ctor argument checks: JapaneseKatakanaStemFilter,
DoubleMetaphoneFilter (Uwe Schindler, Robert Muir)
* LLUCENE-10353: Add random null injection to TestRandomChains. (Robert Muir,
Uwe Schindler)
Other
---------------------

View File

@ -33,7 +33,6 @@ module org.apache.lucene.analysis.tests {
requires org.apache.lucene.analysis.smartcn;
requires org.apache.lucene.analysis.stempel;
requires org.apache.lucene.test_framework;
requires junit;
exports org.apache.lucene.analysis.tests;
}

View File

@ -36,6 +36,7 @@ import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Random;
import java.util.Set;
import java.util.function.Function;
@ -646,6 +647,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
@SuppressWarnings("unchecked")
static <T> T newRandomArg(Random random, Class<T> paramType) {
// if the argument type is not a primitive, return 1/10th of all cases null:
if (!paramType.isPrimitive() && random.nextInt(10) == 0) {
return null;
}
final Function<Random, Object> producer = argProducers.get(paramType);
assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
return (T) producer.apply(random);
@ -754,10 +759,11 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
} catch (InvocationTargetException ite) {
final Throwable cause = ite.getCause();
if (cause instanceof IllegalArgumentException
|| (cause instanceof NullPointerException && Stream.of(args).anyMatch(Objects::isNull))
|| cause instanceof UnsupportedOperationException) {
// thats ok, ignore
if (VERBOSE) {
System.err.println("Ignoring IAE/UOE from ctor:");
System.err.println("Ignoring IAE/UOE/NPE from ctor:");
cause.printStackTrace(System.err);
}
} else {

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.compound;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
@ -32,7 +33,7 @@ import org.xml.sax.InputSource;
* this.
*/
public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
private HyphenationTree hyphenator;
private final HyphenationTree hyphenator;
/**
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
@ -74,7 +75,7 @@ public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterB
boolean onlyLongestMatch) {
super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
this.hyphenator = hyphenator;
this.hyphenator = Objects.requireNonNull(hyphenator, "hyphenator");
}
/**

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.analysis.core;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -38,7 +39,7 @@ public final class TypeTokenFilter extends FilteringTokenFilter {
*/
public TypeTokenFilter(TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
super(input);
this.stopTypes = stopTypes;
this.stopTypes = Objects.requireNonNull(stopTypes, "stopTypes");
this.useWhiteList = useWhiteList;
}

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.analysis.miscellaneous;
import java.util.Objects;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -42,7 +43,7 @@ public final class KeepWordFilter extends FilteringTokenFilter {
*/
public KeepWordFilter(TokenStream in, CharArraySet words) {
super(in);
this.words = words;
this.words = Objects.requireNonNull(words, "words");
}
@Override

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.analysis.miscellaneous;
import java.util.Objects;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -39,7 +40,7 @@ public final class SetKeywordMarkerFilter extends KeywordMarkerFilter {
*/
public SetKeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) {
super(in);
this.keywordSet = keywordSet;
this.keywordSet = Objects.requireNonNull(keywordSet, "keywordSet");
}
@Override

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.pattern;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.charfilter.BaseCharFilter;
@ -46,8 +47,8 @@ public class PatternReplaceCharFilter extends BaseCharFilter {
public PatternReplaceCharFilter(Pattern pattern, String replacement, Reader in) {
super(in);
this.pattern = pattern;
this.replacement = replacement;
this.pattern = Objects.requireNonNull(pattern, "pattern");
this.replacement = Objects.requireNonNull(replacement, "replacement");
}
@Override

View File

@ -18,8 +18,10 @@
package org.apache.lucene.analysis.pattern;
import java.io.IOException;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -46,6 +48,10 @@ public class PatternTypingFilter extends TokenFilter {
public PatternTypingFilter(TokenStream input, PatternTypingRule... replacementAndFlagByPattern) {
super(input);
if (replacementAndFlagByPattern == null
|| Stream.of(replacementAndFlagByPattern).anyMatch(Objects::isNull)) {
throw new NullPointerException("replacementAndFlagByPattern");
}
this.replacementAndFlagByPattern = replacementAndFlagByPattern;
}

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.payloads;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -45,7 +46,7 @@ public final class DelimitedPayloadTokenFilter extends TokenFilter {
public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) {
super(input);
this.delimiter = delimiter;
this.encoder = encoder;
this.encoder = Objects.requireNonNull(encoder, "encoder");
}
@Override

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.payloads;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
@ -29,20 +30,17 @@ import org.apache.lucene.util.BytesRef;
*/
public class NumericPayloadTokenFilter extends TokenFilter {
private String typeMatch;
private BytesRef thePayload;
private final String typeMatch;
private final BytesRef thePayload;
private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
super(input);
if (typeMatch == null) {
throw new IllegalArgumentException("typeMatch must not be null");
}
this.typeMatch = Objects.requireNonNull(typeMatch, "typeMatch");
// Need to encode the payload
thePayload = new BytesRef(PayloadHelper.encodeFloat(payload));
this.typeMatch = typeMatch;
this.thePayload = new BytesRef(PayloadHelper.encodeFloat(payload));
}
@Override

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.snowball;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -53,7 +54,7 @@ public final class SnowballFilter extends TokenFilter {
public SnowballFilter(TokenStream input, SnowballStemmer stemmer) {
super(input);
this.stemmer = stemmer;
this.stemmer = Objects.requireNonNull(stemmer, "stemmer");
}
/**
@ -68,6 +69,7 @@ public final class SnowballFilter extends TokenFilter {
*/
public SnowballFilter(TokenStream in, String name) {
super(in);
Objects.requireNonNull(name, "name");
// Class.forName is frowned upon in place of the ResourceLoader but in this case,
// the factory will use the other constructor so that the program is already loaded.
try {
@ -75,7 +77,7 @@ public final class SnowballFilter extends TokenFilter {
Class.forName("org.tartarus.snowball.ext." + name + "Stemmer")
.asSubclass(SnowballStemmer.class);
stemmer = stemClass.getConstructor().newInstance();
} catch (Exception e) {
} catch (ReflectiveOperationException e) {
throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e);
}
}

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -40,7 +41,7 @@ public final class ElisionFilter extends TokenFilter {
*/
public ElisionFilter(TokenStream input, CharArraySet articles) {
super(input);
this.articles = articles;
this.articles = Objects.requireNonNull(articles, "articles");
}
/** Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start */

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.icu;
import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.Normalizer2;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -71,7 +72,7 @@ public class ICUNormalizer2Filter extends TokenFilter {
*/
public ICUNormalizer2Filter(TokenStream input, Normalizer2 normalizer) {
super(input);
this.normalizer = normalizer;
this.normalizer = Objects.requireNonNull(normalizer, "normalizer");
}
@Override

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.analysis.ja;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -34,7 +35,7 @@ public final class JapanesePartOfSpeechStopFilter extends FilteringTokenFilter {
*/
public JapanesePartOfSpeechStopFilter(TokenStream input, Set<String> stopTags) {
super(input);
this.stopTags = stopTags;
this.stopTags = Objects.requireNonNull(stopTags, "stopTags");
}
@Override

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.stempel;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -58,7 +59,10 @@ public final class StempelFilter extends TokenFilter {
*/
public StempelFilter(TokenStream in, StempelStemmer stemmer, int minLength) {
super(in);
this.stemmer = stemmer;
this.stemmer = Objects.requireNonNull(stemmer, "stemmer");
if (minLength < 1) {
throw new IllegalArgumentException("minLength must be >=1");
}
this.minLength = minLength;
}
@ -66,7 +70,7 @@ public final class StempelFilter extends TokenFilter {
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAtt.isKeyword() && termAtt.length() > minLength) {
if (!keywordAtt.isKeyword() && termAtt.length() >= minLength) {
StringBuilder sb = stemmer.stem(termAtt);
if (sb != null) // if we can't stem it, return unchanged
termAtt.setEmpty().append(sb);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** Removes stop words from a token stream. */
@ -35,7 +36,7 @@ public class StopFilter extends FilteringTokenFilter {
*/
public StopFilter(TokenStream in, CharArraySet stopWords) {
super(in);
this.stopWords = stopWords;
this.stopWords = Objects.requireNonNull(stopWords, "stopWords");
}
/**
@ -72,9 +73,7 @@ public class StopFilter extends FilteringTokenFilter {
* @return a Set containing the words
*/
public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) {
CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
stopSet.addAll(Arrays.asList(stopWords));
return stopSet;
return makeStopSet(Arrays.asList(Objects.requireNonNull(stopWords, "stopWords")), ignoreCase);
}
/**
@ -86,6 +85,7 @@ public class StopFilter extends FilteringTokenFilter {
* @return A Set ({@link CharArraySet}) containing the words
*/
public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase) {
Objects.requireNonNull(stopWords, "stopWords");
CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
stopSet.addAll(stopWords);
return stopSet;