LUCENE-10353: add random null injection to TestRandomChains (#586)

Co-authored-by: Uwe Schindler <uschindler@apache.org>, Robert Muir <rmuir@apache.org>
This commit is contained in:
Robert Muir 2022-01-06 10:56:49 -05:00 committed by GitHub
parent 603a43f668
commit f2e00bb9e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 55 additions and 28 deletions

View File

@ -165,6 +165,9 @@ Bug Fixes
* LUCENE-10352: Fixed ctor argument checks: JapaneseKatakanaStemFilter, * LUCENE-10352: Fixed ctor argument checks: JapaneseKatakanaStemFilter,
DoubleMetaphoneFilter (Uwe Schindler, Robert Muir) DoubleMetaphoneFilter (Uwe Schindler, Robert Muir)
* LLUCENE-10353: Add random null injection to TestRandomChains. (Robert Muir,
Uwe Schindler)
Other Other
--------------------- ---------------------

View File

@ -33,7 +33,6 @@ module org.apache.lucene.analysis.tests {
requires org.apache.lucene.analysis.smartcn; requires org.apache.lucene.analysis.smartcn;
requires org.apache.lucene.analysis.stempel; requires org.apache.lucene.analysis.stempel;
requires org.apache.lucene.test_framework; requires org.apache.lucene.test_framework;
requires junit;
exports org.apache.lucene.analysis.tests; exports org.apache.lucene.analysis.tests;
} }

View File

@ -36,6 +36,7 @@ import java.util.HashSet;
import java.util.IdentityHashMap; import java.util.IdentityHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.function.Function; import java.util.function.Function;
@ -646,6 +647,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
static <T> T newRandomArg(Random random, Class<T> paramType) { static <T> T newRandomArg(Random random, Class<T> paramType) {
// if the argument type is not a primitive, return 1/10th of all cases null:
if (!paramType.isPrimitive() && random.nextInt(10) == 0) {
return null;
}
final Function<Random, Object> producer = argProducers.get(paramType); final Function<Random, Object> producer = argProducers.get(paramType);
assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer); assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
return (T) producer.apply(random); return (T) producer.apply(random);
@ -754,10 +759,11 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
} catch (InvocationTargetException ite) { } catch (InvocationTargetException ite) {
final Throwable cause = ite.getCause(); final Throwable cause = ite.getCause();
if (cause instanceof IllegalArgumentException if (cause instanceof IllegalArgumentException
|| (cause instanceof NullPointerException && Stream.of(args).anyMatch(Objects::isNull))
|| cause instanceof UnsupportedOperationException) { || cause instanceof UnsupportedOperationException) {
// thats ok, ignore // thats ok, ignore
if (VERBOSE) { if (VERBOSE) {
System.err.println("Ignoring IAE/UOE from ctor:"); System.err.println("Ignoring IAE/UOE/NPE from ctor:");
cause.printStackTrace(System.err); cause.printStackTrace(System.err);
} }
} else { } else {

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.compound; package org.apache.lucene.analysis.compound;
import java.io.IOException; import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.compound.hyphenation.Hyphenation; import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
@ -32,7 +33,7 @@ import org.xml.sax.InputSource;
* this. * this.
*/ */
public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterBase { public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
private HyphenationTree hyphenator; private final HyphenationTree hyphenator;
/** /**
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance. * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
@ -74,7 +75,7 @@ public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterB
boolean onlyLongestMatch) { boolean onlyLongestMatch) {
super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
this.hyphenator = hyphenator; this.hyphenator = Objects.requireNonNull(hyphenator, "hyphenator");
} }
/** /**

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.lucene.analysis.core; package org.apache.lucene.analysis.core;
import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.FilteringTokenFilter; import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -38,7 +39,7 @@ public final class TypeTokenFilter extends FilteringTokenFilter {
*/ */
public TypeTokenFilter(TokenStream input, Set<String> stopTypes, boolean useWhiteList) { public TypeTokenFilter(TokenStream input, Set<String> stopTypes, boolean useWhiteList) {
super(input); super(input);
this.stopTypes = stopTypes; this.stopTypes = Objects.requireNonNull(stopTypes, "stopTypes");
this.useWhiteList = useWhiteList; this.useWhiteList = useWhiteList;
} }

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.lucene.analysis.miscellaneous; package org.apache.lucene.analysis.miscellaneous;
import java.util.Objects;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.FilteringTokenFilter; import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -42,7 +43,7 @@ public final class KeepWordFilter extends FilteringTokenFilter {
*/ */
public KeepWordFilter(TokenStream in, CharArraySet words) { public KeepWordFilter(TokenStream in, CharArraySet words) {
super(in); super(in);
this.words = words; this.words = Objects.requireNonNull(words, "words");
} }
@Override @Override

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.lucene.analysis.miscellaneous; package org.apache.lucene.analysis.miscellaneous;
import java.util.Objects;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -39,7 +40,7 @@ public final class SetKeywordMarkerFilter extends KeywordMarkerFilter {
*/ */
public SetKeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) { public SetKeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) {
super(in); super(in);
this.keywordSet = keywordSet; this.keywordSet = Objects.requireNonNull(keywordSet, "keywordSet");
} }
@Override @Override

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.pattern;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import java.util.Objects;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.analysis.charfilter.BaseCharFilter; import org.apache.lucene.analysis.charfilter.BaseCharFilter;
@ -46,8 +47,8 @@ public class PatternReplaceCharFilter extends BaseCharFilter {
public PatternReplaceCharFilter(Pattern pattern, String replacement, Reader in) { public PatternReplaceCharFilter(Pattern pattern, String replacement, Reader in) {
super(in); super(in);
this.pattern = pattern; this.pattern = Objects.requireNonNull(pattern, "pattern");
this.replacement = replacement; this.replacement = Objects.requireNonNull(replacement, "replacement");
} }
@Override @Override

View File

@ -18,8 +18,10 @@
package org.apache.lucene.analysis.pattern; package org.apache.lucene.analysis.pattern;
import java.io.IOException; import java.io.IOException;
import java.util.Objects;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -46,6 +48,10 @@ public class PatternTypingFilter extends TokenFilter {
public PatternTypingFilter(TokenStream input, PatternTypingRule... replacementAndFlagByPattern) { public PatternTypingFilter(TokenStream input, PatternTypingRule... replacementAndFlagByPattern) {
super(input); super(input);
if (replacementAndFlagByPattern == null
|| Stream.of(replacementAndFlagByPattern).anyMatch(Objects::isNull)) {
throw new NullPointerException("replacementAndFlagByPattern");
}
this.replacementAndFlagByPattern = replacementAndFlagByPattern; this.replacementAndFlagByPattern = replacementAndFlagByPattern;
} }

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.payloads; package org.apache.lucene.analysis.payloads;
import java.io.IOException; import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -45,7 +46,7 @@ public final class DelimitedPayloadTokenFilter extends TokenFilter {
public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) { public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) {
super(input); super(input);
this.delimiter = delimiter; this.delimiter = delimiter;
this.encoder = encoder; this.encoder = Objects.requireNonNull(encoder, "encoder");
} }
@Override @Override

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.payloads; package org.apache.lucene.analysis.payloads;
import java.io.IOException; import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
@ -29,20 +30,17 @@ import org.apache.lucene.util.BytesRef;
*/ */
public class NumericPayloadTokenFilter extends TokenFilter { public class NumericPayloadTokenFilter extends TokenFilter {
private String typeMatch; private final String typeMatch;
private BytesRef thePayload; private final BytesRef thePayload;
private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) { public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
super(input); super(input);
if (typeMatch == null) { this.typeMatch = Objects.requireNonNull(typeMatch, "typeMatch");
throw new IllegalArgumentException("typeMatch must not be null");
}
// Need to encode the payload // Need to encode the payload
thePayload = new BytesRef(PayloadHelper.encodeFloat(payload)); this.thePayload = new BytesRef(PayloadHelper.encodeFloat(payload));
this.typeMatch = typeMatch;
} }
@Override @Override

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.snowball; package org.apache.lucene.analysis.snowball;
import java.io.IOException; import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -53,7 +54,7 @@ public final class SnowballFilter extends TokenFilter {
public SnowballFilter(TokenStream input, SnowballStemmer stemmer) { public SnowballFilter(TokenStream input, SnowballStemmer stemmer) {
super(input); super(input);
this.stemmer = stemmer; this.stemmer = Objects.requireNonNull(stemmer, "stemmer");
} }
/** /**
@ -68,6 +69,7 @@ public final class SnowballFilter extends TokenFilter {
*/ */
public SnowballFilter(TokenStream in, String name) { public SnowballFilter(TokenStream in, String name) {
super(in); super(in);
Objects.requireNonNull(name, "name");
// Class.forName is frowned upon in place of the ResourceLoader but in this case, // Class.forName is frowned upon in place of the ResourceLoader but in this case,
// the factory will use the other constructor so that the program is already loaded. // the factory will use the other constructor so that the program is already loaded.
try { try {
@ -75,7 +77,7 @@ public final class SnowballFilter extends TokenFilter {
Class.forName("org.tartarus.snowball.ext." + name + "Stemmer") Class.forName("org.tartarus.snowball.ext." + name + "Stemmer")
.asSubclass(SnowballStemmer.class); .asSubclass(SnowballStemmer.class);
stemmer = stemClass.getConstructor().newInstance(); stemmer = stemClass.getConstructor().newInstance();
} catch (Exception e) { } catch (ReflectiveOperationException e) {
throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e); throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e);
} }
} }

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.util; package org.apache.lucene.analysis.util;
import java.io.IOException; import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -40,7 +41,7 @@ public final class ElisionFilter extends TokenFilter {
*/ */
public ElisionFilter(TokenStream input, CharArraySet articles) { public ElisionFilter(TokenStream input, CharArraySet articles) {
super(input); super(input);
this.articles = articles; this.articles = Objects.requireNonNull(articles, "articles");
} }
/** Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start */ /** Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start */

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.icu;
import com.ibm.icu.text.Normalizer; import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.Normalizer2; import com.ibm.icu.text.Normalizer2;
import java.io.IOException; import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -71,7 +72,7 @@ public class ICUNormalizer2Filter extends TokenFilter {
*/ */
public ICUNormalizer2Filter(TokenStream input, Normalizer2 normalizer) { public ICUNormalizer2Filter(TokenStream input, Normalizer2 normalizer) {
super(input); super(input);
this.normalizer = normalizer; this.normalizer = Objects.requireNonNull(normalizer, "normalizer");
} }
@Override @Override

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.lucene.analysis.ja; package org.apache.lucene.analysis.ja;
import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.FilteringTokenFilter; import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -34,7 +35,7 @@ public final class JapanesePartOfSpeechStopFilter extends FilteringTokenFilter {
*/ */
public JapanesePartOfSpeechStopFilter(TokenStream input, Set<String> stopTags) { public JapanesePartOfSpeechStopFilter(TokenStream input, Set<String> stopTags) {
super(input); super(input);
this.stopTags = stopTags; this.stopTags = Objects.requireNonNull(stopTags, "stopTags");
} }
@Override @Override

View File

@ -17,6 +17,7 @@
package org.apache.lucene.analysis.stempel; package org.apache.lucene.analysis.stempel;
import java.io.IOException; import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -58,7 +59,10 @@ public final class StempelFilter extends TokenFilter {
*/ */
public StempelFilter(TokenStream in, StempelStemmer stemmer, int minLength) { public StempelFilter(TokenStream in, StempelStemmer stemmer, int minLength) {
super(in); super(in);
this.stemmer = stemmer; this.stemmer = Objects.requireNonNull(stemmer, "stemmer");
if (minLength < 1) {
throw new IllegalArgumentException("minLength must be >=1");
}
this.minLength = minLength; this.minLength = minLength;
} }
@ -66,7 +70,7 @@ public final class StempelFilter extends TokenFilter {
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
if (input.incrementToken()) { if (input.incrementToken()) {
if (!keywordAtt.isKeyword() && termAtt.length() > minLength) { if (!keywordAtt.isKeyword() && termAtt.length() >= minLength) {
StringBuilder sb = stemmer.stem(termAtt); StringBuilder sb = stemmer.stem(termAtt);
if (sb != null) // if we can't stem it, return unchanged if (sb != null) // if we can't stem it, return unchanged
termAtt.setEmpty().append(sb); termAtt.setEmpty().append(sb);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Objects;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** Removes stop words from a token stream. */ /** Removes stop words from a token stream. */
@ -35,7 +36,7 @@ public class StopFilter extends FilteringTokenFilter {
*/ */
public StopFilter(TokenStream in, CharArraySet stopWords) { public StopFilter(TokenStream in, CharArraySet stopWords) {
super(in); super(in);
this.stopWords = stopWords; this.stopWords = Objects.requireNonNull(stopWords, "stopWords");
} }
/** /**
@ -72,9 +73,7 @@ public class StopFilter extends FilteringTokenFilter {
* @return a Set containing the words * @return a Set containing the words
*/ */
public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) { public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) {
CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase); return makeStopSet(Arrays.asList(Objects.requireNonNull(stopWords, "stopWords")), ignoreCase);
stopSet.addAll(Arrays.asList(stopWords));
return stopSet;
} }
/** /**
@ -86,6 +85,7 @@ public class StopFilter extends FilteringTokenFilter {
* @return A Set ({@link CharArraySet}) containing the words * @return A Set ({@link CharArraySet}) containing the words
*/ */
public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase) { public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase) {
Objects.requireNonNull(stopWords, "stopWords");
CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase); CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
stopSet.addAll(stopWords); stopSet.addAll(stopWords);
return stopSet; return stopSet;