diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 11221ad83a6..9e84b7a636b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -165,6 +165,9 @@ Bug Fixes * LUCENE-10352: Fixed ctor argument checks: JapaneseKatakanaStemFilter, DoubleMetaphoneFilter (Uwe Schindler, Robert Muir) +* LLUCENE-10353: Add random null injection to TestRandomChains. (Robert Muir, + Uwe Schindler) + Other --------------------- diff --git a/lucene/analysis.tests/src/test/module-info.java b/lucene/analysis.tests/src/test/module-info.java index 502611624a0..3a67c75febb 100644 --- a/lucene/analysis.tests/src/test/module-info.java +++ b/lucene/analysis.tests/src/test/module-info.java @@ -33,7 +33,6 @@ module org.apache.lucene.analysis.tests { requires org.apache.lucene.analysis.smartcn; requires org.apache.lucene.analysis.stempel; requires org.apache.lucene.test_framework; - requires junit; exports org.apache.lucene.analysis.tests; } diff --git a/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/TestRandomChains.java b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/TestRandomChains.java index 208c882532c..8c245e7058c 100644 --- a/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/TestRandomChains.java +++ b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/TestRandomChains.java @@ -36,6 +36,7 @@ import java.util.HashSet; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Random; import java.util.Set; import java.util.function.Function; @@ -646,6 +647,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase { @SuppressWarnings("unchecked") static T newRandomArg(Random random, Class paramType) { + // if the argument type is not a primitive, return 1/10th of all cases null: + if (!paramType.isPrimitive() && random.nextInt(10) == 0) { + return null; + } final Function producer = argProducers.get(paramType); assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer); return (T) producer.apply(random); @@ -754,10 +759,11 @@ public class TestRandomChains extends BaseTokenStreamTestCase { } catch (InvocationTargetException ite) { final Throwable cause = ite.getCause(); if (cause instanceof IllegalArgumentException + || (cause instanceof NullPointerException && Stream.of(args).anyMatch(Objects::isNull)) || cause instanceof UnsupportedOperationException) { // thats ok, ignore if (VERBOSE) { - System.err.println("Ignoring IAE/UOE from ctor:"); + System.err.println("Ignoring IAE/UOE/NPE from ctor:"); cause.printStackTrace(System.err); } } else { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java index 3acb5c297f5..20378288052 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java @@ -17,6 +17,7 @@ package org.apache.lucene.analysis.compound; import java.io.IOException; +import java.util.Objects; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.compound.hyphenation.Hyphenation; @@ -32,7 +33,7 @@ import org.xml.sax.InputSource; * this. */ public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterBase { - private HyphenationTree hyphenator; + private final HyphenationTree hyphenator; /** * Creates a new {@link HyphenationCompoundWordTokenFilter} instance. @@ -74,7 +75,7 @@ public class HyphenationCompoundWordTokenFilter extends CompoundWordTokenFilterB boolean onlyLongestMatch) { super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch); - this.hyphenator = hyphenator; + this.hyphenator = Objects.requireNonNull(hyphenator, "hyphenator"); } /** diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java index 707192f87e4..3088fc92ebb 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.analysis.core; +import java.util.Objects; import java.util.Set; import org.apache.lucene.analysis.FilteringTokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -38,7 +39,7 @@ public final class TypeTokenFilter extends FilteringTokenFilter { */ public TypeTokenFilter(TokenStream input, Set stopTypes, boolean useWhiteList) { super(input); - this.stopTypes = stopTypes; + this.stopTypes = Objects.requireNonNull(stopTypes, "stopTypes"); this.useWhiteList = useWhiteList; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java index 5d2b91dfcd7..7f29ecc6ca4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.analysis.miscellaneous; +import java.util.Objects; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.FilteringTokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -42,7 +43,7 @@ public final class KeepWordFilter extends FilteringTokenFilter { */ public KeepWordFilter(TokenStream in, CharArraySet words) { super(in); - this.words = words; + this.words = Objects.requireNonNull(words, "words"); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java index e2e2f3aa73f..b28d95f9f1c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.analysis.miscellaneous; +import java.util.Objects; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -39,7 +40,7 @@ public final class SetKeywordMarkerFilter extends KeywordMarkerFilter { */ public SetKeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) { super(in); - this.keywordSet = keywordSet; + this.keywordSet = Objects.requireNonNull(keywordSet, "keywordSet"); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java index c7c7a3e3ac2..930b0cf23a7 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.pattern; import java.io.IOException; import java.io.Reader; import java.io.StringReader; +import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.lucene.analysis.charfilter.BaseCharFilter; @@ -46,8 +47,8 @@ public class PatternReplaceCharFilter extends BaseCharFilter { public PatternReplaceCharFilter(Pattern pattern, String replacement, Reader in) { super(in); - this.pattern = pattern; - this.replacement = replacement; + this.pattern = Objects.requireNonNull(pattern, "pattern"); + this.replacement = Objects.requireNonNull(replacement, "replacement"); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java index c622bdef456..c0288ef7c89 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java @@ -18,8 +18,10 @@ package org.apache.lucene.analysis.pattern; import java.io.IOException; +import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Stream; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -46,6 +48,10 @@ public class PatternTypingFilter extends TokenFilter { public PatternTypingFilter(TokenStream input, PatternTypingRule... replacementAndFlagByPattern) { super(input); + if (replacementAndFlagByPattern == null + || Stream.of(replacementAndFlagByPattern).anyMatch(Objects::isNull)) { + throw new NullPointerException("replacementAndFlagByPattern"); + } this.replacementAndFlagByPattern = replacementAndFlagByPattern; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java index a8ef3bf58e1..817e0bab26a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java @@ -17,6 +17,7 @@ package org.apache.lucene.analysis.payloads; import java.io.IOException; +import java.util.Objects; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -45,7 +46,7 @@ public final class DelimitedPayloadTokenFilter extends TokenFilter { public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) { super(input); this.delimiter = delimiter; - this.encoder = encoder; + this.encoder = Objects.requireNonNull(encoder, "encoder"); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java index bf7a9655674..d472d239b11 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java @@ -17,6 +17,7 @@ package org.apache.lucene.analysis.payloads; import java.io.IOException; +import java.util.Objects; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; @@ -29,20 +30,17 @@ import org.apache.lucene.util.BytesRef; */ public class NumericPayloadTokenFilter extends TokenFilter { - private String typeMatch; - private BytesRef thePayload; + private final String typeMatch; + private final BytesRef thePayload; private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) { super(input); - if (typeMatch == null) { - throw new IllegalArgumentException("typeMatch must not be null"); - } + this.typeMatch = Objects.requireNonNull(typeMatch, "typeMatch"); // Need to encode the payload - thePayload = new BytesRef(PayloadHelper.encodeFloat(payload)); - this.typeMatch = typeMatch; + this.thePayload = new BytesRef(PayloadHelper.encodeFloat(payload)); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java index 6ee8aae5d39..4d3dfc4f42f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java @@ -17,6 +17,7 @@ package org.apache.lucene.analysis.snowball; import java.io.IOException; +import java.util.Objects; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -53,7 +54,7 @@ public final class SnowballFilter extends TokenFilter { public SnowballFilter(TokenStream input, SnowballStemmer stemmer) { super(input); - this.stemmer = stemmer; + this.stemmer = Objects.requireNonNull(stemmer, "stemmer"); } /** @@ -68,6 +69,7 @@ public final class SnowballFilter extends TokenFilter { */ public SnowballFilter(TokenStream in, String name) { super(in); + Objects.requireNonNull(name, "name"); // Class.forName is frowned upon in place of the ResourceLoader but in this case, // the factory will use the other constructor so that the program is already loaded. try { @@ -75,7 +77,7 @@ public final class SnowballFilter extends TokenFilter { Class.forName("org.tartarus.snowball.ext." + name + "Stemmer") .asSubclass(SnowballStemmer.class); stemmer = stemClass.getConstructor().newInstance(); - } catch (Exception e) { + } catch (ReflectiveOperationException e) { throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java index 63898a967a1..797df993bae 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java @@ -17,6 +17,7 @@ package org.apache.lucene.analysis.util; import java.io.IOException; +import java.util.Objects; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -40,7 +41,7 @@ public final class ElisionFilter extends TokenFilter { */ public ElisionFilter(TokenStream input, CharArraySet articles) { super(input); - this.articles = articles; + this.articles = Objects.requireNonNull(articles, "articles"); } /** Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start */ diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2Filter.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2Filter.java index 6c50011ba04..0635edf015c 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2Filter.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2Filter.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.icu; import com.ibm.icu.text.Normalizer; import com.ibm.icu.text.Normalizer2; import java.io.IOException; +import java.util.Objects; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -71,7 +72,7 @@ public class ICUNormalizer2Filter extends TokenFilter { */ public ICUNormalizer2Filter(TokenStream input, Normalizer2 normalizer) { super(input); - this.normalizer = normalizer; + this.normalizer = Objects.requireNonNull(normalizer, "normalizer"); } @Override diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java index 89fe3d49181..fa4a0896763 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.analysis.ja; +import java.util.Objects; import java.util.Set; import org.apache.lucene.analysis.FilteringTokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -34,7 +35,7 @@ public final class JapanesePartOfSpeechStopFilter extends FilteringTokenFilter { */ public JapanesePartOfSpeechStopFilter(TokenStream input, Set stopTags) { super(input); - this.stopTags = stopTags; + this.stopTags = Objects.requireNonNull(stopTags, "stopTags"); } @Override diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java index ce334746943..bb59bf7fce4 100644 --- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java +++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java @@ -17,6 +17,7 @@ package org.apache.lucene.analysis.stempel; import java.io.IOException; +import java.util.Objects; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -58,7 +59,10 @@ public final class StempelFilter extends TokenFilter { */ public StempelFilter(TokenStream in, StempelStemmer stemmer, int minLength) { super(in); - this.stemmer = stemmer; + this.stemmer = Objects.requireNonNull(stemmer, "stemmer"); + if (minLength < 1) { + throw new IllegalArgumentException("minLength must be >=1"); + } this.minLength = minLength; } @@ -66,7 +70,7 @@ public final class StempelFilter extends TokenFilter { @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - if (!keywordAtt.isKeyword() && termAtt.length() > minLength) { + if (!keywordAtt.isKeyword() && termAtt.length() >= minLength) { StringBuilder sb = stemmer.stem(termAtt); if (sb != null) // if we can't stem it, return unchanged termAtt.setEmpty().append(sb); diff --git a/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java b/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java index 403c0041033..2fa5524497d 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis; import java.util.Arrays; import java.util.List; +import java.util.Objects; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** Removes stop words from a token stream. */ @@ -35,7 +36,7 @@ public class StopFilter extends FilteringTokenFilter { */ public StopFilter(TokenStream in, CharArraySet stopWords) { super(in); - this.stopWords = stopWords; + this.stopWords = Objects.requireNonNull(stopWords, "stopWords"); } /** @@ -72,9 +73,7 @@ public class StopFilter extends FilteringTokenFilter { * @return a Set containing the words */ public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) { - CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase); - stopSet.addAll(Arrays.asList(stopWords)); - return stopSet; + return makeStopSet(Arrays.asList(Objects.requireNonNull(stopWords, "stopWords")), ignoreCase); } /** @@ -86,6 +85,7 @@ public class StopFilter extends FilteringTokenFilter { * @return A Set ({@link CharArraySet}) containing the words */ public static CharArraySet makeStopSet(List stopWords, boolean ignoreCase) { + Objects.requireNonNull(stopWords, "stopWords"); CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase); stopSet.addAll(stopWords); return stopSet;