diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java index 3b3fae9ca76..909ef5ef1a2 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java @@ -82,8 +82,17 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { super(input); this.tokens=new LinkedList(); + if (minWordSize < 0) { + throw new IllegalArgumentException("minWordSize cannot be negative"); + } this.minWordSize=minWordSize; + if (minSubwordSize < 0) { + throw new IllegalArgumentException("minSubwordSize cannot be negative"); + } this.minSubwordSize=minSubwordSize; + if (maxSubwordSize < 0) { + throw new IllegalArgumentException("maxSubwordSize cannot be negative"); + } this.maxSubwordSize=maxSubwordSize; this.onlyLongestMatch=onlyLongestMatch; this.dictionary = dictionary; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java index 935c607c3de..a71352db1f7 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java @@ -191,6 +191,8 @@ public class HyphenationCompoundWordTokenFilter extends // we only put subwords to the token stream // that are longer than minPartSize if (partLength < this.minSubwordSize) { + // nocommit/BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the + // calculation above, and we rely upon minSubwordSize being >=0 to filter them out... continue; }