mirror of https://github.com/apache/lucene.git
LUCENE-3969: don't allow negative subword params, Hyphenation relies upon this to filter out what appear to be bogus hyphenation points
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311257 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
24f8a9e627
commit
ac393486e0
|
@ -82,8 +82,17 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
|
|||
super(input);
|
||||
|
||||
this.tokens=new LinkedList<CompoundToken>();
|
||||
if (minWordSize < 0) {
|
||||
throw new IllegalArgumentException("minWordSize cannot be negative");
|
||||
}
|
||||
this.minWordSize=minWordSize;
|
||||
if (minSubwordSize < 0) {
|
||||
throw new IllegalArgumentException("minSubwordSize cannot be negative");
|
||||
}
|
||||
this.minSubwordSize=minSubwordSize;
|
||||
if (maxSubwordSize < 0) {
|
||||
throw new IllegalArgumentException("maxSubwordSize cannot be negative");
|
||||
}
|
||||
this.maxSubwordSize=maxSubwordSize;
|
||||
this.onlyLongestMatch=onlyLongestMatch;
|
||||
this.dictionary = dictionary;
|
||||
|
|
|
@ -191,6 +191,8 @@ public class HyphenationCompoundWordTokenFilter extends
|
|||
// we only put subwords to the token stream
|
||||
// that are longer than minPartSize
|
||||
if (partLength < this.minSubwordSize) {
|
||||
// nocommit/BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
|
||||
// calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue