mirror of https://github.com/apache/lucene.git
LUCENE-3969: don't allow negative subword params, Hyphenation relies upon this to filter out what appear to be bogus hyphenation points
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311257 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
24f8a9e627
commit
ac393486e0
|
@ -82,8 +82,17 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
|
||||||
super(input);
|
super(input);
|
||||||
|
|
||||||
this.tokens=new LinkedList<CompoundToken>();
|
this.tokens=new LinkedList<CompoundToken>();
|
||||||
|
if (minWordSize < 0) {
|
||||||
|
throw new IllegalArgumentException("minWordSize cannot be negative");
|
||||||
|
}
|
||||||
this.minWordSize=minWordSize;
|
this.minWordSize=minWordSize;
|
||||||
|
if (minSubwordSize < 0) {
|
||||||
|
throw new IllegalArgumentException("minSubwordSize cannot be negative");
|
||||||
|
}
|
||||||
this.minSubwordSize=minSubwordSize;
|
this.minSubwordSize=minSubwordSize;
|
||||||
|
if (maxSubwordSize < 0) {
|
||||||
|
throw new IllegalArgumentException("maxSubwordSize cannot be negative");
|
||||||
|
}
|
||||||
this.maxSubwordSize=maxSubwordSize;
|
this.maxSubwordSize=maxSubwordSize;
|
||||||
this.onlyLongestMatch=onlyLongestMatch;
|
this.onlyLongestMatch=onlyLongestMatch;
|
||||||
this.dictionary = dictionary;
|
this.dictionary = dictionary;
|
||||||
|
|
|
@ -191,6 +191,8 @@ public class HyphenationCompoundWordTokenFilter extends
|
||||||
// we only put subwords to the token stream
|
// we only put subwords to the token stream
|
||||||
// that are longer than minPartSize
|
// that are longer than minPartSize
|
||||||
if (partLength < this.minSubwordSize) {
|
if (partLength < this.minSubwordSize) {
|
||||||
|
// nocommit/BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
|
||||||
|
// calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue