From ac393486e0e6d5a74b88cd6f98881dac15146db2 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 9 Apr 2012 14:31:25 +0000 Subject: [PATCH] LUCENE-3969: don't allow negative subword params, Hyphenation relies upon this to filter out what appear to be bogus hyphenation points git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311257 13f79535-47bb-0310-9956-ffa450edef68 --- .../analysis/compound/CompoundWordTokenFilterBase.java | 9 +++++++++ .../compound/HyphenationCompoundWordTokenFilter.java | 2 ++ 2 files changed, 11 insertions(+) diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java index 3b3fae9ca76..909ef5ef1a2 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java @@ -82,8 +82,17 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { super(input); this.tokens=new LinkedList(); + if (minWordSize < 0) { + throw new IllegalArgumentException("minWordSize cannot be negative"); + } this.minWordSize=minWordSize; + if (minSubwordSize < 0) { + throw new IllegalArgumentException("minSubwordSize cannot be negative"); + } this.minSubwordSize=minSubwordSize; + if (maxSubwordSize < 0) { + throw new IllegalArgumentException("maxSubwordSize cannot be negative"); + } this.maxSubwordSize=maxSubwordSize; this.onlyLongestMatch=onlyLongestMatch; this.dictionary = dictionary; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java index 935c607c3de..a71352db1f7 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java @@ -191,6 +191,8 @@ public class HyphenationCompoundWordTokenFilter extends // we only put subwords to the token stream // that are longer than minPartSize if (partLength < this.minSubwordSize) { + // nocommit/BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the + // calculation above, and we rely upon minSubwordSize being >=0 to filter them out... continue; }