LUCENE-3043: GermanStemmer threw IOOBE on zero-length tokens

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096194 13f79535-47bb-0310-9956-ffa450edef68
2011-04-23 17:48:17 +00:00 · 2011-04-23 17:48:17 +00:00 · 7db98455e7
parent c0c695053c
commit 7db98455e7
3 changed files with 11 additions and 3 deletions
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -55,6 +55,9 @@ Bug Fixes
   when using CachingTokenStream. This can be a significant performance bug for
   large documents. (Mark Miller)
 * LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered
   a zero-length token.  (Robert Muir)
 New Features
 * LUCENE-3016: Add analyzer for Latvian.  (Robert Muir)
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
@ -132,7 +132,8 @@ public class GermanStemmer
        strip( buffer );
      }
      // Additional step for irregular plural nouns like "Matrizen -> Matrix".
-      if ( buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
+      // NOTE: this length constraint is probably not a great value, its just to prevent AIOOBE on empty terms
      if ( buffer.length() > 0 && buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
        buffer.setCharAt( buffer.length() - 1, 'x' );
      }
    }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
@ -26,7 +26,6 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.junit.Ignore;
 import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -53,8 +52,13 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
    vocOut.close();
  }
  // LUCENE-3043: we use keywordtokenizer in this test,
  // so ensure the stemmer does not crash on zero-length strings.
  public void testEmpty() throws Exception {
    assertAnalyzesTo(analyzer, "", new String[] { "" });
  }
  /** blast some random strings through the analyzer */
  @Ignore("bugs!")
  public void testRandomStrings() throws Exception {
    checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
  }