From 7961e8ef26d9bfd0aa5ec16d897c93e1dd17e6c4 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Fri, 25 Apr 2014 01:54:19 +0000
Subject: [PATCH] LUCENE-5559: additional argument validation for
 CapitalizationFilter and CodepointCountFilter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1589919 13f79535-47bb-0310-9956-ffa450edef68
---
 lucene/CHANGES.txt                            |  3 +++
 .../miscellaneous/CapitalizationFilter.java   |  9 +++++++
 .../miscellaneous/CodepointCountFilter.java   |  6 +++++
 .../TestCapitalizationFilter.java             | 18 +++++++++++++-
 .../TestCapitalizationFilterFactory.java      | 24 ++++++++++++++++++-
 .../TestCodepointCountFilter.java             | 15 +++++++++++-
 .../TestCodepointCountFilterFactory.java      | 15 ++++++++++++
 7 files changed, 87 insertions(+), 3 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 415293f4713..9f97dd2d5ef 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -106,6 +106,9 @@ Bug fixes
 * LUCENE-5600: HttpClientBase did not properly consume a connection if a server
   error occurred. (Christoph Kaser via Shai Erera)
 
+* LUCENE-5559: Add additional argument validation for CapitalizationFilter
+  and CodepointCountFilter. (Ahmet Arslan via Robert Muir)
+
 ======================= Lucene 4.8.0 =======================
 
 System Requirements
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
index c3d0a96f78e..f762cc43b66 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
@@ -78,6 +78,15 @@ public final class CapitalizationFilter extends TokenFilter {
     this.keep = keep;
     this.forceFirstLetter = forceFirstLetter;
     this.okPrefix = okPrefix;
+    if (minWordLength < 0) {
+      throw new IllegalArgumentException("minWordLength must be greater than or equal to zero");
+    }
+    if (maxWordCount < 1) {
+      throw new IllegalArgumentException("maxWordCount must be greater than zero");
+    }
+    if (maxTokenLength < 1) {
+      throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+    }
     this.minWordLength = minWordLength;
     this.maxWordCount = maxWordCount;
     this.maxTokenLength = maxTokenLength;
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
index 066f295b5ad..5f501e06cb2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
@@ -46,6 +46,12 @@ public final class CodepointCountFilter extends FilteringTokenFilter {
    */
   public CodepointCountFilter(Version version, TokenStream in, int min, int max) {
     super(version, in);
+    if (min < 0) {
+      throw new IllegalArgumentException("minimum length must be greater than or equal to zero");
+    }
+    if (min > max) {
+      throw new IllegalArgumentException("maximum length must not be greater than minimum length");
+    }
     this.min = min;
     this.max = max;
   }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
index 00ef72e1a63..96998d5d046 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
@@ -18,7 +18,6 @@
 package org.apache.lucene.analysis.miscellaneous;
 
 import java.io.IOException;
-import java.io.Reader;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -31,6 +30,7 @@ import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.junit.Test;
 
 import static org.apache.lucene.analysis.miscellaneous.CapitalizationFilter.*;
 
@@ -149,4 +149,20 @@ public class TestCapitalizationFilter extends BaseTokenStreamTestCase {
     };
     checkOneTerm(a, "", "");
   }
+
+  /**
+   * checking the validity of constructor arguments
+   */
+  @Test(expected = IllegalArgumentException.class)
+  public void testIllegalArguments() throws Exception {
+    new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, -1 , DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
+  }
+  @Test(expected = IllegalArgumentException.class)
+  public void testIllegalArguments1() throws Exception {
+    new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, 0 , -10, DEFAULT_MAX_TOKEN_LENGTH);
+  }
+  @Test(expected = IllegalArgumentException.class)
+  public void testIllegalArguments2() throws Exception {
+    new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, 0 , DEFAULT_MAX_WORD_COUNT, -50);
+  }
 }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java
index 26722adf7ff..9c19becf8c3 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
 import java.io.Reader;
 import java.io.StringReader;
 
-import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
@@ -272,4 +271,27 @@ public class TestCapitalizationFilterFactory extends BaseTokenStreamFactoryTestC
       assertTrue(expected.getMessage().contains("Unknown parameters"));
     }
   }
+
+  /**
+   * Test that invalid arguments result in exception
+   */
+  public void testInvalidArguments() throws Exception {
+    for (final String arg : new String[]{"minWordLength", "maxTokenLength", "maxWordCount"}) {
+      try {
+        Reader reader = new StringReader("foo foobar super-duper-trooper");
+        TokenStream stream = whitespaceMockTokenizer(reader);
+
+        tokenFilterFactory("Capitalization",
+            "keep", "and the it BIG",
+            "onlyFirstWord", "false",
+            arg, "-3",
+            "okPrefix", "McK",
+            "forceFirstLetter", "true").create(stream);
+        fail();
+      } catch (IllegalArgumentException expected) {
+        assertTrue(expected.getMessage().contains(arg + " must be greater than or equal to zero")
+            || expected.getMessage().contains(arg + " must be greater than zero"));
+      }
+    }
+  }
 }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java
index 8c58f5e6da3..2a158ec2367 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java
@@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.TestUtil;
+import org.junit.Test;
 
 public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
   public void testFilterWithPosIncr() throws Exception {
@@ -55,6 +55,11 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
       int min = TestUtil.nextInt(random(), 0, 100);
       int max = TestUtil.nextInt(random(), 0, 100);
       int count = text.codePointCount(0, text.length());
+      if(min>max){
+        int temp = min;
+        min = max;
+        max = temp;
+      }
       boolean expected = count >= min && count <= max;
       TokenStream stream = new KeywordTokenizer();
       ((Tokenizer)stream).setReader(new StringReader(text));
@@ -65,4 +70,12 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
       stream.close();
     }
   }
+
+  /**
+   * checking the validity of constructor arguments
+   */
+  @Test(expected = IllegalArgumentException.class)
+  public void testIllegalArguments() throws Exception {
+    new CodepointCountFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), 4, 1);
+  }
 }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilterFactory.java
index bd22090b2f7..1eab1de9b47 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilterFactory.java
@@ -49,4 +49,19 @@ public class TestCodepointCountFilterFactory extends BaseTokenStreamFactoryTestC
       assertTrue(expected.getMessage().contains("Unknown parameters"));
     }
   }
+
+  /** Test that invalid arguments result in exception */
+  public void testInvalidArguments() throws Exception {
+    try {
+      Reader reader = new StringReader("foo foobar super-duper-trooper");
+      TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+      ((Tokenizer)stream).setReader(reader);
+      tokenFilterFactory("CodepointCount",
+          CodepointCountFilterFactory.MIN_KEY, "5",
+          CodepointCountFilterFactory.MAX_KEY, "4").create(stream);
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("maximum length must not be greater than minimum length"));
+    }
+  }
 }
\ No newline at end of file