From 7961e8ef26d9bfd0aa5ec16d897c93e1dd17e6c4 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 25 Apr 2014 01:54:19 +0000 Subject: [PATCH] LUCENE-5559: additional argument validation for CapitalizationFilter and CodepointCountFilter git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1589919 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 +++ .../miscellaneous/CapitalizationFilter.java | 9 +++++++ .../miscellaneous/CodepointCountFilter.java | 6 +++++ .../TestCapitalizationFilter.java | 18 +++++++++++++- .../TestCapitalizationFilterFactory.java | 24 ++++++++++++++++++- .../TestCodepointCountFilter.java | 15 +++++++++++- .../TestCodepointCountFilterFactory.java | 15 ++++++++++++ 7 files changed, 87 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 415293f4713..9f97dd2d5ef 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -106,6 +106,9 @@ Bug fixes * LUCENE-5600: HttpClientBase did not properly consume a connection if a server error occurred. (Christoph Kaser via Shai Erera) +* LUCENE-5559: Add additional argument validation for CapitalizationFilter + and CodepointCountFilter. (Ahmet Arslan via Robert Muir) + ======================= Lucene 4.8.0 ======================= System Requirements diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java index c3d0a96f78e..f762cc43b66 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java @@ -78,6 +78,15 @@ public final class CapitalizationFilter extends TokenFilter { this.keep = keep; this.forceFirstLetter = forceFirstLetter; this.okPrefix = okPrefix; + if (minWordLength < 0) { + throw new IllegalArgumentException("minWordLength must be greater than or equal to zero"); + } + if (maxWordCount < 1) { + throw new IllegalArgumentException("maxWordCount must be greater than zero"); + } + if (maxTokenLength < 1) { + throw new IllegalArgumentException("maxTokenLength must be greater than zero"); + } this.minWordLength = minWordLength; this.maxWordCount = maxWordCount; this.maxTokenLength = maxTokenLength; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java index 066f295b5ad..5f501e06cb2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java @@ -46,6 +46,12 @@ public final class CodepointCountFilter extends FilteringTokenFilter { */ public CodepointCountFilter(Version version, TokenStream in, int min, int max) { super(version, in); + if (min < 0) { + throw new IllegalArgumentException("minimum length must be greater than or equal to zero"); + } + if (min > max) { + throw new IllegalArgumentException("maximum length must not be greater than minimum length"); + } this.min = min; this.max = max; } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java index 00ef72e1a63..96998d5d046 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java @@ -18,7 +18,6 @@ package org.apache.lucene.analysis.miscellaneous; import java.io.IOException; -import java.io.Reader; import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; @@ -31,6 +30,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.util.CharArraySet; +import org.junit.Test; import static org.apache.lucene.analysis.miscellaneous.CapitalizationFilter.*; @@ -149,4 +149,20 @@ public class TestCapitalizationFilter extends BaseTokenStreamTestCase { }; checkOneTerm(a, "", ""); } + + /** + * checking the validity of constructor arguments + */ + @Test(expected = IllegalArgumentException.class) + public void testIllegalArguments() throws Exception { + new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, -1 , DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH); + } + @Test(expected = IllegalArgumentException.class) + public void testIllegalArguments1() throws Exception { + new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, 0 , -10, DEFAULT_MAX_TOKEN_LENGTH); + } + @Test(expected = IllegalArgumentException.class) + public void testIllegalArguments2() throws Exception { + new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, 0 , DEFAULT_MAX_WORD_COUNT, -50); + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java index 26722adf7ff..9c19becf8c3 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilterFactory.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous; import java.io.Reader; import java.io.StringReader; -import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase; @@ -272,4 +271,27 @@ public class TestCapitalizationFilterFactory extends BaseTokenStreamFactoryTestC assertTrue(expected.getMessage().contains("Unknown parameters")); } } + + /** + * Test that invalid arguments result in exception + */ + public void testInvalidArguments() throws Exception { + for (final String arg : new String[]{"minWordLength", "maxTokenLength", "maxWordCount"}) { + try { + Reader reader = new StringReader("foo foobar super-duper-trooper"); + TokenStream stream = whitespaceMockTokenizer(reader); + + tokenFilterFactory("Capitalization", + "keep", "and the it BIG", + "onlyFirstWord", "false", + arg, "-3", + "okPrefix", "McK", + "forceFirstLetter", "true").create(stream); + fail(); + } catch (IllegalArgumentException expected) { + assertTrue(expected.getMessage().contains(arg + " must be greater than or equal to zero") + || expected.getMessage().contains(arg + " must be greater than zero")); + } + } + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java index 8c58f5e6da3..2a158ec2367 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilter.java @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.util.TestUtil; -import org.apache.lucene.util.TestUtil; +import org.junit.Test; public class TestCodepointCountFilter extends BaseTokenStreamTestCase { public void testFilterWithPosIncr() throws Exception { @@ -55,6 +55,11 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase { int min = TestUtil.nextInt(random(), 0, 100); int max = TestUtil.nextInt(random(), 0, 100); int count = text.codePointCount(0, text.length()); + if(min>max){ + int temp = min; + min = max; + max = temp; + } boolean expected = count >= min && count <= max; TokenStream stream = new KeywordTokenizer(); ((Tokenizer)stream).setReader(new StringReader(text)); @@ -65,4 +70,12 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase { stream.close(); } } + + /** + * checking the validity of constructor arguments + */ + @Test(expected = IllegalArgumentException.class) + public void testIllegalArguments() throws Exception { + new CodepointCountFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), 4, 1); + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilterFactory.java index bd22090b2f7..1eab1de9b47 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilterFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCodepointCountFilterFactory.java @@ -49,4 +49,19 @@ public class TestCodepointCountFilterFactory extends BaseTokenStreamFactoryTestC assertTrue(expected.getMessage().contains("Unknown parameters")); } } + + /** Test that invalid arguments result in exception */ + public void testInvalidArguments() throws Exception { + try { + Reader reader = new StringReader("foo foobar super-duper-trooper"); + TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false); + ((Tokenizer)stream).setReader(reader); + tokenFilterFactory("CodepointCount", + CodepointCountFilterFactory.MIN_KEY, "5", + CodepointCountFilterFactory.MAX_KEY, "4").create(stream); + fail(); + } catch (IllegalArgumentException expected) { + assertTrue(expected.getMessage().contains("maximum length must not be greater than minimum length")); + } + } } \ No newline at end of file