LUCENE-5559: additional argument validation for CapitalizationFilter and CodepointCountFilter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1589919 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-04-25 01:54:19 +00:00
parent 0bb3e820ca
commit 7961e8ef26
7 changed files with 87 additions and 3 deletions

View File

@ -106,6 +106,9 @@ Bug fixes
* LUCENE-5600: HttpClientBase did not properly consume a connection if a server
error occurred. (Christoph Kaser via Shai Erera)
* LUCENE-5559: Add additional argument validation for CapitalizationFilter
and CodepointCountFilter. (Ahmet Arslan via Robert Muir)
======================= Lucene 4.8.0 =======================
System Requirements

View File

@ -78,6 +78,15 @@ public final class CapitalizationFilter extends TokenFilter {
this.keep = keep;
this.forceFirstLetter = forceFirstLetter;
this.okPrefix = okPrefix;
if (minWordLength < 0) {
throw new IllegalArgumentException("minWordLength must be greater than or equal to zero");
}
if (maxWordCount < 1) {
throw new IllegalArgumentException("maxWordCount must be greater than zero");
}
if (maxTokenLength < 1) {
throw new IllegalArgumentException("maxTokenLength must be greater than zero");
}
this.minWordLength = minWordLength;
this.maxWordCount = maxWordCount;
this.maxTokenLength = maxTokenLength;

View File

@ -46,6 +46,12 @@ public final class CodepointCountFilter extends FilteringTokenFilter {
*/
public CodepointCountFilter(Version version, TokenStream in, int min, int max) {
super(version, in);
if (min < 0) {
throw new IllegalArgumentException("minimum length must be greater than or equal to zero");
}
if (min > max) {
throw new IllegalArgumentException("maximum length must not be greater than minimum length");
}
this.min = min;
this.max = max;
}

View File

@ -18,7 +18,6 @@
package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
@ -31,6 +30,7 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.junit.Test;
import static org.apache.lucene.analysis.miscellaneous.CapitalizationFilter.*;
@ -149,4 +149,20 @@ public class TestCapitalizationFilter extends BaseTokenStreamTestCase {
};
checkOneTerm(a, "", "");
}
/**
* checking the validity of constructor arguments
*/
@Test(expected = IllegalArgumentException.class)
public void testIllegalArguments() throws Exception {
new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, -1 , DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
}
@Test(expected = IllegalArgumentException.class)
public void testIllegalArguments1() throws Exception {
new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, 0 , -10, DEFAULT_MAX_TOKEN_LENGTH);
}
@Test(expected = IllegalArgumentException.class)
public void testIllegalArguments2() throws Exception {
new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, 0 , DEFAULT_MAX_WORD_COUNT, -50);
}
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
@ -272,4 +271,27 @@ public class TestCapitalizationFilterFactory extends BaseTokenStreamFactoryTestC
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
/**
* Test that invalid arguments result in exception
*/
public void testInvalidArguments() throws Exception {
for (final String arg : new String[]{"minWordLength", "maxTokenLength", "maxWordCount"}) {
try {
Reader reader = new StringReader("foo foobar super-duper-trooper");
TokenStream stream = whitespaceMockTokenizer(reader);
tokenFilterFactory("Capitalization",
"keep", "and the it BIG",
"onlyFirstWord", "false",
arg, "-3",
"okPrefix", "McK",
"forceFirstLetter", "true").create(stream);
fail();
} catch (IllegalArgumentException expected) {
assertTrue(expected.getMessage().contains(arg + " must be greater than or equal to zero")
|| expected.getMessage().contains(arg + " must be greater than zero"));
}
}
}
}

View File

@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
public void testFilterWithPosIncr() throws Exception {
@ -55,6 +55,11 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
int min = TestUtil.nextInt(random(), 0, 100);
int max = TestUtil.nextInt(random(), 0, 100);
int count = text.codePointCount(0, text.length());
if(min>max){
int temp = min;
min = max;
max = temp;
}
boolean expected = count >= min && count <= max;
TokenStream stream = new KeywordTokenizer();
((Tokenizer)stream).setReader(new StringReader(text));
@ -65,4 +70,12 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
stream.close();
}
}
/**
* checking the validity of constructor arguments
*/
@Test(expected = IllegalArgumentException.class)
public void testIllegalArguments() throws Exception {
new CodepointCountFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), 4, 1);
}
}

View File

@ -49,4 +49,19 @@ public class TestCodepointCountFilterFactory extends BaseTokenStreamFactoryTestC
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
/** Test that invalid arguments result in exception */
public void testInvalidArguments() throws Exception {
try {
Reader reader = new StringReader("foo foobar super-duper-trooper");
TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer)stream).setReader(reader);
tokenFilterFactory("CodepointCount",
CodepointCountFilterFactory.MIN_KEY, "5",
CodepointCountFilterFactory.MAX_KEY, "4").create(stream);
fail();
} catch (IllegalArgumentException expected) {
assertTrue(expected.getMessage().contains("maximum length must not be greater than minimum length"));
}
}
}