mirror of https://github.com/apache/lucene.git
LUCENE-5559: additional argument validation for CapitalizationFilter and CodepointCountFilter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1589919 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0bb3e820ca
commit
7961e8ef26
|
@ -106,6 +106,9 @@ Bug fixes
|
|||
* LUCENE-5600: HttpClientBase did not properly consume a connection if a server
|
||||
error occurred. (Christoph Kaser via Shai Erera)
|
||||
|
||||
* LUCENE-5559: Add additional argument validation for CapitalizationFilter
|
||||
and CodepointCountFilter. (Ahmet Arslan via Robert Muir)
|
||||
|
||||
======================= Lucene 4.8.0 =======================
|
||||
|
||||
System Requirements
|
||||
|
|
|
@ -78,6 +78,15 @@ public final class CapitalizationFilter extends TokenFilter {
|
|||
this.keep = keep;
|
||||
this.forceFirstLetter = forceFirstLetter;
|
||||
this.okPrefix = okPrefix;
|
||||
if (minWordLength < 0) {
|
||||
throw new IllegalArgumentException("minWordLength must be greater than or equal to zero");
|
||||
}
|
||||
if (maxWordCount < 1) {
|
||||
throw new IllegalArgumentException("maxWordCount must be greater than zero");
|
||||
}
|
||||
if (maxTokenLength < 1) {
|
||||
throw new IllegalArgumentException("maxTokenLength must be greater than zero");
|
||||
}
|
||||
this.minWordLength = minWordLength;
|
||||
this.maxWordCount = maxWordCount;
|
||||
this.maxTokenLength = maxTokenLength;
|
||||
|
|
|
@ -46,6 +46,12 @@ public final class CodepointCountFilter extends FilteringTokenFilter {
|
|||
*/
|
||||
public CodepointCountFilter(Version version, TokenStream in, int min, int max) {
|
||||
super(version, in);
|
||||
if (min < 0) {
|
||||
throw new IllegalArgumentException("minimum length must be greater than or equal to zero");
|
||||
}
|
||||
if (min > max) {
|
||||
throw new IllegalArgumentException("maximum length must not be greater than minimum length");
|
||||
}
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
@ -31,6 +30,7 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.apache.lucene.analysis.miscellaneous.CapitalizationFilter.*;
|
||||
|
||||
|
@ -149,4 +149,20 @@ public class TestCapitalizationFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
checkOneTerm(a, "", "");
|
||||
}
|
||||
|
||||
/**
|
||||
* checking the validity of constructor arguments
|
||||
*/
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testIllegalArguments() throws Exception {
|
||||
new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, -1 , DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH);
|
||||
}
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testIllegalArguments1() throws Exception {
|
||||
new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, 0 , -10, DEFAULT_MAX_TOKEN_LENGTH);
|
||||
}
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testIllegalArguments2() throws Exception {
|
||||
new CapitalizationFilter(whitespaceMockTokenizer("accept only valid arguments"),true, null, true, null, 0 , DEFAULT_MAX_WORD_COUNT, -50);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||
|
||||
|
@ -272,4 +271,27 @@ public class TestCapitalizationFilterFactory extends BaseTokenStreamFactoryTestC
|
|||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that invalid arguments result in exception
|
||||
*/
|
||||
public void testInvalidArguments() throws Exception {
|
||||
for (final String arg : new String[]{"minWordLength", "maxTokenLength", "maxWordCount"}) {
|
||||
try {
|
||||
Reader reader = new StringReader("foo foobar super-duper-trooper");
|
||||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
|
||||
tokenFilterFactory("Capitalization",
|
||||
"keep", "and the it BIG",
|
||||
"onlyFirstWord", "false",
|
||||
arg, "-3",
|
||||
"okPrefix", "McK",
|
||||
"forceFirstLetter", "true").create(stream);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
assertTrue(expected.getMessage().contains(arg + " must be greater than or equal to zero")
|
||||
|| expected.getMessage().contains(arg + " must be greater than zero"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
|
||||
public void testFilterWithPosIncr() throws Exception {
|
||||
|
@ -55,6 +55,11 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
|
|||
int min = TestUtil.nextInt(random(), 0, 100);
|
||||
int max = TestUtil.nextInt(random(), 0, 100);
|
||||
int count = text.codePointCount(0, text.length());
|
||||
if(min>max){
|
||||
int temp = min;
|
||||
min = max;
|
||||
max = temp;
|
||||
}
|
||||
boolean expected = count >= min && count <= max;
|
||||
TokenStream stream = new KeywordTokenizer();
|
||||
((Tokenizer)stream).setReader(new StringReader(text));
|
||||
|
@ -65,4 +70,12 @@ public class TestCodepointCountFilter extends BaseTokenStreamTestCase {
|
|||
stream.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checking the validity of constructor arguments
|
||||
*/
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testIllegalArguments() throws Exception {
|
||||
new CodepointCountFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), 4, 1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,4 +49,19 @@ public class TestCodepointCountFilterFactory extends BaseTokenStreamFactoryTestC
|
|||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
}
|
||||
}
|
||||
|
||||
/** Test that invalid arguments result in exception */
|
||||
public void testInvalidArguments() throws Exception {
|
||||
try {
|
||||
Reader reader = new StringReader("foo foobar super-duper-trooper");
|
||||
TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
((Tokenizer)stream).setReader(reader);
|
||||
tokenFilterFactory("CodepointCount",
|
||||
CodepointCountFilterFactory.MIN_KEY, "5",
|
||||
CodepointCountFilterFactory.MAX_KEY, "4").create(stream);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
assertTrue(expected.getMessage().contains("maximum length must not be greater than minimum length"));
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue