LUCENE-2510: add more tests, fix bugs found by test where misconfigured factories wouldnt error out until runtime

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene2510@1365414 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-07-25 02:54:31 +00:00
parent ef25010554
commit 07f84ee57f
14 changed files with 326 additions and 15 deletions

View File

@ -25,7 +25,6 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
import org.apache.lucene.analysis.util.*;
@ -75,11 +74,18 @@ public class MappingCharFilterFactory extends CharFilterFactory implements
final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
parseRules( wlist, builder );
normMap = builder.build();
if (normMap.map == null) {
// if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
// so just set the whole map to null
normMap = null;
}
}
}
public CharFilter create(Reader input) {
return new MappingCharFilter(normMap,input);
public Reader create(Reader input) {
// if the map is null, it means there's actually no mappings... just return the original stream
// as there is nothing to do here.
return normMap == null ? input : new MappingCharFilter(normMap,input);
}
// "source" => "target"

View File

@ -57,6 +57,9 @@ public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBa
*/
public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary) {
super(matchVersion, input, dictionary);
if (dictionary == null) {
throw new IllegalArgumentException("dictionary cannot be null");
}
}
/**
@ -83,6 +86,9 @@ public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBa
public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary,
int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
if (dictionary == null) {
throw new IllegalArgumentException("dictionary cannot be null");
}
}
@Override

View File

@ -63,8 +63,9 @@ public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory
throw new InitializationException("IOException thrown while loading dictionary", e);
}
}
public DictionaryCompoundWordTokenFilter create(TokenStream input) {
return new DictionaryCompoundWordTokenFilter(luceneMatchVersion,input,dictionary,minWordSize,minSubwordSize,maxSubwordSize,onlyLongestMatch);
public TokenStream create(TokenStream input) {
// if the dictionary is null, it means it was empty
return dictionary == null ? input : new DictionaryCompoundWordTokenFilter(luceneMatchVersion,input,dictionary,minWordSize,minSubwordSize,maxSubwordSize,onlyLongestMatch);
}
}

View File

@ -68,6 +68,10 @@ public class HunspellStemFilterFactory extends TokenFilterFactory implements Res
*/
public void inform(ResourceLoader loader) {
assureMatchVersion();
String dictionaryArg = args.get(PARAM_DICTIONARY);
if (dictionaryArg == null) {
throw new InitializationException("Parameter " + PARAM_DICTIONARY + " is mandatory.");
}
String dictionaryFiles[] = args.get(PARAM_DICTIONARY).split(",");
String affixFile = args.get(PARAM_AFFIX);
String pic = args.get(PARAM_IGNORE_CASE);

View File

@ -89,7 +89,8 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc
return words;
}
public KeepWordFilter create(TokenStream input) {
return new KeepWordFilter(enablePositionIncrements, input, words);
public TokenStream create(TokenStream input) {
// if the set is null, it means it was empty
return words == null ? input : new KeepWordFilter(enablePositionIncrements, input, words);
}
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.util.InitializationException;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import java.util.Map;
@ -43,8 +44,13 @@ public class LengthFilterFactory extends TokenFilterFactory {
@Override
public void init(Map<String, String> args) {
super.init(args);
min=Integer.parseInt(args.get(MIN_KEY));
max=Integer.parseInt(args.get(MAX_KEY));
String minKey = args.get(MIN_KEY);
String maxKey = args.get(MAX_KEY);
if (minKey == null || maxKey == null) {
throw new InitializationException("Both " + MIN_KEY + " and " + MAX_KEY + " are mandatory");
}
min=Integer.parseInt(minKey);
max=Integer.parseInt(maxKey);
enablePositionIncrements = getBoolean("enablePositionIncrements",false);
}

View File

@ -21,6 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
import org.apache.lucene.analysis.util.InitializationException;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
@ -41,12 +42,16 @@ public class LimitTokenCountFilterFactory extends TokenFilterFactory {
@Override
public void init(Map<String, String> args) {
super.init( args );
maxTokenCount = Integer.parseInt( args.get( "maxTokenCount" ) );
String maxTokenCountArg = args.get("maxTokenCount");
if (maxTokenCountArg == null) {
throw new InitializationException("maxTokenCount is mandatory.");
}
maxTokenCount = Integer.parseInt(args.get(maxTokenCountArg));
}
@Override
public TokenStream create(TokenStream input) {
return new LimitTokenCountFilter( input, maxTokenCount );
return new LimitTokenCountFilter(input, maxTokenCount);
}
}

View File

@ -61,6 +61,9 @@ public class DelimitedPayloadTokenFilterFactory extends TokenFilterFactory imple
public void inform(ResourceLoader loader) {
String encoderClass = args.get(ENCODER_ATTR);
if (encoderClass == null) {
throw new InitializationException("Parameter " + ENCODER_ATTR + " is mandatory");
}
if (encoderClass.equals("float")){
encoder = new FloatEncoder();
} else if (encoderClass.equals("integer")){

View File

@ -40,6 +40,9 @@ public class NumericPayloadTokenFilter extends TokenFilter {
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
super(input);
if (typeMatch == null) {
throw new IllegalArgumentException("typeMatch cannot be null");
}
//Need to encode the payload
thePayload = new BytesRef(PayloadHelper.encodeFloat(payload));
this.typeMatch = typeMatch;

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.payloads;
import org.apache.lucene.analysis.payloads.NumericPayloadTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.InitializationException;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import java.util.Map;
@ -39,8 +40,12 @@ public class NumericPayloadTokenFilterFactory extends TokenFilterFactory {
@Override
public void init(Map<String, String> args) {
super.init(args);
payload = Float.parseFloat(args.get("payload"));
String payloadArg = args.get("payload");
typeMatch = args.get("typeMatch");
if (payloadArg == null || typeMatch == null) {
throw new InitializationException("Both payload and typeMatch are required");
}
payload = Float.parseFloat(payloadArg);
}
public NumericPayloadTokenFilter create(TokenStream input) {
return new NumericPayloadTokenFilter(input,payload,typeMatch);

View File

@ -55,5 +55,5 @@ public abstract class CharFilterFactory extends AbstractAnalysisFactory {
return loader.availableServices();
}
public abstract CharFilter create(Reader input);
public abstract Reader create(Reader input);
}

View File

@ -97,13 +97,12 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
for (final Class<?> c : analysisClasses) {
final int modifiers = c.getModifiers();
if (
// don't waste time with abstract classes or deprecated known-buggy ones
// don't waste time with abstract classes
Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers)
|| c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface()
|| testComponents.contains(c)
|| crazyComponents.contains(c)
|| oddlyNamedComponents.contains(c)
|| c.isAnnotationPresent(Deprecated.class)
|| !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c))
) {
continue;

View File

@ -0,0 +1,176 @@
package org.apache.lucene.analysis.core;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.InitializationException;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
/**
* Sanity check some things about all factories,
* we do our best to see if we can sanely initialize it with
* no parameters and smoke test it, etc.
*/
public class TestFactories extends BaseTokenStreamTestCase {
public void test() throws IOException {
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
doTestTokenizer(tokenizer);
}
for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) {
doTestTokenFilter(tokenFilter);
}
for (String charFilter : CharFilterFactory.availableCharFilters()) {
doTestCharFilter(charFilter);
}
}
private void doTestTokenizer(String tokenizer) throws IOException {
TokenizerFactory factory = TokenizerFactory.forName(tokenizer);
if (initialize(factory)) {
// we managed to fully create an instance. check a few more things:
// if it implements MultiTermAware, sanity check its impl
if (factory instanceof MultiTermAwareComponent) {
AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
assertNotNull(mtc);
// its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
assertFalse(mtc instanceof CharFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
}
}
private void doTestTokenFilter(String tokenfilter) throws IOException {
TokenFilterFactory factory = TokenFilterFactory.forName(tokenfilter);
if (initialize(factory)) {
// we managed to fully create an instance. check a few more things:
// if it implements MultiTermAware, sanity check its impl
if (factory instanceof MultiTermAwareComponent) {
AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
assertNotNull(mtc);
// its not ok to return a charfilter or tokenizer here, this makes no sense
assertTrue(mtc instanceof TokenFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
}
}
private void doTestCharFilter(String charfilter) throws IOException {
CharFilterFactory factory = CharFilterFactory.forName(charfilter);
if (initialize(factory)) {
// we managed to fully create an instance. check a few more things:
// if it implements MultiTermAware, sanity check its impl
if (factory instanceof MultiTermAwareComponent) {
AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
assertNotNull(mtc);
// its not ok to return a tokenizer or tokenfilter here, this makes no sense
assertTrue(mtc instanceof CharFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false);
}
}
/** tries to initialize a factory with no arguments */
private boolean initialize(AbstractAnalysisFactory factory) {
boolean success = false;
try {
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
factory.init(Collections.<String,String>emptyMap());
success = true;
} catch (InitializationException ignored) {
// its ok if we dont provide the right parameters to throw this
}
if (factory instanceof ResourceLoaderAware) {
success = false;
try {
((ResourceLoaderAware) factory).inform(new StringMockResourceLoader(""));
success = true;
} catch (InitializationException ignored) {
// its ok if the right files arent available or whatever to throw this
}
}
return success;
}
// some silly classes just so we can use checkRandomData
private TokenizerFactory assertingTokenizer = new TokenizerFactory() {
@Override
public Tokenizer create(Reader input) {
return new MockTokenizer(input);
}
};
private static class FactoryAnalyzer extends Analyzer {
final TokenizerFactory tokenizer;
final CharFilterFactory charFilter;
final TokenFilterFactory tokenfilter;
FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter) {
assert tokenizer != null;
this.tokenizer = tokenizer;
this.charFilter = charFilter;
this.tokenfilter = tokenfilter;
}
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tf = tokenizer.create(reader);
if (tokenfilter != null) {
return new TokenStreamComponents(tf, tokenfilter.create(tf));
} else {
return new TokenStreamComponents(tf);
}
}
@Override
protected Reader initReader(String fieldName, Reader reader) {
if (charFilter != null) {
return charFilter.create(reader);
} else {
return reader;
}
}
}
}

View File

@ -30,6 +30,22 @@ public class TestAnalysisSPILoader extends LuceneTestCase {
assertEquals(WhitespaceTokenizerFactory.class, TokenizerFactory.forName("WHITESPACE").getClass());
assertEquals(WhitespaceTokenizerFactory.class, TokenizerFactory.forName("whitespace").getClass());
}
public void testBogusLookupTokenizer() {
try {
TokenizerFactory.forName("sdfsdfsdfdsfsdfsdf");
fail();
} catch (IllegalArgumentException expected) {
//
}
try {
TokenizerFactory.forName("!(**#$U*#$*");
fail();
} catch (IllegalArgumentException expected) {
//
}
}
public void testLookupTokenizerClass() {
assertEquals(WhitespaceTokenizerFactory.class, TokenizerFactory.lookupClass("Whitespace"));
@ -37,6 +53,22 @@ public class TestAnalysisSPILoader extends LuceneTestCase {
assertEquals(WhitespaceTokenizerFactory.class, TokenizerFactory.lookupClass("whitespace"));
}
public void testBogusLookupTokenizerClass() {
try {
TokenizerFactory.lookupClass("sdfsdfsdfdsfsdfsdf");
fail();
} catch (IllegalArgumentException expected) {
//
}
try {
TokenizerFactory.lookupClass("!(**#$U*#$*");
fail();
} catch (IllegalArgumentException expected) {
//
}
}
public void testAvailableTokenizers() {
assertTrue(TokenizerFactory.availableTokenizers().contains("whitespace"));
}
@ -50,6 +82,22 @@ public class TestAnalysisSPILoader extends LuceneTestCase {
assertEquals(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.forName("REMOVEDUPLICATES").getClass());
assertEquals(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.forName("removeduplicates").getClass());
}
public void testBogusLookupTokenFilter() {
try {
TokenFilterFactory.forName("sdfsdfsdfdsfsdfsdf");
fail();
} catch (IllegalArgumentException expected) {
//
}
try {
TokenFilterFactory.forName("!(**#$U*#$*");
fail();
} catch (IllegalArgumentException expected) {
//
}
}
public void testLookupTokenFilterClass() {
assertEquals(LowerCaseFilterFactory.class, TokenFilterFactory.lookupClass("Lowercase"));
@ -61,6 +109,22 @@ public class TestAnalysisSPILoader extends LuceneTestCase {
assertEquals(RemoveDuplicatesTokenFilterFactory.class, TokenFilterFactory.lookupClass("removeduplicates"));
}
public void testBogusLookupTokenFilterClass() {
try {
TokenFilterFactory.lookupClass("sdfsdfsdfdsfsdfsdf");
fail();
} catch (IllegalArgumentException expected) {
//
}
try {
TokenFilterFactory.lookupClass("!(**#$U*#$*");
fail();
} catch (IllegalArgumentException expected) {
//
}
}
public void testAvailableTokenFilters() {
assertTrue(TokenFilterFactory.availableTokenFilters().contains("lowercase"));
assertTrue(TokenFilterFactory.availableTokenFilters().contains("removeduplicates"));
@ -71,6 +135,22 @@ public class TestAnalysisSPILoader extends LuceneTestCase {
assertEquals(HTMLStripCharFilterFactory.class, CharFilterFactory.forName("HTMLSTRIP").getClass());
assertEquals(HTMLStripCharFilterFactory.class, CharFilterFactory.forName("htmlstrip").getClass());
}
public void testBogusLookupCharFilter() {
try {
CharFilterFactory.forName("sdfsdfsdfdsfsdfsdf");
fail();
} catch (IllegalArgumentException expected) {
//
}
try {
CharFilterFactory.forName("!(**#$U*#$*");
fail();
} catch (IllegalArgumentException expected) {
//
}
}
public void testLookupCharFilterClass() {
assertEquals(HTMLStripCharFilterFactory.class, CharFilterFactory.lookupClass("HTMLStrip"));
@ -78,6 +158,22 @@ public class TestAnalysisSPILoader extends LuceneTestCase {
assertEquals(HTMLStripCharFilterFactory.class, CharFilterFactory.lookupClass("htmlstrip"));
}
public void testBogusLookupCharFilterClass() {
try {
CharFilterFactory.lookupClass("sdfsdfsdfdsfsdfsdf");
fail();
} catch (IllegalArgumentException expected) {
//
}
try {
CharFilterFactory.lookupClass("!(**#$U*#$*");
fail();
} catch (IllegalArgumentException expected) {
//
}
}
public void testAvailableCharFilters() {
assertTrue(CharFilterFactory.availableCharFilters().contains("htmlstrip"));
}