SOLR-3402: Analysis Factory Lucene Version is now parsed outside of the Factories

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1331220 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher John Male 2012-04-27 05:18:38 +00:00
parent 09e3948d71
commit 9dcae1a12f
45 changed files with 246 additions and 122 deletions

View File

@ -18,6 +18,8 @@ package org.apache.lucene.util;
*/
import java.util.Locale;
/**
* Use by certain classes to match version compatibility
* across releases of Lucene.
@ -111,4 +113,9 @@ public enum Version {
public boolean onOrAfter(Version other) {
return compareTo(other) >= 0;
}
public static Version parseLeniently(String version) {
String parsedMatchVersion = version.toUpperCase(Locale.ENGLISH);
return Version.valueOf(parsedMatchVersion.replaceFirst("^(\\d)\\.(\\d)$", "LUCENE_$1$2"));
}
}

View File

@ -28,4 +28,9 @@ public class TestVersion extends LuceneTestCase {
assertFalse(Version.LUCENE_30.onOrAfter(Version.LUCENE_31));
}
public void testParseLeniently() {
assertEquals(Version.LUCENE_40, Version.parseLeniently("4.0"));
assertEquals(Version.LUCENE_40, Version.parseLeniently("LUCENE_40"));
assertEquals(Version.LUCENE_CURRENT, Version.parseLeniently("LUCENE_CURRENT"));
}
}

View File

@ -279,6 +279,11 @@ New Features
(all date range queries and date faceting is affected). The default TZ
is still UTC. (David Schlotfeldt, hossman)
* SOLR-3402: Analysis Factories are now configured with their Lucene Version
throw setLuceneMatchVersion, rather than through the Map passed to init.
Parsing and simple error checking for the Version is now done inside
the code that creates the Analysis Factories. (Chris Male)
Optimizations
----------------------

View File

@ -31,7 +31,7 @@ public class TestICUFoldingFilterFactory extends BaseTokenTestCase {
public void test() throws Exception {
Reader reader = new StringReader("Résumé");
ICUFoldingFilterFactory factory = new ICUFoldingFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "resume" });

View File

@ -31,7 +31,8 @@ public class TestICUNormalizer2FilterFactory extends BaseTokenTestCase {
public void testDefaults() throws Exception {
Reader reader = new StringReader("This is a ");
ICUNormalizer2FilterFactory factory = new ICUNormalizer2FilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });

View File

@ -32,11 +32,10 @@ public class TestMorfologikFilterFactory extends BaseTokenTestCase {
public void testCreateDictionary() throws Exception {
StringReader reader = new StringReader("rowery bilety");
Map<String,String> initParams = new HashMap<String,String>();
initParams.put(IndexSchema.LUCENE_MATCH_VERSION_PARAM,
DEFAULT_VERSION.toString());
initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
"morfologik");
MorfologikFilterFactory factory = new MorfologikFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(initParams);
TokenStream ts = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION,
reader));

View File

@ -19,6 +19,7 @@ package org.apache.solr.analysis;
import java.util.Map;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -31,6 +32,8 @@ public abstract class BaseCharFilterFactory implements CharFilterFactory {
public static final Logger log = LoggerFactory.getLogger(BaseCharFilterFactory.class);
protected Version luceneMatchVersion;
/** The init args */
protected Map<String,String> args;
@ -42,6 +45,10 @@ public abstract class BaseCharFilterFactory implements CharFilterFactory {
this.args = args;
}
public void setLuceneMatchVersion(Version luceneMatchVersion) {
this.luceneMatchVersion = luceneMatchVersion;
}
protected int getInt(String name) {
return getInt(name,-1,false);
}

View File

@ -58,10 +58,6 @@ abstract class BaseTokenStreamFactory {
public void init(Map<String,String> args) {
this.args=args;
String matchVersion = args.get(IndexSchema.LUCENE_MATCH_VERSION_PARAM);
if (matchVersion != null) {
luceneMatchVersion = Config.parseLuceneVersionString(matchVersion);
}
}
public Map<String,String> getArgs() {
@ -75,16 +71,16 @@ abstract class BaseTokenStreamFactory {
if (luceneMatchVersion == null) {
throw new InitializationException("Configuration Error: Factory '" + this.getClass().getName() +
"' needs a 'luceneMatchVersion' parameter");
} else if (!luceneMatchVersion.onOrAfter(Version.LUCENE_40)) {
log.warn(getClass().getSimpleName() + " is using deprecated " + luceneMatchVersion +
" emulation. You should at some point declare and reindex to at least 4.0, because " +
"3.x emulation is deprecated and will be removed in 5.0");
}
}
protected final void warnDeprecated(String message) {
log.warn(getClass().getSimpleName() + " is deprecated. " + message);
}
public void setLuceneMatchVersion(Version luceneMatchVersion) {
this.luceneMatchVersion = luceneMatchVersion;
}
// TODO: move these somewhere that tokenizers and others
// can also use them...

View File

@ -20,6 +20,7 @@ package org.apache.solr.analysis;
import java.util.Map;
import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.util.Version;
/**
*
@ -27,7 +28,12 @@ import org.apache.lucene.analysis.CharStream;
*
*/
public interface CharFilterFactory {
public void init(Map<String,String> args);
public Map<String,String> getArgs();
public void setLuceneMatchVersion(Version luceneMatchVersion);
public CharStream create(CharStream input);
}

View File

@ -66,7 +66,7 @@ final class FSTSynonymFilterFactory extends BaseTokenFilterFactory implements Re
String tf = args.get("tokenizerFactory");
final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf, args);
final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf);
Analyzer analyzer = new Analyzer() {
@Override
@ -153,8 +153,9 @@ final class FSTSynonymFilterFactory extends BaseTokenFilterFactory implements Re
return parser.build();
}
private static TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname, Map<String,String> args){
private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname){
TokenizerFactory tokFactory = loader.newInstance(cname, TokenizerFactory.class);
tokFactory.setLuceneMatchVersion(luceneMatchVersion);
tokFactory.init(args);
if (tokFactory instanceof ResourceLoaderAware) {
((ResourceLoaderAware) tokFactory).inform(loader);

View File

@ -47,6 +47,7 @@ public class LowerCaseTokenizerFactory extends BaseTokenizerFactory implements M
@Override
public Object getMultiTermComponent() {
LowerCaseFilterFactory filt = new LowerCaseFilterFactory();
filt.setLuceneMatchVersion(luceneMatchVersion);
filt.init(args);
return filt;
}

View File

@ -56,7 +56,7 @@ final class SlowSynonymFilterFactory extends BaseTokenFilterFactory implements R
String tf = args.get("tokenizerFactory");
TokenizerFactory tokFactory = null;
if( tf != null ){
tokFactory = loadTokenizerFactory( loader, tf, args );
tokFactory = loadTokenizerFactory(loader, tf);
}
Iterable<String> wlist=loadRules( synonyms, loader );
@ -167,8 +167,9 @@ final class SlowSynonymFilterFactory extends BaseTokenFilterFactory implements R
return tokList;
}
private static TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname, Map<String,String> args){
private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) {
TokenizerFactory tokFactory = loader.newInstance(cname, TokenizerFactory.class);
tokFactory.setLuceneMatchVersion(luceneMatchVersion);
tokFactory.init( args );
if (tokFactory instanceof ResourceLoaderAware) {
((ResourceLoaderAware) tokFactory).inform(loader);

View File

@ -18,6 +18,8 @@
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
import java.util.Map;
/**
@ -54,6 +56,8 @@ public interface TokenFilterFactory {
* schema.xml
*/
public void init(Map<String,String> args);
public void setLuceneMatchVersion(Version luceneMatchVersion);
/**
* Accessor method for reporting the args used to initialize this factory.

View File

@ -20,6 +20,7 @@ package org.apache.solr.analysis;
import java.io.*;
import java.util.Map;
import org.apache.lucene.analysis.*;
import org.apache.lucene.util.Version;
/**
@ -53,6 +54,8 @@ public interface TokenizerFactory {
* schema.xml
*/
public void init(Map<String,String> args);
public void setLuceneMatchVersion(Version luceneMatchVersion);
/**
* Accessor method for reporting the args used to initialize this factory.

View File

@ -314,14 +314,9 @@ public class Config {
private static final AtomicBoolean versionWarningAlreadyLogged = new AtomicBoolean(false);
public static final Version parseLuceneVersionString(final String matchVersion) {
String parsedMatchVersion = matchVersion.toUpperCase(Locale.ENGLISH);
// be lenient with the supplied version parameter
parsedMatchVersion = parsedMatchVersion.replaceFirst("^(\\d)\\.(\\d)$", "LUCENE_$1$2");
final Version version;
try {
version = Version.valueOf(parsedMatchVersion);
version = Version.parseLeniently(matchVersion);
} catch (IllegalArgumentException iae) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Invalid luceneMatchVersion '" + matchVersion +

View File

@ -19,7 +19,6 @@ package org.apache.solr.schema;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.*;
import org.apache.solr.common.ResourceLoader;
@ -270,10 +269,10 @@ public final class FieldTypePluginLoader
protected void init(CharFilterFactory plugin, Node node) throws Exception {
if( plugin != null ) {
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
// copy the luceneMatchVersion from config, if not set
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
params.put(LUCENE_MATCH_VERSION_PARAM,
schema.getDefaultLuceneMatchVersion().toString());
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion, plugin.getClass().getSimpleName()));
plugin.init( params );
charFilters.add( plugin );
}
@ -306,10 +305,9 @@ public final class FieldTypePluginLoader
}
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
// copy the luceneMatchVersion from config, if not set
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
params.put(LUCENE_MATCH_VERSION_PARAM,
schema.getDefaultLuceneMatchVersion().toString());
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion, plugin.getClass().getSimpleName()));
plugin.init( params );
tokenizers.add( plugin );
}
@ -340,10 +338,10 @@ public final class FieldTypePluginLoader
protected void init(TokenFilterFactory plugin, Node node) throws Exception {
if( plugin != null ) {
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
// copy the luceneMatchVersion from config, if not set
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
params.put(LUCENE_MATCH_VERSION_PARAM,
schema.getDefaultLuceneMatchVersion().toString());
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion, plugin.getClass().getSimpleName()));
plugin.init( params );
filters.add( plugin );
}
@ -359,5 +357,17 @@ public final class FieldTypePluginLoader
return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]),
tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));
}
private Version parseConfiguredVersion(String configuredVersion, String pluginClassName) {
Version version = (configuredVersion != null) ?
Config.parseLuceneVersionString(configuredVersion) : schema.getDefaultLuceneMatchVersion();
if (!version.onOrAfter(Version.LUCENE_40)) {
log.warn(pluginClassName + " is using deprecated " + version +
" emulation. You should at some point declare and reindex to at least 4.0, because " +
"3.x emulation is deprecated and will be removed in 5.0");
}
return version;
}
}

View File

@ -39,9 +39,10 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
ResourceLoader loader = new SolrResourceLoader(null, null);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String, String> args = new HashMap<String, String>();
args.put("words", "stop-1.txt");
args.put("ignoreCase", "true");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
CharArraySet words = factory.getCommonWords();
@ -53,6 +54,7 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
factory = new CommonGramsFilterFactory();
args.put("words", "stop-1.txt, stop-2.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
words = factory.getCommonWords();
@ -65,6 +67,7 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
factory = new CommonGramsFilterFactory();
args.put("words", "stop-snowball.txt");
args.put("format", "snowball");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
words = factory.getCommonWords();
@ -86,8 +89,8 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
ResourceLoader loader = new SolrResourceLoader(null, null);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
factory.init(args);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
factory.inform(loader);
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);

View File

@ -38,9 +38,10 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
ResourceLoader loader = new SolrResourceLoader(null, null);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String, String> args = new HashMap<String, String>();
args.put("words", "stop-1.txt");
args.put("ignoreCase", "true");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
CharArraySet words = factory.getCommonWords();
@ -52,6 +53,7 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
factory = new CommonGramsQueryFilterFactory();
args.put("words", "stop-1.txt, stop-2.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
words = factory.getCommonWords();
@ -62,6 +64,7 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
.isIgnoreCase() == true);
factory = new CommonGramsQueryFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
args.put("words", "stop-snowball.txt");
args.put("format", "snowball");
factory.init(args);
@ -85,8 +88,8 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
ResourceLoader loader = new SolrResourceLoader(null, null);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
factory.init(args);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
factory.inform(loader);
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);

View File

@ -47,9 +47,10 @@ public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {
}
SnowballPorterFilterFactory factory = new SnowballPorterFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String, String> args = new HashMap<String, String>();
args.put("language", "English");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(new LinesMockSolrResourceLoader(new ArrayList<String>()));
Tokenizer tokenizer = new MockTokenizer(
@ -84,9 +85,10 @@ public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {
public void testProtected() throws Exception {
SnowballPorterFilterFactory factory = new SnowballPorterFilterFactory();
ResourceLoader loader = new SolrResourceLoader(null, null);
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
args.put("protected", "protwords.txt");
args.put("language", "English");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
Reader reader = new StringReader("ridding of some stemming");

View File

@ -36,7 +36,8 @@ public class TestArabicFilters extends BaseTokenTestCase {
public void testTokenizer() throws Exception {
Reader reader = new StringReader("الذين مَلكت أيمانكم");
ArabicLetterTokenizerFactory factory = new ArabicLetterTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream, new String[] {"الذين", "مَلكت", "أيمانكم"});
}
@ -47,9 +48,11 @@ public class TestArabicFilters extends BaseTokenTestCase {
public void testNormalizer() throws Exception {
Reader reader = new StringReader("الذين مَلكت أيمانكم");
StandardTokenizerFactory factory = new StandardTokenizerFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
ArabicNormalizationFilterFactory filterFactory = new ArabicNormalizationFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
filterFactory.init(DEFAULT_VERSION_PARAM);
filterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
filterFactory.init(EMPTY_PARAMS);
Tokenizer tokenizer = factory.create(reader);
TokenStream stream = filterFactory.create(tokenizer);
assertTokenStreamContents(stream, new String[] {"الذين", "ملكت", "ايمانكم"});
@ -61,10 +64,12 @@ public class TestArabicFilters extends BaseTokenTestCase {
public void testStemmer() throws Exception {
Reader reader = new StringReader("الذين مَلكت أيمانكم");
StandardTokenizerFactory factory = new StandardTokenizerFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
ArabicNormalizationFilterFactory normFactory = new ArabicNormalizationFilterFactory();
normFactory.setLuceneMatchVersion(DEFAULT_VERSION);
ArabicStemFilterFactory stemFactory = new ArabicStemFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
normFactory.init(DEFAULT_VERSION_PARAM);
factory.init(EMPTY_PARAMS);
normFactory.init(EMPTY_PARAMS);
Tokenizer tokenizer = factory.create(reader);
TokenStream stream = normFactory.create(tokenizer);
stream = stemFactory.create(stream);
@ -78,7 +83,8 @@ public class TestArabicFilters extends BaseTokenTestCase {
Reader reader = new StringReader("می‌خورد");
PersianCharFilterFactory charfilterFactory = new PersianCharFilterFactory();
StandardTokenizerFactory tokenizerFactory = new StandardTokenizerFactory();
tokenizerFactory.init(DEFAULT_VERSION_PARAM);
tokenizerFactory.setLuceneMatchVersion(DEFAULT_VERSION);
tokenizerFactory.init(EMPTY_PARAMS);
TokenStream stream = tokenizerFactory.create(charfilterFactory.create(CharReader.get(reader)));
assertTokenStreamContents(stream, new String[] { "می", "خورد" });
}

View File

@ -28,7 +28,8 @@ import org.apache.lucene.analysis.TokenStream;
public class TestBeiderMorseFilterFactory extends BaseTokenTestCase {
public void testBasics() throws Exception {
BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
assertTokenStreamContents(ts,
new String[] { "vDnbirk", "vanbirk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },

View File

@ -33,7 +33,8 @@ public class TestCJKBigramFilterFactory extends BaseTokenTestCase {
public void testDefaults() throws Exception {
Reader reader = new StringReader("多くの学生が試験に落ちた。");
CJKBigramFilterFactory factory = new CJKBigramFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
TokenStream stream = factory.create(new StandardTokenizer(TEST_VERSION_CURRENT, reader));
assertTokenStreamContents(stream,
new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });

View File

@ -33,11 +33,12 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
public void testCapitalization() throws Exception
{
Map<String,String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String, String>();
args.put( CapitalizationFilterFactory.KEEP, "and the it BIG" );
args.put( CapitalizationFilterFactory.ONLY_FIRST_WORD, "true" );
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init( args );
assertTokenStreamContents(factory.create(
new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.WHITESPACE, false)),
@ -94,6 +95,7 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
// Now try some prefixes
factory = new CapitalizationFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
args.put( "okPrefix", "McK" ); // all words
factory.init( args );
assertTokenStreamContents(factory.create(
@ -114,12 +116,13 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
}
public void testKeepIgnoreCase() throws Exception {
Map<String,String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String, String>();
args.put( CapitalizationFilterFactory.KEEP, "kitten" );
args.put( CapitalizationFilterFactory.KEEP_IGNORE_CASE, "true" );
args.put( CapitalizationFilterFactory.ONLY_FIRST_WORD, "true" );
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init( args );
factory.forceFirstLetter = true;
assertTokenStreamContents(factory.create(
@ -143,10 +146,11 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
* This is very weird when combined with ONLY_FIRST_WORD!!!
*/
public void testMinWordLength() throws Exception {
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
args.put(CapitalizationFilterFactory.ONLY_FIRST_WORD, "true");
args.put(CapitalizationFilterFactory.MIN_WORD_LENGTH, "5");
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
Tokenizer tokenizer = new MockTokenizer(new StringReader(
"helo testing"), MockTokenizer.WHITESPACE, false);
@ -159,9 +163,10 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
* in each token (it should do nothing)
*/
public void testMaxWordCount() throws Exception {
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
Tokenizer tokenizer = new MockTokenizer(new StringReader(
"one two three four"), MockTokenizer.WHITESPACE, false);
@ -173,9 +178,10 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
* Test CapitalizationFilterFactory's maxWordCount option when exceeded
*/
public void testMaxWordCount2() throws Exception {
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
Tokenizer tokenizer = new MockTokenizer(new StringReader(
"one two three four"), MockTokenizer.KEYWORD, false);
@ -189,9 +195,10 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
* This is weird, it is not really a max, but inclusive (look at 'is')
*/
public void testMaxTokenLength() throws Exception {
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
args.put(CapitalizationFilterFactory.MAX_TOKEN_LENGTH, "2");
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
Tokenizer tokenizer = new MockTokenizer(new StringReader(
"this is a test"), MockTokenizer.WHITESPACE, false);
@ -203,10 +210,11 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
* Test CapitalizationFilterFactory's forceFirstLetter option
*/
public void testForceFirstLetter() throws Exception {
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
args.put(CapitalizationFilterFactory.KEEP, "kitten");
args.put(CapitalizationFilterFactory.FORCE_FIRST_LETTER, "true");
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
Tokenizer tokenizer = new MockTokenizer(new StringReader("kitten"), MockTokenizer.WHITESPACE, false);
TokenStream ts = factory.create(tokenizer);

View File

@ -40,8 +40,9 @@ public class TestDictionaryCompoundWordTokenFilterFactory extends BaseTokenTestC
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
DictionaryCompoundWordTokenFilterFactory factory = new DictionaryCompoundWordTokenFilterFactory();
ResourceLoader loader = new SolrResourceLoader(null, null);
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
args.put("dictionary", "compoundDictionary.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
TokenStream stream = factory.create(tokenizer);

View File

@ -39,7 +39,8 @@ public class TestElisionFilterFactory extends BaseTokenTestCase {
Reader reader = new StringReader("l'avion");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
ElisionFilterFactory factory = new ElisionFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
ResourceLoader loader = new SolrResourceLoader(null, null);
Map<String,String> args = new HashMap<String,String>();
args.put("articles", "frenchArticles.txt");
@ -56,7 +57,8 @@ public class TestElisionFilterFactory extends BaseTokenTestCase {
Reader reader = new StringReader("l'avion");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
ElisionFilterFactory factory = new ElisionFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
ResourceLoader loader = new SolrResourceLoader(null, null);
factory.init(new HashMap<String,String>());
factory.inform(loader);
@ -71,7 +73,8 @@ public class TestElisionFilterFactory extends BaseTokenTestCase {
Reader reader = new StringReader("L'avion");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
ElisionFilterFactory factory = new ElisionFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
ResourceLoader loader = new SolrResourceLoader(null, null);
Map<String,String> args = new HashMap<String,String>();
args.put("articles", "frenchArticles.txt");

View File

@ -35,7 +35,8 @@ public class TestGreekLowerCaseFilterFactory extends BaseTokenTestCase {
Reader reader = new StringReader("Μάϊος ΜΆΪΟΣ");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
GreekLowerCaseFilterFactory factory = new GreekLowerCaseFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "μαιοσ", "μαιοσ" });
}

View File

@ -33,9 +33,11 @@ public class TestHindiFilters extends BaseTokenTestCase {
public void testIndicNormalizer() throws Exception {
Reader reader = new StringReader("ত্‍ अाैर");
StandardTokenizerFactory factory = new StandardTokenizerFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
IndicNormalizationFilterFactory filterFactory = new IndicNormalizationFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
filterFactory.init(DEFAULT_VERSION_PARAM);
filterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
filterFactory.init(EMPTY_PARAMS);
Tokenizer tokenizer = factory.create(reader);
TokenStream stream = filterFactory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "", "और" });
@ -47,10 +49,12 @@ public class TestHindiFilters extends BaseTokenTestCase {
public void testHindiNormalizer() throws Exception {
Reader reader = new StringReader("क़िताब");
StandardTokenizerFactory factory = new StandardTokenizerFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
hindiFilterFactory.init(DEFAULT_VERSION_PARAM);
hindiFilterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
hindiFilterFactory.init(EMPTY_PARAMS);
Tokenizer tokenizer = factory.create(reader);
TokenStream stream = indicFilterFactory.create(tokenizer);
stream = hindiFilterFactory.create(stream);
@ -63,11 +67,13 @@ public class TestHindiFilters extends BaseTokenTestCase {
public void testStemmer() throws Exception {
Reader reader = new StringReader("किताबें");
StandardTokenizerFactory factory = new StandardTokenizerFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
HindiStemFilterFactory stemFactory = new HindiStemFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
stemFactory.init(DEFAULT_VERSION_PARAM);
stemFactory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
stemFactory.init(EMPTY_PARAMS);
Tokenizer tokenizer = factory.create(reader);
TokenStream stream = indicFilterFactory.create(tokenizer);
stream = hindiFilterFactory.create(stream);

View File

@ -36,7 +36,7 @@ public class TestHunspellStemFilterFactory extends BaseTokenTestCase {
Map<String,String> args = new HashMap<String,String>();
args.put("dictionary", "hunspell-test.dic");
args.put("affix", "hunspell-test.aff");
args.put(IndexSchema.LUCENE_MATCH_VERSION_PARAM, DEFAULT_VERSION.name());
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(new SolrResourceLoader("solr"));

View File

@ -40,9 +40,10 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenTest
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
ResourceLoader loader = new SolrResourceLoader(null, null);
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
args.put("hyphenator", "da_UTF8.xml");
args.put("dictionary", "da_compoundDictionary.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
TokenStream stream = factory.create(tokenizer);
@ -63,10 +64,11 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenTest
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
ResourceLoader loader = new SolrResourceLoader(null, null);
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
args.put("hyphenator", "da_UTF8.xml");
args.put("minSubwordSize", "2");
args.put("maxSubwordSize", "4");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
TokenStream stream = factory.create(tokenizer);

View File

@ -29,7 +29,8 @@ import org.apache.solr.core.SolrResourceLoader;
public class TestJapaneseBaseFormFilterFactory extends BaseTokenTestCase {
public void testBasics() throws IOException {
JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
tokenizerFactory.init(DEFAULT_VERSION_PARAM);
tokenizerFactory.setLuceneMatchVersion(DEFAULT_VERSION);
tokenizerFactory.init(EMPTY_PARAMS);
tokenizerFactory.inform(new SolrResourceLoader(null, null));
TokenStream ts = tokenizerFactory.create(new StringReader("それはまだ実験段階にあります"));
JapaneseBaseFormFilterFactory factory = new JapaneseBaseFormFilterFactory();

View File

@ -35,13 +35,14 @@ public class TestJapanesePartOfSpeechStopFilterFactory extends BaseTokenTestCase
"動詞-自立\n";
JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
tokenizerFactory.init(DEFAULT_VERSION_PARAM);
tokenizerFactory.setLuceneMatchVersion(DEFAULT_VERSION);
tokenizerFactory.init(EMPTY_PARAMS);
tokenizerFactory.inform(new SolrResourceLoader(null, null));
TokenStream ts = tokenizerFactory.create(new StringReader("私は制限スピードを超える。"));
JapanesePartOfSpeechStopFilterFactory factory = new JapanesePartOfSpeechStopFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
args.put("tags", "stoptags.txt");
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
factory.init(args);
factory.inform(new StringMockSolrResourceLoader(tags));
ts = factory.create(ts);

View File

@ -31,7 +31,8 @@ import org.apache.solr.core.SolrResourceLoader;
public class TestJapaneseTokenizerFactory extends BaseTokenTestCase {
public void testSimple() throws IOException {
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
factory.inform(new SolrResourceLoader(null, null));
TokenStream ts = factory.create(new StringReader("これは本ではない"));
assertTokenStreamContents(ts,
@ -46,7 +47,8 @@ public class TestJapaneseTokenizerFactory extends BaseTokenTestCase {
*/
public void testDefaults() throws IOException {
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
factory.inform(new SolrResourceLoader(null, null));
TokenStream ts = factory.create(new StringReader("シニアソフトウェアエンジニア"));
assertTokenStreamContents(ts,

View File

@ -33,9 +33,10 @@ public class TestKeepFilterFactory extends BaseTokenTestCase{
ResourceLoader loader = new SolrResourceLoader(null, null);
assertTrue("loader is null and it shouldn't be", loader != null);
KeepWordFilterFactory factory = new KeepWordFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String, String> args = new HashMap<String, String>();
args.put("words", "keep-1.txt");
args.put("ignoreCase", "true");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
CharArraySet words = factory.getWords();
@ -45,6 +46,7 @@ public class TestKeepFilterFactory extends BaseTokenTestCase{
factory = new KeepWordFilterFactory();
args.put("words", "keep-1.txt, keep-2.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
words = factory.getWords();

View File

@ -38,9 +38,10 @@ public class TestKeywordMarkerFilterFactory extends BaseTokenTestCase {
Reader reader = new StringReader("dogs cats");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
ResourceLoader loader = new SolrResourceLoader(null, null);
args.put("protected", "protwords.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
@ -52,10 +53,11 @@ public class TestKeywordMarkerFilterFactory extends BaseTokenTestCase {
Reader reader = new StringReader("dogs cats Cats");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
ResourceLoader loader = new SolrResourceLoader(null, null);
args.put("protected", "protwords.txt");
args.put("ignoreCase", "true");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);

View File

@ -53,8 +53,8 @@ public class TestMultiWordSynonyms extends BaseTokenTestCase {
public void testMultiWordSynonyms() throws IOException {
SynonymFilterFactory factory = new SynonymFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.putAll(DEFAULT_VERSION_PARAM);
args.put("synonyms", "synonyms.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(new StringMockSolrResourceLoader("a b c,d"));
TokenStream ts = factory.create(new MockTokenizer(new StringReader("a e"), MockTokenizer.WHITESPACE, false));

View File

@ -35,7 +35,8 @@ public class TestReverseStringFilterFactory extends BaseTokenTestCase {
Reader reader = new StringReader("simple test");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
ReverseStringFilterFactory factory = new ReverseStringFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "elpmis", "tset" });
}

View File

@ -32,7 +32,8 @@ public class TestRussianFilters extends BaseTokenTestCase {
public void testTokenizer() throws Exception {
Reader reader = new StringReader("Вместе с тем о силе электромагнитной 100");
RussianLetterTokenizerFactory factory = new RussianLetterTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream, new String[] {"Вместе", "с", "тем", "о",
"силе", "электромагнитной", "100"});

View File

@ -36,7 +36,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testStandardTokenizer() throws Exception {
Reader reader = new StringReader("Wha\u0301t's this thing do?");
StandardTokenizerFactory factory = new StandardTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"Wha\u0301t's", "this", "thing", "do" });
@ -51,9 +52,9 @@ public class TestStandardFactories extends BaseTokenTestCase {
String content = "one two three " + longWord + " four five six";
Reader reader = new StringReader(content);
Map<String,String> args = new HashMap<String,String>();
args.put("luceneMatchVersion", DEFAULT_VERSION_PARAM.get("luceneMatchVersion"));
args.put("maxTokenLength", "1000");
StandardTokenizerFactory factory = new StandardTokenizerFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
@ -66,7 +67,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testClassicTokenizer() throws Exception {
Reader reader = new StringReader("What's this thing do?");
ClassicTokenizerFactory factory = new ClassicTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"What's", "this", "thing", "do" });
@ -81,9 +83,9 @@ public class TestStandardFactories extends BaseTokenTestCase {
String content = "one two three " + longWord + " four five six";
Reader reader = new StringReader(content);
Map<String,String> args = new HashMap<String,String>();
args.put("luceneMatchVersion", DEFAULT_VERSION_PARAM.get("luceneMatchVersion"));
args.put("maxTokenLength", "1000");
ClassicTokenizerFactory factory = new ClassicTokenizerFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
@ -96,9 +98,11 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testStandardFilter() throws Exception {
Reader reader = new StringReader("What's this thing do?");
ClassicTokenizerFactory factory = new ClassicTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
ClassicFilterFactory filterFactory = new ClassicFilterFactory();
filterFactory.init(DEFAULT_VERSION_PARAM);
filterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
filterFactory.init(EMPTY_PARAMS);
Tokenizer tokenizer = factory.create(reader);
TokenStream stream = filterFactory.create(tokenizer);
assertTokenStreamContents(stream,
@ -111,7 +115,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testKeywordTokenizer() throws Exception {
Reader reader = new StringReader("What's this thing do?");
KeywordTokenizerFactory factory = new KeywordTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"What's this thing do?"});
@ -123,7 +128,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testWhitespaceTokenizer() throws Exception {
Reader reader = new StringReader("What's this thing do?");
WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"What's", "this", "thing", "do?"});
@ -135,7 +141,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testLetterTokenizer() throws Exception {
Reader reader = new StringReader("What's this thing do?");
LetterTokenizerFactory factory = new LetterTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"What", "s", "this", "thing", "do"});
@ -147,7 +154,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testLowerCaseTokenizer() throws Exception {
Reader reader = new StringReader("What's this thing do?");
LowerCaseTokenizerFactory factory = new LowerCaseTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"what", "s", "this", "thing", "do"});
@ -160,7 +168,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
Reader reader = new StringReader("Česká");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "Ceska" });
}

View File

@ -39,9 +39,10 @@ public class TestStemmerOverrideFilterFactory extends BaseTokenTestCase {
Reader reader = new StringReader("testing dogs");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
ResourceLoader loader = new SolrResourceLoader(null, null);
args.put("dictionary", "stemdict.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
@ -53,10 +54,11 @@ public class TestStemmerOverrideFilterFactory extends BaseTokenTestCase {
Reader reader = new StringReader("testing DoGs");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
Map<String,String> args = new HashMap<String,String>();
ResourceLoader loader = new SolrResourceLoader(null, null);
args.put("dictionary", "stemdict.txt");
args.put("ignoreCase", "true");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);

View File

@ -34,9 +34,10 @@ public class TestStopFilterFactory extends BaseTokenTestCase {
ResourceLoader loader = new SolrResourceLoader(null, null);
assertTrue("loader is null and it shouldn't be", loader != null);
StopFilterFactory factory = new StopFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String, String> args = new HashMap<String, String>();
args.put("words", "stop-1.txt");
args.put("ignoreCase", "true");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
CharArraySet words = factory.getStopWords();
@ -46,6 +47,7 @@ public class TestStopFilterFactory extends BaseTokenTestCase {
factory = new StopFilterFactory();
args.put("words", "stop-1.txt, stop-2.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
words = factory.getStopWords();
@ -54,6 +56,7 @@ public class TestStopFilterFactory extends BaseTokenTestCase {
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = new StopFilterFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
args.put("words", "stop-snowball.txt");
args.put("format", "snowball");
factory.init(args);

View File

@ -38,8 +38,8 @@ public class TestSynonymFilterFactory extends BaseTokenTestCase {
public void testSynonyms() throws Exception {
SynonymFilterFactory factory = new SynonymFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.putAll(DEFAULT_VERSION_PARAM);
args.put("synonyms", "synonyms.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(new SolrResourceLoader(null, null));
TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
@ -55,8 +55,8 @@ public class TestSynonymFilterFactory extends BaseTokenTestCase {
public void testSynonymsOld() throws Exception {
SynonymFilterFactory factory = new SynonymFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("luceneMatchVersion", Version.LUCENE_33.toString());
args.put("synonyms", "synonyms.txt");
factory.setLuceneMatchVersion(Version.LUCENE_33);
factory.init(args);
factory.inform(new SolrResourceLoader(null, null));
TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
@ -72,8 +72,8 @@ public class TestSynonymFilterFactory extends BaseTokenTestCase {
public void testMultiwordOffsetsOld() throws Exception {
SynonymFilterFactory factory = new SynonymFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.put("luceneMatchVersion", Version.LUCENE_33.toString());
args.put("synonyms", "synonyms.txt");
factory.setLuceneMatchVersion(Version.LUCENE_33);
factory.init(args);
factory.inform(new StringMockSolrResourceLoader("national hockey league, nhl"));
TokenStream ts = factory.create(new MockTokenizer(new StringReader("national hockey league"), MockTokenizer.WHITESPACE, false));
@ -89,8 +89,8 @@ public class TestSynonymFilterFactory extends BaseTokenTestCase {
public void testEmptySynonyms() throws Exception {
SynonymFilterFactory factory = new SynonymFilterFactory();
Map<String,String> args = new HashMap<String,String>();
args.putAll(DEFAULT_VERSION_PARAM);
args.put("synonyms", "synonyms.txt");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(new StringMockSolrResourceLoader("")); // empty file!
TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));

View File

@ -37,7 +37,8 @@ public class TestThaiWordFilterFactory extends BaseTokenTestCase {
Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี");
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
ThaiWordFilterFactory factory = new ThaiWordFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] {"การ", "ที่", "ได้",
"ต้อง", "แสดง", "ว่า", "งาน", "ดี"});

View File

@ -35,9 +35,10 @@ public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
public void testInform() throws Exception {
ResourceLoader loader = new SolrResourceLoader(null, null);
TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String, String> args = new HashMap<String, String>();
args.put("types", "stoptypes-1.txt");
args.put("enablePositionIncrements", "true");
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
factory.inform(loader);
Set<String> types = factory.getStopTypes();
@ -60,9 +61,10 @@ public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
@Test
public void testCreationWithBlackList() throws Exception {
TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String, String> args = new HashMap<String, String>();
args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
args.put("enablePositionIncrements", "false");
typeTokenFilterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
typeTokenFilterFactory.init(args);
NumericTokenStream input = new NumericTokenStream();
input.setIntValue(123);
@ -72,10 +74,11 @@ public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
@Test
public void testCreationWithWhiteList() throws Exception {
TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String, String> args = new HashMap<String, String>();
args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
args.put("enablePositionIncrements", "false");
args.put("useWhitelist","true");
typeTokenFilterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
typeTokenFilterFactory.init(args);
NumericTokenStream input = new NumericTokenStream();
input.setIntValue(123);
@ -86,8 +89,9 @@ public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
public void testMissingTypesParameter() throws Exception {
try {
TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
Map<String, String> args = new HashMap<String, String>();
args.put("enablePositionIncrements", "false");
typeTokenFilterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
typeTokenFilterFactory.init(args);
typeTokenFilterFactory.inform(new SolrResourceLoader(null, null));
fail("not supplying 'types' parameter should cause an InitializationException");

View File

@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Version;
/**
* A few tests based on org.apache.lucene.analysis.TestUAX29URLEmailTokenizer
@ -34,7 +35,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
public void testUAX29URLEmailTokenizer() throws Exception {
Reader reader = new StringReader("Wha\u0301t's this thing do?");
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"Wha\u0301t's", "this", "thing", "do" });
@ -43,7 +45,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
public void testArabic() throws Exception {
Reader reader = new StringReader("الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.");
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا",
@ -53,7 +56,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
public void testChinese() throws Exception {
Reader reader = new StringReader("我是中国人。 ");
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"", "", "", "", "", "", ""});
@ -62,7 +66,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
public void testKorean() throws Exception {
Reader reader = new StringReader("안녕하세요 한글입니다");
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"안녕하세요", "한글입니다"});
@ -71,7 +76,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
public void testHyphen() throws Exception {
Reader reader = new StringReader("some-dashed-phrase");
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"some", "dashed", "phrase"});
@ -95,7 +101,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
+ "http://[a42:a7b6::]/qSmxSUU4z/%52qVl4\n";
Reader reader = new StringReader(textWithURLs);
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {
@ -135,7 +142,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
+ "lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H\n";
Reader reader = new StringReader(textWithEmails);
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {
@ -166,9 +174,9 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
String content = "one two three " + longWord + " four five six";
Reader reader = new StringReader(content);
Map<String,String> args = new HashMap<String,String>();
args.put("luceneMatchVersion", DEFAULT_VERSION_PARAM.get("luceneMatchVersion"));
args.put("maxTokenLength", "1000");
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(args);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
@ -180,14 +188,16 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
public void testMatchVersion() throws Exception {
Reader reader = new StringReader("ざ");
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
factory.setLuceneMatchVersion(DEFAULT_VERSION);
factory.init(EMPTY_PARAMS);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {"ざ"});
reader = new StringReader("ざ");
factory = new UAX29URLEmailTokenizerFactory();
factory.init(Collections.singletonMap("luceneMatchVersion", "3.1"));
factory.setLuceneMatchVersion(Version.LUCENE_31);
factory.init(EMPTY_PARAMS);
stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] {""}); // old broken behavior

View File

@ -27,12 +27,19 @@ import org.apache.solr.core.Config;
/**
* General token testing helper functions
*/
public abstract class BaseTokenTestCase extends BaseTokenStreamTestCase
{
/** a map containing the default test version param for easy testing */
protected static final Map<String,String> DEFAULT_VERSION_PARAM =
Collections.singletonMap("luceneMatchVersion", System.getProperty("tests.luceneMatchVersion", "LUCENE_CURRENT"));
public abstract class BaseTokenTestCase extends BaseTokenStreamTestCase{
protected static final Map<String, String> EMPTY_PARAMS = Collections.emptyMap();
/** The default test version for easy testing */
public static final Version DEFAULT_VERSION = Config.parseLuceneVersionString(DEFAULT_VERSION_PARAM.get("luceneMatchVersion"));
public static final Version DEFAULT_VERSION;
static {
String rawVersion = System.getProperty("tests.luceneMatchVersion", "LUCENE_CURRENT");
try {
DEFAULT_VERSION = Version.parseLeniently(rawVersion);
} catch (IllegalArgumentException iae) {
throw new RuntimeException("Test Lucene Match Version [" + rawVersion + "] is invalid", iae);
}
}
}