mirror of https://github.com/apache/lucene.git
SOLR-3402: Analysis Factory Lucene Version is now parsed outside of the Factories
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1331220 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
09e3948d71
commit
9dcae1a12f
|
@ -18,6 +18,8 @@ package org.apache.lucene.util;
|
|||
*/
|
||||
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Use by certain classes to match version compatibility
|
||||
* across releases of Lucene.
|
||||
|
@ -111,4 +113,9 @@ public enum Version {
|
|||
public boolean onOrAfter(Version other) {
|
||||
return compareTo(other) >= 0;
|
||||
}
|
||||
|
||||
public static Version parseLeniently(String version) {
|
||||
String parsedMatchVersion = version.toUpperCase(Locale.ENGLISH);
|
||||
return Version.valueOf(parsedMatchVersion.replaceFirst("^(\\d)\\.(\\d)$", "LUCENE_$1$2"));
|
||||
}
|
||||
}
|
|
@ -28,4 +28,9 @@ public class TestVersion extends LuceneTestCase {
|
|||
assertFalse(Version.LUCENE_30.onOrAfter(Version.LUCENE_31));
|
||||
}
|
||||
|
||||
public void testParseLeniently() {
|
||||
assertEquals(Version.LUCENE_40, Version.parseLeniently("4.0"));
|
||||
assertEquals(Version.LUCENE_40, Version.parseLeniently("LUCENE_40"));
|
||||
assertEquals(Version.LUCENE_CURRENT, Version.parseLeniently("LUCENE_CURRENT"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -279,6 +279,11 @@ New Features
|
|||
(all date range queries and date faceting is affected). The default TZ
|
||||
is still UTC. (David Schlotfeldt, hossman)
|
||||
|
||||
* SOLR-3402: Analysis Factories are now configured with their Lucene Version
|
||||
throw setLuceneMatchVersion, rather than through the Map passed to init.
|
||||
Parsing and simple error checking for the Version is now done inside
|
||||
the code that creates the Analysis Factories. (Chris Male)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ public class TestICUFoldingFilterFactory extends BaseTokenTestCase {
|
|||
public void test() throws Exception {
|
||||
Reader reader = new StringReader("Résumé");
|
||||
ICUFoldingFilterFactory factory = new ICUFoldingFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, new String[] { "resume" });
|
||||
|
|
|
@ -31,7 +31,8 @@ public class TestICUNormalizer2FilterFactory extends BaseTokenTestCase {
|
|||
public void testDefaults() throws Exception {
|
||||
Reader reader = new StringReader("This is a Test");
|
||||
ICUNormalizer2FilterFactory factory = new ICUNormalizer2FilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, new String[] { "this", "is", "a", "test" });
|
||||
|
|
|
@ -32,11 +32,10 @@ public class TestMorfologikFilterFactory extends BaseTokenTestCase {
|
|||
public void testCreateDictionary() throws Exception {
|
||||
StringReader reader = new StringReader("rowery bilety");
|
||||
Map<String,String> initParams = new HashMap<String,String>();
|
||||
initParams.put(IndexSchema.LUCENE_MATCH_VERSION_PARAM,
|
||||
DEFAULT_VERSION.toString());
|
||||
initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
|
||||
"morfologik");
|
||||
MorfologikFilterFactory factory = new MorfologikFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(initParams);
|
||||
TokenStream ts = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION,
|
||||
reader));
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.analysis;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -31,6 +32,8 @@ public abstract class BaseCharFilterFactory implements CharFilterFactory {
|
|||
|
||||
public static final Logger log = LoggerFactory.getLogger(BaseCharFilterFactory.class);
|
||||
|
||||
protected Version luceneMatchVersion;
|
||||
|
||||
/** The init args */
|
||||
protected Map<String,String> args;
|
||||
|
||||
|
@ -42,6 +45,10 @@ public abstract class BaseCharFilterFactory implements CharFilterFactory {
|
|||
this.args = args;
|
||||
}
|
||||
|
||||
public void setLuceneMatchVersion(Version luceneMatchVersion) {
|
||||
this.luceneMatchVersion = luceneMatchVersion;
|
||||
}
|
||||
|
||||
protected int getInt(String name) {
|
||||
return getInt(name,-1,false);
|
||||
}
|
||||
|
|
|
@ -58,10 +58,6 @@ abstract class BaseTokenStreamFactory {
|
|||
|
||||
public void init(Map<String,String> args) {
|
||||
this.args=args;
|
||||
String matchVersion = args.get(IndexSchema.LUCENE_MATCH_VERSION_PARAM);
|
||||
if (matchVersion != null) {
|
||||
luceneMatchVersion = Config.parseLuceneVersionString(matchVersion);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String,String> getArgs() {
|
||||
|
@ -75,16 +71,16 @@ abstract class BaseTokenStreamFactory {
|
|||
if (luceneMatchVersion == null) {
|
||||
throw new InitializationException("Configuration Error: Factory '" + this.getClass().getName() +
|
||||
"' needs a 'luceneMatchVersion' parameter");
|
||||
} else if (!luceneMatchVersion.onOrAfter(Version.LUCENE_40)) {
|
||||
log.warn(getClass().getSimpleName() + " is using deprecated " + luceneMatchVersion +
|
||||
" emulation. You should at some point declare and reindex to at least 4.0, because " +
|
||||
"3.x emulation is deprecated and will be removed in 5.0");
|
||||
}
|
||||
}
|
||||
|
||||
protected final void warnDeprecated(String message) {
|
||||
log.warn(getClass().getSimpleName() + " is deprecated. " + message);
|
||||
}
|
||||
|
||||
public void setLuceneMatchVersion(Version luceneMatchVersion) {
|
||||
this.luceneMatchVersion = luceneMatchVersion;
|
||||
}
|
||||
|
||||
// TODO: move these somewhere that tokenizers and others
|
||||
// can also use them...
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.solr.analysis;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -27,7 +28,12 @@ import org.apache.lucene.analysis.CharStream;
|
|||
*
|
||||
*/
|
||||
public interface CharFilterFactory {
|
||||
|
||||
public void init(Map<String,String> args);
|
||||
|
||||
public Map<String,String> getArgs();
|
||||
|
||||
public void setLuceneMatchVersion(Version luceneMatchVersion);
|
||||
|
||||
public CharStream create(CharStream input);
|
||||
}
|
||||
|
|
|
@ -66,7 +66,7 @@ final class FSTSynonymFilterFactory extends BaseTokenFilterFactory implements Re
|
|||
|
||||
String tf = args.get("tokenizerFactory");
|
||||
|
||||
final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf, args);
|
||||
final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf);
|
||||
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
|
@ -153,8 +153,9 @@ final class FSTSynonymFilterFactory extends BaseTokenFilterFactory implements Re
|
|||
return parser.build();
|
||||
}
|
||||
|
||||
private static TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname, Map<String,String> args){
|
||||
private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname){
|
||||
TokenizerFactory tokFactory = loader.newInstance(cname, TokenizerFactory.class);
|
||||
tokFactory.setLuceneMatchVersion(luceneMatchVersion);
|
||||
tokFactory.init(args);
|
||||
if (tokFactory instanceof ResourceLoaderAware) {
|
||||
((ResourceLoaderAware) tokFactory).inform(loader);
|
||||
|
|
|
@ -47,6 +47,7 @@ public class LowerCaseTokenizerFactory extends BaseTokenizerFactory implements M
|
|||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
LowerCaseFilterFactory filt = new LowerCaseFilterFactory();
|
||||
filt.setLuceneMatchVersion(luceneMatchVersion);
|
||||
filt.init(args);
|
||||
return filt;
|
||||
}
|
||||
|
|
|
@ -56,7 +56,7 @@ final class SlowSynonymFilterFactory extends BaseTokenFilterFactory implements R
|
|||
String tf = args.get("tokenizerFactory");
|
||||
TokenizerFactory tokFactory = null;
|
||||
if( tf != null ){
|
||||
tokFactory = loadTokenizerFactory( loader, tf, args );
|
||||
tokFactory = loadTokenizerFactory(loader, tf);
|
||||
}
|
||||
|
||||
Iterable<String> wlist=loadRules( synonyms, loader );
|
||||
|
@ -167,8 +167,9 @@ final class SlowSynonymFilterFactory extends BaseTokenFilterFactory implements R
|
|||
return tokList;
|
||||
}
|
||||
|
||||
private static TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname, Map<String,String> args){
|
||||
private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) {
|
||||
TokenizerFactory tokFactory = loader.newInstance(cname, TokenizerFactory.class);
|
||||
tokFactory.setLuceneMatchVersion(luceneMatchVersion);
|
||||
tokFactory.init( args );
|
||||
if (tokFactory instanceof ResourceLoaderAware) {
|
||||
((ResourceLoaderAware) tokFactory).inform(loader);
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
@ -54,6 +56,8 @@ public interface TokenFilterFactory {
|
|||
* schema.xml
|
||||
*/
|
||||
public void init(Map<String,String> args);
|
||||
|
||||
public void setLuceneMatchVersion(Version luceneMatchVersion);
|
||||
|
||||
/**
|
||||
* Accessor method for reporting the args used to initialize this factory.
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.solr.analysis;
|
|||
import java.io.*;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -53,6 +54,8 @@ public interface TokenizerFactory {
|
|||
* schema.xml
|
||||
*/
|
||||
public void init(Map<String,String> args);
|
||||
|
||||
public void setLuceneMatchVersion(Version luceneMatchVersion);
|
||||
|
||||
/**
|
||||
* Accessor method for reporting the args used to initialize this factory.
|
||||
|
|
|
@ -314,14 +314,9 @@ public class Config {
|
|||
private static final AtomicBoolean versionWarningAlreadyLogged = new AtomicBoolean(false);
|
||||
|
||||
public static final Version parseLuceneVersionString(final String matchVersion) {
|
||||
String parsedMatchVersion = matchVersion.toUpperCase(Locale.ENGLISH);
|
||||
|
||||
// be lenient with the supplied version parameter
|
||||
parsedMatchVersion = parsedMatchVersion.replaceFirst("^(\\d)\\.(\\d)$", "LUCENE_$1$2");
|
||||
|
||||
final Version version;
|
||||
try {
|
||||
version = Version.valueOf(parsedMatchVersion);
|
||||
version = Version.parseLeniently(matchVersion);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Invalid luceneMatchVersion '" + matchVersion +
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.solr.schema;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.analysis.*;
|
||||
import org.apache.solr.common.ResourceLoader;
|
||||
|
@ -270,10 +269,10 @@ public final class FieldTypePluginLoader
|
|||
protected void init(CharFilterFactory plugin, Node node) throws Exception {
|
||||
if( plugin != null ) {
|
||||
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
|
||||
// copy the luceneMatchVersion from config, if not set
|
||||
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
|
||||
params.put(LUCENE_MATCH_VERSION_PARAM,
|
||||
schema.getDefaultLuceneMatchVersion().toString());
|
||||
|
||||
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
|
||||
plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion, plugin.getClass().getSimpleName()));
|
||||
|
||||
plugin.init( params );
|
||||
charFilters.add( plugin );
|
||||
}
|
||||
|
@ -306,10 +305,9 @@ public final class FieldTypePluginLoader
|
|||
}
|
||||
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
|
||||
|
||||
// copy the luceneMatchVersion from config, if not set
|
||||
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
|
||||
params.put(LUCENE_MATCH_VERSION_PARAM,
|
||||
schema.getDefaultLuceneMatchVersion().toString());
|
||||
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
|
||||
plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion, plugin.getClass().getSimpleName()));
|
||||
|
||||
plugin.init( params );
|
||||
tokenizers.add( plugin );
|
||||
}
|
||||
|
@ -340,10 +338,10 @@ public final class FieldTypePluginLoader
|
|||
protected void init(TokenFilterFactory plugin, Node node) throws Exception {
|
||||
if( plugin != null ) {
|
||||
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
|
||||
// copy the luceneMatchVersion from config, if not set
|
||||
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
|
||||
params.put(LUCENE_MATCH_VERSION_PARAM,
|
||||
schema.getDefaultLuceneMatchVersion().toString());
|
||||
|
||||
String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
|
||||
plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion, plugin.getClass().getSimpleName()));
|
||||
|
||||
plugin.init( params );
|
||||
filters.add( plugin );
|
||||
}
|
||||
|
@ -359,5 +357,17 @@ public final class FieldTypePluginLoader
|
|||
return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]),
|
||||
tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));
|
||||
}
|
||||
|
||||
private Version parseConfiguredVersion(String configuredVersion, String pluginClassName) {
|
||||
Version version = (configuredVersion != null) ?
|
||||
Config.parseLuceneVersionString(configuredVersion) : schema.getDefaultLuceneMatchVersion();
|
||||
|
||||
if (!version.onOrAfter(Version.LUCENE_40)) {
|
||||
log.warn(pluginClassName + " is using deprecated " + version +
|
||||
" emulation. You should at some point declare and reindex to at least 4.0, because " +
|
||||
"3.x emulation is deprecated and will be removed in 5.0");
|
||||
}
|
||||
return version;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -39,9 +39,10 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
|
|||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put("words", "stop-1.txt");
|
||||
args.put("ignoreCase", "true");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
CharArraySet words = factory.getCommonWords();
|
||||
|
@ -53,6 +54,7 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
|
|||
|
||||
factory = new CommonGramsFilterFactory();
|
||||
args.put("words", "stop-1.txt, stop-2.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
words = factory.getCommonWords();
|
||||
|
@ -65,6 +67,7 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
|
|||
factory = new CommonGramsFilterFactory();
|
||||
args.put("words", "stop-snowball.txt");
|
||||
args.put("format", "snowball");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
words = factory.getCommonWords();
|
||||
|
@ -86,8 +89,8 @@ public class CommonGramsFilterFactoryTest extends BaseTokenTestCase {
|
|||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
factory.init(args);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
factory.inform(loader);
|
||||
CharArraySet words = factory.getCommonWords();
|
||||
assertTrue("words is null and it shouldn't be", words != null);
|
||||
|
|
|
@ -38,9 +38,10 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
|
|||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put("words", "stop-1.txt");
|
||||
args.put("ignoreCase", "true");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
CharArraySet words = factory.getCommonWords();
|
||||
|
@ -52,6 +53,7 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
|
|||
|
||||
factory = new CommonGramsQueryFilterFactory();
|
||||
args.put("words", "stop-1.txt, stop-2.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
words = factory.getCommonWords();
|
||||
|
@ -62,6 +64,7 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
|
|||
.isIgnoreCase() == true);
|
||||
|
||||
factory = new CommonGramsQueryFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
args.put("words", "stop-snowball.txt");
|
||||
args.put("format", "snowball");
|
||||
factory.init(args);
|
||||
|
@ -85,8 +88,8 @@ public class CommonGramsQueryFilterFactoryTest extends BaseTokenTestCase {
|
|||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
factory.init(args);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
factory.inform(loader);
|
||||
CharArraySet words = factory.getCommonWords();
|
||||
assertTrue("words is null and it shouldn't be", words != null);
|
||||
|
|
|
@ -47,9 +47,10 @@ public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {
|
|||
}
|
||||
|
||||
SnowballPorterFilterFactory factory = new SnowballPorterFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put("language", "English");
|
||||
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(new LinesMockSolrResourceLoader(new ArrayList<String>()));
|
||||
Tokenizer tokenizer = new MockTokenizer(
|
||||
|
@ -84,9 +85,10 @@ public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {
|
|||
public void testProtected() throws Exception {
|
||||
SnowballPorterFilterFactory factory = new SnowballPorterFilterFactory();
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("protected", "protwords.txt");
|
||||
args.put("language", "English");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
Reader reader = new StringReader("ridding of some stemming");
|
||||
|
|
|
@ -36,7 +36,8 @@ public class TestArabicFilters extends BaseTokenTestCase {
|
|||
public void testTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("الذين مَلكت أيمانكم");
|
||||
ArabicLetterTokenizerFactory factory = new ArabicLetterTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream, new String[] {"الذين", "مَلكت", "أيمانكم"});
|
||||
}
|
||||
|
@ -47,9 +48,11 @@ public class TestArabicFilters extends BaseTokenTestCase {
|
|||
public void testNormalizer() throws Exception {
|
||||
Reader reader = new StringReader("الذين مَلكت أيمانكم");
|
||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
ArabicNormalizationFilterFactory filterFactory = new ArabicNormalizationFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
filterFactory.init(DEFAULT_VERSION_PARAM);
|
||||
filterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
filterFactory.init(EMPTY_PARAMS);
|
||||
Tokenizer tokenizer = factory.create(reader);
|
||||
TokenStream stream = filterFactory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, new String[] {"الذين", "ملكت", "ايمانكم"});
|
||||
|
@ -61,10 +64,12 @@ public class TestArabicFilters extends BaseTokenTestCase {
|
|||
public void testStemmer() throws Exception {
|
||||
Reader reader = new StringReader("الذين مَلكت أيمانكم");
|
||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
ArabicNormalizationFilterFactory normFactory = new ArabicNormalizationFilterFactory();
|
||||
normFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
ArabicStemFilterFactory stemFactory = new ArabicStemFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
normFactory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
normFactory.init(EMPTY_PARAMS);
|
||||
Tokenizer tokenizer = factory.create(reader);
|
||||
TokenStream stream = normFactory.create(tokenizer);
|
||||
stream = stemFactory.create(stream);
|
||||
|
@ -78,7 +83,8 @@ public class TestArabicFilters extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("میخورد");
|
||||
PersianCharFilterFactory charfilterFactory = new PersianCharFilterFactory();
|
||||
StandardTokenizerFactory tokenizerFactory = new StandardTokenizerFactory();
|
||||
tokenizerFactory.init(DEFAULT_VERSION_PARAM);
|
||||
tokenizerFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
tokenizerFactory.init(EMPTY_PARAMS);
|
||||
TokenStream stream = tokenizerFactory.create(charfilterFactory.create(CharReader.get(reader)));
|
||||
assertTokenStreamContents(stream, new String[] { "می", "خورد" });
|
||||
}
|
||||
|
|
|
@ -28,7 +28,8 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
public class TestBeiderMorseFilterFactory extends BaseTokenTestCase {
|
||||
public void testBasics() throws Exception {
|
||||
BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
TokenStream ts = factory.create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
|
||||
assertTokenStreamContents(ts,
|
||||
new String[] { "vDnbirk", "vanbirk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
|
||||
|
|
|
@ -33,7 +33,8 @@ public class TestCJKBigramFilterFactory extends BaseTokenTestCase {
|
|||
public void testDefaults() throws Exception {
|
||||
Reader reader = new StringReader("多くの学生が試験に落ちた。");
|
||||
CJKBigramFilterFactory factory = new CJKBigramFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
TokenStream stream = factory.create(new StandardTokenizer(TEST_VERSION_CURRENT, reader));
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });
|
||||
|
|
|
@ -33,11 +33,12 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
|||
|
||||
public void testCapitalization() throws Exception
|
||||
{
|
||||
Map<String,String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String, String>();
|
||||
args.put( CapitalizationFilterFactory.KEEP, "and the it BIG" );
|
||||
args.put( CapitalizationFilterFactory.ONLY_FIRST_WORD, "true" );
|
||||
|
||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init( args );
|
||||
assertTokenStreamContents(factory.create(
|
||||
new MockTokenizer(new StringReader("kiTTEN"), MockTokenizer.WHITESPACE, false)),
|
||||
|
@ -94,6 +95,7 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
|||
|
||||
// Now try some prefixes
|
||||
factory = new CapitalizationFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
args.put( "okPrefix", "McK" ); // all words
|
||||
factory.init( args );
|
||||
assertTokenStreamContents(factory.create(
|
||||
|
@ -114,12 +116,13 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
|||
}
|
||||
|
||||
public void testKeepIgnoreCase() throws Exception {
|
||||
Map<String,String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String, String>();
|
||||
args.put( CapitalizationFilterFactory.KEEP, "kitten" );
|
||||
args.put( CapitalizationFilterFactory.KEEP_IGNORE_CASE, "true" );
|
||||
args.put( CapitalizationFilterFactory.ONLY_FIRST_WORD, "true" );
|
||||
|
||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init( args );
|
||||
factory.forceFirstLetter = true;
|
||||
assertTokenStreamContents(factory.create(
|
||||
|
@ -143,10 +146,11 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
|||
* This is very weird when combined with ONLY_FIRST_WORD!!!
|
||||
*/
|
||||
public void testMinWordLength() throws Exception {
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put(CapitalizationFilterFactory.ONLY_FIRST_WORD, "true");
|
||||
args.put(CapitalizationFilterFactory.MIN_WORD_LENGTH, "5");
|
||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
Tokenizer tokenizer = new MockTokenizer(new StringReader(
|
||||
"helo testing"), MockTokenizer.WHITESPACE, false);
|
||||
|
@ -159,9 +163,10 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
|||
* in each token (it should do nothing)
|
||||
*/
|
||||
public void testMaxWordCount() throws Exception {
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
|
||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
Tokenizer tokenizer = new MockTokenizer(new StringReader(
|
||||
"one two three four"), MockTokenizer.WHITESPACE, false);
|
||||
|
@ -173,9 +178,10 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
|||
* Test CapitalizationFilterFactory's maxWordCount option when exceeded
|
||||
*/
|
||||
public void testMaxWordCount2() throws Exception {
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put(CapitalizationFilterFactory.MAX_WORD_COUNT, "2");
|
||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
Tokenizer tokenizer = new MockTokenizer(new StringReader(
|
||||
"one two three four"), MockTokenizer.KEYWORD, false);
|
||||
|
@ -189,9 +195,10 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
|||
* This is weird, it is not really a max, but inclusive (look at 'is')
|
||||
*/
|
||||
public void testMaxTokenLength() throws Exception {
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put(CapitalizationFilterFactory.MAX_TOKEN_LENGTH, "2");
|
||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
Tokenizer tokenizer = new MockTokenizer(new StringReader(
|
||||
"this is a test"), MockTokenizer.WHITESPACE, false);
|
||||
|
@ -203,10 +210,11 @@ public class TestCapitalizationFilterFactory extends BaseTokenTestCase {
|
|||
* Test CapitalizationFilterFactory's forceFirstLetter option
|
||||
*/
|
||||
public void testForceFirstLetter() throws Exception {
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put(CapitalizationFilterFactory.KEEP, "kitten");
|
||||
args.put(CapitalizationFilterFactory.FORCE_FIRST_LETTER, "true");
|
||||
CapitalizationFilterFactory factory = new CapitalizationFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
Tokenizer tokenizer = new MockTokenizer(new StringReader("kitten"), MockTokenizer.WHITESPACE, false);
|
||||
TokenStream ts = factory.create(tokenizer);
|
||||
|
|
|
@ -40,8 +40,9 @@ public class TestDictionaryCompoundWordTokenFilterFactory extends BaseTokenTestC
|
|||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
DictionaryCompoundWordTokenFilterFactory factory = new DictionaryCompoundWordTokenFilterFactory();
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("dictionary", "compoundDictionary.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
|
|
|
@ -39,7 +39,8 @@ public class TestElisionFilterFactory extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("l'avion");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
ElisionFilterFactory factory = new ElisionFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("articles", "frenchArticles.txt");
|
||||
|
@ -56,7 +57,8 @@ public class TestElisionFilterFactory extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("l'avion");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
ElisionFilterFactory factory = new ElisionFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
factory.init(new HashMap<String,String>());
|
||||
factory.inform(loader);
|
||||
|
@ -71,7 +73,8 @@ public class TestElisionFilterFactory extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("L'avion");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
ElisionFilterFactory factory = new ElisionFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("articles", "frenchArticles.txt");
|
||||
|
|
|
@ -35,7 +35,8 @@ public class TestGreekLowerCaseFilterFactory extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("Μάϊος ΜΆΪΟΣ");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
GreekLowerCaseFilterFactory factory = new GreekLowerCaseFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, new String[] { "μαιοσ", "μαιοσ" });
|
||||
}
|
||||
|
|
|
@ -33,9 +33,11 @@ public class TestHindiFilters extends BaseTokenTestCase {
|
|||
public void testIndicNormalizer() throws Exception {
|
||||
Reader reader = new StringReader("ত্ अाैर");
|
||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
IndicNormalizationFilterFactory filterFactory = new IndicNormalizationFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
filterFactory.init(DEFAULT_VERSION_PARAM);
|
||||
filterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
filterFactory.init(EMPTY_PARAMS);
|
||||
Tokenizer tokenizer = factory.create(reader);
|
||||
TokenStream stream = filterFactory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, new String[] { "ৎ", "और" });
|
||||
|
@ -47,10 +49,12 @@ public class TestHindiFilters extends BaseTokenTestCase {
|
|||
public void testHindiNormalizer() throws Exception {
|
||||
Reader reader = new StringReader("क़िताब");
|
||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
|
||||
HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
hindiFilterFactory.init(DEFAULT_VERSION_PARAM);
|
||||
hindiFilterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
hindiFilterFactory.init(EMPTY_PARAMS);
|
||||
Tokenizer tokenizer = factory.create(reader);
|
||||
TokenStream stream = indicFilterFactory.create(tokenizer);
|
||||
stream = hindiFilterFactory.create(stream);
|
||||
|
@ -63,11 +67,13 @@ public class TestHindiFilters extends BaseTokenTestCase {
|
|||
public void testStemmer() throws Exception {
|
||||
Reader reader = new StringReader("किताबें");
|
||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
|
||||
HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
|
||||
HindiStemFilterFactory stemFactory = new HindiStemFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
stemFactory.init(DEFAULT_VERSION_PARAM);
|
||||
stemFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
stemFactory.init(EMPTY_PARAMS);
|
||||
Tokenizer tokenizer = factory.create(reader);
|
||||
TokenStream stream = indicFilterFactory.create(tokenizer);
|
||||
stream = hindiFilterFactory.create(stream);
|
||||
|
|
|
@ -36,7 +36,7 @@ public class TestHunspellStemFilterFactory extends BaseTokenTestCase {
|
|||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("dictionary", "hunspell-test.dic");
|
||||
args.put("affix", "hunspell-test.aff");
|
||||
args.put(IndexSchema.LUCENE_MATCH_VERSION_PARAM, DEFAULT_VERSION.name());
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(new SolrResourceLoader("solr"));
|
||||
|
||||
|
|
|
@ -40,9 +40,10 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenTest
|
|||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("hyphenator", "da_UTF8.xml");
|
||||
args.put("dictionary", "da_compoundDictionary.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
|
@ -63,10 +64,11 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenTest
|
|||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("hyphenator", "da_UTF8.xml");
|
||||
args.put("minSubwordSize", "2");
|
||||
args.put("maxSubwordSize", "4");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
|
|
|
@ -29,7 +29,8 @@ import org.apache.solr.core.SolrResourceLoader;
|
|||
public class TestJapaneseBaseFormFilterFactory extends BaseTokenTestCase {
|
||||
public void testBasics() throws IOException {
|
||||
JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
|
||||
tokenizerFactory.init(DEFAULT_VERSION_PARAM);
|
||||
tokenizerFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
tokenizerFactory.init(EMPTY_PARAMS);
|
||||
tokenizerFactory.inform(new SolrResourceLoader(null, null));
|
||||
TokenStream ts = tokenizerFactory.create(new StringReader("それはまだ実験段階にあります"));
|
||||
JapaneseBaseFormFilterFactory factory = new JapaneseBaseFormFilterFactory();
|
||||
|
|
|
@ -35,13 +35,14 @@ public class TestJapanesePartOfSpeechStopFilterFactory extends BaseTokenTestCase
|
|||
"動詞-自立\n";
|
||||
|
||||
JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
|
||||
tokenizerFactory.init(DEFAULT_VERSION_PARAM);
|
||||
tokenizerFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
tokenizerFactory.init(EMPTY_PARAMS);
|
||||
tokenizerFactory.inform(new SolrResourceLoader(null, null));
|
||||
TokenStream ts = tokenizerFactory.create(new StringReader("私は制限スピードを超える。"));
|
||||
JapanesePartOfSpeechStopFilterFactory factory = new JapanesePartOfSpeechStopFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
|
||||
args.put("tags", "stoptags.txt");
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
factory.init(args);
|
||||
factory.inform(new StringMockSolrResourceLoader(tags));
|
||||
ts = factory.create(ts);
|
||||
|
|
|
@ -31,7 +31,8 @@ import org.apache.solr.core.SolrResourceLoader;
|
|||
public class TestJapaneseTokenizerFactory extends BaseTokenTestCase {
|
||||
public void testSimple() throws IOException {
|
||||
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
factory.inform(new SolrResourceLoader(null, null));
|
||||
TokenStream ts = factory.create(new StringReader("これは本ではない"));
|
||||
assertTokenStreamContents(ts,
|
||||
|
@ -46,7 +47,8 @@ public class TestJapaneseTokenizerFactory extends BaseTokenTestCase {
|
|||
*/
|
||||
public void testDefaults() throws IOException {
|
||||
JapaneseTokenizerFactory factory = new JapaneseTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
factory.inform(new SolrResourceLoader(null, null));
|
||||
TokenStream ts = factory.create(new StringReader("シニアソフトウェアエンジニア"));
|
||||
assertTokenStreamContents(ts,
|
||||
|
|
|
@ -33,9 +33,10 @@ public class TestKeepFilterFactory extends BaseTokenTestCase{
|
|||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
KeepWordFilterFactory factory = new KeepWordFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put("words", "keep-1.txt");
|
||||
args.put("ignoreCase", "true");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
CharArraySet words = factory.getWords();
|
||||
|
@ -45,6 +46,7 @@ public class TestKeepFilterFactory extends BaseTokenTestCase{
|
|||
|
||||
factory = new KeepWordFilterFactory();
|
||||
args.put("words", "keep-1.txt, keep-2.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
words = factory.getWords();
|
||||
|
|
|
@ -38,9 +38,10 @@ public class TestKeywordMarkerFilterFactory extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("dogs cats");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
args.put("protected", "protwords.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
|
||||
|
@ -52,10 +53,11 @@ public class TestKeywordMarkerFilterFactory extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("dogs cats Cats");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
args.put("protected", "protwords.txt");
|
||||
args.put("ignoreCase", "true");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
|
||||
|
|
|
@ -53,8 +53,8 @@ public class TestMultiWordSynonyms extends BaseTokenTestCase {
|
|||
public void testMultiWordSynonyms() throws IOException {
|
||||
SynonymFilterFactory factory = new SynonymFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.putAll(DEFAULT_VERSION_PARAM);
|
||||
args.put("synonyms", "synonyms.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(new StringMockSolrResourceLoader("a b c,d"));
|
||||
TokenStream ts = factory.create(new MockTokenizer(new StringReader("a e"), MockTokenizer.WHITESPACE, false));
|
||||
|
|
|
@ -35,7 +35,8 @@ public class TestReverseStringFilterFactory extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("simple test");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
ReverseStringFilterFactory factory = new ReverseStringFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, new String[] { "elpmis", "tset" });
|
||||
}
|
||||
|
|
|
@ -32,7 +32,8 @@ public class TestRussianFilters extends BaseTokenTestCase {
|
|||
public void testTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("Вместе с тем о силе электромагнитной 100");
|
||||
RussianLetterTokenizerFactory factory = new RussianLetterTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream, new String[] {"Вместе", "с", "тем", "о",
|
||||
"силе", "электромагнитной", "100"});
|
||||
|
|
|
@ -36,7 +36,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testStandardTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("Wha\u0301t's this thing do?");
|
||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"Wha\u0301t's", "this", "thing", "do" });
|
||||
|
@ -51,9 +52,9 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
String content = "one two three " + longWord + " four five six";
|
||||
Reader reader = new StringReader(content);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("luceneMatchVersion", DEFAULT_VERSION_PARAM.get("luceneMatchVersion"));
|
||||
args.put("maxTokenLength", "1000");
|
||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
|
@ -66,7 +67,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testClassicTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
ClassicTokenizerFactory factory = new ClassicTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"What's", "this", "thing", "do" });
|
||||
|
@ -81,9 +83,9 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
String content = "one two three " + longWord + " four five six";
|
||||
Reader reader = new StringReader(content);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("luceneMatchVersion", DEFAULT_VERSION_PARAM.get("luceneMatchVersion"));
|
||||
args.put("maxTokenLength", "1000");
|
||||
ClassicTokenizerFactory factory = new ClassicTokenizerFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
|
@ -96,9 +98,11 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testStandardFilter() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
ClassicTokenizerFactory factory = new ClassicTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
ClassicFilterFactory filterFactory = new ClassicFilterFactory();
|
||||
filterFactory.init(DEFAULT_VERSION_PARAM);
|
||||
filterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
filterFactory.init(EMPTY_PARAMS);
|
||||
Tokenizer tokenizer = factory.create(reader);
|
||||
TokenStream stream = filterFactory.create(tokenizer);
|
||||
assertTokenStreamContents(stream,
|
||||
|
@ -111,7 +115,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testKeywordTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
KeywordTokenizerFactory factory = new KeywordTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"What's this thing do?"});
|
||||
|
@ -123,7 +128,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testWhitespaceTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"What's", "this", "thing", "do?"});
|
||||
|
@ -135,7 +141,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testLetterTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
LetterTokenizerFactory factory = new LetterTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"What", "s", "this", "thing", "do"});
|
||||
|
@ -147,7 +154,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testLowerCaseTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
LowerCaseTokenizerFactory factory = new LowerCaseTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"what", "s", "this", "thing", "do"});
|
||||
|
@ -160,7 +168,8 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("Česká");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, new String[] { "Ceska" });
|
||||
}
|
||||
|
|
|
@ -39,9 +39,10 @@ public class TestStemmerOverrideFilterFactory extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("testing dogs");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
args.put("dictionary", "stemdict.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
|
||||
|
@ -53,10 +54,11 @@ public class TestStemmerOverrideFilterFactory extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("testing DoGs");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
args.put("dictionary", "stemdict.txt");
|
||||
args.put("ignoreCase", "true");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
|
||||
|
|
|
@ -34,9 +34,10 @@ public class TestStopFilterFactory extends BaseTokenTestCase {
|
|||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
StopFilterFactory factory = new StopFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put("words", "stop-1.txt");
|
||||
args.put("ignoreCase", "true");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
CharArraySet words = factory.getStopWords();
|
||||
|
@ -46,6 +47,7 @@ public class TestStopFilterFactory extends BaseTokenTestCase {
|
|||
|
||||
factory = new StopFilterFactory();
|
||||
args.put("words", "stop-1.txt, stop-2.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
words = factory.getStopWords();
|
||||
|
@ -54,6 +56,7 @@ public class TestStopFilterFactory extends BaseTokenTestCase {
|
|||
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
|
||||
|
||||
factory = new StopFilterFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
args.put("words", "stop-snowball.txt");
|
||||
args.put("format", "snowball");
|
||||
factory.init(args);
|
||||
|
|
|
@ -38,8 +38,8 @@ public class TestSynonymFilterFactory extends BaseTokenTestCase {
|
|||
public void testSynonyms() throws Exception {
|
||||
SynonymFilterFactory factory = new SynonymFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.putAll(DEFAULT_VERSION_PARAM);
|
||||
args.put("synonyms", "synonyms.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(new SolrResourceLoader(null, null));
|
||||
TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
|
||||
|
@ -55,8 +55,8 @@ public class TestSynonymFilterFactory extends BaseTokenTestCase {
|
|||
public void testSynonymsOld() throws Exception {
|
||||
SynonymFilterFactory factory = new SynonymFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("luceneMatchVersion", Version.LUCENE_33.toString());
|
||||
args.put("synonyms", "synonyms.txt");
|
||||
factory.setLuceneMatchVersion(Version.LUCENE_33);
|
||||
factory.init(args);
|
||||
factory.inform(new SolrResourceLoader(null, null));
|
||||
TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
|
||||
|
@ -72,8 +72,8 @@ public class TestSynonymFilterFactory extends BaseTokenTestCase {
|
|||
public void testMultiwordOffsetsOld() throws Exception {
|
||||
SynonymFilterFactory factory = new SynonymFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("luceneMatchVersion", Version.LUCENE_33.toString());
|
||||
args.put("synonyms", "synonyms.txt");
|
||||
factory.setLuceneMatchVersion(Version.LUCENE_33);
|
||||
factory.init(args);
|
||||
factory.inform(new StringMockSolrResourceLoader("national hockey league, nhl"));
|
||||
TokenStream ts = factory.create(new MockTokenizer(new StringReader("national hockey league"), MockTokenizer.WHITESPACE, false));
|
||||
|
@ -89,8 +89,8 @@ public class TestSynonymFilterFactory extends BaseTokenTestCase {
|
|||
public void testEmptySynonyms() throws Exception {
|
||||
SynonymFilterFactory factory = new SynonymFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.putAll(DEFAULT_VERSION_PARAM);
|
||||
args.put("synonyms", "synonyms.txt");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(new StringMockSolrResourceLoader("")); // empty file!
|
||||
TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
|
||||
|
|
|
@ -37,7 +37,8 @@ public class TestThaiWordFilterFactory extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
ThaiWordFilterFactory factory = new ThaiWordFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, new String[] {"การ", "ที่", "ได้",
|
||||
"ต้อง", "แสดง", "ว่า", "งาน", "ดี"});
|
||||
|
|
|
@ -35,9 +35,10 @@ public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
|
|||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put("types", "stoptypes-1.txt");
|
||||
args.put("enablePositionIncrements", "true");
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
Set<String> types = factory.getStopTypes();
|
||||
|
@ -60,9 +61,10 @@ public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
|
|||
@Test
|
||||
public void testCreationWithBlackList() throws Exception {
|
||||
TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
|
||||
args.put("enablePositionIncrements", "false");
|
||||
typeTokenFilterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
typeTokenFilterFactory.init(args);
|
||||
NumericTokenStream input = new NumericTokenStream();
|
||||
input.setIntValue(123);
|
||||
|
@ -72,10 +74,11 @@ public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
|
|||
@Test
|
||||
public void testCreationWithWhiteList() throws Exception {
|
||||
TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
|
||||
args.put("enablePositionIncrements", "false");
|
||||
args.put("useWhitelist","true");
|
||||
typeTokenFilterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
typeTokenFilterFactory.init(args);
|
||||
NumericTokenStream input = new NumericTokenStream();
|
||||
input.setIntValue(123);
|
||||
|
@ -86,8 +89,9 @@ public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
|
|||
public void testMissingTypesParameter() throws Exception {
|
||||
try {
|
||||
TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put("enablePositionIncrements", "false");
|
||||
typeTokenFilterFactory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
typeTokenFilterFactory.init(args);
|
||||
typeTokenFilterFactory.inform(new SolrResourceLoader(null, null));
|
||||
fail("not supplying 'types' parameter should cause an InitializationException");
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* A few tests based on org.apache.lucene.analysis.TestUAX29URLEmailTokenizer
|
||||
|
@ -34,7 +35,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
|
|||
public void testUAX29URLEmailTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("Wha\u0301t's this thing do?");
|
||||
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"Wha\u0301t's", "this", "thing", "do" });
|
||||
|
@ -43,7 +45,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
|
|||
public void testArabic() throws Exception {
|
||||
Reader reader = new StringReader("الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.");
|
||||
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا",
|
||||
|
@ -53,7 +56,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
|
|||
public void testChinese() throws Exception {
|
||||
Reader reader = new StringReader("我是中国人。 1234 Tests ");
|
||||
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"我", "是", "中", "国", "人", "1234", "Tests"});
|
||||
|
@ -62,7 +66,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
|
|||
public void testKorean() throws Exception {
|
||||
Reader reader = new StringReader("안녕하세요 한글입니다");
|
||||
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"안녕하세요", "한글입니다"});
|
||||
|
@ -71,7 +76,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
|
|||
public void testHyphen() throws Exception {
|
||||
Reader reader = new StringReader("some-dashed-phrase");
|
||||
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"some", "dashed", "phrase"});
|
||||
|
@ -95,7 +101,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
|
|||
+ "http://[a42:a7b6::]/qSmxSUU4z/%52qVl4\n";
|
||||
Reader reader = new StringReader(textWithURLs);
|
||||
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {
|
||||
|
@ -135,7 +142,8 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
|
|||
+ "lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H\n";
|
||||
Reader reader = new StringReader(textWithEmails);
|
||||
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {
|
||||
|
@ -166,9 +174,9 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
|
|||
String content = "one two three " + longWord + " four five six";
|
||||
Reader reader = new StringReader(content);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("luceneMatchVersion", DEFAULT_VERSION_PARAM.get("luceneMatchVersion"));
|
||||
args.put("maxTokenLength", "1000");
|
||||
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(args);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
|
@ -180,14 +188,16 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenTestCase {
|
|||
public void testMatchVersion() throws Exception {
|
||||
Reader reader = new StringReader("ざ");
|
||||
UAX29URLEmailTokenizerFactory factory = new UAX29URLEmailTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
factory.setLuceneMatchVersion(DEFAULT_VERSION);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"ざ"});
|
||||
|
||||
reader = new StringReader("ざ");
|
||||
factory = new UAX29URLEmailTokenizerFactory();
|
||||
factory.init(Collections.singletonMap("luceneMatchVersion", "3.1"));
|
||||
factory.setLuceneMatchVersion(Version.LUCENE_31);
|
||||
factory.init(EMPTY_PARAMS);
|
||||
stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"さ"}); // old broken behavior
|
||||
|
|
|
@ -27,12 +27,19 @@ import org.apache.solr.core.Config;
|
|||
/**
|
||||
* General token testing helper functions
|
||||
*/
|
||||
public abstract class BaseTokenTestCase extends BaseTokenStreamTestCase
|
||||
{
|
||||
/** a map containing the default test version param for easy testing */
|
||||
protected static final Map<String,String> DEFAULT_VERSION_PARAM =
|
||||
Collections.singletonMap("luceneMatchVersion", System.getProperty("tests.luceneMatchVersion", "LUCENE_CURRENT"));
|
||||
public abstract class BaseTokenTestCase extends BaseTokenStreamTestCase{
|
||||
|
||||
protected static final Map<String, String> EMPTY_PARAMS = Collections.emptyMap();
|
||||
|
||||
/** The default test version for easy testing */
|
||||
public static final Version DEFAULT_VERSION = Config.parseLuceneVersionString(DEFAULT_VERSION_PARAM.get("luceneMatchVersion"));
|
||||
public static final Version DEFAULT_VERSION;
|
||||
|
||||
static {
|
||||
String rawVersion = System.getProperty("tests.luceneMatchVersion", "LUCENE_CURRENT");
|
||||
try {
|
||||
DEFAULT_VERSION = Version.parseLeniently(rawVersion);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
throw new RuntimeException("Test Lucene Match Version [" + rawVersion + "] is invalid", iae);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue