SOLR-1677: Add support for luceneMatchVersion in Analyzers, Tokenizers and TokenFilters.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/branches/solr@923028 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2010-03-15 02:07:09 +00:00
parent 2e35d9a8c6
commit 5cc19567b9
13 changed files with 257 additions and 12 deletions

View File

@ -17,10 +17,13 @@
package org.apache.solr.analysis; package org.apache.solr.analysis;
import org.apache.solr.core.Config;
import org.apache.solr.schema.IndexSchema;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.lucene.util.Version;
/** /**
@ -34,8 +37,15 @@ public abstract class BaseTokenFilterFactory implements TokenFilterFactory {
/** The init args */ /** The init args */
protected Map<String,String> args; protected Map<String,String> args;
/** the luceneVersion arg */
protected Version luceneMatchVersion = null;
public void init(Map<String,String> args) { public void init(Map<String,String> args) {
this.args=args; this.args=args;
String matchVersion = args.get(IndexSchema.LUCENE_MATCH_VERSION_PARAM);
if (matchVersion != null) {
luceneMatchVersion = Config.parseLuceneVersionString(matchVersion);
}
} }
public Map<String,String> getArgs() { public Map<String,String> getArgs() {

View File

@ -17,9 +17,13 @@
package org.apache.solr.analysis; package org.apache.solr.analysis;
import org.apache.solr.core.Config;
import org.apache.solr.schema.IndexSchema;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.lucene.util.Version;
/** /**
@ -33,8 +37,15 @@ public abstract class BaseTokenizerFactory implements TokenizerFactory {
/** The init args */ /** The init args */
protected Map<String,String> args; protected Map<String,String> args;
/** the luceneVersion arg */
protected Version luceneMatchVersion = null;
public void init(Map<String,String> args) { public void init(Map<String,String> args) {
this.args=args; this.args=args;
String matchVersion = args.get(IndexSchema.LUCENE_MATCH_VERSION_PARAM);
if (matchVersion != null) {
luceneMatchVersion = Config.parseLuceneVersionString(matchVersion);
}
} }
public Map<String,String> getArgs() { public Map<String,String> getArgs() {

View File

@ -19,7 +19,6 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
import java.io.Reader; import java.io.Reader;
@ -29,6 +28,6 @@ import java.io.Reader;
public class StandardTokenizerFactory extends BaseTokenizerFactory { public class StandardTokenizerFactory extends BaseTokenizerFactory {
public StandardTokenizer create(Reader input) { public StandardTokenizer create(Reader input) {
return new StandardTokenizer(Version.LUCENE_24, input); return new StandardTokenizer(luceneMatchVersion, input);
} }
} }

View File

@ -51,8 +51,7 @@ public class StopFilterFactory extends BaseTokenFilterFactory implements Resourc
} }
for (String file : files) { for (String file : files) {
List<String> wlist = loader.getLines(file.trim()); List<String> wlist = loader.getLines(file.trim());
//TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call stopWords.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
stopWords.addAll(StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), ignoreCase));
} }
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
@ -61,7 +60,7 @@ public class StopFilterFactory extends BaseTokenFilterFactory implements Resourc
stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
} }
} }
//Force the use of a char array set, as it is the most performant, although this may break things if Lucene ever goes away from it. See SOLR-1095
private CharArraySet stopWords; private CharArraySet stopWords;
private boolean ignoreCase; private boolean ignoreCase;
private boolean enablePositionIncrements; private boolean enablePositionIncrements;
@ -74,12 +73,12 @@ public class StopFilterFactory extends BaseTokenFilterFactory implements Resourc
return ignoreCase; return ignoreCase;
} }
public Set getStopWords() { public Set<?> getStopWords() {
return stopWords; return stopWords;
} }
public StopFilter create(TokenStream input) { public StopFilter create(TokenStream input) {
StopFilter stopFilter = new StopFilter(enablePositionIncrements, input,stopWords,ignoreCase); StopFilter stopFilter = new StopFilter(enablePositionIncrements,input,stopWords,ignoreCase);
return stopFilter; return stopFilter;
} }
} }

View File

@ -29,9 +29,15 @@ import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathExpressionException;
import javax.xml.namespace.QName; import javax.xml.namespace.QName;
import java.io.*; import java.io.*;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.LinkedHashMap;
import java.util.Collections;
import java.util.concurrent.atomic.AtomicBoolean;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.lucene.util.Version;
/** /**
* @version $Id$ * @version $Id$
@ -266,6 +272,41 @@ public class Config {
String val = getVal(path, false); String val = getVal(path, false);
return val!=null ? Double.parseDouble(val) : def; return val!=null ? Double.parseDouble(val) : def;
} }
public Version getLuceneVersion(String path) {
return parseLuceneVersionString(getVal(path, true));
}
public Version getLuceneVersion(String path, Version def) {
String val = getVal(path, false);
return val!=null ? parseLuceneVersionString(val) : def;
}
private static final AtomicBoolean versionWarningAlreadyLogged = new AtomicBoolean(false);
public static final Version parseLuceneVersionString(String matchVersion) {
matchVersion = matchVersion.toUpperCase();
final Version version;
try {
version = Version.valueOf(matchVersion);
} catch (IllegalArgumentException iae) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Invalid luceneMatchVersion '" + matchVersion +
"' property, valid values are: " + Arrays.toString(Version.values()), iae, false);
}
if (version == Version.LUCENE_CURRENT && !versionWarningAlreadyLogged.getAndSet(true)) {
log.warn(
"You should not use LUCENE_CURRENT as luceneMatchVersion property: "+
"if you use this setting, and then Solr upgrades to a newer release of Lucene, "+
"sizable changes may happen. If precise back compatibility is important "+
"then you should instead explicitly specify an actual Lucene version."
);
}
return version;
}
// The following functions were moved to ResourceLoader // The following functions were moved to ResourceLoader
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------

View File

@ -37,6 +37,7 @@ import org.apache.solr.spelling.QueryConverter;
import org.apache.solr.highlight.SolrHighlighter; import org.apache.solr.highlight.SolrHighlighter;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.index.IndexDeletionPolicy; import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.util.Version;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -134,6 +135,8 @@ public class SolrConfig extends Config {
reopenReaders = getBool("mainIndex/reopenReaders", true); reopenReaders = getBool("mainIndex/reopenReaders", true);
booleanQueryMaxClauseCount = getInt("query/maxBooleanClauses", BooleanQuery.getMaxClauseCount()); booleanQueryMaxClauseCount = getInt("query/maxBooleanClauses", BooleanQuery.getMaxClauseCount());
luceneMatchVersion = getLuceneVersion("luceneMatchVersion", Version.LUCENE_24);
filtOptEnabled = getBool("query/boolTofilterOptimizer/@enabled", false); filtOptEnabled = getBool("query/boolTofilterOptimizer/@enabled", false);
filtOptCacheSize = getInt("query/boolTofilterOptimizer/@cacheSize",32); filtOptCacheSize = getInt("query/boolTofilterOptimizer/@cacheSize",32);
filtOptThreshold = getFloat("query/boolTofilterOptimizer/@threshold",.05f); filtOptThreshold = getFloat("query/boolTofilterOptimizer/@threshold",.05f);
@ -261,6 +264,7 @@ public class SolrConfig extends Config {
public final int maxWarmingSearchers; public final int maxWarmingSearchers;
public final boolean unlockOnStartup; public final boolean unlockOnStartup;
public final boolean useColdSearcher; public final boolean useColdSearcher;
public final Version luceneMatchVersion;
protected String dataDir; protected String dataDir;
//JMX configuration //JMX configuration

View File

@ -22,6 +22,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.util.Version;
import org.apache.solr.common.ResourceLoader; import org.apache.solr.common.ResourceLoader;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
@ -46,6 +47,7 @@ import java.io.InputStream;
import java.io.Reader; import java.io.Reader;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
import java.lang.reflect.Constructor;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -57,6 +59,7 @@ import org.slf4j.LoggerFactory;
*/ */
public final class IndexSchema { public final class IndexSchema {
public static final String DEFAULT_SCHEMA_FILE = "schema.xml"; public static final String DEFAULT_SCHEMA_FILE = "schema.xml";
public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
final static Logger log = LoggerFactory.getLogger(IndexSchema.class); final static Logger log = LoggerFactory.getLogger(IndexSchema.class);
private final SolrConfig solrConfig; private final SolrConfig solrConfig;
@ -818,7 +821,24 @@ public final class IndexSchema {
NamedNodeMap attrs = node.getAttributes(); NamedNodeMap attrs = node.getAttributes();
String analyzerName = DOMUtil.getAttr(attrs,"class"); String analyzerName = DOMUtil.getAttr(attrs,"class");
if (analyzerName != null) { if (analyzerName != null) {
return (Analyzer)loader.newInstance(analyzerName); // nocommit: add support for CoreAware & Co here?
final Class<? extends Analyzer> clazz = loader.findClass(analyzerName).asSubclass(Analyzer.class);
try {
try {
// first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore)
Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
final Version luceneMatchVersion = (matchVersionStr == null) ?
solrConfig.luceneMatchVersion : Config.parseLuceneVersionString(matchVersionStr);
return cnstr.newInstance(luceneMatchVersion);
} catch (NoSuchMethodException nsme) {
// otherwise use default ctor
return clazz.newInstance();
}
} catch (Exception e) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"Cannot load analyzer: "+analyzerName );
}
} }
XPath xpath = XPathFactory.newInstance().newXPath(); XPath xpath = XPathFactory.newInstance().newXPath();
@ -832,7 +852,11 @@ public final class IndexSchema {
@Override @Override
protected void init(CharFilterFactory plugin, Node node) throws Exception { protected void init(CharFilterFactory plugin, Node node) throws Exception {
if( plugin != null ) { if( plugin != null ) {
plugin.init( DOMUtil.toMapExcept(node.getAttributes(),"class") ); final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
// copy the luceneMatchVersion from config, if not set
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString());
plugin.init( params );
charFilters.add( plugin ); charFilters.add( plugin );
} }
} }
@ -858,7 +882,11 @@ public final class IndexSchema {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"The schema defines multiple tokenizers for: "+node ); "The schema defines multiple tokenizers for: "+node );
} }
plugin.init( DOMUtil.toMapExcept(node.getAttributes(),"class") ); final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
// copy the luceneMatchVersion from config, if not set
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString());
plugin.init( params );
tokenizers.add( plugin ); tokenizers.add( plugin );
} }
@ -884,7 +912,11 @@ public final class IndexSchema {
@Override @Override
protected void init(TokenFilterFactory plugin, Node node) throws Exception { protected void init(TokenFilterFactory plugin, Node node) throws Exception {
if( plugin != null ) { if( plugin != null ) {
plugin.init( DOMUtil.toMapExcept(node.getAttributes(),"class") ); final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
// copy the luceneMatchVersion from config, if not set
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString());
plugin.init( params );
filters.add( plugin ); filters.add( plugin );
} }
} }

View File

@ -80,7 +80,7 @@ public class SolrInfoMBeanTest extends TestCase
} }
} }
} }
assertTrue( "there are at least 10 SolrInfoMBean that should be found in the classpath.", checked > 10 ); assertTrue( "there are at least 10 SolrInfoMBean that should be found in the classpath, found " + checked, checked > 10 );
} }
private static List<Class> getClassesForPackage(String pckgname) throws Exception { private static List<Class> getClassesForPackage(String pckgname) throws Exception {

View File

@ -19,6 +19,8 @@ package org.apache.solr.analysis;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.util.Map;
import java.util.Collections;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -34,6 +36,9 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
*/ */
public abstract class BaseTokenTestCase extends TestCase public abstract class BaseTokenTestCase extends TestCase
{ {
protected static final Map<String,String> DEFAULT_VERSION_PARAM =
Collections.singletonMap("luceneMatchVersion", "LUCENE_30");
// some helpers to test Analyzers and TokenStreams: // some helpers to test Analyzers and TokenStreams:
// these are taken from Lucene's BaseTokenStreamTestCase // these are taken from Lucene's BaseTokenStreamTestCase

View File

@ -0,0 +1,82 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.StringReader;
import java.lang.reflect.Field;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType;
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
/**
* Tests for luceneMatchVersion property for analyzers
*/
public class TestLuceneMatchVersion extends AbstractSolrTestCase {
@Override
public String getSchemaFile() {
return "schema-luceneMatchVersion.xml";
}
@Override
public String getSolrConfigFile() {
return "solrconfig.xml";
}
public static final Version DEFAULT_VERSION = Version.LUCENE_30;
public void testStandardTokenizerVersions() throws Exception {
assertEquals(DEFAULT_VERSION, solrConfig.luceneMatchVersion);
final IndexSchema schema = h.getCore().getSchema();
FieldType type = schema.getFieldType("textDefault");
TokenizerChain ana = (TokenizerChain) type.getAnalyzer();
assertEquals(DEFAULT_VERSION, ((BaseTokenizerFactory) ana.getTokenizerFactory()).luceneMatchVersion);
assertEquals(DEFAULT_VERSION, ((BaseTokenFilterFactory) ana.getTokenFilterFactories()[2]).luceneMatchVersion);
TokenizerChain.TokenStreamInfo tsi = ana.getStream("textDefault",new StringReader(""));
StandardTokenizer tok = (StandardTokenizer) tsi.getTokenizer();
assertTrue(tok.isReplaceInvalidAcronym());
type = schema.getFieldType("text20");
ana = (TokenizerChain) type.getAnalyzer();
assertEquals(Version.LUCENE_20, ((BaseTokenizerFactory) ana.getTokenizerFactory()).luceneMatchVersion);
assertEquals(Version.LUCENE_24, ((BaseTokenFilterFactory) ana.getTokenFilterFactories()[2]).luceneMatchVersion);
tsi = ana.getStream("text20",new StringReader(""));
tok = (StandardTokenizer) tsi.getTokenizer();
assertFalse(tok.isReplaceInvalidAcronym());
// this is a hack to get the private matchVersion field in StandardAnalyzer, may break in later lucene versions - we have no getter :(
final Field matchVersionField = StandardAnalyzer.class.getDeclaredField("matchVersion");
matchVersionField.setAccessible(true);
type = schema.getFieldType("textStandardAnalyzerDefault");
Analyzer ana1 = type.getAnalyzer();
assertTrue(ana1 instanceof StandardAnalyzer);
assertEquals(DEFAULT_VERSION, matchVersionField.get(ana1));
type = schema.getFieldType("textStandardAnalyzer20");
ana1 = type.getAnalyzer();
assertTrue(ana1 instanceof StandardAnalyzer);
assertEquals(Version.LUCENE_20, matchVersionField.get(ana1));
}
}

View File

@ -34,6 +34,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testStandardTokenizer() throws Exception { public void testStandardTokenizer() throws Exception {
Reader reader = new StringReader("What's this thing do?"); Reader reader = new StringReader("What's this thing do?");
StandardTokenizerFactory factory = new StandardTokenizerFactory(); StandardTokenizerFactory factory = new StandardTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
Tokenizer stream = factory.create(reader); Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream, assertTokenStreamContents(stream,
new String[] {"What's", "this", "thing", "do" }); new String[] {"What's", "this", "thing", "do" });
@ -45,7 +46,9 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testStandardFilter() throws Exception { public void testStandardFilter() throws Exception {
Reader reader = new StringReader("What's this thing do?"); Reader reader = new StringReader("What's this thing do?");
StandardTokenizerFactory factory = new StandardTokenizerFactory(); StandardTokenizerFactory factory = new StandardTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
StandardFilterFactory filterFactory = new StandardFilterFactory(); StandardFilterFactory filterFactory = new StandardFilterFactory();
filterFactory.init(DEFAULT_VERSION_PARAM);
Tokenizer tokenizer = factory.create(reader); Tokenizer tokenizer = factory.create(reader);
TokenStream stream = filterFactory.create(tokenizer); TokenStream stream = filterFactory.create(tokenizer);
assertTokenStreamContents(stream, assertTokenStreamContents(stream,
@ -58,6 +61,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testKeywordTokenizer() throws Exception { public void testKeywordTokenizer() throws Exception {
Reader reader = new StringReader("What's this thing do?"); Reader reader = new StringReader("What's this thing do?");
KeywordTokenizerFactory factory = new KeywordTokenizerFactory(); KeywordTokenizerFactory factory = new KeywordTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
Tokenizer stream = factory.create(reader); Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream, assertTokenStreamContents(stream,
new String[] {"What's this thing do?"}); new String[] {"What's this thing do?"});
@ -69,6 +73,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testWhitespaceTokenizer() throws Exception { public void testWhitespaceTokenizer() throws Exception {
Reader reader = new StringReader("What's this thing do?"); Reader reader = new StringReader("What's this thing do?");
WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory(); WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
Tokenizer stream = factory.create(reader); Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream, assertTokenStreamContents(stream,
new String[] {"What's", "this", "thing", "do?"}); new String[] {"What's", "this", "thing", "do?"});
@ -80,6 +85,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testLetterTokenizer() throws Exception { public void testLetterTokenizer() throws Exception {
Reader reader = new StringReader("What's this thing do?"); Reader reader = new StringReader("What's this thing do?");
LetterTokenizerFactory factory = new LetterTokenizerFactory(); LetterTokenizerFactory factory = new LetterTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
Tokenizer stream = factory.create(reader); Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream, assertTokenStreamContents(stream,
new String[] {"What", "s", "this", "thing", "do"}); new String[] {"What", "s", "this", "thing", "do"});
@ -91,6 +97,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
public void testLowerCaseTokenizer() throws Exception { public void testLowerCaseTokenizer() throws Exception {
Reader reader = new StringReader("What's this thing do?"); Reader reader = new StringReader("What's this thing do?");
LowerCaseTokenizerFactory factory = new LowerCaseTokenizerFactory(); LowerCaseTokenizerFactory factory = new LowerCaseTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
Tokenizer stream = factory.create(reader); Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream, assertTokenStreamContents(stream,
new String[] {"what", "s", "this", "thing", "do"}); new String[] {"what", "s", "this", "thing", "do"});
@ -103,6 +110,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
Reader reader = new StringReader("Česká"); Reader reader = new StringReader("Česká");
Tokenizer tokenizer = new WhitespaceTokenizer(reader); Tokenizer tokenizer = new WhitespaceTokenizer(reader);
ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory(); ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
TokenStream stream = factory.create(tokenizer); TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "Ceska" }); assertTokenStreamContents(stream, new String[] { "Ceska" });
} }
@ -115,6 +123,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
Reader reader = new StringReader("Česká"); Reader reader = new StringReader("Česká");
Tokenizer tokenizer = new WhitespaceTokenizer(reader); Tokenizer tokenizer = new WhitespaceTokenizer(reader);
ISOLatin1AccentFilterFactory factory = new ISOLatin1AccentFilterFactory(); ISOLatin1AccentFilterFactory factory = new ISOLatin1AccentFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
TokenStream stream = factory.create(tokenizer); TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "Česka" }); assertTokenStreamContents(stream, new String[] { "Česka" });
} }

View File

@ -0,0 +1,51 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="luceneMatchVersionTest" version="1.1">
<types>
<fieldtype name="text20" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory" luceneMatchVersion="LUCENE_20"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" luceneMatchVersion="LUCENE_24"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="textDefault" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="textStandardAnalyzer20" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer" luceneMatchVersion="LUCENE_20"/>
</fieldtype>
<fieldtype name="textStandardAnalyzerDefault" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
</fieldtype>
</types>
<fields>
<field name="text20" type="text20" indexed="true" stored="false" />
<field name="textDefault" type="textDefault" indexed="true" stored="false" />
<field name="textStandardAnalyzer20" type="textStandardAnalyzer20" indexed="true" stored="false" />
<field name="textStandardAnalyzerDefault" type="textStandardAnalyzerDefault" indexed="true" stored="false" />
</fields>
</schema>

View File

@ -44,6 +44,8 @@
It defaults to "index" if not present, and should probably It defaults to "index" if not present, and should probably
not be changed if replication is in use. --> not be changed if replication is in use. -->
<dataDir>${solr.data.dir:./solr/data}</dataDir> <dataDir>${solr.data.dir:./solr/data}</dataDir>
<luceneMatchVersion>LUCENE_30</luceneMatchVersion>
<indexDefaults> <indexDefaults>
<!-- Values here affect all index writers and act as a default <!-- Values here affect all index writers and act as a default