mirror of https://github.com/apache/lucene.git
SOLR-1677: Add support for luceneMatchVersion in Analyzers, Tokenizers and TokenFilters.
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/branches/solr@923028 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2e35d9a8c6
commit
5cc19567b9
|
@ -17,10 +17,13 @@
|
|||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.solr.core.Config;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
|
||||
import java.util.Map;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -34,8 +37,15 @@ public abstract class BaseTokenFilterFactory implements TokenFilterFactory {
|
|||
/** The init args */
|
||||
protected Map<String,String> args;
|
||||
|
||||
/** the luceneVersion arg */
|
||||
protected Version luceneMatchVersion = null;
|
||||
|
||||
public void init(Map<String,String> args) {
|
||||
this.args=args;
|
||||
String matchVersion = args.get(IndexSchema.LUCENE_MATCH_VERSION_PARAM);
|
||||
if (matchVersion != null) {
|
||||
luceneMatchVersion = Config.parseLuceneVersionString(matchVersion);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String,String> getArgs() {
|
||||
|
|
|
@ -17,9 +17,13 @@
|
|||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.solr.core.Config;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
|
||||
import java.util.Map;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -33,8 +37,15 @@ public abstract class BaseTokenizerFactory implements TokenizerFactory {
|
|||
/** The init args */
|
||||
protected Map<String,String> args;
|
||||
|
||||
/** the luceneVersion arg */
|
||||
protected Version luceneMatchVersion = null;
|
||||
|
||||
public void init(Map<String,String> args) {
|
||||
this.args=args;
|
||||
String matchVersion = args.get(IndexSchema.LUCENE_MATCH_VERSION_PARAM);
|
||||
if (matchVersion != null) {
|
||||
luceneMatchVersion = Config.parseLuceneVersionString(matchVersion);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String,String> getArgs() {
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.solr.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
|
@ -29,6 +28,6 @@ import java.io.Reader;
|
|||
|
||||
public class StandardTokenizerFactory extends BaseTokenizerFactory {
|
||||
public StandardTokenizer create(Reader input) {
|
||||
return new StandardTokenizer(Version.LUCENE_24, input);
|
||||
return new StandardTokenizer(luceneMatchVersion, input);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,8 +51,7 @@ public class StopFilterFactory extends BaseTokenFilterFactory implements Resourc
|
|||
}
|
||||
for (String file : files) {
|
||||
List<String> wlist = loader.getLines(file.trim());
|
||||
//TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call
|
||||
stopWords.addAll(StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), ignoreCase));
|
||||
stopWords.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
|
@ -61,7 +60,7 @@ public class StopFilterFactory extends BaseTokenFilterFactory implements Resourc
|
|||
stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
|
||||
}
|
||||
}
|
||||
//Force the use of a char array set, as it is the most performant, although this may break things if Lucene ever goes away from it. See SOLR-1095
|
||||
|
||||
private CharArraySet stopWords;
|
||||
private boolean ignoreCase;
|
||||
private boolean enablePositionIncrements;
|
||||
|
@ -74,12 +73,12 @@ public class StopFilterFactory extends BaseTokenFilterFactory implements Resourc
|
|||
return ignoreCase;
|
||||
}
|
||||
|
||||
public Set getStopWords() {
|
||||
public Set<?> getStopWords() {
|
||||
return stopWords;
|
||||
}
|
||||
|
||||
public StopFilter create(TokenStream input) {
|
||||
StopFilter stopFilter = new StopFilter(enablePositionIncrements, input,stopWords,ignoreCase);
|
||||
StopFilter stopFilter = new StopFilter(enablePositionIncrements,input,stopWords,ignoreCase);
|
||||
return stopFilter;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,9 +29,15 @@ import javax.xml.xpath.XPathConstants;
|
|||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.namespace.QName;
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Collections;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* @version $Id$
|
||||
|
@ -267,6 +273,41 @@ public class Config {
|
|||
return val!=null ? Double.parseDouble(val) : def;
|
||||
}
|
||||
|
||||
public Version getLuceneVersion(String path) {
|
||||
return parseLuceneVersionString(getVal(path, true));
|
||||
}
|
||||
|
||||
public Version getLuceneVersion(String path, Version def) {
|
||||
String val = getVal(path, false);
|
||||
return val!=null ? parseLuceneVersionString(val) : def;
|
||||
}
|
||||
|
||||
private static final AtomicBoolean versionWarningAlreadyLogged = new AtomicBoolean(false);
|
||||
|
||||
public static final Version parseLuceneVersionString(String matchVersion) {
|
||||
matchVersion = matchVersion.toUpperCase();
|
||||
|
||||
final Version version;
|
||||
try {
|
||||
version = Version.valueOf(matchVersion);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Invalid luceneMatchVersion '" + matchVersion +
|
||||
"' property, valid values are: " + Arrays.toString(Version.values()), iae, false);
|
||||
}
|
||||
|
||||
if (version == Version.LUCENE_CURRENT && !versionWarningAlreadyLogged.getAndSet(true)) {
|
||||
log.warn(
|
||||
"You should not use LUCENE_CURRENT as luceneMatchVersion property: "+
|
||||
"if you use this setting, and then Solr upgrades to a newer release of Lucene, "+
|
||||
"sizable changes may happen. If precise back compatibility is important "+
|
||||
"then you should instead explicitly specify an actual Lucene version."
|
||||
);
|
||||
}
|
||||
|
||||
return version;
|
||||
}
|
||||
|
||||
// The following functions were moved to ResourceLoader
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.solr.spelling.QueryConverter;
|
|||
import org.apache.solr.highlight.SolrHighlighter;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -134,6 +135,8 @@ public class SolrConfig extends Config {
|
|||
reopenReaders = getBool("mainIndex/reopenReaders", true);
|
||||
|
||||
booleanQueryMaxClauseCount = getInt("query/maxBooleanClauses", BooleanQuery.getMaxClauseCount());
|
||||
luceneMatchVersion = getLuceneVersion("luceneMatchVersion", Version.LUCENE_24);
|
||||
|
||||
filtOptEnabled = getBool("query/boolTofilterOptimizer/@enabled", false);
|
||||
filtOptCacheSize = getInt("query/boolTofilterOptimizer/@cacheSize",32);
|
||||
filtOptThreshold = getFloat("query/boolTofilterOptimizer/@threshold",.05f);
|
||||
|
@ -261,6 +264,7 @@ public class SolrConfig extends Config {
|
|||
public final int maxWarmingSearchers;
|
||||
public final boolean unlockOnStartup;
|
||||
public final boolean useColdSearcher;
|
||||
public final Version luceneMatchVersion;
|
||||
protected String dataDir;
|
||||
|
||||
//JMX configuration
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.common.ResourceLoader;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
@ -46,6 +47,7 @@ import java.io.InputStream;
|
|||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.lang.reflect.Constructor;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -57,6 +59,7 @@ import org.slf4j.LoggerFactory;
|
|||
*/
|
||||
public final class IndexSchema {
|
||||
public static final String DEFAULT_SCHEMA_FILE = "schema.xml";
|
||||
public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
|
||||
|
||||
final static Logger log = LoggerFactory.getLogger(IndexSchema.class);
|
||||
private final SolrConfig solrConfig;
|
||||
|
@ -818,7 +821,24 @@ public final class IndexSchema {
|
|||
NamedNodeMap attrs = node.getAttributes();
|
||||
String analyzerName = DOMUtil.getAttr(attrs,"class");
|
||||
if (analyzerName != null) {
|
||||
return (Analyzer)loader.newInstance(analyzerName);
|
||||
// nocommit: add support for CoreAware & Co here?
|
||||
final Class<? extends Analyzer> clazz = loader.findClass(analyzerName).asSubclass(Analyzer.class);
|
||||
try {
|
||||
try {
|
||||
// first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore)
|
||||
Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
|
||||
final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
|
||||
final Version luceneMatchVersion = (matchVersionStr == null) ?
|
||||
solrConfig.luceneMatchVersion : Config.parseLuceneVersionString(matchVersionStr);
|
||||
return cnstr.newInstance(luceneMatchVersion);
|
||||
} catch (NoSuchMethodException nsme) {
|
||||
// otherwise use default ctor
|
||||
return clazz.newInstance();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Cannot load analyzer: "+analyzerName );
|
||||
}
|
||||
}
|
||||
|
||||
XPath xpath = XPathFactory.newInstance().newXPath();
|
||||
|
@ -832,7 +852,11 @@ public final class IndexSchema {
|
|||
@Override
|
||||
protected void init(CharFilterFactory plugin, Node node) throws Exception {
|
||||
if( plugin != null ) {
|
||||
plugin.init( DOMUtil.toMapExcept(node.getAttributes(),"class") );
|
||||
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
|
||||
// copy the luceneMatchVersion from config, if not set
|
||||
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
|
||||
params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString());
|
||||
plugin.init( params );
|
||||
charFilters.add( plugin );
|
||||
}
|
||||
}
|
||||
|
@ -858,7 +882,11 @@ public final class IndexSchema {
|
|||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||
"The schema defines multiple tokenizers for: "+node );
|
||||
}
|
||||
plugin.init( DOMUtil.toMapExcept(node.getAttributes(),"class") );
|
||||
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
|
||||
// copy the luceneMatchVersion from config, if not set
|
||||
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
|
||||
params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString());
|
||||
plugin.init( params );
|
||||
tokenizers.add( plugin );
|
||||
}
|
||||
|
||||
|
@ -884,7 +912,11 @@ public final class IndexSchema {
|
|||
@Override
|
||||
protected void init(TokenFilterFactory plugin, Node node) throws Exception {
|
||||
if( plugin != null ) {
|
||||
plugin.init( DOMUtil.toMapExcept(node.getAttributes(),"class") );
|
||||
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
|
||||
// copy the luceneMatchVersion from config, if not set
|
||||
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
|
||||
params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString());
|
||||
plugin.init( params );
|
||||
filters.add( plugin );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -80,7 +80,7 @@ public class SolrInfoMBeanTest extends TestCase
|
|||
}
|
||||
}
|
||||
}
|
||||
assertTrue( "there are at least 10 SolrInfoMBean that should be found in the classpath.", checked > 10 );
|
||||
assertTrue( "there are at least 10 SolrInfoMBean that should be found in the classpath, found " + checked, checked > 10 );
|
||||
}
|
||||
|
||||
private static List<Class> getClassesForPackage(String pckgname) throws Exception {
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.solr.analysis;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Map;
|
||||
import java.util.Collections;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
|
@ -34,6 +36,9 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
|||
*/
|
||||
public abstract class BaseTokenTestCase extends TestCase
|
||||
{
|
||||
protected static final Map<String,String> DEFAULT_VERSION_PARAM =
|
||||
Collections.singletonMap("luceneMatchVersion", "LUCENE_30");
|
||||
|
||||
// some helpers to test Analyzers and TokenStreams:
|
||||
// these are taken from Lucene's BaseTokenStreamTestCase
|
||||
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Tests for luceneMatchVersion property for analyzers
|
||||
*/
|
||||
public class TestLuceneMatchVersion extends AbstractSolrTestCase {
|
||||
|
||||
@Override
|
||||
public String getSchemaFile() {
|
||||
return "schema-luceneMatchVersion.xml";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSolrConfigFile() {
|
||||
return "solrconfig.xml";
|
||||
}
|
||||
|
||||
public static final Version DEFAULT_VERSION = Version.LUCENE_30;
|
||||
|
||||
public void testStandardTokenizerVersions() throws Exception {
|
||||
assertEquals(DEFAULT_VERSION, solrConfig.luceneMatchVersion);
|
||||
|
||||
final IndexSchema schema = h.getCore().getSchema();
|
||||
|
||||
FieldType type = schema.getFieldType("textDefault");
|
||||
TokenizerChain ana = (TokenizerChain) type.getAnalyzer();
|
||||
assertEquals(DEFAULT_VERSION, ((BaseTokenizerFactory) ana.getTokenizerFactory()).luceneMatchVersion);
|
||||
assertEquals(DEFAULT_VERSION, ((BaseTokenFilterFactory) ana.getTokenFilterFactories()[2]).luceneMatchVersion);
|
||||
TokenizerChain.TokenStreamInfo tsi = ana.getStream("textDefault",new StringReader(""));
|
||||
StandardTokenizer tok = (StandardTokenizer) tsi.getTokenizer();
|
||||
assertTrue(tok.isReplaceInvalidAcronym());
|
||||
|
||||
type = schema.getFieldType("text20");
|
||||
ana = (TokenizerChain) type.getAnalyzer();
|
||||
assertEquals(Version.LUCENE_20, ((BaseTokenizerFactory) ana.getTokenizerFactory()).luceneMatchVersion);
|
||||
assertEquals(Version.LUCENE_24, ((BaseTokenFilterFactory) ana.getTokenFilterFactories()[2]).luceneMatchVersion);
|
||||
tsi = ana.getStream("text20",new StringReader(""));
|
||||
tok = (StandardTokenizer) tsi.getTokenizer();
|
||||
assertFalse(tok.isReplaceInvalidAcronym());
|
||||
|
||||
// this is a hack to get the private matchVersion field in StandardAnalyzer, may break in later lucene versions - we have no getter :(
|
||||
final Field matchVersionField = StandardAnalyzer.class.getDeclaredField("matchVersion");
|
||||
matchVersionField.setAccessible(true);
|
||||
|
||||
type = schema.getFieldType("textStandardAnalyzerDefault");
|
||||
Analyzer ana1 = type.getAnalyzer();
|
||||
assertTrue(ana1 instanceof StandardAnalyzer);
|
||||
assertEquals(DEFAULT_VERSION, matchVersionField.get(ana1));
|
||||
|
||||
type = schema.getFieldType("textStandardAnalyzer20");
|
||||
ana1 = type.getAnalyzer();
|
||||
assertTrue(ana1 instanceof StandardAnalyzer);
|
||||
assertEquals(Version.LUCENE_20, matchVersionField.get(ana1));
|
||||
}
|
||||
}
|
|
@ -34,6 +34,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testStandardTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"What's", "this", "thing", "do" });
|
||||
|
@ -45,7 +46,9 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testStandardFilter() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
StandardFilterFactory filterFactory = new StandardFilterFactory();
|
||||
filterFactory.init(DEFAULT_VERSION_PARAM);
|
||||
Tokenizer tokenizer = factory.create(reader);
|
||||
TokenStream stream = filterFactory.create(tokenizer);
|
||||
assertTokenStreamContents(stream,
|
||||
|
@ -58,6 +61,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testKeywordTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
KeywordTokenizerFactory factory = new KeywordTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"What's this thing do?"});
|
||||
|
@ -69,6 +73,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testWhitespaceTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"What's", "this", "thing", "do?"});
|
||||
|
@ -80,6 +85,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testLetterTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
LetterTokenizerFactory factory = new LetterTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"What", "s", "this", "thing", "do"});
|
||||
|
@ -91,6 +97,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
public void testLowerCaseTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("What's this thing do?");
|
||||
LowerCaseTokenizerFactory factory = new LowerCaseTokenizerFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
Tokenizer stream = factory.create(reader);
|
||||
assertTokenStreamContents(stream,
|
||||
new String[] {"what", "s", "this", "thing", "do"});
|
||||
|
@ -103,6 +110,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("Česká");
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(reader);
|
||||
ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, new String[] { "Ceska" });
|
||||
}
|
||||
|
@ -115,6 +123,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
|||
Reader reader = new StringReader("Česká");
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(reader);
|
||||
ISOLatin1AccentFilterFactory factory = new ISOLatin1AccentFilterFactory();
|
||||
factory.init(DEFAULT_VERSION_PARAM);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, new String[] { "Česka" });
|
||||
}
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<schema name="luceneMatchVersionTest" version="1.1">
|
||||
<types>
|
||||
<fieldtype name="text20" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory" luceneMatchVersion="LUCENE_20"/>
|
||||
<filter class="solr.StandardFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" luceneMatchVersion="LUCENE_24"/>
|
||||
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="textDefault" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StandardFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="textStandardAnalyzer20" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer" luceneMatchVersion="LUCENE_20"/>
|
||||
</fieldtype>
|
||||
<fieldtype name="textStandardAnalyzerDefault" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||
</fieldtype>
|
||||
</types>
|
||||
<fields>
|
||||
<field name="text20" type="text20" indexed="true" stored="false" />
|
||||
<field name="textDefault" type="textDefault" indexed="true" stored="false" />
|
||||
<field name="textStandardAnalyzer20" type="textStandardAnalyzer20" indexed="true" stored="false" />
|
||||
<field name="textStandardAnalyzerDefault" type="textStandardAnalyzerDefault" indexed="true" stored="false" />
|
||||
</fields>
|
||||
</schema>
|
|
@ -45,6 +45,8 @@
|
|||
not be changed if replication is in use. -->
|
||||
<dataDir>${solr.data.dir:./solr/data}</dataDir>
|
||||
|
||||
<luceneMatchVersion>LUCENE_30</luceneMatchVersion>
|
||||
|
||||
<indexDefaults>
|
||||
<!-- Values here affect all index writers and act as a default
|
||||
unless overridden. -->
|
||||
|
|
Loading…
Reference in New Issue