mirror of https://github.com/apache/lucene.git
SOLR-1677: Add support for luceneMatchVersion in Analyzers, Tokenizers and TokenFilters.
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/branches/solr@923028 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2e35d9a8c6
commit
5cc19567b9
src
java/org/apache/solr
analysis
BaseTokenFilterFactory.javaBaseTokenizerFactory.javaStandardTokenizerFactory.javaStopFilterFactory.java
core
schema
test
|
@ -17,10 +17,13 @@
|
||||||
|
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import org.apache.solr.core.Config;
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -34,8 +37,15 @@ public abstract class BaseTokenFilterFactory implements TokenFilterFactory {
|
||||||
/** The init args */
|
/** The init args */
|
||||||
protected Map<String,String> args;
|
protected Map<String,String> args;
|
||||||
|
|
||||||
|
/** the luceneVersion arg */
|
||||||
|
protected Version luceneMatchVersion = null;
|
||||||
|
|
||||||
public void init(Map<String,String> args) {
|
public void init(Map<String,String> args) {
|
||||||
this.args=args;
|
this.args=args;
|
||||||
|
String matchVersion = args.get(IndexSchema.LUCENE_MATCH_VERSION_PARAM);
|
||||||
|
if (matchVersion != null) {
|
||||||
|
luceneMatchVersion = Config.parseLuceneVersionString(matchVersion);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String,String> getArgs() {
|
public Map<String,String> getArgs() {
|
||||||
|
|
|
@ -17,9 +17,13 @@
|
||||||
|
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import org.apache.solr.core.Config;
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -33,8 +37,15 @@ public abstract class BaseTokenizerFactory implements TokenizerFactory {
|
||||||
/** The init args */
|
/** The init args */
|
||||||
protected Map<String,String> args;
|
protected Map<String,String> args;
|
||||||
|
|
||||||
|
/** the luceneVersion arg */
|
||||||
|
protected Version luceneMatchVersion = null;
|
||||||
|
|
||||||
public void init(Map<String,String> args) {
|
public void init(Map<String,String> args) {
|
||||||
this.args=args;
|
this.args=args;
|
||||||
|
String matchVersion = args.get(IndexSchema.LUCENE_MATCH_VERSION_PARAM);
|
||||||
|
if (matchVersion != null) {
|
||||||
|
luceneMatchVersion = Config.parseLuceneVersionString(matchVersion);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String,String> getArgs() {
|
public Map<String,String> getArgs() {
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.solr.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
|
@ -29,6 +28,6 @@ import java.io.Reader;
|
||||||
|
|
||||||
public class StandardTokenizerFactory extends BaseTokenizerFactory {
|
public class StandardTokenizerFactory extends BaseTokenizerFactory {
|
||||||
public StandardTokenizer create(Reader input) {
|
public StandardTokenizer create(Reader input) {
|
||||||
return new StandardTokenizer(Version.LUCENE_24, input);
|
return new StandardTokenizer(luceneMatchVersion, input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,8 +51,7 @@ public class StopFilterFactory extends BaseTokenFilterFactory implements Resourc
|
||||||
}
|
}
|
||||||
for (String file : files) {
|
for (String file : files) {
|
||||||
List<String> wlist = loader.getLines(file.trim());
|
List<String> wlist = loader.getLines(file.trim());
|
||||||
//TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call
|
stopWords.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
|
||||||
stopWords.addAll(StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), ignoreCase));
|
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
@ -61,7 +60,7 @@ public class StopFilterFactory extends BaseTokenFilterFactory implements Resourc
|
||||||
stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
|
stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//Force the use of a char array set, as it is the most performant, although this may break things if Lucene ever goes away from it. See SOLR-1095
|
|
||||||
private CharArraySet stopWords;
|
private CharArraySet stopWords;
|
||||||
private boolean ignoreCase;
|
private boolean ignoreCase;
|
||||||
private boolean enablePositionIncrements;
|
private boolean enablePositionIncrements;
|
||||||
|
@ -74,12 +73,12 @@ public class StopFilterFactory extends BaseTokenFilterFactory implements Resourc
|
||||||
return ignoreCase;
|
return ignoreCase;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Set getStopWords() {
|
public Set<?> getStopWords() {
|
||||||
return stopWords;
|
return stopWords;
|
||||||
}
|
}
|
||||||
|
|
||||||
public StopFilter create(TokenStream input) {
|
public StopFilter create(TokenStream input) {
|
||||||
StopFilter stopFilter = new StopFilter(enablePositionIncrements, input,stopWords,ignoreCase);
|
StopFilter stopFilter = new StopFilter(enablePositionIncrements,input,stopWords,ignoreCase);
|
||||||
return stopFilter;
|
return stopFilter;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,9 +29,15 @@ import javax.xml.xpath.XPathConstants;
|
||||||
import javax.xml.xpath.XPathExpressionException;
|
import javax.xml.xpath.XPathExpressionException;
|
||||||
import javax.xml.namespace.QName;
|
import javax.xml.namespace.QName;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
|
@ -266,6 +272,41 @@ public class Config {
|
||||||
String val = getVal(path, false);
|
String val = getVal(path, false);
|
||||||
return val!=null ? Double.parseDouble(val) : def;
|
return val!=null ? Double.parseDouble(val) : def;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Version getLuceneVersion(String path) {
|
||||||
|
return parseLuceneVersionString(getVal(path, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Version getLuceneVersion(String path, Version def) {
|
||||||
|
String val = getVal(path, false);
|
||||||
|
return val!=null ? parseLuceneVersionString(val) : def;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final AtomicBoolean versionWarningAlreadyLogged = new AtomicBoolean(false);
|
||||||
|
|
||||||
|
public static final Version parseLuceneVersionString(String matchVersion) {
|
||||||
|
matchVersion = matchVersion.toUpperCase();
|
||||||
|
|
||||||
|
final Version version;
|
||||||
|
try {
|
||||||
|
version = Version.valueOf(matchVersion);
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||||
|
"Invalid luceneMatchVersion '" + matchVersion +
|
||||||
|
"' property, valid values are: " + Arrays.toString(Version.values()), iae, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (version == Version.LUCENE_CURRENT && !versionWarningAlreadyLogged.getAndSet(true)) {
|
||||||
|
log.warn(
|
||||||
|
"You should not use LUCENE_CURRENT as luceneMatchVersion property: "+
|
||||||
|
"if you use this setting, and then Solr upgrades to a newer release of Lucene, "+
|
||||||
|
"sizable changes may happen. If precise back compatibility is important "+
|
||||||
|
"then you should instead explicitly specify an actual Lucene version."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return version;
|
||||||
|
}
|
||||||
|
|
||||||
// The following functions were moved to ResourceLoader
|
// The following functions were moved to ResourceLoader
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.solr.spelling.QueryConverter;
|
||||||
import org.apache.solr.highlight.SolrHighlighter;
|
import org.apache.solr.highlight.SolrHighlighter;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -134,6 +135,8 @@ public class SolrConfig extends Config {
|
||||||
reopenReaders = getBool("mainIndex/reopenReaders", true);
|
reopenReaders = getBool("mainIndex/reopenReaders", true);
|
||||||
|
|
||||||
booleanQueryMaxClauseCount = getInt("query/maxBooleanClauses", BooleanQuery.getMaxClauseCount());
|
booleanQueryMaxClauseCount = getInt("query/maxBooleanClauses", BooleanQuery.getMaxClauseCount());
|
||||||
|
luceneMatchVersion = getLuceneVersion("luceneMatchVersion", Version.LUCENE_24);
|
||||||
|
|
||||||
filtOptEnabled = getBool("query/boolTofilterOptimizer/@enabled", false);
|
filtOptEnabled = getBool("query/boolTofilterOptimizer/@enabled", false);
|
||||||
filtOptCacheSize = getInt("query/boolTofilterOptimizer/@cacheSize",32);
|
filtOptCacheSize = getInt("query/boolTofilterOptimizer/@cacheSize",32);
|
||||||
filtOptThreshold = getFloat("query/boolTofilterOptimizer/@threshold",.05f);
|
filtOptThreshold = getFloat("query/boolTofilterOptimizer/@threshold",.05f);
|
||||||
|
@ -261,6 +264,7 @@ public class SolrConfig extends Config {
|
||||||
public final int maxWarmingSearchers;
|
public final int maxWarmingSearchers;
|
||||||
public final boolean unlockOnStartup;
|
public final boolean unlockOnStartup;
|
||||||
public final boolean useColdSearcher;
|
public final boolean useColdSearcher;
|
||||||
|
public final Version luceneMatchVersion;
|
||||||
protected String dataDir;
|
protected String dataDir;
|
||||||
|
|
||||||
//JMX configuration
|
//JMX configuration
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.search.Similarity;
|
import org.apache.lucene.search.Similarity;
|
||||||
import org.apache.lucene.queryParser.QueryParser;
|
import org.apache.lucene.queryParser.QueryParser;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
@ -46,6 +47,7 @@ import java.io.InputStream;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.lang.reflect.Constructor;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -57,6 +59,7 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public final class IndexSchema {
|
public final class IndexSchema {
|
||||||
public static final String DEFAULT_SCHEMA_FILE = "schema.xml";
|
public static final String DEFAULT_SCHEMA_FILE = "schema.xml";
|
||||||
|
public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
|
||||||
|
|
||||||
final static Logger log = LoggerFactory.getLogger(IndexSchema.class);
|
final static Logger log = LoggerFactory.getLogger(IndexSchema.class);
|
||||||
private final SolrConfig solrConfig;
|
private final SolrConfig solrConfig;
|
||||||
|
@ -818,7 +821,24 @@ public final class IndexSchema {
|
||||||
NamedNodeMap attrs = node.getAttributes();
|
NamedNodeMap attrs = node.getAttributes();
|
||||||
String analyzerName = DOMUtil.getAttr(attrs,"class");
|
String analyzerName = DOMUtil.getAttr(attrs,"class");
|
||||||
if (analyzerName != null) {
|
if (analyzerName != null) {
|
||||||
return (Analyzer)loader.newInstance(analyzerName);
|
// nocommit: add support for CoreAware & Co here?
|
||||||
|
final Class<? extends Analyzer> clazz = loader.findClass(analyzerName).asSubclass(Analyzer.class);
|
||||||
|
try {
|
||||||
|
try {
|
||||||
|
// first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore)
|
||||||
|
Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
|
||||||
|
final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
|
||||||
|
final Version luceneMatchVersion = (matchVersionStr == null) ?
|
||||||
|
solrConfig.luceneMatchVersion : Config.parseLuceneVersionString(matchVersionStr);
|
||||||
|
return cnstr.newInstance(luceneMatchVersion);
|
||||||
|
} catch (NoSuchMethodException nsme) {
|
||||||
|
// otherwise use default ctor
|
||||||
|
return clazz.newInstance();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||||
|
"Cannot load analyzer: "+analyzerName );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
XPath xpath = XPathFactory.newInstance().newXPath();
|
XPath xpath = XPathFactory.newInstance().newXPath();
|
||||||
|
@ -832,7 +852,11 @@ public final class IndexSchema {
|
||||||
@Override
|
@Override
|
||||||
protected void init(CharFilterFactory plugin, Node node) throws Exception {
|
protected void init(CharFilterFactory plugin, Node node) throws Exception {
|
||||||
if( plugin != null ) {
|
if( plugin != null ) {
|
||||||
plugin.init( DOMUtil.toMapExcept(node.getAttributes(),"class") );
|
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
|
||||||
|
// copy the luceneMatchVersion from config, if not set
|
||||||
|
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
|
||||||
|
params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString());
|
||||||
|
plugin.init( params );
|
||||||
charFilters.add( plugin );
|
charFilters.add( plugin );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -858,7 +882,11 @@ public final class IndexSchema {
|
||||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
|
||||||
"The schema defines multiple tokenizers for: "+node );
|
"The schema defines multiple tokenizers for: "+node );
|
||||||
}
|
}
|
||||||
plugin.init( DOMUtil.toMapExcept(node.getAttributes(),"class") );
|
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
|
||||||
|
// copy the luceneMatchVersion from config, if not set
|
||||||
|
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
|
||||||
|
params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString());
|
||||||
|
plugin.init( params );
|
||||||
tokenizers.add( plugin );
|
tokenizers.add( plugin );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -884,7 +912,11 @@ public final class IndexSchema {
|
||||||
@Override
|
@Override
|
||||||
protected void init(TokenFilterFactory plugin, Node node) throws Exception {
|
protected void init(TokenFilterFactory plugin, Node node) throws Exception {
|
||||||
if( plugin != null ) {
|
if( plugin != null ) {
|
||||||
plugin.init( DOMUtil.toMapExcept(node.getAttributes(),"class") );
|
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
|
||||||
|
// copy the luceneMatchVersion from config, if not set
|
||||||
|
if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM))
|
||||||
|
params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString());
|
||||||
|
plugin.init( params );
|
||||||
filters.add( plugin );
|
filters.add( plugin );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,7 +80,7 @@ public class SolrInfoMBeanTest extends TestCase
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assertTrue( "there are at least 10 SolrInfoMBean that should be found in the classpath.", checked > 10 );
|
assertTrue( "there are at least 10 SolrInfoMBean that should be found in the classpath, found " + checked, checked > 10 );
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Class> getClassesForPackage(String pckgname) throws Exception {
|
private static List<Class> getClassesForPackage(String pckgname) throws Exception {
|
||||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.solr.analysis;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
@ -34,6 +36,9 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
*/
|
*/
|
||||||
public abstract class BaseTokenTestCase extends TestCase
|
public abstract class BaseTokenTestCase extends TestCase
|
||||||
{
|
{
|
||||||
|
protected static final Map<String,String> DEFAULT_VERSION_PARAM =
|
||||||
|
Collections.singletonMap("luceneMatchVersion", "LUCENE_30");
|
||||||
|
|
||||||
// some helpers to test Analyzers and TokenStreams:
|
// some helpers to test Analyzers and TokenStreams:
|
||||||
// these are taken from Lucene's BaseTokenStreamTestCase
|
// these are taken from Lucene's BaseTokenStreamTestCase
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.schema.FieldType;
|
||||||
|
import org.apache.solr.util.AbstractSolrTestCase;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for luceneMatchVersion property for analyzers
|
||||||
|
*/
|
||||||
|
public class TestLuceneMatchVersion extends AbstractSolrTestCase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSchemaFile() {
|
||||||
|
return "schema-luceneMatchVersion.xml";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSolrConfigFile() {
|
||||||
|
return "solrconfig.xml";
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final Version DEFAULT_VERSION = Version.LUCENE_30;
|
||||||
|
|
||||||
|
public void testStandardTokenizerVersions() throws Exception {
|
||||||
|
assertEquals(DEFAULT_VERSION, solrConfig.luceneMatchVersion);
|
||||||
|
|
||||||
|
final IndexSchema schema = h.getCore().getSchema();
|
||||||
|
|
||||||
|
FieldType type = schema.getFieldType("textDefault");
|
||||||
|
TokenizerChain ana = (TokenizerChain) type.getAnalyzer();
|
||||||
|
assertEquals(DEFAULT_VERSION, ((BaseTokenizerFactory) ana.getTokenizerFactory()).luceneMatchVersion);
|
||||||
|
assertEquals(DEFAULT_VERSION, ((BaseTokenFilterFactory) ana.getTokenFilterFactories()[2]).luceneMatchVersion);
|
||||||
|
TokenizerChain.TokenStreamInfo tsi = ana.getStream("textDefault",new StringReader(""));
|
||||||
|
StandardTokenizer tok = (StandardTokenizer) tsi.getTokenizer();
|
||||||
|
assertTrue(tok.isReplaceInvalidAcronym());
|
||||||
|
|
||||||
|
type = schema.getFieldType("text20");
|
||||||
|
ana = (TokenizerChain) type.getAnalyzer();
|
||||||
|
assertEquals(Version.LUCENE_20, ((BaseTokenizerFactory) ana.getTokenizerFactory()).luceneMatchVersion);
|
||||||
|
assertEquals(Version.LUCENE_24, ((BaseTokenFilterFactory) ana.getTokenFilterFactories()[2]).luceneMatchVersion);
|
||||||
|
tsi = ana.getStream("text20",new StringReader(""));
|
||||||
|
tok = (StandardTokenizer) tsi.getTokenizer();
|
||||||
|
assertFalse(tok.isReplaceInvalidAcronym());
|
||||||
|
|
||||||
|
// this is a hack to get the private matchVersion field in StandardAnalyzer, may break in later lucene versions - we have no getter :(
|
||||||
|
final Field matchVersionField = StandardAnalyzer.class.getDeclaredField("matchVersion");
|
||||||
|
matchVersionField.setAccessible(true);
|
||||||
|
|
||||||
|
type = schema.getFieldType("textStandardAnalyzerDefault");
|
||||||
|
Analyzer ana1 = type.getAnalyzer();
|
||||||
|
assertTrue(ana1 instanceof StandardAnalyzer);
|
||||||
|
assertEquals(DEFAULT_VERSION, matchVersionField.get(ana1));
|
||||||
|
|
||||||
|
type = schema.getFieldType("textStandardAnalyzer20");
|
||||||
|
ana1 = type.getAnalyzer();
|
||||||
|
assertTrue(ana1 instanceof StandardAnalyzer);
|
||||||
|
assertEquals(Version.LUCENE_20, matchVersionField.get(ana1));
|
||||||
|
}
|
||||||
|
}
|
|
@ -34,6 +34,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
||||||
public void testStandardTokenizer() throws Exception {
|
public void testStandardTokenizer() throws Exception {
|
||||||
Reader reader = new StringReader("What's this thing do?");
|
Reader reader = new StringReader("What's this thing do?");
|
||||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||||
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
Tokenizer stream = factory.create(reader);
|
Tokenizer stream = factory.create(reader);
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] {"What's", "this", "thing", "do" });
|
new String[] {"What's", "this", "thing", "do" });
|
||||||
|
@ -45,7 +46,9 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
||||||
public void testStandardFilter() throws Exception {
|
public void testStandardFilter() throws Exception {
|
||||||
Reader reader = new StringReader("What's this thing do?");
|
Reader reader = new StringReader("What's this thing do?");
|
||||||
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
StandardTokenizerFactory factory = new StandardTokenizerFactory();
|
||||||
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
StandardFilterFactory filterFactory = new StandardFilterFactory();
|
StandardFilterFactory filterFactory = new StandardFilterFactory();
|
||||||
|
filterFactory.init(DEFAULT_VERSION_PARAM);
|
||||||
Tokenizer tokenizer = factory.create(reader);
|
Tokenizer tokenizer = factory.create(reader);
|
||||||
TokenStream stream = filterFactory.create(tokenizer);
|
TokenStream stream = filterFactory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
|
@ -58,6 +61,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
||||||
public void testKeywordTokenizer() throws Exception {
|
public void testKeywordTokenizer() throws Exception {
|
||||||
Reader reader = new StringReader("What's this thing do?");
|
Reader reader = new StringReader("What's this thing do?");
|
||||||
KeywordTokenizerFactory factory = new KeywordTokenizerFactory();
|
KeywordTokenizerFactory factory = new KeywordTokenizerFactory();
|
||||||
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
Tokenizer stream = factory.create(reader);
|
Tokenizer stream = factory.create(reader);
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] {"What's this thing do?"});
|
new String[] {"What's this thing do?"});
|
||||||
|
@ -69,6 +73,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
||||||
public void testWhitespaceTokenizer() throws Exception {
|
public void testWhitespaceTokenizer() throws Exception {
|
||||||
Reader reader = new StringReader("What's this thing do?");
|
Reader reader = new StringReader("What's this thing do?");
|
||||||
WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory();
|
WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory();
|
||||||
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
Tokenizer stream = factory.create(reader);
|
Tokenizer stream = factory.create(reader);
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] {"What's", "this", "thing", "do?"});
|
new String[] {"What's", "this", "thing", "do?"});
|
||||||
|
@ -80,6 +85,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
||||||
public void testLetterTokenizer() throws Exception {
|
public void testLetterTokenizer() throws Exception {
|
||||||
Reader reader = new StringReader("What's this thing do?");
|
Reader reader = new StringReader("What's this thing do?");
|
||||||
LetterTokenizerFactory factory = new LetterTokenizerFactory();
|
LetterTokenizerFactory factory = new LetterTokenizerFactory();
|
||||||
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
Tokenizer stream = factory.create(reader);
|
Tokenizer stream = factory.create(reader);
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] {"What", "s", "this", "thing", "do"});
|
new String[] {"What", "s", "this", "thing", "do"});
|
||||||
|
@ -91,6 +97,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
||||||
public void testLowerCaseTokenizer() throws Exception {
|
public void testLowerCaseTokenizer() throws Exception {
|
||||||
Reader reader = new StringReader("What's this thing do?");
|
Reader reader = new StringReader("What's this thing do?");
|
||||||
LowerCaseTokenizerFactory factory = new LowerCaseTokenizerFactory();
|
LowerCaseTokenizerFactory factory = new LowerCaseTokenizerFactory();
|
||||||
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
Tokenizer stream = factory.create(reader);
|
Tokenizer stream = factory.create(reader);
|
||||||
assertTokenStreamContents(stream,
|
assertTokenStreamContents(stream,
|
||||||
new String[] {"what", "s", "this", "thing", "do"});
|
new String[] {"what", "s", "this", "thing", "do"});
|
||||||
|
@ -103,6 +110,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
||||||
Reader reader = new StringReader("Česká");
|
Reader reader = new StringReader("Česká");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(reader);
|
Tokenizer tokenizer = new WhitespaceTokenizer(reader);
|
||||||
ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory();
|
ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory();
|
||||||
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "Ceska" });
|
assertTokenStreamContents(stream, new String[] { "Ceska" });
|
||||||
}
|
}
|
||||||
|
@ -115,6 +123,7 @@ public class TestStandardFactories extends BaseTokenTestCase {
|
||||||
Reader reader = new StringReader("Česká");
|
Reader reader = new StringReader("Česká");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(reader);
|
Tokenizer tokenizer = new WhitespaceTokenizer(reader);
|
||||||
ISOLatin1AccentFilterFactory factory = new ISOLatin1AccentFilterFactory();
|
ISOLatin1AccentFilterFactory factory = new ISOLatin1AccentFilterFactory();
|
||||||
|
factory.init(DEFAULT_VERSION_PARAM);
|
||||||
TokenStream stream = factory.create(tokenizer);
|
TokenStream stream = factory.create(tokenizer);
|
||||||
assertTokenStreamContents(stream, new String[] { "Česka" });
|
assertTokenStreamContents(stream, new String[] { "Česka" });
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<schema name="luceneMatchVersionTest" version="1.1">
|
||||||
|
<types>
|
||||||
|
<fieldtype name="text20" class="solr.TextField">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.StandardTokenizerFactory" luceneMatchVersion="LUCENE_20"/>
|
||||||
|
<filter class="solr.StandardFilterFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.StopFilterFactory" luceneMatchVersion="LUCENE_24"/>
|
||||||
|
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
<fieldtype name="textDefault" class="solr.TextField">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||||
|
<filter class="solr.StandardFilterFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.StopFilterFactory"/>
|
||||||
|
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
<fieldtype name="textStandardAnalyzer20" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer" luceneMatchVersion="LUCENE_20"/>
|
||||||
|
</fieldtype>
|
||||||
|
<fieldtype name="textStandardAnalyzerDefault" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
</fieldtype>
|
||||||
|
</types>
|
||||||
|
<fields>
|
||||||
|
<field name="text20" type="text20" indexed="true" stored="false" />
|
||||||
|
<field name="textDefault" type="textDefault" indexed="true" stored="false" />
|
||||||
|
<field name="textStandardAnalyzer20" type="textStandardAnalyzer20" indexed="true" stored="false" />
|
||||||
|
<field name="textStandardAnalyzerDefault" type="textStandardAnalyzerDefault" indexed="true" stored="false" />
|
||||||
|
</fields>
|
||||||
|
</schema>
|
|
@ -44,6 +44,8 @@
|
||||||
It defaults to "index" if not present, and should probably
|
It defaults to "index" if not present, and should probably
|
||||||
not be changed if replication is in use. -->
|
not be changed if replication is in use. -->
|
||||||
<dataDir>${solr.data.dir:./solr/data}</dataDir>
|
<dataDir>${solr.data.dir:./solr/data}</dataDir>
|
||||||
|
|
||||||
|
<luceneMatchVersion>LUCENE_30</luceneMatchVersion>
|
||||||
|
|
||||||
<indexDefaults>
|
<indexDefaults>
|
||||||
<!-- Values here affect all index writers and act as a default
|
<!-- Values here affect all index writers and act as a default
|
||||||
|
|
Loading…
Reference in New Issue