diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index da6d0a2068c..faa29f8bd73 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -76,6 +76,9 @@ Changes in Runtime Behavior
New Features
+* LUCENE-5356: Morfologik filter can accept custom dictionary resources.
+ (Michal Hlavac, Dawid Weiss)
+
* LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting
on multi-valued field. (Robert Muir)
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
index f64f54888c4..985ac90600e 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
@@ -31,26 +31,34 @@ import org.apache.lucene.util.Version;
* @see Morfologik project page
*/
public class MorfologikAnalyzer extends Analyzer {
+ private final String dictionary;
private final Version version;
/**
* Builds an analyzer with the default Morfologik's dictionary (polimorf).
*
- * @param version
- * Lucene compatibility version
+ * @param version Lucene compatibility version
+ * @param dictionaryResource A constant specifying which dictionary to choose. The
+ * dictionary resource must be named morfologik/dictionaries/{dictionaryResource}.dict
+ * and have an associated .info
metadata file. See the Morfologik project
+ * for details.
+ *
+ * @see "http://morfologik.blogspot.com/"
*/
- public MorfologikAnalyzer(final Version version) {
+ public MorfologikAnalyzer(final Version version, final String dictionaryResource) {
this.version = version;
+ this.dictionary = dictionaryResource;
+ }
+ public MorfologikAnalyzer(final Version version) {
+ this(version, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
}
-
/**
* Creates a
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @param field ignored field name
- * @return A
- * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
+ * @return A {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter} and {@link MorfologikFilter}.
*/
@@ -60,6 +68,6 @@ public class MorfologikAnalyzer extends Analyzer {
return new TokenStreamComponents(
src,
- new MorfologikFilter(new StandardFilter(this.version, src), this.version));
+ new MorfologikFilter(new StandardFilter(this.version, src), dictionary, this.version));
}
}
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
index 6bb08036400..08b4ce4dd3c 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
@@ -61,20 +61,29 @@ public class MorfologikFilter extends TokenFilter {
private int lemmaListIndex;
/**
- * Creates MorfologikFilter
- * @param in input token stream
- * @param version Lucene version compatibility for lowercasing.
+ * Creates a filter with the default (Polish) dictionary.
*/
public MorfologikFilter(final TokenStream in, final Version version) {
+ this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE, version);
+ }
+
+ /**
+ * Creates a filter with a given dictionary resource.
+ *
+ * @param in input token stream.
+ * @param dict Dictionary resource from classpath.
+ * @param version Lucene version compatibility for lowercasing.
+ */
+ public MorfologikFilter(final TokenStream in, final String dict, final Version version) {
super(in);
this.input = in;
-
+
// SOLR-4007: temporarily substitute context class loader to allow finding dictionary resources.
Thread me = Thread.currentThread();
ClassLoader cl = me.getContextClassLoader();
try {
- me.setContextClassLoader(PolishStemmer.class.getClassLoader());
- this.stemmer = new PolishStemmer();
+ me.setContextClassLoader(morfologik.stemming.Dictionary.class.getClassLoader());
+ this.stemmer = new DictionaryLookup(morfologik.stemming.Dictionary.getForLanguage(dict));
this.charUtils = CharacterUtils.getInstance(version);
this.lemmaList = Collections.emptyList();
} finally {
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
index 388a441ee3a..41f09473f32 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
@@ -23,22 +23,37 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
- * Filter factory for {@link MorfologikFilter}.
+ * Filter factory for {@link MorfologikFilter}. For backward compatibility polish
+ * dictionary is used as default. You can change dictionary resource
+ * by dictionary-resource parameter.
*
* <fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100"> * <analyzer> * <tokenizer class="solr.WhitespaceTokenizerFactory"/> - * <filter class="solr.MorfologikFilterFactory" /> + * <filter class="solr.MorfologikFilterFactory" dictionary-resource="pl" /> * </analyzer> * </fieldType>* * @see Morfologik web site */ public class MorfologikFilterFactory extends TokenFilterFactory { + /** + * The default dictionary resource (for Polish). + */ + public static final String DEFAULT_DICTIONARY_RESOURCE = "pl"; + + /** + * Stemming dictionary resource. See {@link MorfologikAnalyzer} for more details. + */ + private final String dictionaryResource; + /** Schema attribute. */ @Deprecated public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary"; + /** Dictionary resource */ + public static final String DICTIONARY_RESOURCE_ATTRIBUTE = "dictionary-resource"; + /** Creates a new MorfologikFilterFactory */ public MorfologikFilterFactory(Map