mirror of https://github.com/apache/lucene.git
LUCENE-6833: Upgraded Morfologik to version 2.0.1. The 'dictionary' attribute has been
reverted back and now points at the dictionary resource to be loaded instead of the default Polish dictionary. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1707457 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4a78c824a5
commit
7e6ba558ff
|
@ -207,8 +207,12 @@ Other
|
|||
* LUCENE-6761: MatchAllDocsQuery's Scorers do not expose approximations
|
||||
anymore. (Adrien Grand)
|
||||
|
||||
* LUCENE-6775: Improved MorfologikFilterFactory to allow loading of
|
||||
custom dictionaries from ResourceLoader. (Uwe Schindler)
|
||||
* LUCENE-6775, LUCENE-6833: Improved MorfologikFilterFactory to allow
|
||||
loading of custom dictionaries from ResourceLoader. Upgraded
|
||||
Morfologik to version 2.0.1. The 'dictionary' attribute has been
|
||||
reverted back and now points at the dictionary resource to be
|
||||
loaded instead of the default Polish dictionary.
|
||||
(Uwe Schindler, Dawid Weiss)
|
||||
|
||||
* LUCENE-6797: Make GeoCircle an interface and use a factory to create
|
||||
it, to eventually handle degenerate cases (Karl Wright via Mike
|
||||
|
|
|
@ -18,9 +18,8 @@
|
|||
-->
|
||||
|
||||
<project name="analyzers-morfologik" default="default">
|
||||
|
||||
<description>
|
||||
Analyzer for indexing Polish
|
||||
Analyzer for dictionary stemming, built-in Polish dictionary
|
||||
</description>
|
||||
|
||||
<import file="../analysis-module-build.xml"/>
|
||||
|
@ -30,6 +29,12 @@
|
|||
<fileset dir="lib"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
|
||||
<path id="test.classpath">
|
||||
<path refid="test.base.classpath" />
|
||||
<pathelement path="src/test-files" />
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
|
||||
</project>
|
||||
|
|
|
@ -20,6 +20,9 @@ package org.apache.lucene.analysis.morfologik;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import morfologik.stemming.Dictionary;
|
||||
import morfologik.stemming.polish.PolishStemmer;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
|
@ -30,27 +33,23 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
* @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
|
||||
*/
|
||||
public class MorfologikAnalyzer extends Analyzer {
|
||||
private final String dictionary;
|
||||
private final Dictionary dictionary;
|
||||
|
||||
/**
|
||||
* Builds an analyzer with an explicit dictionary resource.
|
||||
* Builds an analyzer with an explicit {@link Dictionary} resource.
|
||||
*
|
||||
* @param dictionaryResource A constant specifying which dictionary to choose. The
|
||||
* dictionary resource must be named <code>morfologik/dictionaries/{dictionaryResource}.dict</code>
|
||||
* and have an associated <code>.info</code> metadata file. See the Morfologik project
|
||||
* for details.
|
||||
*
|
||||
* @see <a href="http://morfologik.blogspot.com/">http://morfologik.blogspot.com/</a>
|
||||
* @param dictionary A prebuilt automaton with inflected and base word forms.
|
||||
* @see <a href="https://github.com/morfologik/">https://github.com/morfologik/</a>
|
||||
*/
|
||||
public MorfologikAnalyzer(final String dictionaryResource) {
|
||||
this.dictionary = dictionaryResource;
|
||||
public MorfologikAnalyzer(final Dictionary dictionary) {
|
||||
this.dictionary = dictionary;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default Morfologik's Polish dictionary.
|
||||
*/
|
||||
public MorfologikAnalyzer() {
|
||||
this(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
|
||||
this(new PolishStemmer().getDictionary());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -18,17 +18,16 @@ package org.apache.lucene.analysis.morfologik;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import morfologik.stemming.Dictionary;
|
||||
import morfologik.stemming.DictionaryLookup;
|
||||
import morfologik.stemming.IStemmer;
|
||||
import morfologik.stemming.WordData;
|
||||
import morfologik.stemming.polish.PolishStemmer;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -70,20 +69,9 @@ public class MorfologikFilter extends TokenFilter {
|
|||
* Creates a filter with the default (Polish) dictionary.
|
||||
*/
|
||||
public MorfologikFilter(final TokenStream in) {
|
||||
this(in, DictionaryHolder.DEFAULT_DICT);
|
||||
this(in, new PolishStemmer().getDictionary());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a filter with a given dictionary resource.
|
||||
*
|
||||
* @param in input token stream.
|
||||
* @param dictResource Dictionary resource name in classpath.
|
||||
*/
|
||||
public MorfologikFilter(final TokenStream in, final String dictResource) {
|
||||
this(in, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE.equals(dictResource) ?
|
||||
DictionaryHolder.DEFAULT_DICT : loadDictionaryResource(dictResource));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a filter with a given dictionary.
|
||||
*
|
||||
|
@ -180,23 +168,4 @@ public class MorfologikFilter extends TokenFilter {
|
|||
tagsList.clear();
|
||||
super.reset();
|
||||
}
|
||||
|
||||
/** This method was added, because Morfologik uses context classloader and fails to load from our classloader (bug with absolute path). */
|
||||
static Dictionary loadDictionaryResource(String resource) {
|
||||
Objects.requireNonNull(resource, "Morfologik language code may not be null");
|
||||
final String dictPath = "/morfologik/dictionaries/" + resource + ".dict";
|
||||
final String metaPath = Dictionary.getExpectedFeaturesName(dictPath);
|
||||
|
||||
try (final InputStream dictIn = Objects.requireNonNull(Dictionary.class.getResourceAsStream(dictPath), "Unable to find Morfologik dictionary: " + dictPath);
|
||||
final InputStream metaIn = Objects.requireNonNull(Dictionary.class.getResourceAsStream(metaPath), "Unable to find Morfologik metadata: " + metaPath)) {
|
||||
return Dictionary.readAndClose(dictIn, metaIn);
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException("IOException while loading Morfologik dictionary and metadata.", ioe);
|
||||
}
|
||||
}
|
||||
|
||||
/** This holder is for the default Polish dictionary */
|
||||
static final class DictionaryHolder {
|
||||
static final Dictionary DEFAULT_DICT = loadDictionaryResource(MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,11 +19,12 @@ package org.apache.lucene.analysis.morfologik;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import morfologik.stemming.Dictionary;
|
||||
import morfologik.stemming.DictionaryMetadata;
|
||||
import morfologik.stemming.polish.PolishStemmer;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
|
@ -31,75 +32,47 @@ import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
|||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Filter factory for {@link MorfologikFilter}. For backward compatibility polish
|
||||
* dictionary is used as default. You can change dictionary resource
|
||||
* by dictionary-resource parameter:
|
||||
* Filter factory for {@link MorfologikFilter}.
|
||||
*
|
||||
* <p>An explicit resource name of the dictionary ({@code ".dict"}) can be
|
||||
* provided via the <code>dictionary</code> attribute, as the example below demonstrates:
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100">
|
||||
* <fieldType name="text_mylang" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.MorfologikFilterFactory" dictionary-resource="pl" />
|
||||
* <filter class="solr.MorfologikFilterFactory" dictionary="mylang.dict" />
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* <p>Alternatively, you can pass in the filenames of FSA ({@code ".dict"} and features "{@code ".info"}" file
|
||||
* (if the features file is not given, its name is derived from the FSA file):
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.MorfologikFilterFactory" dictionary-fsa-file="mylang.dict" dictionary-features-file="mylang.info" />
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* <p>If the dictionary attribute is not provided, the Polish dictionary is loaded
|
||||
* and used by default.
|
||||
*
|
||||
* @see <a href="http://morfologik.blogspot.com/">Morfologik web site</a>
|
||||
*/
|
||||
public class MorfologikFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
/**
|
||||
* The default dictionary resource (for Polish).
|
||||
*/
|
||||
public static final String DEFAULT_DICTIONARY_RESOURCE = "pl";
|
||||
/** Dictionary resource attribute (should have {@code ".dict"} suffix), loaded from {@link ResourceLoader}. */
|
||||
public static final String DICTIONARY_ATTRIBUTE = "dictionary";
|
||||
|
||||
/** Dictionary resource */
|
||||
public static final String DICTIONARY_RESOURCE_ATTRIBUTE = "dictionary-resource";
|
||||
/** {@link #DICTIONARY_ATTRIBUTE} value passed to {@link #inform}. */
|
||||
private String resourceName;
|
||||
|
||||
/** Dictionary FSA file (should have {@code ".dict"} suffix), loaded from {@link ResourceLoader}. */
|
||||
public static final String DICTIONARY_FSA_FILE_ATTRIBUTE = "dictionary-fsa-file";
|
||||
|
||||
/** Dictionary features/properties file, loaded from {@link ResourceLoader}. If not given, this
|
||||
* loads the file with same name like {@link #DICTIONARY_FSA_FILE_ATTRIBUTE}, but with
|
||||
* {@code ".info"} suffix.
|
||||
*/
|
||||
public static final String DICTIONARY_FEATURES_FILE_ATTRIBUTE = "dictionary-features-file";
|
||||
|
||||
private final String dictionaryFsaFile, dictionaryFeaturesFile, dictionaryResource;
|
||||
private Dictionary dictionary; // initialized on inform()
|
||||
/** Loaded {@link Dictionary}, initialized on {@link #inform(ResourceLoader)}. */
|
||||
private Dictionary dictionary;
|
||||
|
||||
/** Creates a new MorfologikFilterFactory */
|
||||
public MorfologikFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
|
||||
// first check FSA and features (at least FSA must be given, features name is guessed):
|
||||
dictionaryFsaFile = get(args, DICTIONARY_FSA_FILE_ATTRIBUTE);
|
||||
dictionaryFeaturesFile = get(args, DICTIONARY_FEATURES_FILE_ATTRIBUTE,
|
||||
(dictionaryFsaFile == null) ? null : Dictionary.getExpectedFeaturesName(dictionaryFsaFile));
|
||||
|
||||
if (dictionaryFsaFile == null && dictionaryFeaturesFile == null) {
|
||||
// if we have no FSA/features combination, we resolve the classpath resource:
|
||||
dictionaryResource = get(args, DICTIONARY_RESOURCE_ATTRIBUTE, DEFAULT_DICTIONARY_RESOURCE);
|
||||
} else if (dictionaryFsaFile == null || dictionaryFeaturesFile == null) {
|
||||
// if we have incomplete FSA/features tuple in args
|
||||
throw new IllegalArgumentException(String.format(Locale.ENGLISH, "Missing '%s' or '%s' attribute.",
|
||||
DICTIONARY_FSA_FILE_ATTRIBUTE, DICTIONARY_FEATURES_FILE_ATTRIBUTE));
|
||||
} else {
|
||||
dictionaryResource = null;
|
||||
if (get(args, DICTIONARY_RESOURCE_ATTRIBUTE) != null) {
|
||||
// fail if both is given: FSA/features files + classpath resource
|
||||
throw new IllegalArgumentException(String.format(Locale.ENGLISH, "Cannot give '%s' and '%s'/'%s' at the same time.",
|
||||
DICTIONARY_RESOURCE_ATTRIBUTE, DICTIONARY_FSA_FILE_ATTRIBUTE, DICTIONARY_FEATURES_FILE_ATTRIBUTE));
|
||||
}
|
||||
// Be specific about no-longer-supported dictionary attribute.
|
||||
final String DICTIONARY_RESOURCE_ATTRIBUTE = "dictionary-resource";
|
||||
String dictionaryResource = get(args, DICTIONARY_RESOURCE_ATTRIBUTE);
|
||||
if (dictionaryResource != null && !dictionaryResource.isEmpty()) {
|
||||
throw new IllegalArgumentException("The " + DICTIONARY_RESOURCE_ATTRIBUTE + " attribute is no "
|
||||
+ "longer supported. Use the '" + DICTIONARY_ATTRIBUTE + "' attribute instead (see LUCENE-6833).");
|
||||
}
|
||||
|
||||
|
||||
resourceName = get(args, DICTIONARY_ATTRIBUTE);
|
||||
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -107,16 +80,14 @@ public class MorfologikFilterFactory extends TokenFilterFactory implements Resou
|
|||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
if (dictionaryFsaFile != null) {
|
||||
assert dictionaryFeaturesFile != null;
|
||||
assert dictionaryResource == null;
|
||||
try (final InputStream dictIn = loader.openResource(dictionaryFsaFile);
|
||||
final InputStream metaIn = loader.openResource(dictionaryFeaturesFile)) {
|
||||
this.dictionary = Dictionary.readAndClose(dictIn, metaIn);
|
||||
}
|
||||
if (resourceName == null) {
|
||||
// Get the dictionary lazily, does not hold up memory.
|
||||
this.dictionary = new PolishStemmer().getDictionary();
|
||||
} else {
|
||||
assert dictionaryResource != null;
|
||||
this.dictionary = MorfologikFilter.loadDictionaryResource(dictionaryResource);
|
||||
try (InputStream dict = loader.openResource(resourceName);
|
||||
InputStream meta = loader.openResource(DictionaryMetadata.getExpectedMetadataFileName(resourceName))) {
|
||||
this.dictionary = Dictionary.read(dict, meta);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,24 @@
|
|||
#
|
||||
# An example stemming dictionary file for Morfologik filter.
|
||||
#
|
||||
# Compile with Morfologik-stemming, see
|
||||
# https://github.com/morfologik/morfologik-stemming/wiki/Examples
|
||||
#
|
||||
|
||||
# Author of the dictionary.
|
||||
fsa.dict.author=Acme Inc.
|
||||
|
||||
# Date the dictionary data was assembled (not compilation time!).
|
||||
fsa.dict.created=2015/10/08 09:16:00
|
||||
|
||||
# The license for the dictionary data.
|
||||
fsa.dict.license=ASL 2.0
|
||||
|
||||
# Character encoding inside the automaton (and input file).
|
||||
fsa.dict.encoding=UTF-8
|
||||
|
||||
# field separator (lemma;inflected;tag)
|
||||
fsa.dict.separator=;
|
||||
|
||||
# type of base/lemma compression encoding before automaton compression.
|
||||
fsa.dict.encoder=INFIX
|
|
@ -0,0 +1,2 @@
|
|||
lemma1;inflected1;tag1
|
||||
lemma2;inflected2;tag2
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.morfologik;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringReader;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
|
@ -31,51 +33,59 @@ import org.apache.lucene.analysis.util.ResourceLoader;
|
|||
* Test for {@link MorfologikFilterFactory}.
|
||||
*/
|
||||
public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
|
||||
final ResourceLoader loader = new ClasspathResourceLoader(getClass());
|
||||
private static class ForbidResourcesLoader implements ResourceLoader {
|
||||
@Override
|
||||
public InputStream openResource(String resource) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T newInstance(String cname, Class<T> expectedType) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
public void testDefaultDictionary() throws Exception {
|
||||
StringReader reader = new StringReader("rowery bilety");
|
||||
MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.<String,String>emptyMap());
|
||||
factory.inform(loader);
|
||||
factory.inform(new ForbidResourcesLoader());
|
||||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
|
||||
}
|
||||
|
||||
public void testResourceDictionary() throws Exception {
|
||||
StringReader reader = new StringReader("rowery bilety");
|
||||
|
||||
public void testExplicitDictionary() throws Exception {
|
||||
final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);
|
||||
|
||||
StringReader reader = new StringReader("inflected1 inflected2");
|
||||
Map<String,String> params = new HashMap<>();
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_RESOURCE_ATTRIBUTE, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "custom-dictionary.dict");
|
||||
MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
|
||||
factory.inform(loader);
|
||||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
|
||||
assertTokenStreamContents(stream, new String[] {"lemma1", "lemma2"});
|
||||
}
|
||||
|
||||
public void testResourceLoaderDictionary1() throws Exception {
|
||||
StringReader reader = new StringReader("rowery bilety");
|
||||
Map<String,String> params = new HashMap<>();
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
|
||||
MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
|
||||
factory.inform(loader);
|
||||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
|
||||
|
||||
public void testMissingDictionary() throws Exception {
|
||||
final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);
|
||||
|
||||
try {
|
||||
Map<String,String> params = new HashMap<>();
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "missing-dictionary-resource.dict");
|
||||
MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
|
||||
factory.inform(loader);
|
||||
fail();
|
||||
} catch (IOException e) {
|
||||
assertTrue(e.getMessage().contains("Resource not found"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testResourceLoaderDictionary2() throws Exception {
|
||||
StringReader reader = new StringReader("rowery bilety");
|
||||
Map<String,String> params = new HashMap<>();
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_FEATURES_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.info");
|
||||
MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
|
||||
factory.inform(loader);
|
||||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
|
||||
}
|
||||
|
||||
|
||||
/** Test that bogus arguments result in exception */
|
||||
public void testBogusArguments() throws Exception {
|
||||
try {
|
||||
|
@ -87,40 +97,4 @@ public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
|
|||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testIncompatibleArgs1() throws Exception {
|
||||
try {
|
||||
HashMap<String,String> params = new HashMap<String,String>();
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_RESOURCE_ATTRIBUTE, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
|
||||
new MorfologikFilterFactory(params);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
assertTrue(expected.getMessage().contains("at the same time"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testIncompatibleArgs2() throws Exception {
|
||||
try {
|
||||
HashMap<String,String> params = new HashMap<String,String>();
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_RESOURCE_ATTRIBUTE, MorfologikFilterFactory.DEFAULT_DICTIONARY_RESOURCE);
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_FSA_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.dict");
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_FEATURES_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.info");
|
||||
new MorfologikFilterFactory(params);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
assertTrue(expected.getMessage().contains("at the same time"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testMissingArgs1() throws Exception {
|
||||
try {
|
||||
HashMap<String,String> params = new HashMap<String,String>();
|
||||
params.put(MorfologikFilterFactory.DICTIONARY_FEATURES_FILE_ATTRIBUTE, "/morfologik/dictionaries/pl.info");
|
||||
new MorfologikFilterFactory(params);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
assertTrue(expected.getMessage().contains("Missing"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -216,7 +216,7 @@ org.bouncycastle.version = 1.45
|
|||
|
||||
/org.carrot2/carrot2-mini = 3.10.3
|
||||
|
||||
org.carrot2.morfologik.version = 1.10.0
|
||||
org.carrot2.morfologik.version = 2.0.1
|
||||
/org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}
|
||||
/org.carrot2/morfologik-polish = ${org.carrot2.morfologik.version}
|
||||
/org.carrot2/morfologik-stemming = ${org.carrot2.morfologik.version}
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
87100c6baf60f096b42b9af06dafeb20f686cd02
|
|
@ -0,0 +1 @@
|
|||
23b4c04bb74f80e77573dc3ab84c8b4203f68d50
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
Copyright (c) 2006 Dawid Weiss
|
||||
Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
|
||||
Copyright (c) 2007-2015 Dawid Weiss, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
0f8eeb58acb5a39e162c0d49fcf29a70744cc2bc
|
|
@ -0,0 +1 @@
|
|||
b35034de153a79d0afeeeee2ff883d548a178961
|
|
@ -1 +0,0 @@
|
|||
a74ad7ceb29ff1d8194eb161f5b2dfbd636626a5
|
|
@ -0,0 +1 @@
|
|||
df9434b431bbed20ded67ede439c7dfb1e29e9f8
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
Copyright (c) 2006 Dawid Weiss
|
||||
Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
|
||||
Copyright (c) 2007-2015 Dawid Weiss, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
87100c6baf60f096b42b9af06dafeb20f686cd02
|
|
@ -0,0 +1 @@
|
|||
23b4c04bb74f80e77573dc3ab84c8b4203f68d50
|
|
@ -1 +0,0 @@
|
|||
0f8eeb58acb5a39e162c0d49fcf29a70744cc2bc
|
|
@ -0,0 +1 @@
|
|||
b35034de153a79d0afeeeee2ff883d548a178961
|
|
@ -1 +0,0 @@
|
|||
a74ad7ceb29ff1d8194eb161f5b2dfbd636626a5
|
|
@ -0,0 +1 @@
|
|||
df9434b431bbed20ded67ede439c7dfb1e29e9f8
|
Loading…
Reference in New Issue