mirror of https://github.com/apache/lucene.git
LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter no longer support multiple "dictionaries" as there is only one dictionary available.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1499352 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e2fc82a978
commit
efc6826ed8
|
@ -412,7 +412,7 @@
|
|||
<dependency>
|
||||
<groupId>org.carrot2</groupId>
|
||||
<artifactId>morfologik-polish</artifactId>
|
||||
<version>1.5.5</version>
|
||||
<version>1.6.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.woodstox</groupId>
|
||||
|
|
|
@ -23,11 +23,19 @@ Changes in backwards compatibility policy
|
|||
not positioned. This change affects all classes that inherit from
|
||||
DocIdSetIterator, including DocsEnum and DocsAndPositionsEnum. (Adrien Grand)
|
||||
|
||||
* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter
|
||||
no longer support multiple "dictionaries" as there is only one dictionary available.
|
||||
(Dawid Weiss)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-4747: Move to Java 7 as minimum Java version.
|
||||
(Robert Muir, Uwe Schindler)
|
||||
|
||||
* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter
|
||||
no longer support multiple "dictionaries" as there is only one dictionary available.
|
||||
(Dawid Weiss)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4848: Use Java 7 NIO2-FileChannel instead of RandomAccessFile
|
||||
|
|
|
@ -19,9 +19,9 @@
|
|||
<ivy-module version="2.0">
|
||||
<info organisation="org.apache.lucene" module="analyzers-morfologik"/>
|
||||
<dependencies>
|
||||
<dependency org="org.carrot2" name="morfologik-polish" rev="1.5.5" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.5" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.5" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-polish" rev="1.6.0" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.6.0" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.6.0" transitive="false"/>
|
||||
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
|
||||
</dependencies>
|
||||
</ivy-module>
|
||||
|
|
|
@ -26,38 +26,21 @@ import org.apache.lucene.analysis.standard.StandardFilter;
|
|||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import morfologik.stemming.PolishStemmer.DICTIONARY;
|
||||
|
||||
/**
|
||||
* {@link org.apache.lucene.analysis.Analyzer} using Morfologik library.
|
||||
* @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
|
||||
*/
|
||||
public class MorfologikAnalyzer extends Analyzer {
|
||||
|
||||
private final DICTIONARY dictionary;
|
||||
private final Version version;
|
||||
|
||||
/**
|
||||
* Builds an analyzer for a given PolishStemmer.DICTIONARY enum.
|
||||
* Builds an analyzer with the default Morfologik's dictionary (polimorf).
|
||||
*
|
||||
* @param vers
|
||||
* lucene compatibility version
|
||||
* @param dict
|
||||
* A constant specifying which dictionary to choose. See the
|
||||
* Morfologik documentation for details or use the default.
|
||||
* @param version
|
||||
* Lucene compatibility version
|
||||
*/
|
||||
public MorfologikAnalyzer(final Version vers, final DICTIONARY dict) {
|
||||
this.version = vers;
|
||||
this.dictionary = dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer for an original MORFOLOGIK dictionary.
|
||||
*
|
||||
* @param vers lucene compatibility version
|
||||
*/
|
||||
public MorfologikAnalyzer(final Version vers) {
|
||||
this(vers, DICTIONARY.MORFOLOGIK);
|
||||
public MorfologikAnalyzer(final Version version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -78,7 +61,7 @@ public class MorfologikAnalyzer extends Analyzer {
|
|||
final Tokenizer src = new StandardTokenizer(this.version, reader);
|
||||
|
||||
return new TokenStreamComponents(
|
||||
src,
|
||||
new MorfologikFilter(new StandardFilter(this.version, src), this.dictionary, this.version));
|
||||
src,
|
||||
new MorfologikFilter(new StandardFilter(this.version, src), this.version));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.io.IOException;
|
|||
import java.util.*;
|
||||
|
||||
import morfologik.stemming.*;
|
||||
import morfologik.stemming.PolishStemmer.DICTIONARY;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -33,10 +32,11 @@ import org.apache.lucene.analysis.util.CharacterUtils;
|
|||
import org.apache.lucene.util.*;
|
||||
|
||||
/**
|
||||
* {@link TokenFilter} using Morfologik library.
|
||||
* {@link TokenFilter} using Morfologik library to transform input tokens into lemma and
|
||||
* morphosyntactic (POS) tokens. Applies to Polish only.
|
||||
*
|
||||
* MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
|
||||
* annotations for produced lemmas. See the Morfologik documentation for details.
|
||||
* <p>MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
|
||||
* annotations for produced lemmas. See the Morfologik documentation for details.</p>
|
||||
*
|
||||
* @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
|
||||
*/
|
||||
|
@ -60,13 +60,10 @@ public class MorfologikFilter extends TokenFilter {
|
|||
private int lemmaListIndex;
|
||||
|
||||
/**
|
||||
* Builds a filter for given PolishStemmer.DICTIONARY enum.
|
||||
*
|
||||
* @param in input token stream
|
||||
* @param dict PolishStemmer.DICTIONARY enum
|
||||
* @param version Lucene version compatibility for lowercasing.
|
||||
*/
|
||||
public MorfologikFilter(final TokenStream in, final DICTIONARY dict, final Version version) {
|
||||
public MorfologikFilter(final TokenStream in, final Version version) {
|
||||
super(in);
|
||||
this.input = in;
|
||||
|
||||
|
@ -75,7 +72,7 @@ public class MorfologikFilter extends TokenFilter {
|
|||
ClassLoader cl = me.getContextClassLoader();
|
||||
try {
|
||||
me.setContextClassLoader(PolishStemmer.class.getClassLoader());
|
||||
this.stemmer = new PolishStemmer(dict);
|
||||
this.stemmer = new PolishStemmer();
|
||||
this.charUtils = CharacterUtils.getInstance(version);
|
||||
this.lemmaList = Collections.emptyList();
|
||||
} finally {
|
||||
|
@ -83,29 +80,57 @@ public class MorfologikFilter extends TokenFilter {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The tag encoding format has been changing in Morfologik from version
|
||||
* to version. Let's keep both variants and determine which one to run
|
||||
* based on this flag.
|
||||
*/
|
||||
private final static boolean multipleTagsPerLemma = true;
|
||||
|
||||
private void popNextLemma() {
|
||||
// Collect all tags for the next unique lemma.
|
||||
CharSequence currentStem;
|
||||
int tags = 0;
|
||||
do {
|
||||
if (multipleTagsPerLemma) {
|
||||
// One tag (concatenated) per lemma.
|
||||
final WordData lemma = lemmaList.get(lemmaListIndex++);
|
||||
currentStem = lemma.getStem();
|
||||
final CharSequence tag = lemma.getTag();
|
||||
termAtt.setEmpty().append(lemma.getStem());
|
||||
CharSequence tag = lemma.getTag();
|
||||
if (tag != null) {
|
||||
if (tagsList.size() <= tags) {
|
||||
tagsList.add(new StringBuilder());
|
||||
String[] tags = tag.toString().split("\\+");
|
||||
for (int i = 0; i < tags.length; i++) {
|
||||
if (tagsList.size() <= i) {
|
||||
tagsList.add(new StringBuilder());
|
||||
}
|
||||
StringBuilder buffer = tagsList.get(i);
|
||||
buffer.setLength(0);
|
||||
buffer.append(tags[i]);
|
||||
}
|
||||
|
||||
final StringBuilder buffer = tagsList.get(tags++);
|
||||
buffer.setLength(0);
|
||||
buffer.append(lemma.getTag());
|
||||
tagsAtt.setTags(tagsList.subList(0, tags.length));
|
||||
} else {
|
||||
tagsAtt.setTags(Collections.<StringBuilder> emptyList());
|
||||
}
|
||||
} while (lemmaListIndex < lemmaList.size() &&
|
||||
equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem));
|
||||
} else {
|
||||
// One tag (concatenated) per stem (lemma repeated).
|
||||
CharSequence currentStem;
|
||||
int tags = 0;
|
||||
do {
|
||||
final WordData lemma = lemmaList.get(lemmaListIndex++);
|
||||
currentStem = lemma.getStem();
|
||||
final CharSequence tag = lemma.getTag();
|
||||
if (tag != null) {
|
||||
if (tagsList.size() <= tags) {
|
||||
tagsList.add(new StringBuilder());
|
||||
}
|
||||
|
||||
// Set the lemma's base form and tags as attributes.
|
||||
termAtt.setEmpty().append(currentStem);
|
||||
tagsAtt.setTags(tagsList.subList(0, tags));
|
||||
final StringBuilder buffer = tagsList.get(tags++);
|
||||
buffer.setLength(0);
|
||||
buffer.append(lemma.getTag());
|
||||
}
|
||||
} while (lemmaListIndex < lemmaList.size() &&
|
||||
equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem));
|
||||
|
||||
// Set the lemma's base form and tags as attributes.
|
||||
termAtt.setEmpty().append(currentStem);
|
||||
tagsAtt.setTags(tagsList.subList(0, tags));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,12 +17,8 @@ package org.apache.lucene.analysis.morfologik;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import morfologik.stemming.PolishStemmer.DICTIONARY;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
|
@ -32,39 +28,28 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" />
|
||||
* <filter class="solr.MorfologikFilterFactory" />
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* <p>Any of Morfologik dictionaries can be used, these are at the moment:
|
||||
* <code>MORFOLOGIK</code> (Morfologik's original dictionary),
|
||||
* <code>MORFEUSZ</code> (Morfeusz-SIAT),
|
||||
* <code>COMBINED</code> (both of the dictionaries above, combined).
|
||||
*
|
||||
* @see <a href="http://morfologik.blogspot.com/">Morfologik web site</a>
|
||||
*/
|
||||
public class MorfologikFilterFactory extends TokenFilterFactory {
|
||||
/** Dictionary. */
|
||||
private DICTIONARY dictionary = DICTIONARY.MORFOLOGIK;
|
||||
|
||||
/** Schema attribute. */
|
||||
@Deprecated
|
||||
public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
|
||||
|
||||
/** Creates a new MorfologikFilterFactory */
|
||||
public MorfologikFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
|
||||
// Be specific about no-longer-supported dictionary attribute.
|
||||
String dictionaryName = get(args, DICTIONARY_SCHEMA_ATTRIBUTE);
|
||||
if (dictionaryName != null && !dictionaryName.isEmpty()) {
|
||||
try {
|
||||
DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ROOT));
|
||||
assert dictionary != null;
|
||||
this.dictionary = dictionary;
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute accepts the "
|
||||
+ "following constants: " + Arrays.toString(DICTIONARY.values()) + ", this value is invalid: "
|
||||
+ dictionaryName);
|
||||
}
|
||||
throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute is no "
|
||||
+ "longer supported (Morfologik has one dictionary): " + dictionaryName);
|
||||
}
|
||||
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -72,6 +57,6 @@ public class MorfologikFilterFactory extends TokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream ts) {
|
||||
return new MorfologikFilter(ts, dictionary, luceneMatchVersion);
|
||||
return new MorfologikFilter(ts, luceneMatchVersion);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,9 +23,9 @@ import java.util.List;
|
|||
import org.apache.lucene.util.Attribute;
|
||||
|
||||
/**
|
||||
* Morfologik dictionaries provide morphosyntactic annotations for
|
||||
* Morfologik provides morphosyntactic annotations for
|
||||
* surface forms. For the exact format and description of these,
|
||||
* see the project's documentation (annotations vary by dictionary!).
|
||||
* see the project's documentation.
|
||||
*/
|
||||
public interface MorphosyntacticTagsAttribute extends Attribute {
|
||||
/**
|
||||
|
@ -36,7 +36,9 @@ public interface MorphosyntacticTagsAttribute extends Attribute {
|
|||
public void setTags(List<StringBuilder> tags);
|
||||
|
||||
/**
|
||||
* Returns the POS tag of the term.
|
||||
* Returns the POS tag of the term. A single word may have multiple POS tags,
|
||||
* depending on the interpretation (context disambiguation is typically needed
|
||||
* to determine which particular tag is appropriate).
|
||||
*/
|
||||
public List<StringBuilder> getTags();
|
||||
|
||||
|
|
|
@ -22,8 +22,6 @@ import java.io.Reader;
|
|||
import java.io.StringReader;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import morfologik.stemming.PolishStemmer.DICTIONARY;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -67,10 +65,22 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesToReuse(
|
||||
a,
|
||||
"T. Gl\u00FCcksberg",
|
||||
new String[] { "to", "tom", "tona", "Gl\u00FCcksberg" },
|
||||
new int[] { 0, 0, 0, 3 },
|
||||
new int[] { 1, 1, 1, 13 },
|
||||
new int[] { 1, 0, 0, 1 });
|
||||
new String[] { "tom", "tona", "Gl\u00FCcksberg" },
|
||||
new int[] { 0, 0, 3 },
|
||||
new int[] { 1, 1, 13 },
|
||||
new int[] { 1, 0, 1 });
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private void dumpTokens(String input) throws IOException {
|
||||
TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader(input));
|
||||
ts.reset();
|
||||
|
||||
MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
|
||||
CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
|
||||
while (ts.incrementToken()) {
|
||||
System.out.println(charTerm.toString() + " => " + attribute.getTags());
|
||||
}
|
||||
}
|
||||
|
||||
/** Test reuse of MorfologikFilter with leftover stems. */
|
||||
|
@ -158,9 +168,8 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** */
|
||||
public final void testKeywordAttrTokens() throws IOException {
|
||||
final Version version = TEST_VERSION_CURRENT;
|
||||
final DICTIONARY dictionary = DICTIONARY.COMBINED;
|
||||
|
||||
Analyzer a = new MorfologikAnalyzer(version, dictionary) {
|
||||
Analyzer a = new MorfologikAnalyzer(version) {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String field, Reader reader) {
|
||||
final CharArraySet keywords = new CharArraySet(version, 1, false);
|
||||
|
@ -169,7 +178,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
|||
final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src);
|
||||
result = new SetKeywordMarkerFilter(result, keywords);
|
||||
result = new MorfologikFilter(result, dictionary, TEST_VERSION_CURRENT);
|
||||
result = new MorfologikFilter(result, TEST_VERSION_CURRENT);
|
||||
|
||||
return new TokenStreamComponents(src, result);
|
||||
}
|
||||
|
|
|
@ -18,8 +18,8 @@ package org.apache.lucene.analysis.morfologik;
|
|||
*/
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
|
@ -31,10 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
|
||||
public void testCreateDictionary() throws Exception {
|
||||
StringReader reader = new StringReader("rowery bilety");
|
||||
Map<String,String> initParams = new HashMap<String,String>();
|
||||
initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
|
||||
"morfologik");
|
||||
MorfologikFilterFactory factory = new MorfologikFilterFactory(initParams);
|
||||
MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.<String,String>emptyMap());
|
||||
TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
stream = factory.create(stream);
|
||||
assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
7965a39db114f7c404b71d38bc7f0e6a332c4e73
|
|
@ -0,0 +1 @@
|
|||
397a99307020797e6790f2faf8cf865983b52559
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
Copyright (c) 2006 Dawid Weiss
|
||||
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
|
||||
Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
b4a3a9746cab8b2c99c33d2ceeda2ece3f8d8ef2
|
|
@ -0,0 +1 @@
|
|||
ca0663530971b54420fc1cea00a6338f68428232
|
|
@ -1,62 +1,26 @@
|
|||
BSD-licensed dictionary of Polish (Morfologik)
|
||||
|
||||
Copyright (c) 2012, Marcin Miłkowski
|
||||
Morfologik Polish dictionary.
|
||||
Version: 2.0 PoliMorf
|
||||
Copyright (c) 2013, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
--
|
||||
|
||||
BSD-licensed dictionary of Polish (SGJP)
|
||||
http://sgjp.pl/morfeusz/
|
||||
|
||||
Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński,
|
||||
Marcin Woliński, Robert Wołosz
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
|
||||
This product includes data from BSD-licensed dictionary of Polish (Morfologik)
|
||||
This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf)
|
||||
(http://morfologik.blogspot.com/)
|
||||
|
||||
This product includes data from BSD-licensed dictionary of Polish (SGJP)
|
||||
(http://sgjp.pl/morfeusz/)
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
e5dc913adeba3b89539cd5f82e5b88d136a1d85b
|
|
@ -0,0 +1 @@
|
|||
8a284571bea2cdd305cd86fbac9bab6deef31c7f
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
Copyright (c) 2006 Dawid Weiss
|
||||
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
|
||||
Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
|
|
@ -20,9 +20,9 @@
|
|||
<info organisation="org.apache.solr" module="analysis-extras"/>
|
||||
<dependencies>
|
||||
<dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-polish" rev="1.5.5" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.5" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.5" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-polish" rev="1.6.0" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.6.0" transitive="false"/>
|
||||
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.6.0" transitive="false"/>
|
||||
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
|
||||
</dependencies>
|
||||
</ivy-module>
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
7965a39db114f7c404b71d38bc7f0e6a332c4e73
|
|
@ -0,0 +1 @@
|
|||
397a99307020797e6790f2faf8cf865983b52559
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
Copyright (c) 2006 Dawid Weiss
|
||||
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
|
||||
Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
b4a3a9746cab8b2c99c33d2ceeda2ece3f8d8ef2
|
|
@ -0,0 +1 @@
|
|||
ca0663530971b54420fc1cea00a6338f68428232
|
|
@ -1,62 +1,26 @@
|
|||
BSD-licensed dictionary of Polish (Morfologik)
|
||||
|
||||
Copyright (c) 2012, Marcin Miłkowski
|
||||
Morfologik Polish dictionary.
|
||||
Version: 2.0 PoliMorf
|
||||
Copyright (c) 2013, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
--
|
||||
|
||||
BSD-licensed dictionary of Polish (SGJP)
|
||||
http://sgjp.pl/morfeusz/
|
||||
|
||||
Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński,
|
||||
Marcin Woliński, Robert Wołosz
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
|
||||
This product includes data from BSD-licensed dictionary of Polish (Morfologik)
|
||||
This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf)
|
||||
(http://morfologik.blogspot.com/)
|
||||
|
||||
This product includes data from BSD-licensed dictionary of Polish (SGJP)
|
||||
(http://sgjp.pl/morfeusz/)
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
e5dc913adeba3b89539cd5f82e5b88d136a1d85b
|
|
@ -0,0 +1 @@
|
|||
8a284571bea2cdd305cd86fbac9bab6deef31c7f
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
Copyright (c) 2006 Dawid Weiss
|
||||
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
|
||||
Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
|
Loading…
Reference in New Issue