LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter no longer support multiple "dictionaries" as there is only one dictionary available.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1499352 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dawid Weiss 2013-07-03 12:14:50 +00:00
parent e2fc82a978
commit efc6826ed8
30 changed files with 163 additions and 232 deletions

View File

@ -412,7 +412,7 @@
<dependency>
<groupId>org.carrot2</groupId>
<artifactId>morfologik-polish</artifactId>
<version>1.5.5</version>
<version>1.6.0</version>
</dependency>
<dependency>
<groupId>org.codehaus.woodstox</groupId>

View File

@ -23,11 +23,19 @@ Changes in backwards compatibility policy
not positioned. This change affects all classes that inherit from
DocIdSetIterator, including DocsEnum and DocsAndPositionsEnum. (Adrien Grand)
* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter
no longer support multiple "dictionaries" as there is only one dictionary available.
(Dawid Weiss)
New Features
* LUCENE-4747: Move to Java 7 as minimum Java version.
(Robert Muir, Uwe Schindler)
* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter
no longer support multiple "dictionaries" as there is only one dictionary available.
(Dawid Weiss)
Optimizations
* LUCENE-4848: Use Java 7 NIO2-FileChannel instead of RandomAccessFile

View File

@ -19,9 +19,9 @@
<ivy-module version="2.0">
<info organisation="org.apache.lucene" module="analyzers-morfologik"/>
<dependencies>
<dependency org="org.carrot2" name="morfologik-polish" rev="1.5.5" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.5" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.5" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-polish" rev="1.6.0" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.6.0" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.6.0" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>

View File

@ -26,38 +26,21 @@ import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
import morfologik.stemming.PolishStemmer.DICTIONARY;
/**
* {@link org.apache.lucene.analysis.Analyzer} using Morfologik library.
* @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
*/
public class MorfologikAnalyzer extends Analyzer {
private final DICTIONARY dictionary;
private final Version version;
/**
* Builds an analyzer for a given PolishStemmer.DICTIONARY enum.
* Builds an analyzer with the default Morfologik's dictionary (polimorf).
*
* @param vers
* lucene compatibility version
* @param dict
* A constant specifying which dictionary to choose. See the
* Morfologik documentation for details or use the default.
* @param version
* Lucene compatibility version
*/
public MorfologikAnalyzer(final Version vers, final DICTIONARY dict) {
this.version = vers;
this.dictionary = dict;
}
/**
* Builds an analyzer for an original MORFOLOGIK dictionary.
*
* @param vers lucene compatibility version
*/
public MorfologikAnalyzer(final Version vers) {
this(vers, DICTIONARY.MORFOLOGIK);
public MorfologikAnalyzer(final Version version) {
this.version = version;
}
/**
@ -78,7 +61,7 @@ public class MorfologikAnalyzer extends Analyzer {
final Tokenizer src = new StandardTokenizer(this.version, reader);
return new TokenStreamComponents(
src,
new MorfologikFilter(new StandardFilter(this.version, src), this.dictionary, this.version));
src,
new MorfologikFilter(new StandardFilter(this.version, src), this.version));
}
}

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import java.util.*;
import morfologik.stemming.*;
import morfologik.stemming.PolishStemmer.DICTIONARY;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -33,10 +32,11 @@ import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.*;
/**
* {@link TokenFilter} using Morfologik library.
* {@link TokenFilter} using Morfologik library to transform input tokens into lemma and
* morphosyntactic (POS) tokens. Applies to Polish only.
*
* MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
* annotations for produced lemmas. See the Morfologik documentation for details.
* <p>MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
* annotations for produced lemmas. See the Morfologik documentation for details.</p>
*
* @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
*/
@ -60,13 +60,10 @@ public class MorfologikFilter extends TokenFilter {
private int lemmaListIndex;
/**
* Builds a filter for given PolishStemmer.DICTIONARY enum.
*
* @param in input token stream
* @param dict PolishStemmer.DICTIONARY enum
* @param version Lucene version compatibility for lowercasing.
*/
public MorfologikFilter(final TokenStream in, final DICTIONARY dict, final Version version) {
public MorfologikFilter(final TokenStream in, final Version version) {
super(in);
this.input = in;
@ -75,7 +72,7 @@ public class MorfologikFilter extends TokenFilter {
ClassLoader cl = me.getContextClassLoader();
try {
me.setContextClassLoader(PolishStemmer.class.getClassLoader());
this.stemmer = new PolishStemmer(dict);
this.stemmer = new PolishStemmer();
this.charUtils = CharacterUtils.getInstance(version);
this.lemmaList = Collections.emptyList();
} finally {
@ -83,29 +80,57 @@ public class MorfologikFilter extends TokenFilter {
}
}
/**
* The tag encoding format has been changing in Morfologik from version
* to version. Let's keep both variants and determine which one to run
* based on this flag.
*/
private final static boolean multipleTagsPerLemma = true;
private void popNextLemma() {
// Collect all tags for the next unique lemma.
CharSequence currentStem;
int tags = 0;
do {
if (multipleTagsPerLemma) {
// One tag (concatenated) per lemma.
final WordData lemma = lemmaList.get(lemmaListIndex++);
currentStem = lemma.getStem();
final CharSequence tag = lemma.getTag();
termAtt.setEmpty().append(lemma.getStem());
CharSequence tag = lemma.getTag();
if (tag != null) {
if (tagsList.size() <= tags) {
tagsList.add(new StringBuilder());
String[] tags = tag.toString().split("\\+");
for (int i = 0; i < tags.length; i++) {
if (tagsList.size() <= i) {
tagsList.add(new StringBuilder());
}
StringBuilder buffer = tagsList.get(i);
buffer.setLength(0);
buffer.append(tags[i]);
}
final StringBuilder buffer = tagsList.get(tags++);
buffer.setLength(0);
buffer.append(lemma.getTag());
tagsAtt.setTags(tagsList.subList(0, tags.length));
} else {
tagsAtt.setTags(Collections.<StringBuilder> emptyList());
}
} while (lemmaListIndex < lemmaList.size() &&
equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem));
} else {
// One tag (concatenated) per stem (lemma repeated).
CharSequence currentStem;
int tags = 0;
do {
final WordData lemma = lemmaList.get(lemmaListIndex++);
currentStem = lemma.getStem();
final CharSequence tag = lemma.getTag();
if (tag != null) {
if (tagsList.size() <= tags) {
tagsList.add(new StringBuilder());
}
final StringBuilder buffer = tagsList.get(tags++);
buffer.setLength(0);
buffer.append(lemma.getTag());
}
} while (lemmaListIndex < lemmaList.size() &&
equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem));
// Set the lemma's base form and tags as attributes.
termAtt.setEmpty().append(currentStem);
tagsAtt.setTags(tagsList.subList(0, tags));
// Set the lemma's base form and tags as attributes.
termAtt.setEmpty().append(currentStem);
tagsAtt.setTags(tagsList.subList(0, tags));
}
}
/**

View File

@ -17,12 +17,8 @@ package org.apache.lucene.analysis.morfologik;
* limitations under the License.
*/
import java.util.Arrays;
import java.util.Locale;
import java.util.Map;
import morfologik.stemming.PolishStemmer.DICTIONARY;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -32,39 +28,28 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" /&gt;
* &lt;filter class="solr.MorfologikFilterFactory" /&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* <p>Any of Morfologik dictionaries can be used, these are at the moment:
* <code>MORFOLOGIK</code> (Morfologik's original dictionary),
* <code>MORFEUSZ</code> (Morfeusz-SIAT),
* <code>COMBINED</code> (both of the dictionaries above, combined).
*
* @see <a href="http://morfologik.blogspot.com/">Morfologik web site</a>
*/
public class MorfologikFilterFactory extends TokenFilterFactory {
/** Dictionary. */
private DICTIONARY dictionary = DICTIONARY.MORFOLOGIK;
/** Schema attribute. */
@Deprecated
public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
/** Creates a new MorfologikFilterFactory */
public MorfologikFilterFactory(Map<String,String> args) {
super(args);
// Be specific about no-longer-supported dictionary attribute.
String dictionaryName = get(args, DICTIONARY_SCHEMA_ATTRIBUTE);
if (dictionaryName != null && !dictionaryName.isEmpty()) {
try {
DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ROOT));
assert dictionary != null;
this.dictionary = dictionary;
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute accepts the "
+ "following constants: " + Arrays.toString(DICTIONARY.values()) + ", this value is invalid: "
+ dictionaryName);
}
throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute is no "
+ "longer supported (Morfologik has one dictionary): " + dictionaryName);
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@ -72,6 +57,6 @@ public class MorfologikFilterFactory extends TokenFilterFactory {
@Override
public TokenStream create(TokenStream ts) {
return new MorfologikFilter(ts, dictionary, luceneMatchVersion);
return new MorfologikFilter(ts, luceneMatchVersion);
}
}

View File

@ -23,9 +23,9 @@ import java.util.List;
import org.apache.lucene.util.Attribute;
/**
* Morfologik dictionaries provide morphosyntactic annotations for
* Morfologik provides morphosyntactic annotations for
* surface forms. For the exact format and description of these,
* see the project's documentation (annotations vary by dictionary!).
* see the project's documentation.
*/
public interface MorphosyntacticTagsAttribute extends Attribute {
/**
@ -36,7 +36,9 @@ public interface MorphosyntacticTagsAttribute extends Attribute {
public void setTags(List<StringBuilder> tags);
/**
* Returns the POS tag of the term.
* Returns the POS tag of the term. A single word may have multiple POS tags,
* depending on the interpretation (context disambiguation is typically needed
* to determine which particular tag is appropriate).
*/
public List<StringBuilder> getTags();

View File

@ -22,8 +22,6 @@ import java.io.Reader;
import java.io.StringReader;
import java.util.TreeSet;
import morfologik.stemming.PolishStemmer.DICTIONARY;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
@ -67,10 +65,22 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesToReuse(
a,
"T. Gl\u00FCcksberg",
new String[] { "to", "tom", "tona", "Gl\u00FCcksberg" },
new int[] { 0, 0, 0, 3 },
new int[] { 1, 1, 1, 13 },
new int[] { 1, 0, 0, 1 });
new String[] { "tom", "tona", "Gl\u00FCcksberg" },
new int[] { 0, 0, 3 },
new int[] { 1, 1, 13 },
new int[] { 1, 0, 1 });
}
@SuppressWarnings("unused")
private void dumpTokens(String input) throws IOException {
TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader(input));
ts.reset();
MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
while (ts.incrementToken()) {
System.out.println(charTerm.toString() + " => " + attribute.getTags());
}
}
/** Test reuse of MorfologikFilter with leftover stems. */
@ -158,9 +168,8 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
/** */
public final void testKeywordAttrTokens() throws IOException {
final Version version = TEST_VERSION_CURRENT;
final DICTIONARY dictionary = DICTIONARY.COMBINED;
Analyzer a = new MorfologikAnalyzer(version, dictionary) {
Analyzer a = new MorfologikAnalyzer(version) {
@Override
protected TokenStreamComponents createComponents(String field, Reader reader) {
final CharArraySet keywords = new CharArraySet(version, 1, false);
@ -169,7 +178,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src);
result = new SetKeywordMarkerFilter(result, keywords);
result = new MorfologikFilter(result, dictionary, TEST_VERSION_CURRENT);
result = new MorfologikFilter(result, TEST_VERSION_CURRENT);
return new TokenStreamComponents(src, result);
}

View File

@ -18,8 +18,8 @@ package org.apache.lucene.analysis.morfologik;
*/
import java.io.StringReader;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@ -31,10 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
public void testCreateDictionary() throws Exception {
StringReader reader = new StringReader("rowery bilety");
Map<String,String> initParams = new HashMap<String,String>();
initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
"morfologik");
MorfologikFilterFactory factory = new MorfologikFilterFactory(initParams);
MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.<String,String>emptyMap());
TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] {"rower", "bilet"});

View File

@ -1 +0,0 @@
7965a39db114f7c404b71d38bc7f0e6a332c4e73

View File

@ -0,0 +1 @@
397a99307020797e6790f2faf8cf865983b52559

View File

@ -1,6 +1,6 @@
Copyright (c) 2006 Dawid Weiss
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1 +0,0 @@
b4a3a9746cab8b2c99c33d2ceeda2ece3f8d8ef2

View File

@ -0,0 +1 @@
ca0663530971b54420fc1cea00a6338f68428232

View File

@ -1,62 +1,26 @@
BSD-licensed dictionary of Polish (Morfologik)
Copyright (c) 2012, Marcin Miłkowski
Morfologik Polish dictionary.
Version: 2.0 PoliMorf
Copyright (c) 2013, Marcin Miłkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--
BSD-licensed dictionary of Polish (SGJP)
http://sgjp.pl/morfeusz/
Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński,
Marcin Woliński, Robert Wołosz
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,6 +1,3 @@
This product includes data from BSD-licensed dictionary of Polish (Morfologik)
(http://morfologik.blogspot.com/)
This product includes data from BSD-licensed dictionary of Polish (SGJP)
(http://sgjp.pl/morfeusz/)
This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf)
(http://morfologik.blogspot.com/)

View File

@ -1 +0,0 @@
e5dc913adeba3b89539cd5f82e5b88d136a1d85b

View File

@ -0,0 +1 @@
8a284571bea2cdd305cd86fbac9bab6deef31c7f

View File

@ -1,6 +1,6 @@
Copyright (c) 2006 Dawid Weiss
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -20,9 +20,9 @@
<info organisation="org.apache.solr" module="analysis-extras"/>
<dependencies>
<dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-polish" rev="1.5.5" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.5" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.5" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-polish" rev="1.6.0" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.6.0" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.6.0" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>

View File

@ -1 +0,0 @@
7965a39db114f7c404b71d38bc7f0e6a332c4e73

View File

@ -0,0 +1 @@
397a99307020797e6790f2faf8cf865983b52559

View File

@ -1,6 +1,6 @@
Copyright (c) 2006 Dawid Weiss
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1 +0,0 @@
b4a3a9746cab8b2c99c33d2ceeda2ece3f8d8ef2

View File

@ -0,0 +1 @@
ca0663530971b54420fc1cea00a6338f68428232

View File

@ -1,62 +1,26 @@
BSD-licensed dictionary of Polish (Morfologik)
Copyright (c) 2012, Marcin Miłkowski
Morfologik Polish dictionary.
Version: 2.0 PoliMorf
Copyright (c) 2013, Marcin Miłkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--
BSD-licensed dictionary of Polish (SGJP)
http://sgjp.pl/morfeusz/
Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński,
Marcin Woliński, Robert Wołosz
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,6 +1,3 @@
This product includes data from BSD-licensed dictionary of Polish (Morfologik)
(http://morfologik.blogspot.com/)
This product includes data from BSD-licensed dictionary of Polish (SGJP)
(http://sgjp.pl/morfeusz/)
This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf)
(http://morfologik.blogspot.com/)

View File

@ -1 +0,0 @@
e5dc913adeba3b89539cd5f82e5b88d136a1d85b

View File

@ -0,0 +1 @@
8a284571bea2cdd305cd86fbac9bab6deef31c7f

View File

@ -1,6 +1,6 @@
Copyright (c) 2006 Dawid Weiss
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.