diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template index dc864d11fbc..5164684496d 100644 --- a/dev-tools/maven/pom.xml.template +++ b/dev-tools/maven/pom.xml.template @@ -412,7 +412,7 @@ org.carrot2 morfologik-polish - 1.5.5 + 1.6.0 org.codehaus.woodstox diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d7bf72ed762..f953a31eef4 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -23,11 +23,19 @@ Changes in backwards compatibility policy not positioned. This change affects all classes that inherit from DocIdSetIterator, including DocsEnum and DocsAndPositionsEnum. (Adrien Grand) +* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter + no longer support multiple "dictionaries" as there is only one dictionary available. + (Dawid Weiss) + New Features * LUCENE-4747: Move to Java 7 as minimum Java version. (Robert Muir, Uwe Schindler) +* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter + no longer support multiple "dictionaries" as there is only one dictionary available. + (Dawid Weiss) + Optimizations * LUCENE-4848: Use Java 7 NIO2-FileChannel instead of RandomAccessFile diff --git a/lucene/analysis/morfologik/ivy.xml b/lucene/analysis/morfologik/ivy.xml index 5d93ef3f5dd..0c9c3373934 100644 --- a/lucene/analysis/morfologik/ivy.xml +++ b/lucene/analysis/morfologik/ivy.xml @@ -19,9 +19,9 @@ - - - + + + diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java index 081dbe63c2e..6205d38e65d 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java @@ -26,38 +26,21 @@ import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.util.Version; -import morfologik.stemming.PolishStemmer.DICTIONARY; - /** * {@link org.apache.lucene.analysis.Analyzer} using Morfologik library. * @see Morfologik project page */ public class MorfologikAnalyzer extends Analyzer { - - private final DICTIONARY dictionary; private final Version version; /** - * Builds an analyzer for a given PolishStemmer.DICTIONARY enum. + * Builds an analyzer with the default Morfologik's dictionary (polimorf). * - * @param vers - * lucene compatibility version - * @param dict - * A constant specifying which dictionary to choose. See the - * Morfologik documentation for details or use the default. + * @param version + * Lucene compatibility version */ - public MorfologikAnalyzer(final Version vers, final DICTIONARY dict) { - this.version = vers; - this.dictionary = dict; - } - - /** - * Builds an analyzer for an original MORFOLOGIK dictionary. - * - * @param vers lucene compatibility version - */ - public MorfologikAnalyzer(final Version vers) { - this(vers, DICTIONARY.MORFOLOGIK); + public MorfologikAnalyzer(final Version version) { + this.version = version; } /** @@ -78,7 +61,7 @@ public class MorfologikAnalyzer extends Analyzer { final Tokenizer src = new StandardTokenizer(this.version, reader); return new TokenStreamComponents( - src, - new MorfologikFilter(new StandardFilter(this.version, src), this.dictionary, this.version)); + src, + new MorfologikFilter(new StandardFilter(this.version, src), this.version)); } } diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java index 8b5fcbb8a43..24f20c55287 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.util.*; import morfologik.stemming.*; -import morfologik.stemming.PolishStemmer.DICTIONARY; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -33,10 +32,11 @@ import org.apache.lucene.analysis.util.CharacterUtils; import org.apache.lucene.util.*; /** - * {@link TokenFilter} using Morfologik library. + * {@link TokenFilter} using Morfologik library to transform input tokens into lemma and + * morphosyntactic (POS) tokens. Applies to Polish only. * - * MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic - * annotations for produced lemmas. See the Morfologik documentation for details. + *

MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic + * annotations for produced lemmas. See the Morfologik documentation for details.

* * @see Morfologik project page */ @@ -60,13 +60,10 @@ public class MorfologikFilter extends TokenFilter { private int lemmaListIndex; /** - * Builds a filter for given PolishStemmer.DICTIONARY enum. - * * @param in input token stream - * @param dict PolishStemmer.DICTIONARY enum * @param version Lucene version compatibility for lowercasing. */ - public MorfologikFilter(final TokenStream in, final DICTIONARY dict, final Version version) { + public MorfologikFilter(final TokenStream in, final Version version) { super(in); this.input = in; @@ -75,7 +72,7 @@ public class MorfologikFilter extends TokenFilter { ClassLoader cl = me.getContextClassLoader(); try { me.setContextClassLoader(PolishStemmer.class.getClassLoader()); - this.stemmer = new PolishStemmer(dict); + this.stemmer = new PolishStemmer(); this.charUtils = CharacterUtils.getInstance(version); this.lemmaList = Collections.emptyList(); } finally { @@ -83,29 +80,57 @@ public class MorfologikFilter extends TokenFilter { } } + /** + * The tag encoding format has been changing in Morfologik from version + * to version. Let's keep both variants and determine which one to run + * based on this flag. + */ + private final static boolean multipleTagsPerLemma = true; + private void popNextLemma() { - // Collect all tags for the next unique lemma. - CharSequence currentStem; - int tags = 0; - do { + if (multipleTagsPerLemma) { + // One tag (concatenated) per lemma. final WordData lemma = lemmaList.get(lemmaListIndex++); - currentStem = lemma.getStem(); - final CharSequence tag = lemma.getTag(); + termAtt.setEmpty().append(lemma.getStem()); + CharSequence tag = lemma.getTag(); if (tag != null) { - if (tagsList.size() <= tags) { - tagsList.add(new StringBuilder()); + String[] tags = tag.toString().split("\\+"); + for (int i = 0; i < tags.length; i++) { + if (tagsList.size() <= i) { + tagsList.add(new StringBuilder()); + } + StringBuilder buffer = tagsList.get(i); + buffer.setLength(0); + buffer.append(tags[i]); } - - final StringBuilder buffer = tagsList.get(tags++); - buffer.setLength(0); - buffer.append(lemma.getTag()); + tagsAtt.setTags(tagsList.subList(0, tags.length)); + } else { + tagsAtt.setTags(Collections. emptyList()); } - } while (lemmaListIndex < lemmaList.size() && - equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem)); + } else { + // One tag (concatenated) per stem (lemma repeated). + CharSequence currentStem; + int tags = 0; + do { + final WordData lemma = lemmaList.get(lemmaListIndex++); + currentStem = lemma.getStem(); + final CharSequence tag = lemma.getTag(); + if (tag != null) { + if (tagsList.size() <= tags) { + tagsList.add(new StringBuilder()); + } + + final StringBuilder buffer = tagsList.get(tags++); + buffer.setLength(0); + buffer.append(lemma.getTag()); + } + } while (lemmaListIndex < lemmaList.size() && + equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem)); - // Set the lemma's base form and tags as attributes. - termAtt.setEmpty().append(currentStem); - tagsAtt.setTags(tagsList.subList(0, tags)); + // Set the lemma's base form and tags as attributes. + termAtt.setEmpty().append(currentStem); + tagsAtt.setTags(tagsList.subList(0, tags)); + } } /** diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java index 3abedb20ccf..388a441ee3a 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java @@ -17,12 +17,8 @@ package org.apache.lucene.analysis.morfologik; * limitations under the License. */ -import java.util.Arrays; -import java.util.Locale; import java.util.Map; -import morfologik.stemming.PolishStemmer.DICTIONARY; - import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.TokenFilterFactory; @@ -32,39 +28,28 @@ import org.apache.lucene.analysis.util.TokenFilterFactory; * <fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100"> * <analyzer> * <tokenizer class="solr.WhitespaceTokenizerFactory"/> - * <filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" /> + * <filter class="solr.MorfologikFilterFactory" /> * </analyzer> * </fieldType> * - *

Any of Morfologik dictionaries can be used, these are at the moment: - * MORFOLOGIK (Morfologik's original dictionary), - * MORFEUSZ (Morfeusz-SIAT), - * COMBINED (both of the dictionaries above, combined). - * * @see Morfologik web site */ public class MorfologikFilterFactory extends TokenFilterFactory { - /** Dictionary. */ - private DICTIONARY dictionary = DICTIONARY.MORFOLOGIK; - /** Schema attribute. */ + @Deprecated public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary"; - + /** Creates a new MorfologikFilterFactory */ public MorfologikFilterFactory(Map args) { super(args); + + // Be specific about no-longer-supported dictionary attribute. String dictionaryName = get(args, DICTIONARY_SCHEMA_ATTRIBUTE); if (dictionaryName != null && !dictionaryName.isEmpty()) { - try { - DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ROOT)); - assert dictionary != null; - this.dictionary = dictionary; - } catch (IllegalArgumentException e) { - throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute accepts the " - + "following constants: " + Arrays.toString(DICTIONARY.values()) + ", this value is invalid: " - + dictionaryName); - } + throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute is no " + + "longer supported (Morfologik has one dictionary): " + dictionaryName); } + if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -72,6 +57,6 @@ public class MorfologikFilterFactory extends TokenFilterFactory { @Override public TokenStream create(TokenStream ts) { - return new MorfologikFilter(ts, dictionary, luceneMatchVersion); + return new MorfologikFilter(ts, luceneMatchVersion); } } diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java index 295148837b8..117be78e869 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java @@ -23,9 +23,9 @@ import java.util.List; import org.apache.lucene.util.Attribute; /** - * Morfologik dictionaries provide morphosyntactic annotations for + * Morfologik provides morphosyntactic annotations for * surface forms. For the exact format and description of these, - * see the project's documentation (annotations vary by dictionary!). + * see the project's documentation. */ public interface MorphosyntacticTagsAttribute extends Attribute { /** @@ -36,7 +36,9 @@ public interface MorphosyntacticTagsAttribute extends Attribute { public void setTags(List tags); /** - * Returns the POS tag of the term. + * Returns the POS tag of the term. A single word may have multiple POS tags, + * depending on the interpretation (context disambiguation is typically needed + * to determine which particular tag is appropriate). */ public List getTags(); diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java index b1763ff597c..7490caa927d 100644 --- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java +++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java @@ -22,8 +22,6 @@ import java.io.Reader; import java.io.StringReader; import java.util.TreeSet; -import morfologik.stemming.PolishStemmer.DICTIONARY; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; @@ -67,10 +65,22 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { assertAnalyzesToReuse( a, "T. Gl\u00FCcksberg", - new String[] { "to", "tom", "tona", "Gl\u00FCcksberg" }, - new int[] { 0, 0, 0, 3 }, - new int[] { 1, 1, 1, 13 }, - new int[] { 1, 0, 0, 1 }); + new String[] { "tom", "tona", "Gl\u00FCcksberg" }, + new int[] { 0, 0, 3 }, + new int[] { 1, 1, 13 }, + new int[] { 1, 0, 1 }); + } + + @SuppressWarnings("unused") + private void dumpTokens(String input) throws IOException { + TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader(input)); + ts.reset(); + + MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class); + CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class); + while (ts.incrementToken()) { + System.out.println(charTerm.toString() + " => " + attribute.getTags()); + } } /** Test reuse of MorfologikFilter with leftover stems. */ @@ -158,9 +168,8 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { /** */ public final void testKeywordAttrTokens() throws IOException { final Version version = TEST_VERSION_CURRENT; - final DICTIONARY dictionary = DICTIONARY.COMBINED; - Analyzer a = new MorfologikAnalyzer(version, dictionary) { + Analyzer a = new MorfologikAnalyzer(version) { @Override protected TokenStreamComponents createComponents(String field, Reader reader) { final CharArraySet keywords = new CharArraySet(version, 1, false); @@ -169,7 +178,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader); TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src); result = new SetKeywordMarkerFilter(result, keywords); - result = new MorfologikFilter(result, dictionary, TEST_VERSION_CURRENT); + result = new MorfologikFilter(result, TEST_VERSION_CURRENT); return new TokenStreamComponents(src, result); } diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java index 9adc5a5e4cb..50085a9f2fc 100644 --- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java +++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java @@ -18,8 +18,8 @@ package org.apache.lucene.analysis.morfologik; */ import java.io.StringReader; +import java.util.Collections; import java.util.HashMap; -import java.util.Map; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; @@ -31,10 +31,7 @@ import org.apache.lucene.analysis.TokenStream; public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase { public void testCreateDictionary() throws Exception { StringReader reader = new StringReader("rowery bilety"); - Map initParams = new HashMap(); - initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE, - "morfologik"); - MorfologikFilterFactory factory = new MorfologikFilterFactory(initParams); + MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.emptyMap()); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = factory.create(stream); assertTokenStreamContents(stream, new String[] {"rower", "bilet"}); diff --git a/lucene/licenses/morfologik-fsa-1.5.5.jar.sha1 b/lucene/licenses/morfologik-fsa-1.5.5.jar.sha1 deleted file mode 100644 index 3a8935a11c7..00000000000 --- a/lucene/licenses/morfologik-fsa-1.5.5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -7965a39db114f7c404b71d38bc7f0e6a332c4e73 diff --git a/lucene/licenses/morfologik-fsa-1.6.0.jar.sha1 b/lucene/licenses/morfologik-fsa-1.6.0.jar.sha1 new file mode 100644 index 00000000000..8041cb4027d --- /dev/null +++ b/lucene/licenses/morfologik-fsa-1.6.0.jar.sha1 @@ -0,0 +1 @@ +397a99307020797e6790f2faf8cf865983b52559 diff --git a/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt b/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt index f97fb7dfe38..4daba4730de 100644 --- a/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt +++ b/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt @@ -1,6 +1,6 @@ Copyright (c) 2006 Dawid Weiss -Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski +Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski All rights reserved. Redistribution and use in source and binary forms, with or without modification, @@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/lucene/licenses/morfologik-polish-1.5.5.jar.sha1 b/lucene/licenses/morfologik-polish-1.5.5.jar.sha1 deleted file mode 100644 index 10c14c0b380..00000000000 --- a/lucene/licenses/morfologik-polish-1.5.5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b4a3a9746cab8b2c99c33d2ceeda2ece3f8d8ef2 diff --git a/lucene/licenses/morfologik-polish-1.6.0.jar.sha1 b/lucene/licenses/morfologik-polish-1.6.0.jar.sha1 new file mode 100644 index 00000000000..b44ead1078f --- /dev/null +++ b/lucene/licenses/morfologik-polish-1.6.0.jar.sha1 @@ -0,0 +1 @@ +ca0663530971b54420fc1cea00a6338f68428232 diff --git a/lucene/licenses/morfologik-polish-LICENSE-BSD.txt b/lucene/licenses/morfologik-polish-LICENSE-BSD.txt index 04ffd07ece9..660f6339dfc 100644 --- a/lucene/licenses/morfologik-polish-LICENSE-BSD.txt +++ b/lucene/licenses/morfologik-polish-LICENSE-BSD.txt @@ -1,62 +1,26 @@ BSD-licensed dictionary of Polish (Morfologik) -Copyright (c) 2012, Marcin Miłkowski +Morfologik Polish dictionary. +Version: 2.0 PoliMorf +Copyright (c) 2013, Marcin Miłkowski All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the - distribution. - -THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN -IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --- - -BSD-licensed dictionary of Polish (SGJP) -http://sgjp.pl/morfeusz/ - -Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, - Marcin Woliński, Robert Wołosz - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the - distribution. - -THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN -IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/lucene/licenses/morfologik-polish-NOTICE.txt b/lucene/licenses/morfologik-polish-NOTICE.txt index a8a3aa11a3d..45d4cba8d32 100644 --- a/lucene/licenses/morfologik-polish-NOTICE.txt +++ b/lucene/licenses/morfologik-polish-NOTICE.txt @@ -1,6 +1,3 @@ -This product includes data from BSD-licensed dictionary of Polish (Morfologik) -(http://morfologik.blogspot.com/) - -This product includes data from BSD-licensed dictionary of Polish (SGJP) -(http://sgjp.pl/morfeusz/) +This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf) +(http://morfologik.blogspot.com/) \ No newline at end of file diff --git a/lucene/licenses/morfologik-stemming-1.5.5.jar.sha1 b/lucene/licenses/morfologik-stemming-1.5.5.jar.sha1 deleted file mode 100644 index c9824e4b2a0..00000000000 --- a/lucene/licenses/morfologik-stemming-1.5.5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e5dc913adeba3b89539cd5f82e5b88d136a1d85b diff --git a/lucene/licenses/morfologik-stemming-1.6.0.jar.sha1 b/lucene/licenses/morfologik-stemming-1.6.0.jar.sha1 new file mode 100644 index 00000000000..4ba54674892 --- /dev/null +++ b/lucene/licenses/morfologik-stemming-1.6.0.jar.sha1 @@ -0,0 +1 @@ +8a284571bea2cdd305cd86fbac9bab6deef31c7f diff --git a/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt b/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt index f97fb7dfe38..4daba4730de 100644 --- a/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt +++ b/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt @@ -1,6 +1,6 @@ Copyright (c) 2006 Dawid Weiss -Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski +Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski All rights reserved. Redistribution and use in source and binary forms, with or without modification, @@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/solr/contrib/analysis-extras/ivy.xml b/solr/contrib/analysis-extras/ivy.xml index aee7c746a35..597f6060b4b 100644 --- a/solr/contrib/analysis-extras/ivy.xml +++ b/solr/contrib/analysis-extras/ivy.xml @@ -20,9 +20,9 @@ - - - + + + diff --git a/solr/licenses/morfologik-fsa-1.5.5.jar.sha1 b/solr/licenses/morfologik-fsa-1.5.5.jar.sha1 deleted file mode 100644 index 3a8935a11c7..00000000000 --- a/solr/licenses/morfologik-fsa-1.5.5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -7965a39db114f7c404b71d38bc7f0e6a332c4e73 diff --git a/solr/licenses/morfologik-fsa-1.6.0.jar.sha1 b/solr/licenses/morfologik-fsa-1.6.0.jar.sha1 new file mode 100644 index 00000000000..8041cb4027d --- /dev/null +++ b/solr/licenses/morfologik-fsa-1.6.0.jar.sha1 @@ -0,0 +1 @@ +397a99307020797e6790f2faf8cf865983b52559 diff --git a/solr/licenses/morfologik-fsa-LICENSE-BSD.txt b/solr/licenses/morfologik-fsa-LICENSE-BSD.txt index f97fb7dfe38..4daba4730de 100644 --- a/solr/licenses/morfologik-fsa-LICENSE-BSD.txt +++ b/solr/licenses/morfologik-fsa-LICENSE-BSD.txt @@ -1,6 +1,6 @@ Copyright (c) 2006 Dawid Weiss -Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski +Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski All rights reserved. Redistribution and use in source and binary forms, with or without modification, @@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/solr/licenses/morfologik-polish-1.5.5.jar.sha1 b/solr/licenses/morfologik-polish-1.5.5.jar.sha1 deleted file mode 100644 index 10c14c0b380..00000000000 --- a/solr/licenses/morfologik-polish-1.5.5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b4a3a9746cab8b2c99c33d2ceeda2ece3f8d8ef2 diff --git a/solr/licenses/morfologik-polish-1.6.0.jar.sha1 b/solr/licenses/morfologik-polish-1.6.0.jar.sha1 new file mode 100644 index 00000000000..b44ead1078f --- /dev/null +++ b/solr/licenses/morfologik-polish-1.6.0.jar.sha1 @@ -0,0 +1 @@ +ca0663530971b54420fc1cea00a6338f68428232 diff --git a/solr/licenses/morfologik-polish-LICENSE-BSD.txt b/solr/licenses/morfologik-polish-LICENSE-BSD.txt index 04ffd07ece9..660f6339dfc 100644 --- a/solr/licenses/morfologik-polish-LICENSE-BSD.txt +++ b/solr/licenses/morfologik-polish-LICENSE-BSD.txt @@ -1,62 +1,26 @@ BSD-licensed dictionary of Polish (Morfologik) -Copyright (c) 2012, Marcin Miłkowski +Morfologik Polish dictionary. +Version: 2.0 PoliMorf +Copyright (c) 2013, Marcin Miłkowski All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the - distribution. - -THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN -IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --- - -BSD-licensed dictionary of Polish (SGJP) -http://sgjp.pl/morfeusz/ - -Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, - Marcin Woliński, Robert Wołosz - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the - distribution. - -THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN -IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/solr/licenses/morfologik-polish-NOTICE.txt b/solr/licenses/morfologik-polish-NOTICE.txt index a8a3aa11a3d..45d4cba8d32 100644 --- a/solr/licenses/morfologik-polish-NOTICE.txt +++ b/solr/licenses/morfologik-polish-NOTICE.txt @@ -1,6 +1,3 @@ -This product includes data from BSD-licensed dictionary of Polish (Morfologik) -(http://morfologik.blogspot.com/) - -This product includes data from BSD-licensed dictionary of Polish (SGJP) -(http://sgjp.pl/morfeusz/) +This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf) +(http://morfologik.blogspot.com/) \ No newline at end of file diff --git a/solr/licenses/morfologik-stemming-1.5.5.jar.sha1 b/solr/licenses/morfologik-stemming-1.5.5.jar.sha1 deleted file mode 100644 index c9824e4b2a0..00000000000 --- a/solr/licenses/morfologik-stemming-1.5.5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e5dc913adeba3b89539cd5f82e5b88d136a1d85b diff --git a/solr/licenses/morfologik-stemming-1.6.0.jar.sha1 b/solr/licenses/morfologik-stemming-1.6.0.jar.sha1 new file mode 100644 index 00000000000..4ba54674892 --- /dev/null +++ b/solr/licenses/morfologik-stemming-1.6.0.jar.sha1 @@ -0,0 +1 @@ +8a284571bea2cdd305cd86fbac9bab6deef31c7f diff --git a/solr/licenses/morfologik-stemming-LICENSE-BSD.txt b/solr/licenses/morfologik-stemming-LICENSE-BSD.txt index f97fb7dfe38..4daba4730de 100644 --- a/solr/licenses/morfologik-stemming-LICENSE-BSD.txt +++ b/solr/licenses/morfologik-stemming-LICENSE-BSD.txt @@ -1,6 +1,6 @@ Copyright (c) 2006 Dawid Weiss -Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski +Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski All rights reserved. Redistribution and use in source and binary forms, with or without modification, @@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file