LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter no longer support multiple "dictionaries" as there is only one dictionary available.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1499352 13f79535-47bb-0310-9956-ffa450edef68
2013-07-03 12:14:50 +00:00 · 2013-07-03 12:14:50 +00:00 · efc6826ed8
parent e2fc82a978
commit efc6826ed8
30 changed files with 163 additions and 232 deletions
--- a/dev-tools/maven/pom.xml.template
+++ b/dev-tools/maven/pom.xml.template
@ -412,7 +412,7 @@
      <dependency>
        <groupId>org.carrot2</groupId>
        <artifactId>morfologik-polish</artifactId>
-        <version>1.5.5</version>
+        <version>1.6.0</version>
      </dependency>
      <dependency>
        <groupId>org.codehaus.woodstox</groupId>
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -23,11 +23,19 @@ Changes in backwards compatibility policy
  not positioned. This change affects all classes that inherit from
  DocIdSetIterator, including DocsEnum and DocsAndPositionsEnum. (Adrien Grand)

+* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter 
+  no longer support multiple "dictionaries" as there is only one dictionary available.
+  (Dawid Weiss)
+
 New Features

 * LUCENE-4747: Move to Java 7 as minimum Java version.
  (Robert Muir, Uwe Schindler)

+* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter 
+  no longer support multiple "dictionaries" as there is only one dictionary available.
+  (Dawid Weiss)
+
 Optimizations

 * LUCENE-4848: Use Java 7 NIO2-FileChannel instead of RandomAccessFile
--- a/lucene/analysis/morfologik/ivy.xml
+++ b/lucene/analysis/morfologik/ivy.xml
@ -19,9 +19,9 @@
 <ivy-module version="2.0">
    <info organisation="org.apache.lucene" module="analyzers-morfologik"/>
    <dependencies>
-      <dependency org="org.carrot2" name="morfologik-polish" rev="1.5.5" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.5" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.5" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-polish" rev="1.6.0" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.6.0" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.6.0" transitive="false"/>
      <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
    </dependencies>
 </ivy-module>
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
@ -26,38 +26,21 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;

-import morfologik.stemming.PolishStemmer.DICTIONARY;
-
 /**
 * {@link org.apache.lucene.analysis.Analyzer} using Morfologik library.
 * @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
 */
 public class MorfologikAnalyzer extends Analyzer {
-
-  private final DICTIONARY dictionary;
  private final Version version;

  /**
-   * Builds an analyzer for a given PolishStemmer.DICTIONARY enum.
+   * Builds an analyzer with the default Morfologik's dictionary (polimorf).
   * 
-   * @param vers
-   *          lucene compatibility version
-   * @param dict
-   *          A constant specifying which dictionary to choose. See the
-   *          Morfologik documentation for details or use the default.
+   * @param version
+   *          Lucene compatibility version
   */
-  public MorfologikAnalyzer(final Version vers, final DICTIONARY dict) {
-    this.version = vers;
-    this.dictionary = dict;
-  }
-
-  /**
-   * Builds an analyzer for an original MORFOLOGIK dictionary.
-   * 
-   * @param vers         lucene compatibility version
-   */
-  public MorfologikAnalyzer(final Version vers) {
-    this(vers, DICTIONARY.MORFOLOGIK);
+  public MorfologikAnalyzer(final Version version) {
+    this.version = version;
  }

  /**
@ -78,7 +61,7 @@ public class MorfologikAnalyzer extends Analyzer {
    final Tokenizer src = new StandardTokenizer(this.version, reader);
    
    return new TokenStreamComponents(
-      src,
-      new MorfologikFilter(new StandardFilter(this.version, src), this.dictionary, this.version));
+        src, 
+        new MorfologikFilter(new StandardFilter(this.version, src), this.version));
  }
 }
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
@ -22,7 +22,6 @@ import java.io.IOException;
 import java.util.*;

 import morfologik.stemming.*;
-import morfologik.stemming.PolishStemmer.DICTIONARY;

 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
@ -33,10 +32,11 @@ import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.*;

 /**
- * {@link TokenFilter} using Morfologik library.
+ * {@link TokenFilter} using Morfologik library to transform input tokens into lemma and
+ * morphosyntactic (POS) tokens. Applies to Polish only.  
 *
- * MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
- * annotations for produced lemmas. See the Morfologik documentation for details.
+ * <p>MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
+ * annotations for produced lemmas. See the Morfologik documentation for details.</p>
 * 
 * @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
 */
@ -60,13 +60,10 @@ public class MorfologikFilter extends TokenFilter {
  private int lemmaListIndex;

  /**
-   * Builds a filter for given PolishStemmer.DICTIONARY enum.
-   * 
   * @param in   input token stream
-   * @param dict PolishStemmer.DICTIONARY enum
   * @param version Lucene version compatibility for lowercasing.
   */
-  public MorfologikFilter(final TokenStream in, final DICTIONARY dict, final Version version) {
+  public MorfologikFilter(final TokenStream in, final Version version) {
    super(in);
    this.input = in;
    
@ -75,7 +72,7 @@ public class MorfologikFilter extends TokenFilter {
    ClassLoader cl = me.getContextClassLoader();
    try {
      me.setContextClassLoader(PolishStemmer.class.getClassLoader());
-      this.stemmer = new PolishStemmer(dict);
+      this.stemmer = new PolishStemmer();
      this.charUtils = CharacterUtils.getInstance(version);
      this.lemmaList = Collections.emptyList();
    } finally {
@ -83,29 +80,57 @@ public class MorfologikFilter extends TokenFilter {
    }  
  }

+  /**
+   * The tag encoding format has been changing in Morfologik from version
+   * to version. Let's keep both variants and determine which one to run
+   * based on this flag.
+   */
+  private final static boolean multipleTagsPerLemma = true;
+
  private void popNextLemma() {
-    // Collect all tags for the next unique lemma.
-    CharSequence currentStem;
-    int tags = 0;
-    do {
+    if (multipleTagsPerLemma) {
+      // One tag (concatenated) per lemma.
      final WordData lemma = lemmaList.get(lemmaListIndex++);
-      currentStem = lemma.getStem();
-      final CharSequence tag = lemma.getTag();
+      termAtt.setEmpty().append(lemma.getStem());
+      CharSequence tag = lemma.getTag();
      if (tag != null) {
-        if (tagsList.size() <= tags) {
-          tagsList.add(new StringBuilder());
+        String[] tags = tag.toString().split("\\+");
+        for (int i = 0; i < tags.length; i++) {
+          if (tagsList.size() <= i) {
+            tagsList.add(new StringBuilder());
+          }
+          StringBuilder buffer = tagsList.get(i);
+          buffer.setLength(0);
+          buffer.append(tags[i]);
        }
-
-        final StringBuilder buffer = tagsList.get(tags++);  
-        buffer.setLength(0);
-        buffer.append(lemma.getTag());
+        tagsAtt.setTags(tagsList.subList(0, tags.length));
+      } else {
+        tagsAtt.setTags(Collections.<StringBuilder> emptyList());
      }
-    } while (lemmaListIndex < lemmaList.size() &&
-             equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem));
+    } else {
+      // One tag (concatenated) per stem (lemma repeated).
+      CharSequence currentStem;
+      int tags = 0;
+      do {
+        final WordData lemma = lemmaList.get(lemmaListIndex++);
+        currentStem = lemma.getStem();
+        final CharSequence tag = lemma.getTag();
+        if (tag != null) {
+          if (tagsList.size() <= tags) {
+            tagsList.add(new StringBuilder());
+          }
  
-    // Set the lemma's base form and tags as attributes.
-    termAtt.setEmpty().append(currentStem);
-    tagsAtt.setTags(tagsList.subList(0, tags));
+          final StringBuilder buffer = tagsList.get(tags++);  
+          buffer.setLength(0);
+          buffer.append(lemma.getTag());
+        }
+      } while (lemmaListIndex < lemmaList.size() &&
+               equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem));
+
+      // Set the lemma's base form and tags as attributes.
+      termAtt.setEmpty().append(currentStem);
+      tagsAtt.setTags(tagsList.subList(0, tags));
+    }
  }

  /**
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
@ -17,12 +17,8 @@ package org.apache.lucene.analysis.morfologik;
 * limitations under the License.
 */

-import java.util.Arrays;
-import java.util.Locale;
 import java.util.Map;

-import morfologik.stemming.PolishStemmer.DICTIONARY;
-
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.TokenFilterFactory;

@ -32,39 +28,28 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
 * &lt;fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100"&gt;
 *   &lt;analyzer&gt;
 *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
- *     &lt;filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" /&gt;
+ *     &lt;filter class="solr.MorfologikFilterFactory" /&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
 * 
- * <p>Any of Morfologik dictionaries can be used, these are at the moment:
- * <code>MORFOLOGIK</code> (Morfologik's original dictionary),
- * <code>MORFEUSZ</code> (Morfeusz-SIAT),
- * <code>COMBINED</code> (both of the dictionaries above, combined).
- * 
 * @see <a href="http://morfologik.blogspot.com/">Morfologik web site</a>
 */
 public class MorfologikFilterFactory extends TokenFilterFactory {
-  /** Dictionary. */
-  private DICTIONARY dictionary = DICTIONARY.MORFOLOGIK;
-  
  /** Schema attribute. */
+  @Deprecated
  public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";

  /** Creates a new MorfologikFilterFactory */
  public MorfologikFilterFactory(Map<String,String> args) {
    super(args);
+
+    // Be specific about no-longer-supported dictionary attribute.
    String dictionaryName = get(args, DICTIONARY_SCHEMA_ATTRIBUTE);
    if (dictionaryName != null && !dictionaryName.isEmpty()) {
-      try {
-        DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ROOT));
-        assert dictionary != null;
-        this.dictionary = dictionary;
-      } catch (IllegalArgumentException e) {
-        throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute accepts the "
-            + "following constants: " + Arrays.toString(DICTIONARY.values()) + ", this value is invalid: "  
-            + dictionaryName);
-      }
+      throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute is no "
+          + "longer supported (Morfologik has one dictionary): " + dictionaryName);
    }
+
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -72,6 +57,6 @@ public class MorfologikFilterFactory extends TokenFilterFactory {

  @Override
  public TokenStream create(TokenStream ts) {
-    return new MorfologikFilter(ts, dictionary, luceneMatchVersion);
+    return new MorfologikFilter(ts, luceneMatchVersion);
  }
 }
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java
@ -23,9 +23,9 @@ import java.util.List;
 import org.apache.lucene.util.Attribute;

 /** 
- * Morfologik dictionaries provide morphosyntactic annotations for
+ * Morfologik provides morphosyntactic annotations for
 * surface forms. For the exact format and description of these,
- * see the project's documentation (annotations vary by dictionary!).
+ * see the project's documentation.
 */
 public interface MorphosyntacticTagsAttribute extends Attribute {
  /** 
@ -36,7 +36,9 @@ public interface MorphosyntacticTagsAttribute extends Attribute {
  public void setTags(List<StringBuilder> tags);

  /** 
-   * Returns the POS tag of the term.
+   * Returns the POS tag of the term. A single word may have multiple POS tags, 
+   * depending on the interpretation (context disambiguation is typically needed
+   * to determine which particular tag is appropriate).  
   */
  public List<StringBuilder> getTags();

--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
@ -22,8 +22,6 @@ import java.io.Reader;
 import java.io.StringReader;
 import java.util.TreeSet;

-import morfologik.stemming.PolishStemmer.DICTIONARY;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
@ -67,10 +65,22 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
    assertAnalyzesToReuse(
        a,
        "T. Gl\u00FCcksberg",
-        new String[] { "to", "tom", "tona", "Gl\u00FCcksberg" },
-        new int[] { 0, 0, 0, 3  },
-        new int[] { 1, 1, 1, 13 },
-        new int[] { 1, 0, 0, 1  });
+        new String[] { "tom", "tona", "Gl\u00FCcksberg" },
+        new int[] { 0, 0, 3  },
+        new int[] { 1, 1, 13 },
+        new int[] { 1, 0, 1  });
+  }
+
+  @SuppressWarnings("unused")
+  private void dumpTokens(String input) throws IOException {
+    TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader(input));
+    ts.reset();
+
+    MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
+    CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
+    while (ts.incrementToken()) {
+      System.out.println(charTerm.toString() + " => " + attribute.getTags());
+    }
  }

  /** Test reuse of MorfologikFilter with leftover stems. */
@ -158,9 +168,8 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
  /** */
  public final void testKeywordAttrTokens() throws IOException {
    final Version version = TEST_VERSION_CURRENT;
-    final DICTIONARY dictionary = DICTIONARY.COMBINED;

-    Analyzer a = new MorfologikAnalyzer(version, dictionary) {
+    Analyzer a = new MorfologikAnalyzer(version) {
      @Override
      protected TokenStreamComponents createComponents(String field, Reader reader) {
        final CharArraySet keywords = new CharArraySet(version, 1, false);
@ -169,7 +178,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
        final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
        TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src);
        result = new SetKeywordMarkerFilter(result, keywords);
-        result = new MorfologikFilter(result, dictionary, TEST_VERSION_CURRENT); 
+        result = new MorfologikFilter(result, TEST_VERSION_CURRENT); 

        return new TokenStreamComponents(src, result);
      }
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
@ -18,8 +18,8 @@ package org.apache.lucene.analysis.morfologik;
 */

 import java.io.StringReader;
+import java.util.Collections;
 import java.util.HashMap;
-import java.util.Map;

 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
@ -31,10 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
 public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
  public void testCreateDictionary() throws Exception {
    StringReader reader = new StringReader("rowery bilety");
-    Map<String,String> initParams = new HashMap<String,String>();
-    initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
-        "morfologik");
-    MorfologikFilterFactory factory = new MorfologikFilterFactory(initParams);
+    MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.<String,String>emptyMap());
    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    stream = factory.create(stream);
    assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
--- a/lucene/licenses/morfologik-fsa-1.5.5.jar.sha1
+++ b/lucene/licenses/morfologik-fsa-1.5.5.jar.sha1
@ -1 +0,0 @@
-7965a39db114f7c404b71d38bc7f0e6a332c4e73
--- a/lucene/licenses/morfologik-fsa-1.6.0.jar.sha1
+++ b/lucene/licenses/morfologik-fsa-1.6.0.jar.sha1
@ -0,0 +1 @@
+397a99307020797e6790f2faf8cf865983b52559
--- a/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt
+++ b/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt
@ -1,6 +1,6 @@

 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification, 
--- a/lucene/licenses/morfologik-polish-1.5.5.jar.sha1
+++ b/lucene/licenses/morfologik-polish-1.5.5.jar.sha1
@ -1 +0,0 @@
-b4a3a9746cab8b2c99c33d2ceeda2ece3f8d8ef2
--- a/lucene/licenses/morfologik-polish-1.6.0.jar.sha1
+++ b/lucene/licenses/morfologik-polish-1.6.0.jar.sha1
@ -0,0 +1 @@
+ca0663530971b54420fc1cea00a6338f68428232
--- a/lucene/licenses/morfologik-polish-LICENSE-BSD.txt
+++ b/lucene/licenses/morfologik-polish-LICENSE-BSD.txt
@ -1,62 +1,26 @@
 BSD-licensed dictionary of Polish (Morfologik)

-Copyright (c) 2012, Marcin Miłkowski
+Morfologik Polish dictionary.
+Version: 2.0 PoliMorf
+Copyright (c) 2013, Marcin Miłkowski
 All rights reserved.

-Redistribution and  use in  source and binary  forms, with  or without
-modification, are permitted provided that the following conditions are
-met:
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met: 

-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
+1. Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution. 

-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
-OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
-LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
-SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
-WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--
-
-BSD-licensed dictionary of Polish (SGJP)
-http://sgjp.pl/morfeusz/
-
-Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, 
-	    	 Marcin Woliński, Robert Wołosz
-
-All rights reserved.
-
-Redistribution and  use in  source and binary  forms, with  or without
-modification, are permitted provided that the following conditions are
-met:
-
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
-OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
-LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
-SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
-WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/lucene/licenses/morfologik-polish-NOTICE.txt
+++ b/lucene/licenses/morfologik-polish-NOTICE.txt
@ -1,6 +1,3 @@

-This product includes data from BSD-licensed dictionary of Polish (Morfologik)
+This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf)
 (http://morfologik.blogspot.com/)
-
-This product includes data from BSD-licensed dictionary of Polish (SGJP)
-(http://sgjp.pl/morfeusz/)
--- a/lucene/licenses/morfologik-stemming-1.5.5.jar.sha1
+++ b/lucene/licenses/morfologik-stemming-1.5.5.jar.sha1
@ -1 +0,0 @@
-e5dc913adeba3b89539cd5f82e5b88d136a1d85b
--- a/lucene/licenses/morfologik-stemming-1.6.0.jar.sha1
+++ b/lucene/licenses/morfologik-stemming-1.6.0.jar.sha1
@ -0,0 +1 @@
+8a284571bea2cdd305cd86fbac9bab6deef31c7f
--- a/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt
+++ b/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt
@ -1,6 +1,6 @@

 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification, 
--- a/solr/contrib/analysis-extras/ivy.xml
+++ b/solr/contrib/analysis-extras/ivy.xml
@ -20,9 +20,9 @@
    <info organisation="org.apache.solr" module="analysis-extras"/>
    <dependencies>
      <dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-polish" rev="1.5.5" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.5" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.5" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-polish" rev="1.6.0" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.6.0" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.6.0" transitive="false"/>
      <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
    </dependencies>
 </ivy-module>
--- a/solr/licenses/morfologik-fsa-1.5.5.jar.sha1
+++ b/solr/licenses/morfologik-fsa-1.5.5.jar.sha1
@ -1 +0,0 @@
-7965a39db114f7c404b71d38bc7f0e6a332c4e73
--- a/solr/licenses/morfologik-fsa-1.6.0.jar.sha1
+++ b/solr/licenses/morfologik-fsa-1.6.0.jar.sha1
@ -0,0 +1 @@
+397a99307020797e6790f2faf8cf865983b52559
--- a/solr/licenses/morfologik-fsa-LICENSE-BSD.txt
+++ b/solr/licenses/morfologik-fsa-LICENSE-BSD.txt
@ -1,6 +1,6 @@

 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification, 
--- a/solr/licenses/morfologik-polish-1.5.5.jar.sha1
+++ b/solr/licenses/morfologik-polish-1.5.5.jar.sha1
@ -1 +0,0 @@
-b4a3a9746cab8b2c99c33d2ceeda2ece3f8d8ef2
--- a/solr/licenses/morfologik-polish-1.6.0.jar.sha1
+++ b/solr/licenses/morfologik-polish-1.6.0.jar.sha1
@ -0,0 +1 @@
+ca0663530971b54420fc1cea00a6338f68428232
--- a/solr/licenses/morfologik-polish-LICENSE-BSD.txt
+++ b/solr/licenses/morfologik-polish-LICENSE-BSD.txt
@ -1,62 +1,26 @@
 BSD-licensed dictionary of Polish (Morfologik)

-Copyright (c) 2012, Marcin Miłkowski
+Morfologik Polish dictionary.
+Version: 2.0 PoliMorf
+Copyright (c) 2013, Marcin Miłkowski
 All rights reserved.

-Redistribution and  use in  source and binary  forms, with  or without
-modification, are permitted provided that the following conditions are
-met:
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met: 

-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
+1. Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution. 

-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
-OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
-LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
-SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
-WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--
-
-BSD-licensed dictionary of Polish (SGJP)
-http://sgjp.pl/morfeusz/
-
-Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, 
-	    	 Marcin Woliński, Robert Wołosz
-
-All rights reserved.
-
-Redistribution and  use in  source and binary  forms, with  or without
-modification, are permitted provided that the following conditions are
-met:
-
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
-OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
-LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
-SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
-WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/solr/licenses/morfologik-polish-NOTICE.txt
+++ b/solr/licenses/morfologik-polish-NOTICE.txt
@ -1,6 +1,3 @@

-This product includes data from BSD-licensed dictionary of Polish (Morfologik)
+This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf)
 (http://morfologik.blogspot.com/)
-
-This product includes data from BSD-licensed dictionary of Polish (SGJP)
-(http://sgjp.pl/morfeusz/)
--- a/solr/licenses/morfologik-stemming-1.5.5.jar.sha1
+++ b/solr/licenses/morfologik-stemming-1.5.5.jar.sha1
@ -1 +0,0 @@
-e5dc913adeba3b89539cd5f82e5b88d136a1d85b
--- a/solr/licenses/morfologik-stemming-1.6.0.jar.sha1
+++ b/solr/licenses/morfologik-stemming-1.6.0.jar.sha1
@ -0,0 +1 @@
+8a284571bea2cdd305cd86fbac9bab6deef31c7f
--- a/solr/licenses/morfologik-stemming-LICENSE-BSD.txt
+++ b/solr/licenses/morfologik-stemming-LICENSE-BSD.txt
@ -1,6 +1,6 @@

 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,