LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter no longer support multiple "dictionaries" as there is only one dictionary available.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1499352 13f79535-47bb-0310-9956-ffa450edef68
2013-07-03 12:14:50 +00:00 · 2013-07-03 12:14:50 +00:00 · efc6826ed8
parent e2fc82a978
commit efc6826ed8
30 changed files with 163 additions and 232 deletions
--- a/dev-tools/maven/pom.xml.template
+++ b/dev-tools/maven/pom.xml.template
@ -412,7 +412,7 @@
      <dependency>
        <groupId>org.carrot2</groupId>
        <artifactId>morfologik-polish</artifactId>
-        <version>1.5.5</version>
+        <version>1.6.0</version>
      </dependency>
      <dependency>
        <groupId>org.codehaus.woodstox</groupId>
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -23,11 +23,19 @@ Changes in backwards compatibility policy
  not positioned. This change affects all classes that inherit from
  DocIdSetIterator, including DocsEnum and DocsAndPositionsEnum. (Adrien Grand)
 * LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter 
  no longer support multiple "dictionaries" as there is only one dictionary available.
  (Dawid Weiss)
 New Features
 * LUCENE-4747: Move to Java 7 as minimum Java version.
  (Robert Muir, Uwe Schindler)
 * LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter 
  no longer support multiple "dictionaries" as there is only one dictionary available.
  (Dawid Weiss)
 Optimizations
 * LUCENE-4848: Use Java 7 NIO2-FileChannel instead of RandomAccessFile
--- a/lucene/analysis/morfologik/ivy.xml
+++ b/lucene/analysis/morfologik/ivy.xml
@ -19,9 +19,9 @@
 <ivy-module version="2.0">
    <info organisation="org.apache.lucene" module="analyzers-morfologik"/>
    <dependencies>
-      <dependency org="org.carrot2" name="morfologik-polish" rev="1.5.5" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-polish" rev="1.6.0" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.5" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.6.0" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.5" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.6.0" transitive="false"/>
      <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
    </dependencies>
 </ivy-module>
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
@ -26,38 +26,21 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import morfologik.stemming.PolishStemmer.DICTIONARY;
 /**
 * {@link org.apache.lucene.analysis.Analyzer} using Morfologik library.
 * @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
 */
 public class MorfologikAnalyzer extends Analyzer {
  private final DICTIONARY dictionary;
  private final Version version;
  /**
-   * Builds an analyzer for a given PolishStemmer.DICTIONARY enum.
+   * Builds an analyzer with the default Morfologik's dictionary (polimorf).
   * 
-   * @param vers
+   * @param version
-   *          lucene compatibility version
+   *          Lucene compatibility version
   * @param dict
   *          A constant specifying which dictionary to choose. See the
   *          Morfologik documentation for details or use the default.
   */
-  public MorfologikAnalyzer(final Version vers, final DICTIONARY dict) {
+  public MorfologikAnalyzer(final Version version) {
-    this.version = vers;
+    this.version = version;
    this.dictionary = dict;
  }
  /**
   * Builds an analyzer for an original MORFOLOGIK dictionary.
   * 
   * @param vers         lucene compatibility version
   */
  public MorfologikAnalyzer(final Version vers) {
    this(vers, DICTIONARY.MORFOLOGIK);
  }
  /**
@ -78,7 +61,7 @@ public class MorfologikAnalyzer extends Analyzer {
    final Tokenizer src = new StandardTokenizer(this.version, reader);
    return new TokenStreamComponents(
-      src,
+        src, 
-      new MorfologikFilter(new StandardFilter(this.version, src), this.dictionary, this.version));
+        new MorfologikFilter(new StandardFilter(this.version, src), this.version));
  }
 }
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
@ -22,7 +22,6 @@ import java.io.IOException;
 import java.util.*;
 import morfologik.stemming.*;
 import morfologik.stemming.PolishStemmer.DICTIONARY;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
@ -33,10 +32,11 @@ import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.*;
 /**
- * {@link TokenFilter} using Morfologik library.
+ * {@link TokenFilter} using Morfologik library to transform input tokens into lemma and
 * morphosyntactic (POS) tokens. Applies to Polish only.  
 *
- * MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
+ * <p>MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
- * annotations for produced lemmas. See the Morfologik documentation for details.
+ * annotations for produced lemmas. See the Morfologik documentation for details.</p>
 * 
 * @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
 */
@ -60,13 +60,10 @@ public class MorfologikFilter extends TokenFilter {
  private int lemmaListIndex;
  /**
   * Builds a filter for given PolishStemmer.DICTIONARY enum.
   * 
   * @param in   input token stream
   * @param dict PolishStemmer.DICTIONARY enum
   * @param version Lucene version compatibility for lowercasing.
   */
-  public MorfologikFilter(final TokenStream in, final DICTIONARY dict, final Version version) {
+  public MorfologikFilter(final TokenStream in, final Version version) {
    super(in);
    this.input = in;
@ -75,7 +72,7 @@ public class MorfologikFilter extends TokenFilter {
    ClassLoader cl = me.getContextClassLoader();
    try {
      me.setContextClassLoader(PolishStemmer.class.getClassLoader());
-      this.stemmer = new PolishStemmer(dict);
+      this.stemmer = new PolishStemmer();
      this.charUtils = CharacterUtils.getInstance(version);
      this.lemmaList = Collections.emptyList();
    } finally {
@ -83,29 +80,57 @@ public class MorfologikFilter extends TokenFilter {
    }  
  }
  /**
   * The tag encoding format has been changing in Morfologik from version
   * to version. Let's keep both variants and determine which one to run
   * based on this flag.
   */
  private final static boolean multipleTagsPerLemma = true;
  private void popNextLemma() {
-    // Collect all tags for the next unique lemma.
+    if (multipleTagsPerLemma) {
-    CharSequence currentStem;
+      // One tag (concatenated) per lemma.
    int tags = 0;
    do {
      final WordData lemma = lemmaList.get(lemmaListIndex++);
-      currentStem = lemma.getStem();
+      termAtt.setEmpty().append(lemma.getStem());
-      final CharSequence tag = lemma.getTag();
+      CharSequence tag = lemma.getTag();
      if (tag != null) {
-        if (tagsList.size() <= tags) {
+        String[] tags = tag.toString().split("\\+");
-          tagsList.add(new StringBuilder());
+        for (int i = 0; i < tags.length; i++) {
          if (tagsList.size() <= i) {
            tagsList.add(new StringBuilder());
          }
          StringBuilder buffer = tagsList.get(i);
          buffer.setLength(0);
          buffer.append(tags[i]);
        }
-
+        tagsAtt.setTags(tagsList.subList(0, tags.length));
-        final StringBuilder buffer = tagsList.get(tags++);  
+      } else {
-        buffer.setLength(0);
+        tagsAtt.setTags(Collections.<StringBuilder> emptyList());
        buffer.append(lemma.getTag());
      }
-    } while (lemmaListIndex < lemmaList.size() &&
+    } else {
-             equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem));
+      // One tag (concatenated) per stem (lemma repeated).
      CharSequence currentStem;
      int tags = 0;
      do {
        final WordData lemma = lemmaList.get(lemmaListIndex++);
        currentStem = lemma.getStem();
        final CharSequence tag = lemma.getTag();
        if (tag != null) {
          if (tagsList.size() <= tags) {
            tagsList.add(new StringBuilder());
          }
          final StringBuilder buffer = tagsList.get(tags++);  
          buffer.setLength(0);
          buffer.append(lemma.getTag());
        }
      } while (lemmaListIndex < lemmaList.size() &&
               equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem));
-    // Set the lemma's base form and tags as attributes.
+      // Set the lemma's base form and tags as attributes.
-    termAtt.setEmpty().append(currentStem);
+      termAtt.setEmpty().append(currentStem);
-    tagsAtt.setTags(tagsList.subList(0, tags));
+      tagsAtt.setTags(tagsList.subList(0, tags));
    }
  }
  /**
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilterFactory.java
@ -17,12 +17,8 @@ package org.apache.lucene.analysis.morfologik;
 * limitations under the License.
 */
 import java.util.Arrays;
 import java.util.Locale;
 import java.util.Map;
 import morfologik.stemming.PolishStemmer.DICTIONARY;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -32,39 +28,28 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
 * &lt;fieldType name="text_polish" class="solr.TextField" positionIncrementGap="100"&gt;
 *   &lt;analyzer&gt;
 *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
- *     &lt;filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" /&gt;
+ *     &lt;filter class="solr.MorfologikFilterFactory" /&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
 * 
 * <p>Any of Morfologik dictionaries can be used, these are at the moment:
 * <code>MORFOLOGIK</code> (Morfologik's original dictionary),
 * <code>MORFEUSZ</code> (Morfeusz-SIAT),
 * <code>COMBINED</code> (both of the dictionaries above, combined).
 * 
 * @see <a href="http://morfologik.blogspot.com/">Morfologik web site</a>
 */
 public class MorfologikFilterFactory extends TokenFilterFactory {
  /** Dictionary. */
  private DICTIONARY dictionary = DICTIONARY.MORFOLOGIK;
  /** Schema attribute. */
  @Deprecated
  public static final String DICTIONARY_SCHEMA_ATTRIBUTE = "dictionary";
-  
+
  /** Creates a new MorfologikFilterFactory */
  public MorfologikFilterFactory(Map<String,String> args) {
    super(args);
    // Be specific about no-longer-supported dictionary attribute.
    String dictionaryName = get(args, DICTIONARY_SCHEMA_ATTRIBUTE);
    if (dictionaryName != null && !dictionaryName.isEmpty()) {
-      try {
+      throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute is no "
-        DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ROOT));
+          + "longer supported (Morfologik has one dictionary): " + dictionaryName);
        assert dictionary != null;
        this.dictionary = dictionary;
      } catch (IllegalArgumentException e) {
        throw new IllegalArgumentException("The " + DICTIONARY_SCHEMA_ATTRIBUTE + " attribute accepts the "
            + "following constants: " + Arrays.toString(DICTIONARY.values()) + ", this value is invalid: "  
            + dictionaryName);
      }
    }
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
@ -72,6 +57,6 @@ public class MorfologikFilterFactory extends TokenFilterFactory {
  @Override
  public TokenStream create(TokenStream ts) {
-    return new MorfologikFilter(ts, dictionary, luceneMatchVersion);
+    return new MorfologikFilter(ts, luceneMatchVersion);
  }
 }
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttribute.java
@ -23,9 +23,9 @@ import java.util.List;
 import org.apache.lucene.util.Attribute;
 /** 
- * Morfologik dictionaries provide morphosyntactic annotations for
+ * Morfologik provides morphosyntactic annotations for
 * surface forms. For the exact format and description of these,
- * see the project's documentation (annotations vary by dictionary!).
+ * see the project's documentation.
 */
 public interface MorphosyntacticTagsAttribute extends Attribute {
  /** 
@ -36,7 +36,9 @@ public interface MorphosyntacticTagsAttribute extends Attribute {
  public void setTags(List<StringBuilder> tags);
  /** 
-   * Returns the POS tag of the term.
+   * Returns the POS tag of the term. A single word may have multiple POS tags, 
   * depending on the interpretation (context disambiguation is typically needed
   * to determine which particular tag is appropriate).  
   */
  public List<StringBuilder> getTags();
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
@ -22,8 +22,6 @@ import java.io.Reader;
 import java.io.StringReader;
 import java.util.TreeSet;
 import morfologik.stemming.PolishStemmer.DICTIONARY;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
@ -67,10 +65,22 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
    assertAnalyzesToReuse(
        a,
        "T. Gl\u00FCcksberg",
-        new String[] { "to", "tom", "tona", "Gl\u00FCcksberg" },
+        new String[] { "tom", "tona", "Gl\u00FCcksberg" },
-        new int[] { 0, 0, 0, 3  },
+        new int[] { 0, 0, 3  },
-        new int[] { 1, 1, 1, 13 },
+        new int[] { 1, 1, 13 },
-        new int[] { 1, 0, 0, 1  });
+        new int[] { 1, 0, 1  });
  }
  @SuppressWarnings("unused")
  private void dumpTokens(String input) throws IOException {
    TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader(input));
    ts.reset();
    MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
    CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
    while (ts.incrementToken()) {
      System.out.println(charTerm.toString() + " => " + attribute.getTags());
    }
  }
  /** Test reuse of MorfologikFilter with leftover stems. */
@ -158,9 +168,8 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
  /** */
  public final void testKeywordAttrTokens() throws IOException {
    final Version version = TEST_VERSION_CURRENT;
    final DICTIONARY dictionary = DICTIONARY.COMBINED;
-    Analyzer a = new MorfologikAnalyzer(version, dictionary) {
+    Analyzer a = new MorfologikAnalyzer(version) {
      @Override
      protected TokenStreamComponents createComponents(String field, Reader reader) {
        final CharArraySet keywords = new CharArraySet(version, 1, false);
@ -169,7 +178,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
        final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
        TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src);
        result = new SetKeywordMarkerFilter(result, keywords);
-        result = new MorfologikFilter(result, dictionary, TEST_VERSION_CURRENT); 
+        result = new MorfologikFilter(result, TEST_VERSION_CURRENT); 
        return new TokenStreamComponents(src, result);
      }
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
@ -18,8 +18,8 @@ package org.apache.lucene.analysis.morfologik;
 */
 import java.io.StringReader;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
@ -31,10 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
 public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
  public void testCreateDictionary() throws Exception {
    StringReader reader = new StringReader("rowery bilety");
-    Map<String,String> initParams = new HashMap<String,String>();
+    MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.<String,String>emptyMap());
    initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
        "morfologik");
    MorfologikFilterFactory factory = new MorfologikFilterFactory(initParams);
    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    stream = factory.create(stream);
    assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
--- a/lucene/licenses/morfologik-fsa-1.5.5.jar.sha1
+++ b/lucene/licenses/morfologik-fsa-1.5.5.jar.sha1
@ -1 +0,0 @@
 7965a39db114f7c404b71d38bc7f0e6a332c4e73
--- a/lucene/licenses/morfologik-fsa-1.6.0.jar.sha1
+++ b/lucene/licenses/morfologik-fsa-1.6.0.jar.sha1
@ -0,0 +1 @@
 397a99307020797e6790f2faf8cf865983b52559
--- a/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt
+++ b/lucene/licenses/morfologik-fsa-LICENSE-BSD.txt
@ -1,6 +1,6 @@
 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 Redistribution and use in source and binary forms, with or without modification, 
@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/lucene/licenses/morfologik-polish-1.5.5.jar.sha1
+++ b/lucene/licenses/morfologik-polish-1.5.5.jar.sha1
@ -1 +0,0 @@
 b4a3a9746cab8b2c99c33d2ceeda2ece3f8d8ef2
--- a/lucene/licenses/morfologik-polish-1.6.0.jar.sha1
+++ b/lucene/licenses/morfologik-polish-1.6.0.jar.sha1
@ -0,0 +1 @@
 ca0663530971b54420fc1cea00a6338f68428232
--- a/lucene/licenses/morfologik-polish-LICENSE-BSD.txt
+++ b/lucene/licenses/morfologik-polish-LICENSE-BSD.txt
@ -1,62 +1,26 @@
 BSD-licensed dictionary of Polish (Morfologik)
-Copyright (c) 2012, Marcin Miłkowski
+Morfologik Polish dictionary.
 Version: 2.0 PoliMorf
 Copyright (c) 2013, Marcin Miłkowski
 All rights reserved.
-Redistribution and  use in  source and binary  forms, with  or without
+Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
+modification, are permitted provided that the following conditions are met: 
 met:
-1. Redistributions of source code must retain the above copyright
+1. Redistributions of source code must retain the above copyright notice, this
-   notice, this list of conditions and the following disclaimer.
+  list of conditions and the following disclaimer. 
 2. Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution. 
-2. Redistributions in binary form must reproduce the above copyright
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-   notice, this list of conditions and the following disclaimer in the
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   documentation and/or other materials provided with the
   distribution.
 THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
 OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 --
 BSD-licensed dictionary of Polish (SGJP)
 http://sgjp.pl/morfeusz/
 Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, 
 	    	 Marcin Woliński, Robert Wołosz
 All rights reserved.
 Redistribution and  use in  source and binary  forms, with  or without
 modification, are permitted provided that the following conditions are
 met:
 1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the
   distribution.
 THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
 OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
 LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
 CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
 SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
 BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
 WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/lucene/licenses/morfologik-polish-NOTICE.txt
+++ b/lucene/licenses/morfologik-polish-NOTICE.txt
@ -1,6 +1,3 @@
-This product includes data from BSD-licensed dictionary of Polish (Morfologik)
+This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf)
-(http://morfologik.blogspot.com/)
+(http://morfologik.blogspot.com/)
 This product includes data from BSD-licensed dictionary of Polish (SGJP)
 (http://sgjp.pl/morfeusz/)
--- a/lucene/licenses/morfologik-stemming-1.5.5.jar.sha1
+++ b/lucene/licenses/morfologik-stemming-1.5.5.jar.sha1
@ -1 +0,0 @@
 e5dc913adeba3b89539cd5f82e5b88d136a1d85b
--- a/lucene/licenses/morfologik-stemming-1.6.0.jar.sha1
+++ b/lucene/licenses/morfologik-stemming-1.6.0.jar.sha1
@ -0,0 +1 @@
 8a284571bea2cdd305cd86fbac9bab6deef31c7f
--- a/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt
+++ b/lucene/licenses/morfologik-stemming-LICENSE-BSD.txt
@ -1,6 +1,6 @@
 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 Redistribution and use in source and binary forms, with or without modification, 
@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/solr/contrib/analysis-extras/ivy.xml
+++ b/solr/contrib/analysis-extras/ivy.xml
@ -20,9 +20,9 @@
    <info organisation="org.apache.solr" module="analysis-extras"/>
    <dependencies>
      <dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-polish" rev="1.5.5" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-polish" rev="1.6.0" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.5" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-fsa" rev="1.6.0" transitive="false"/>
-      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.5" transitive="false"/>
+      <dependency org="org.carrot2" name="morfologik-stemming" rev="1.6.0" transitive="false"/>
      <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
    </dependencies>
 </ivy-module>
--- a/solr/licenses/morfologik-fsa-1.5.5.jar.sha1
+++ b/solr/licenses/morfologik-fsa-1.5.5.jar.sha1
@ -1 +0,0 @@
 7965a39db114f7c404b71d38bc7f0e6a332c4e73
--- a/solr/licenses/morfologik-fsa-1.6.0.jar.sha1
+++ b/solr/licenses/morfologik-fsa-1.6.0.jar.sha1
@ -0,0 +1 @@
 397a99307020797e6790f2faf8cf865983b52559
--- a/solr/licenses/morfologik-fsa-LICENSE-BSD.txt
+++ b/solr/licenses/morfologik-fsa-LICENSE-BSD.txt
@ -1,6 +1,6 @@
 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 Redistribution and use in source and binary forms, with or without modification, 
@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/solr/licenses/morfologik-polish-1.5.5.jar.sha1
+++ b/solr/licenses/morfologik-polish-1.5.5.jar.sha1
@ -1 +0,0 @@
 b4a3a9746cab8b2c99c33d2ceeda2ece3f8d8ef2
--- a/solr/licenses/morfologik-polish-1.6.0.jar.sha1
+++ b/solr/licenses/morfologik-polish-1.6.0.jar.sha1
@ -0,0 +1 @@
 ca0663530971b54420fc1cea00a6338f68428232
--- a/solr/licenses/morfologik-polish-LICENSE-BSD.txt
+++ b/solr/licenses/morfologik-polish-LICENSE-BSD.txt
@ -1,62 +1,26 @@
 BSD-licensed dictionary of Polish (Morfologik)
-Copyright (c) 2012, Marcin Miłkowski
+Morfologik Polish dictionary.
 Version: 2.0 PoliMorf
 Copyright (c) 2013, Marcin Miłkowski
 All rights reserved.
-Redistribution and  use in  source and binary  forms, with  or without
+Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
+modification, are permitted provided that the following conditions are met: 
 met:
-1. Redistributions of source code must retain the above copyright
+1. Redistributions of source code must retain the above copyright notice, this
-   notice, this list of conditions and the following disclaimer.
+  list of conditions and the following disclaimer. 
 2. Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution. 
-2. Redistributions in binary form must reproduce the above copyright
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-   notice, this list of conditions and the following disclaimer in the
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   documentation and/or other materials provided with the
   distribution.
 THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
 OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 --
 BSD-licensed dictionary of Polish (SGJP)
 http://sgjp.pl/morfeusz/
 Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, 
 	    	 Marcin Woliński, Robert Wołosz
 All rights reserved.
 Redistribution and  use in  source and binary  forms, with  or without
 modification, are permitted provided that the following conditions are
 met:
 1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the
   distribution.
 THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
 OR  IMPLIED WARRANTIES,  INCLUDING, BUT  NOT LIMITED  TO,  THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT  SHALL COPYRIGHT  HOLDERS OR  CONTRIBUTORS BE
 LIABLE FOR  ANY DIRECT,  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR
 CONSEQUENTIAL DAMAGES  (INCLUDING, BUT NOT LIMITED  TO, PROCUREMENT OF
 SUBSTITUTE  GOODS OR  SERVICES;  LOSS  OF USE,  DATA,  OR PROFITS;  OR
 BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF LIABILITY,
 WHETHER IN  CONTRACT, STRICT LIABILITY, OR  TORT (INCLUDING NEGLIGENCE
 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/solr/licenses/morfologik-polish-NOTICE.txt
+++ b/solr/licenses/morfologik-polish-NOTICE.txt
@ -1,6 +1,3 @@
-This product includes data from BSD-licensed dictionary of Polish (Morfologik)
+This product includes data from BSD-licensed dictionary of Polish (Morfologik, PoliMorf)
-(http://morfologik.blogspot.com/)
+(http://morfologik.blogspot.com/)
 This product includes data from BSD-licensed dictionary of Polish (SGJP)
 (http://sgjp.pl/morfeusz/)
--- a/solr/licenses/morfologik-stemming-1.5.5.jar.sha1
+++ b/solr/licenses/morfologik-stemming-1.5.5.jar.sha1
@ -1 +0,0 @@
 e5dc913adeba3b89539cd5f82e5b88d136a1d85b
--- a/solr/licenses/morfologik-stemming-1.6.0.jar.sha1
+++ b/solr/licenses/morfologik-stemming-1.6.0.jar.sha1
@ -0,0 +1 @@
 8a284571bea2cdd305cd86fbac9bab6deef31c7f
--- a/solr/licenses/morfologik-stemming-LICENSE-BSD.txt
+++ b/solr/licenses/morfologik-stemming-LICENSE-BSD.txt
@ -1,6 +1,6 @@
 Copyright (c) 2006 Dawid Weiss
-Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+Copyright (c) 2007-2013 Dawid Weiss, Marcin Miłkowski
 All rights reserved.
 Redistribution and use in source and binary forms, with or without modification, 
@ -26,4 +26,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.