LUCENE-4138: Update morfologik (polish stemming) to release 1.5.3. Changed the way morphosyntactic tags are exposed (a list of tags for a single lemma instead of a compound tag).

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1354840 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dawid Weiss 2012-06-28 07:35:36 +00:00
parent 25dd0dd17b
commit 8127865e2d
22 changed files with 253 additions and 159 deletions

View File

@ -97,9 +97,9 @@
<classpathentry kind="lib" path="lucene/sandbox/lib/jakarta-regexp-1.4.jar"/>
<classpathentry kind="lib" path="lucene/analysis/icu/lib/icu4j-4.8.1.1.jar"/>
<classpathentry kind="lib" path="lucene/analysis/phonetic/lib/commons-codec-1.6.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-fsa-1.5.2.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-polish-1.5.2.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-stemming-1.5.2.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-fsa-1.5.3.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-polish-1.5.3.jar"/>
<classpathentry kind="lib" path="lucene/analysis/morfologik/lib/morfologik-stemming-1.5.3.jar"/>
<classpathentry kind="lib" path="lucene/benchmark/lib/commons-compress-1.2.jar"/>
<classpathentry kind="lib" path="lucene/benchmark/lib/xercesImpl-2.9.1.jar"/>
<classpathentry kind="lib" path="solr/lib/commons-fileupload-1.2.1.jar"/>

View File

@ -303,7 +303,7 @@
<dependency>
<groupId>org.carrot2</groupId>
<artifactId>morfologik-polish</artifactId>
<version>1.5.2</version>
<version>1.5.3</version>
</dependency>
<dependency>
<groupId>org.codehaus.woodstox</groupId>

View File

@ -7,6 +7,16 @@ http://s.apache.org/luceneversions
======================= Lucene 5.0.0 =======================
======================= Lucene 4.0.0-BETA =======================
API Changes
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
The tag attribute class has been renamed to MorphosyntacticTagsAttribute and
has a different API (carries a list of tags instead of a compound tag). Upgrade
of embedded morfologik dictionaries to version 1.9. (Dawid Weiss)
======================= Lucene 4.0.0-ALPHA =======================
More information about this release, including any errata related to the

View File

@ -27,9 +27,9 @@
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<pathelement path="lib/morfologik-fsa-1.5.2.jar"/>
<pathelement path="lib/morfologik-polish-1.5.2.jar"/>
<pathelement path="lib/morfologik-stemming-1.5.2.jar"/>
<pathelement path="lib/morfologik-fsa-1.5.3.jar"/>
<pathelement path="lib/morfologik-polish-1.5.3.jar"/>
<pathelement path="lib/morfologik-stemming-1.5.3.jar"/>
<path refid="base.classpath"/>
</path>

View File

@ -19,9 +19,9 @@
<ivy-module version="2.0">
<info organisation="org.apache.lucene" module="analyzers-morfologik"/>
<dependencies>
<dependency org="org.carrot2" name="morfologik-polish" rev="1.5.2" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.2" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.2" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-polish" rev="1.5.3" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-fsa" rev="1.5.3" transitive="false"/>
<dependency org="org.carrot2" name="morfologik-stemming" rev="1.5.3" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>

View File

@ -1 +0,0 @@
1513ee81494d7856f607ff8fffc74b4c6cbe0d48

View File

@ -0,0 +1 @@
d1f729cd3019e6d86485226202f84458141a5688

View File

@ -1,6 +1,6 @@
Copyright (c) 2006 Dawid Weiss
Copyright (c) 2007-2011 Dawid Weiss, Marcin Miłkowski
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,

View File

@ -1 +0,0 @@
ee23a00580efe973aafa6f2c225e52951832901b

View File

@ -0,0 +1 @@
8217b6f7ad018ceda0e824b2e60340000da4397a

View File

@ -1,13 +1,33 @@
morfologik-polish, TERMS OF LICENCE
BSD-licensed dictionary of Polish (Morfologik)
This JAR contains and makes use of data from Polish ispell/myspell
dictionaries hosted at http://www.sjp.pl/slownik/en/ and is
licenced on the terms of (inter alia): GPL, LGPL, MPL or CC-SA licenses.
Copyright (c) 2012, Marcin Miłkowski
All rights reserved.
Part-of-speech tags were added in Morfologik project and are not found
in the data from sjp.pl.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
-----
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--
BSD-licensed dictionary of Polish (SGJP)
http://sgjp.pl/morfeusz/
@ -39,4 +59,4 @@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,9 +1,6 @@
This product includes BSD-licensed software developed by Dawid Weiss and Marcin Miłkowski
(http://morfologik.blogspot.com/).
This JAR contains and makes use of data from Polish ispell/myspell
dictionaries hosted at http://www.sjp.pl/slownik/en/ and is
licenced on the terms of (inter alia): GPL, LGPL, MPL or CC-SA licenses.
This product includes data from BSD-licensed dictionary of Polish (Morfologik)
(http://morfologik.blogspot.com/)
This product includes data from BSD-licensed dictionary of Polish (SGJP)
(http://sgjp.pl/morfeusz/)

View File

@ -1 +0,0 @@
eba98b7cd049e07d55a64b180345954b62e42ec5

View File

@ -0,0 +1 @@
c4ead57b78fa71b00553ff21da6fb5a326e914e8

View File

@ -1,6 +1,6 @@
Copyright (c) 2006 Dawid Weiss
Copyright (c) 2007-2011 Dawid Weiss, Marcin Miłkowski
Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,

View File

@ -19,8 +19,7 @@ package org.apache.lucene.analysis.morfologik;
*/
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.*;
import morfologik.stemming.*;
import morfologik.stemming.PolishStemmer.DICTIONARY;
@ -30,13 +29,12 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.*;
/**
* {@link TokenFilter} using Morfologik library.
*
* MorfologikFilter contains a {@link MorphosyntacticTagAttribute}, which provides morphosyntactic
* MorfologikFilter contains a {@link MorphosyntacticTagsAttribute}, which provides morphosyntactic
* annotations for produced lemmas. See the Morfologik documentation for details.
*
* @see <a href="http://morfologik.blogspot.com/">Morfologik project page</a>
@ -44,7 +42,7 @@ import org.apache.lucene.util.Version;
public class MorfologikFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final MorphosyntacticTagAttribute tagAtt = addAttribute(MorphosyntacticTagAttribute.class);
private final MorphosyntacticTagsAttribute tagsAtt = addAttribute(MorphosyntacticTagsAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
private final CharsRef scratch = new CharsRef(0);
@ -55,6 +53,8 @@ public class MorfologikFilter extends TokenFilter {
private final IStemmer stemmer;
private List<WordData> lemmaList;
private final ArrayList<StringBuilder> tagsList = new ArrayList<StringBuilder>();
private int lemmaListIndex;
/**
@ -73,9 +73,43 @@ public class MorfologikFilter extends TokenFilter {
}
private void popNextLemma() {
final WordData lemma = lemmaList.get(lemmaListIndex++);
termAtt.setEmpty().append(lemma.getStem());
tagAtt.setTag(lemma.getTag());
// Collect all tags for the next unique lemma.
CharSequence currentStem;
int tags = 0;
do {
final WordData lemma = lemmaList.get(lemmaListIndex++);
currentStem = lemma.getStem();
final CharSequence tag = lemma.getTag();
if (tag != null) {
if (tagsList.size() <= tags) {
tagsList.add(new StringBuilder());
}
final StringBuilder buffer = tagsList.get(tags++);
buffer.setLength(0);
buffer.append(lemma.getTag());
}
} while (lemmaListIndex < lemmaList.size() &&
equalCharSequences(lemmaList.get(lemmaListIndex).getStem(), currentStem));
// Set the lemma's base form and tags as attributes.
termAtt.setEmpty().append(currentStem);
tagsAtt.setTags(tagsList.subList(0, tags));
}
/**
* Compare two char sequences for equality. Assumes non-null arguments.
*/
private static final boolean equalCharSequences(CharSequence s1, CharSequence s2) {
int len1 = s1.length();
int len2 = s2.length();
if (len1 != len2) return false;
for (int i = len1; --i >= 0;) {
if (s1.charAt(i) != s2.charAt(i)) {
return false;
}
}
return true;
}
/**
@ -101,7 +135,7 @@ public class MorfologikFilter extends TokenFilter {
current = captureState();
popNextLemma();
} else {
tagAtt.clear();
tagsAtt.clear();
}
return true;
} else {
@ -130,6 +164,7 @@ public class MorfologikFilter extends TokenFilter {
public void reset() throws IOException {
lemmaListIndex = 0;
lemmaList = Collections.emptyList();
tagsList.clear();
super.reset();
}
}

View File

@ -1,92 +0,0 @@
// -*- c-basic-offset: 2 -*-
package org.apache.lucene.analysis.morfologik;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.AttributeImpl;
/**
* Morphosyntactic annotations for surface forms.
* @see MorphosyntacticTagAttribute
*/
public class MorphosyntacticTagAttributeImpl extends AttributeImpl
implements MorphosyntacticTagAttribute, Cloneable {
/**
* Either the original tag from WordData or a clone.
*/
private CharSequence tag;
/**
* Set the tag.
*/
public void setTag(CharSequence pos) {
this.tag = ((pos == null || pos.length() == 0) ? null : pos);
}
/**
* Returns the POS tag of the term. If you need a copy of this char sequence, clone it
* because it may change with each new term!
*/
public CharSequence getTag() {
return tag;
}
public void clear() {
tag = null;
}
public boolean equals(Object other) {
if (other instanceof MorphosyntacticTagAttribute) {
return equal(this.getTag(), ((MorphosyntacticTagAttribute) other).getTag());
}
return false;
}
/**
* Check if two char sequences are the same.
*/
private boolean equal(CharSequence chs1, CharSequence chs2) {
if (chs1 == null && chs2 == null)
return true;
if (chs1 == null || chs2 == null)
return false;
int l1 = chs1.length();
int l2 = chs2.length();
if (l1 != l2)
return false;
for (int i = 0; i < l1; i++)
if (chs1.charAt(i) != chs2.charAt(i))
return false;
return true;
}
public int hashCode() {
return this.tag == null ? 0 : tag.hashCode();
}
public void copyTo(AttributeImpl target) {
((MorphosyntacticTagAttribute) target).setTag(this.tag);
}
public MorphosyntacticTagAttributeImpl clone() {
MorphosyntacticTagAttributeImpl cloned = new MorphosyntacticTagAttributeImpl();
cloned.tag = (tag == null ? null : tag.toString());
return cloned;
}
}

View File

@ -18,6 +18,8 @@ package org.apache.lucene.analysis.morfologik;
* limitations under the License.
*/
import java.util.List;
import org.apache.lucene.util.Attribute;
/**
@ -25,15 +27,18 @@ import org.apache.lucene.util.Attribute;
* surface forms. For the exact format and description of these,
* see the project's documentation (annotations vary by dictionary!).
*/
public interface MorphosyntacticTagAttribute extends Attribute {
public interface MorphosyntacticTagsAttribute extends Attribute {
/**
* Set the POS tag. The default value (no-value) is null.
* @param pos POS tag corresponding to current lemma
*
* @param tags A list of POS tags corresponding to current lemma.
*/
public void setTag(CharSequence pos);
public void setTags(List<StringBuilder> tags);
/** Returns the POS tag of the term. */
public CharSequence getTag();
/**
* Returns the POS tag of the term.
*/
public List<StringBuilder> getTags();
/** Clear to default value. */
public void clear();

View File

@ -0,0 +1,96 @@
// -*- c-basic-offset: 2 -*-
package org.apache.lucene.analysis.morfologik;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.*;
import org.apache.lucene.util.AttributeImpl;
/**
* Morphosyntactic annotations for surface forms.
* @see MorphosyntacticTagsAttribute
*/
public class MorphosyntacticTagsAttributeImpl extends AttributeImpl
implements MorphosyntacticTagsAttribute, Cloneable {
/**
* A list of potential tag variants for the current token.
*/
private List<StringBuilder> tags;
/**
* Returns the POS tag of the term. If you need a copy of this char sequence, copy
* its contents (and clone {@link StringBuilder}s) because it changes with
* each new term to avoid unnecessary memory allocations.
*/
@Override
public List<StringBuilder> getTags() {
return tags;
}
public void clear() {
tags = null;
}
public boolean equals(Object other) {
if (other instanceof MorphosyntacticTagsAttribute) {
return equal(this.getTags(), ((MorphosyntacticTagsAttribute) other).getTags());
}
return false;
}
private boolean equal(Object l1, Object l2) {
return l1 == null ? (l2 == null) : (l1.equals(l2));
}
public int hashCode() {
return this.tags == null ? 0 : tags.hashCode();
}
/**
* Sets the internal tags reference to the given list. The contents
* is not copied.
*/
@Override
public void setTags(List<StringBuilder> tags) {
this.tags = tags;
}
public void copyTo(AttributeImpl target) {
List<StringBuilder> cloned = null;
if (tags != null) {
cloned = new ArrayList<StringBuilder>(tags.size());
for (StringBuilder b : tags) {
cloned.add(new StringBuilder(b));
}
}
((MorphosyntacticTagsAttribute) target).setTags(cloned);
}
public MorphosyntacticTagsAttributeImpl clone() {
MorphosyntacticTagsAttributeImpl cloned = new MorphosyntacticTagsAttributeImpl();
this.copyTo(cloned);
return cloned;
}
@Override
public String toString() {
return tags == null ? "<no tags>" : tags.toString();
}
}

View File

@ -1,4 +1,3 @@
// -*- c-basic-offset: 2 -*-
package org.apache.lucene.analysis.morfologik;
/*
@ -20,10 +19,9 @@ package org.apache.lucene.analysis.morfologik;
import java.io.IOException;
import java.io.StringReader;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
@ -39,8 +37,8 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
public final void testSingleTokens() throws IOException {
Analyzer a = getTestAnalyzer();
assertAnalyzesToReuse(a, "a", new String[] { "a" });
assertAnalyzesToReuse(a, "liście", new String[] { "liść", "list", "lista", });
assertAnalyzesToReuse(a, "danych", new String[] { "dany", "dane", "dać" });
assertAnalyzesToReuse(a, "liście", new String[] { "liście", "liść", "list", "lista" });
assertAnalyzesToReuse(a, "danych", new String[] { "dany", "dana", "dane", "dać" });
assertAnalyzesToReuse(a, "ęóąśłżźćń", new String[] { "ęóąśłżźćń" });
}
@ -50,10 +48,10 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
assertAnalyzesToReuse(
a,
"liście danych",
new String[] { "liść", "list", "lista", "dany", "dane", "dać" },
new int[] { 0, 0, 0, 7, 7, 7 },
new int[] { 6, 6, 6, 13, 13, 13 },
new int[] { 1, 0, 0, 1, 0, 0 });
new String[] { "liście", "liść", "list", "lista", "dany", "dana", "dane", "dać" },
new int[] { 0, 0, 0, 0, 7, 7, 7, 7 },
new int[] { 6, 6, 6, 6, 13, 13, 13, 13 },
new int[] { 1, 0, 0, 0, 1, 0, 0, 0 });
}
/** Test reuse of MorfologikFilter with leftover stems. */
@ -63,7 +61,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class);
ts_1.reset();
ts_1.incrementToken();
assertEquals("first stream", "liść", termAtt_1.toString());
assertEquals("first stream", "liście", termAtt_1.toString());
TokenStream ts_2 = a.tokenStream("dummy", new StringReader("danych"));
CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
@ -76,33 +74,61 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
public final void testCase() throws IOException {
Analyzer a = getTestAnalyzer();
assertAnalyzesToReuse(a, "AGD", new String[] { "artykuły gospodarstwa domowego" });
assertAnalyzesToReuse(a, "AGD", new String[] { "AGD", "artykuły gospodarstwa domowego" });
assertAnalyzesToReuse(a, "agd", new String[] { "artykuły gospodarstwa domowego" });
assertAnalyzesToReuse(a, "Poznania", new String[] { "Poznań" });
assertAnalyzesToReuse(a, "poznania", new String[] { "poznać" });
assertAnalyzesToReuse(a, "poznania", new String[] { "poznanie", "poznać" });
assertAnalyzesToReuse(a, "Aarona", new String[] { "Aaron" });
assertAnalyzesToReuse(a, "aarona", new String[] { "aarona" });
assertAnalyzesToReuse(a, "Liście", new String[] { "liść", "list", "lista" });
assertAnalyzesToReuse(a, "Liście", new String[] { "liście", "liść", "list", "lista" });
}
private void assertPOSToken(TokenStream ts, String term, String pos) throws IOException {
private void assertPOSToken(TokenStream ts, String term, String... tags) throws IOException {
ts.incrementToken();
assertEquals(term, ts.getAttribute(CharTermAttribute.class).toString());
assertEquals(pos, ts.getAttribute(MorphosyntacticTagAttribute.class).getTag().toString());
TreeSet<String> actual = new TreeSet<String>();
TreeSet<String> expected = new TreeSet<String>();
for (StringBuilder b : ts.getAttribute(MorphosyntacticTagsAttribute.class).getTags()) {
actual.add(b.toString());
}
for (String s : tags) {
expected.add(s);
}
if (!expected.equals(actual)) {
System.out.println("Expected:\n" + expected);
System.out.println("Actual:\n" + actual);
assertEquals(expected, actual);
}
}
/** Test morphosyntactic annotations. */
public final void testPOSAttribute() throws IOException {
TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader("liście"));
assertPOSToken(ts, "liść", "subst:pl:acc.nom.voc:m3");
assertPOSToken(ts, "list", "subst:sg:loc.voc:m3");
assertPOSToken(ts, "lista", "subst:sg:dat.loc:f");
assertPOSToken(ts, "liście",
"subst:sg:acc:n2",
"subst:sg:nom:n2",
"subst:sg:voc:n2");
assertPOSToken(ts, "liść",
"subst:pl:acc:m3",
"subst:pl:nom:m3",
"subst:pl:voc:m3");
assertPOSToken(ts, "list",
"subst:sg:loc:m3",
"subst:sg:voc:m3");
assertPOSToken(ts, "lista",
"subst:sg:dat:f",
"subst:sg:loc:f");
}
/** blast some random strings through the analyzer */
public void testRandom() throws Exception {
checkRandomData(random(), getTestAnalyzer(), 10000 * RANDOM_MULTIPLIER);

View File

@ -312,9 +312,7 @@
<property name="analyzers-morfologik.jar" value="${common.dir}/build/analysis/morfologik/lucene-analyzers-morfologik-${version}.jar"/>
<fileset id="analyzers-morfologik.fileset" dir="${common.dir}">
<include name="build/analysis/morfologik/lucene-analyzers-morfologik-${version}.jar" />
<include name="analysis/morfologik/lib/morfologik-fsa-1.5.2.jar" />
<include name="analysis/morfologik/lib/morfologik-polish-1.5.2.jar" />
<include name="analysis/morfologik/lib/morfologik-stemming-1.5.2.jar" />
<include name="analysis/morfologik/lib/morfologik-*.jar" />
</fileset>
<target name="check-analyzers-morfologik-uptodate" unless="analyzers-morfologik.uptodate">
<module-uptodate name="analysis/morfologik" jarfile="${analyzers-morfologik.jar}" property="analyzers-morfologik.uptodate"/>

View File

@ -7,7 +7,6 @@ import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.solr.schema.IndexSchema;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more