LUCENE-9220: regenerate all stemmers/stopwords/test data from snowball 2.0 (#1262)

Previous situation:

* The snowball base classes (Among, SnowballProgram, etc) had accumulated local performance-related changes. There was a task that would also "patch" generated classes (e.g. GermanStemmer) after-the-fact.
* Snowball classes had many "non-changes" from the original such as removal of tabs addition of javadocs, license headers, etc.
* Snowball test data (inputs and expected stems) was incorporated into lucene testing, but this was maintained manually. Also files had become large, making the test too slow (Nightly).
* Snowball stopwords lists from their website were manually maintained. In some cases encoding fixes were manually applied.
* Some generated stemmers (such as Estonian and Armenian) exist in lucene, but have no corresponding `.sbl` file in snowball sources at all.

Besides this mess, snowball project is "moving along" and acquiring new languages, adding non-BSD-licensed test data, huge test data, and other complexity. So it is time to automate the integration better.

New situation:

* Lucene has a `gradle snowball` regeneration task. It works on Linux or Mac only. It checks out their repos, applies the `snowball.patch` in our repository, compiles snowball stemmers, regenerates all java code, applies any adjustments so that our build is happy.
* Tests data is automatically regenerated from the commit hash of the snowball test data repository. Not all languages are tested from their data: only where the license is simple BSD. Test data is also (deterministically) sampled, so that we don't have huge files. We just want to make sure our integration works.
* Randomized tests are still set to test every language with generated fake words. The regeneration task ensures all languages get tested (it writes a simple text file list of them).
* Stopword files are automatically regenerated from the commit hash of the snowball website repository.
* The regeneration procedure is idempotent. This way when stuff does change, you know exactly what happened. For example if test data changes to a different license, you may see a git deletion. Or if a new language/stopwords/test data gets added, you will see git additions.
This commit is contained in:
Robert Muir 2020-02-17 12:38:01 -05:00 committed by GitHub
parent 188f620208
commit 0203815ab2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
87 changed files with 32075 additions and 29658 deletions

View File

@ -85,6 +85,7 @@ apply from: file('gradle/validation/owasp-dependency-check.gradle')
apply from: file('gradle/generation/jflex.gradle')
apply from: file('gradle/generation/javacc.gradle')
apply from: file('gradle/generation/util.gradle')
apply from: file('gradle/generation/snowball.gradle')
// Additional development aids.
apply from: file('gradle/maven/maven-local.gradle')

View File

@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
apply plugin: "de.undercouch.download"
configure(rootProject) {
task snowball() {
description "Regenerate snowball-based sources, stopwords, and tests for ...lucene/analysis."
group "generation"
dependsOn ":lucene:analysis:common:snowballGen"
}
}
configure(project(":lucene:analysis:common")) {
ext {
// git commit hash of source code https://github.com/snowballstem/snowball/
snowballStemmerCommit = "53739a805cfa6c77ff8496dc711dc1c106d987c1"
// git commit hash of stopwords https://github.com/snowballstem/snowball-website
snowballWebsiteCommit = "ff891e74f08e7315523ee3c0cad55bb1b7831b9d"
// git commit hash of test data https://github.com/snowballstem/snowball-data
snowballDataCommit = "9145f8732ec952c8a3d1066be251da198a8bc792"
snowballWorkDir = file("${buildDir}/snowball")
snowballStemmerDir = file("${snowballWorkDir}/stemmers-${snowballStemmerCommit}")
snowballWebsiteDir = file("${snowballWorkDir}/website-${snowballWebsiteCommit}")
snowballDataDir = file("${snowballWorkDir}/data-${snowballDataCommit}")
snowballPatchFile = rootProject.file("gradle/generation/snowball.patch")
snowballScript = rootProject.file("gradle/generation/snowball.sh")
}
// downloads snowball stemmers (or use cached copy)
task downloadSnowballStemmers(type: Download) {
inputs.file(snowballPatchFile)
src "https://github.com/snowballstem/snowball/archive/${snowballStemmerCommit}.zip"
def snowballStemmerZip = file("${snowballStemmerDir}.zip")
dest snowballStemmerZip
overwrite false
tempAndMove true
doLast {
ant.unzip(src: snowballStemmerZip, dest: snowballStemmerDir, overwrite: "true") {
ant.cutdirsmapper(dirs: "1")
}
ant.patch(patchfile: snowballPatchFile, dir: snowballStemmerDir, strip: "1")
}
}
// downloads snowball website (or use cached copy)
task downloadSnowballWebsite(type: Download) {
src "https://github.com/snowballstem/snowball-website/archive/${snowballWebsiteCommit}.zip"
def snowballWebsiteZip = file("${snowballWebsiteDir}.zip")
dest snowballWebsiteZip
overwrite false
tempAndMove true
doLast {
ant.unzip(src: snowballWebsiteZip, dest: snowballWebsiteDir, overwrite: "true") {
ant.cutdirsmapper(dirs: "1")
}
}
}
// downloads snowball test data (or use cached copy)
task downloadSnowballData(type: Download) {
src "https://github.com/snowballstem/snowball-data/archive/${snowballDataCommit}.zip"
def snowballDataZip = file("${snowballDataDir}.zip")
dest snowballDataZip
overwrite false
tempAndMove true
doLast {
ant.unzip(src: snowballDataZip, dest: snowballDataDir, overwrite: "true") {
ant.cutdirsmapper(dirs: "1")
}
}
}
// runs shell script to regenerate stemmers, base stemming subclasses, test data, and stopwords.
task snowballGen() {
dependsOn downloadSnowballStemmers
dependsOn downloadSnowballWebsite
dependsOn downloadSnowballData
doLast {
project.exec {
executable "bash"
args = [snowballScript, snowballStemmerDir, snowballWebsiteDir, snowballDataDir, projectDir]
}
}
}
}

File diff suppressed because it is too large Load Diff

123
gradle/generation/snowball.sh Executable file
View File

@ -0,0 +1,123 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# remove this script when problems are fixed
SRCDIR=$1
WWWSRCDIR=$2
TESTSRCDIR=$3
PROJECTDIR=$4
DESTDIR="${PROJECTDIR}/src/java/org/tartarus/snowball"
WWWDSTDIR="${PROJECTDIR}/src/resources/org/apache/lucene/analysis/snowball"
TESTDSTDIR="${PROJECTDIR}/src/test/org/apache/lucene/analysis/snowball"
trap 'echo "usage: ./snowball.sh <snowball> <snowball-website> <snowball-data> <analysis-common>" && exit 2' ERR
test $# -eq 4
trap 'echo "*** BUILD FAILED ***" $BASH_SOURCE:$LINENO: error: "$BASH_COMMAND" returned $?' ERR
set -eEuo pipefail
# reformats file indentation to kill the crazy space/tabs mix.
# prevents early blindness !
function reformat_java() {
# convert tabs to 8 spaces, then reduce indent from 4 space to 2 space
target=$1
tmpfile=$(mktemp)
cat ${target} | perl -p -e 's/\t/ /g' | perl -p -e 's/ / /g' > ${tmpfile}
mv ${tmpfile} ${target}
}
# generate stuff with existing makefile, just 'make' will try to do crazy stuff with e.g. python
# and likely fail. so only ask for our specific target.
(cd ${SRCDIR} && chmod a+x libstemmer/mkalgorithms.pl && make dist_libstemmer_java)
for file in "SnowballStemmer.java" "Among.java" "SnowballProgram.java"; do
# add license header to files since they have none, otherwise rat will flip the fuck out
echo "/*" > ${DESTDIR}/${file}
cat ${SRCDIR}/COPYING >> ${DESTDIR}/${file}
echo "*/" >> ${DESTDIR}/${file}
cat ${SRCDIR}/java/org/tartarus/snowball/${file} >> ${DESTDIR}/${file}
reformat_java ${DESTDIR}/${file}
done
rm ${DESTDIR}/ext/*Stemmer.java
rm -f ${TESTDSTDIR}/languages.txt
for file in ${SRCDIR}/java/org/tartarus/snowball/ext/*.java; do
# title-case the classes (fooStemmer -> FooStemmer) so they obey normal java conventions
base=$(basename $file)
oldclazz="${base%.*}"
# one-off
if [ "${oldclazz}" == "kraaij_pohlmannStemmer" ]; then
newclazz="KpStemmer"
else
newclazz=${oldclazz^}
fi
echo ${newclazz} | sed -e 's/Stemmer//' >> ${TESTDSTDIR}/languages.txt
cat $file | sed "s/${oldclazz}/${newclazz}/g" > ${DESTDIR}/ext/${newclazz}.java
reformat_java ${DESTDIR}/ext/${newclazz}.java
done
# regenerate test data
rm -f ${TESTDSTDIR}/test_languages.txt
rm -f ${TESTDSTDIR}/*.zip
for file in ${TESTSRCDIR}/*; do
# look for input (voc.txt) and expected output (output.txt) without any special licenses (COPYING)
if [ -f "${file}/voc.txt" ] && [ -f "${file}/output.txt" ] && [ ! -f "${file}/COPYING" ]; then
language=$(basename ${file})
if [ "${language}" == "kraaij_pohlmann" ]; then
language="kp"
fi
# make the .zip reproducible if data hasn't changed.
arbitrary_timestamp="200001010000"
# some test files are yuge, randomly sample up to this amount
row_limit="2000"
tmpdir=$(mktemp -d)
myrandom="openssl enc -aes-256-ctr -k ${arbitrary_timestamp} -nosalt -iv 0 -md md5"
for data in "voc.txt" "output.txt"; do
shuf -n ${row_limit} --random-source=<(${myrandom} < /dev/zero 2>/dev/null) ${file}/${data} > ${tmpdir}/${data} \
&& touch -t ${arbitrary_timestamp} ${tmpdir}/${data}
done
zip --quiet --junk-paths -X -9 ${TESTDSTDIR}/${language}.zip ${tmpdir}/voc.txt ${tmpdir}/output.txt
echo "${language}" >> ${TESTDSTDIR}/test_languages.txt
rm -r ${tmpdir}
fi
done
# regenerate stopwords data
rm -f ${WWWDSTDIR}/*_stop.txt
for file in ${WWWSRCDIR}/algorithms/*/stop.txt; do
language=$(basename $(dirname ${file}))
cat > ${WWWDSTDIR}/${language}_stop.txt << EOF
| From https://snowballstem.org/algorithms/${language}/stop.txt
| This file is distributed under the BSD License.
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
EOF
case "$language" in
danish)
# clear up some slight mojibake on the website. TODO: fix this file!
cat $file | sed 's/Ã¥/å/g' | sed 's/æ/æ/g' >> ${WWWDSTDIR}/${language}_stop.txt
;;
*)
# try to confirm its really UTF-8
iconv -f UTF-8 -t UTF-8 $file >> ${WWWDSTDIR}/${language}_stop.txt
;;
esac
done

View File

@ -210,7 +210,7 @@ class RatTask extends DefaultTask {
// svg files generated by gnuplot
pattern(substring: "Produced by GNUPLOT")
// snowball stemmers generated by snowball compiler
pattern(substring: "This file was generated automatically by the Snowball to Java compiler")
pattern(substring: "Generated by Snowball")
// parsers generated by antlr
pattern(substring: "ANTLR GENERATED CODE")
}

View File

@ -72,6 +72,10 @@ Improvements
This tool no longer forceMerge(1)s to a single segment by default. If you
rely upon this behavior, pass -max-segments 1 instead. (Robert Muir)
* LUCENE-9220: Upgrade snowball to 2.0. New snowball stemmers: Hindi, Indonesian,
Nepali, Serbian, and Tamil. New stoplist: Indonesian. Adds gradle 'snowball'
task to regenerate and ease future upgrades. (Robert Muir, Dawid Weiss)
Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

View File

@ -1,22 +1,12 @@
Lucene Analyzers README file
This project provides pre-compiled version of the Snowball stemmers
based on revision 502 of the Tartarus Snowball repository,
now located at https://github.com/snowballstem/snowball/tree/e103b5c257383ee94a96e7fc58cab3c567bf079b (GitHub),
This project provides pre-compiled version of the Snowball stemmers,
now located at https://github.com/snowballstem/snowball/tree/53739a805cfa6c77ff8496dc711dc1c106d987c1 (GitHub),
together with classes integrating them with the Lucene search engine.
A few changes has been made to the static Snowball code and compiled stemmers:
* Class SnowballProgram is made abstract and contains new abstract method stem() to avoid reflection in Lucene filter class SnowballFilter.
* All use of StringBuffers has been refactored to StringBuilder for speed.
* Snowball BSD license header has been added to the Java classes to avoid having RAT adding new ASL headers.
* Uses Java 7 MethodHandles and fixes method visibility bug: http://article.gmane.org/gmane.comp.search.snowball/1139
If you want to add new stemmers, use the exact revision / Git commit above to generate the Java class, place it
in src/java/org/tartarus/snowball/ext, and finally execute "ant patch-snowball". The latter will change the APIs
of the generated class to make it compatible. Already patched classes are not modified.
The Arabic stemmer has been generated from https://github.com/snowballstem/snowball/blob/master/algorithms/arabic.sbl
using the latest version of snowball and patched manually.
The snowball tree needs patches applied to properly generate efficient code for lucene.
You can regenerate everything with 'gradlew snowball'
Refer to gradle/generation/snowball* files in the build for upgrading snowball.
IMPORTANT NOTICE ON BACKWARDS COMPATIBILITY!

View File

@ -29,8 +29,6 @@
<import file="../analysis-module-build.xml"/>
<property name="snowball.programs.dir" location="src/java/org/tartarus/snowball/ext"/>
<property name="unicode-props-file" location="src/java/org/apache/lucene/analysis/util/UnicodeProps.java"/>
<!-- Because of a bug in JFlex's ant task, HTMLStripCharFilter has to be generated last. -->
@ -124,14 +122,4 @@
<target name="regenerate" depends="jflex,unicode-data"/>
<target name="patch-snowball" description="Patches all snowball programs in '${snowball.programs.dir}' to make them work with MethodHandles">
<fileset id="snowball.programs" dir="${snowball.programs.dir}" includes="*Stemmer.java"/>
<replaceregexp match="^public class \w+Stemmer\b" replace="@SuppressWarnings(&quot;unused&quot;) \0" flags="m" encoding="UTF-8">
<fileset refid="snowball.programs"/>
</replaceregexp>
<replaceregexp match="private final static \w+Stemmer methodObject\b.*$" replace="/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();" flags="m" encoding="UTF-8">
<fileset refid="snowball.programs"/>
</replaceregexp>
<fixcrlf srcdir="${snowball.programs.dir}" includes="*Stemmer.java" tab="remove" tablength="2" encoding="UTF-8" javafiles="yes" fixlast="yes"/>
</target>
</project>

View File

@ -25,7 +25,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.SnowballStemmer;
/**
* A filter that stems words using a Snowball-generated stemmer.
@ -51,12 +51,12 @@ import org.tartarus.snowball.SnowballProgram;
*/
public final class SnowballFilter extends TokenFilter {
private final SnowballProgram stemmer;
private final SnowballStemmer stemmer;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public SnowballFilter(TokenStream input, SnowballProgram stemmer) {
public SnowballFilter(TokenStream input, SnowballStemmer stemmer) {
super(input);
this.stemmer = stemmer;
}
@ -76,8 +76,8 @@ public final class SnowballFilter extends TokenFilter {
//Class.forName is frowned upon in place of the ResourceLoader but in this case,
// the factory will use the other constructor so that the program is already loaded.
try {
Class<? extends SnowballProgram> stemClass =
Class.forName("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(SnowballProgram.class);
Class<? extends SnowballStemmer> stemClass =
Class.forName("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(SnowballStemmer.class);
stemmer = stemClass.getConstructor().newInstance();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e);

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.SnowballStemmer;
/**
* Factory for {@link SnowballFilter}, with configurable language
@ -54,7 +54,7 @@ public class SnowballPorterFilterFactory extends TokenFilterFactory implements R
private final String language;
private final String wordFiles;
private Class<? extends SnowballProgram> stemClass;
private Class<? extends SnowballStemmer> stemClass;
private CharArraySet protectedWords = null;
/** Creates a new SnowballPorterFilterFactory */
@ -70,7 +70,7 @@ public class SnowballPorterFilterFactory extends TokenFilterFactory implements R
@Override
public void inform(ResourceLoader loader) throws IOException {
String className = "org.tartarus.snowball.ext." + language + "Stemmer";
stemClass = loader.newInstance(className, SnowballProgram.class).getClass();
stemClass = loader.newInstance(className, SnowballStemmer.class).getClass();
if (wordFiles != null) {
protectedWords = getWordSet(loader, wordFiles, false);
@ -79,7 +79,7 @@ public class SnowballPorterFilterFactory extends TokenFilterFactory implements R
@Override
public TokenFilter create(TokenStream input) {
SnowballProgram program;
SnowballStemmer program;
try {
program = stemClass.getConstructor().newInstance();
} catch (Exception e) {

View File

@ -1,34 +1,34 @@
/*
Copyright (c) 2001, Dr Martin Porter
Copyright (c) 2002, Richard Boulton
Copyright (c) 2004,2005, Richard Boulton
Copyright (c) 2013, Yoshiki Shibukawa
Copyright (c) 2006,2007,2009,2010,2011,2014-2019, Olly Betts
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holders nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the Snowball project nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.tartarus.snowball;
import java.lang.invoke.MethodHandle;
@ -37,47 +37,41 @@ import java.lang.invoke.MethodType;
import java.util.Locale;
/**
* This is the rev 502 of the Snowball SVN trunk,
* now located at <a target="_blank" href="https://github.com/snowballstem/snowball/tree/e103b5c257383ee94a96e7fc58cab3c567bf079b">GitHub</a>,
* but modified:
* <ul>
* <li>made abstract and introduced abstract method stem to avoid expensive reflection in filter class.
* <li>refactored StringBuffers to StringBuilder
* <li>uses char[] as buffer instead of StringBuffer/StringBuilder
* <li>eq_s,eq_s_b,insert,replace_s take CharSequence like eq_v and eq_v_b
* <li>use MethodHandles and fix <a target="_blank" href="http://article.gmane.org/gmane.comp.search.snowball/1139">method visibility bug</a>.
* </ul>
* Internal class used by Snowball stemmers
*/
public final class Among {
public Among(String s, int substring_i, int result,
String methodname, MethodHandles.Lookup methodobject) {
this.s_size = s.length();
public class Among {
public Among (String s, int substring_i, int result) {
this.s = s.toCharArray();
this.substring_i = substring_i;
this.result = result;
if (methodname.isEmpty()) {
this.method = null;
} else {
final Class<? extends SnowballProgram> clazz = methodobject.lookupClass().asSubclass(SnowballProgram.class);
this.method = null;
}
public Among (String s, int substring_i, int result, String methodname,
MethodHandles.Lookup methodobject) {
this.s = s.toCharArray();
this.substring_i = substring_i;
this.result = result;
final Class<? extends SnowballProgram> clazz = methodobject.lookupClass().asSubclass(SnowballProgram.class);
if (methodname.length() > 0) {
try {
this.method = methodobject.findVirtual(clazz, methodname, MethodType.methodType(boolean.class))
.asType(MethodType.methodType(boolean.class, SnowballProgram.class));
.asType(MethodType.methodType(boolean.class, SnowballProgram.class));
} catch (NoSuchMethodException | IllegalAccessException e) {
throw new RuntimeException(String.format(Locale.ENGLISH,
"Snowball program '%s' is broken, cannot access method: boolean %s()",
clazz.getSimpleName(), methodname
"Snowball program '%s' is broken, cannot access method: boolean %s()",
clazz.getSimpleName(), methodname
), e);
}
} else {
this.method = null;
}
}
final int s_size; /* search string */
final char[] s; /* search string */
final int substring_i; /* index to longest matching substring */
final int result; /* result of the lookup */
final int result; /* result of the lookup */
// Make sure this is not accessible outside package for Java security reasons!
final MethodHandle method; /* method to use if substring matches */
}
};

View File

@ -1,407 +1,370 @@
/*
Copyright (c) 2001, Dr Martin Porter
Copyright (c) 2002, Richard Boulton
Copyright (c) 2004,2005, Richard Boulton
Copyright (c) 2013, Yoshiki Shibukawa
Copyright (c) 2006,2007,2009,2010,2011,2014-2019, Olly Betts
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holders nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the Snowball project nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.tartarus.snowball;
import java.lang.reflect.UndeclaredThrowableException;
import org.apache.lucene.util.ArrayUtil;
import java.io.Serializable;
/**
* This is the rev 502 of the Snowball SVN trunk,
* now located at <a target="_blank" href="https://github.com/snowballstem/snowball/tree/e103b5c257383ee94a96e7fc58cab3c567bf079b">GitHub</a>,
* but modified:
* <ul>
* <li>made abstract and introduced abstract method stem to avoid expensive reflection in filter class.
* <li>refactored StringBuffers to StringBuilder
* <li>uses char[] as buffer instead of StringBuffer/StringBuilder
* <li>eq_s,eq_s_b,insert,replace_s take CharSequence like eq_v and eq_v_b
* <li>use MethodHandles and fix <a target="_blank" href="http://article.gmane.org/gmane.comp.search.snowball/1139">method visibility bug</a>.
* </ul>
* Base class for a snowball stemmer
*/
public abstract class SnowballProgram {
public class SnowballProgram implements Serializable {
protected SnowballProgram()
{
current = new char[8];
setCurrent("");
}
protected SnowballProgram()
{
current = new char[8];
setCurrent("");
}
static final long serialVersionUID = 2016072500L;
public abstract boolean stem();
/**
* Set the current string.
*/
public void setCurrent(String value)
{
current = value.toCharArray();
cursor = 0;
limit = value.length();
limit_backward = 0;
bra = cursor;
ket = limit;
}
/**
* Set the current string.
*/
public void setCurrent(String value)
{
current = value.toCharArray();
cursor = 0;
limit = value.length();
limit_backward = 0;
bra = cursor;
ket = limit;
}
/**
* Get the current string.
*/
public String getCurrent()
{
return new String(current, 0, limit);
}
/**
* Get the current string.
*/
public String getCurrent()
{
return new String(current, 0, limit);
}
/**
* Set the current string.
* @param text character array containing input
* @param length valid length of text.
*/
public void setCurrent(char text[], int length) {
current = text;
cursor = 0;
limit = length;
limit_backward = 0;
bra = cursor;
ket = limit;
}
/**
* Set the current string.
* @param text character array containing input
* @param length valid length of text.
*/
public void setCurrent(char text[], int length) {
current = text;
cursor = 0;
limit = length;
limit_backward = 0;
bra = cursor;
ket = limit;
}
/**
* Get the current buffer containing the stem.
* <p>
* NOTE: this may be a reference to a different character array than the
* one originally provided with setCurrent, in the exceptional case that
* stemming produced a longer intermediate or result string.
* </p>
* <p>
* It is necessary to use {@link #getCurrentBufferLength()} to determine
* the valid length of the returned buffer. For example, many words are
* stemmed simply by subtracting from the length to remove suffixes.
* </p>
* @see #getCurrentBufferLength()
*/
public char[] getCurrentBuffer() {
return current;
}
/**
* Get the valid length of the character array in
* {@link #getCurrentBuffer()}.
* @return valid length of the array.
*/
public int getCurrentBufferLength() {
return limit;
}
/**
* Get the current buffer containing the stem.
* <p>
* NOTE: this may be a reference to a different character array than the
* one originally provided with setCurrent, in the exceptional case that
* stemming produced a longer intermediate or result string.
* </p>
* <p>
* It is necessary to use {@link #getCurrentBufferLength()} to determine
* the valid length of the returned buffer. For example, many words are
* stemmed simply by subtracting from the length to remove suffixes.
* </p>
* @see #getCurrentBufferLength()
*/
public char[] getCurrentBuffer() {
return current;
}
// current string
private char current[];
/**
* Get the valid length of the character array in
* {@link #getCurrentBuffer()}.
* @return valid length of the array.
*/
public int getCurrentBufferLength() {
return limit;
}
protected int cursor;
protected int limit;
protected int limit_backward;
protected int bra;
protected int ket;
// current string
private char current[];
protected void copy_from(SnowballProgram other)
{
current = other.current;
cursor = other.cursor;
limit = other.limit;
limit_backward = other.limit_backward;
bra = other.bra;
ket = other.ket;
}
protected int cursor;
protected int limit;
protected int limit_backward;
protected int bra;
protected int ket;
protected boolean in_grouping(char [] s, int min, int max)
{
if (cursor >= limit) return false;
char ch = current[cursor];
if (ch > max || ch < min) return false;
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
public SnowballProgram(SnowballProgram other) {
current = other.current;
cursor = other.cursor;
limit = other.limit;
limit_backward = other.limit_backward;
bra = other.bra;
ket = other.ket;
}
protected void copy_from(SnowballProgram other)
{
current = other.current;
cursor = other.cursor;
limit = other.limit;
limit_backward = other.limit_backward;
bra = other.bra;
ket = other.ket;
}
protected boolean in_grouping(char [] s, int min, int max)
{
if (cursor >= limit) return false;
char ch = current[cursor];
if (ch > max || ch < min) return false;
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
cursor++;
return true;
}
protected boolean in_grouping_b(char [] s, int min, int max)
{
if (cursor <= limit_backward) return false;
char ch = current[cursor - 1];
if (ch > max || ch < min) return false;
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
cursor--;
return true;
}
protected boolean out_grouping(char [] s, int min, int max)
{
if (cursor >= limit) return false;
char ch = current[cursor];
if (ch > max || ch < min) {
cursor++;
return true;
}
protected boolean in_grouping_b(char [] s, int min, int max)
{
if (cursor <= limit_backward) return false;
char ch = current[cursor - 1];
if (ch > max || ch < min) return false;
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
cursor--;
return true;
}
protected boolean out_grouping(char [] s, int min, int max)
{
if (cursor >= limit) return false;
char ch = current[cursor];
if (ch > max || ch < min) {
cursor++;
return true;
}
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
cursor ++;
return true;
}
return false;
}
protected boolean out_grouping_b(char [] s, int min, int max)
{
if (cursor <= limit_backward) return false;
char ch = current[cursor - 1];
if (ch > max || ch < min) {
cursor--;
return true;
}
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
cursor--;
return true;
}
return false;
}
protected boolean in_range(int min, int max)
{
if (cursor >= limit) return false;
char ch = current[cursor];
if (ch > max || ch < min) return false;
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
cursor++;
return true;
}
return false;
}
protected boolean in_range_b(int min, int max)
{
if (cursor <= limit_backward) return false;
char ch = current[cursor - 1];
if (ch > max || ch < min) return false;
protected boolean out_grouping_b(char [] s, int min, int max)
{
if (cursor <= limit_backward) return false;
char ch = current[cursor - 1];
if (ch > max || ch < min) {
cursor--;
return true;
}
protected boolean out_range(int min, int max)
{
if (cursor >= limit) return false;
char ch = current[cursor];
if (!(ch > max || ch < min)) return false;
cursor++;
return true;
}
protected boolean out_range_b(int min, int max)
{
if (cursor <= limit_backward) return false;
char ch = current[cursor - 1];
if(!(ch > max || ch < min)) return false;
ch -= min;
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
cursor--;
return true;
}
return false;
}
protected boolean eq_s(int s_size, CharSequence s)
{
if (limit - cursor < s_size) return false;
int i;
for (i = 0; i != s_size; i++) {
if (current[cursor + i] != s.charAt(i)) return false;
protected boolean eq_s(CharSequence s)
{
if (limit - cursor < s.length()) return false;
int i;
for (i = 0; i != s.length(); i++) {
if (current[cursor + i] != s.charAt(i)) return false;
}
cursor += s.length();
return true;
}
protected boolean eq_s_b(CharSequence s)
{
if (cursor - limit_backward < s.length()) return false;
int i;
for (i = 0; i != s.length(); i++) {
if (current[cursor - s.length() + i] != s.charAt(i)) return false;
}
cursor -= s.length();
return true;
}
protected int find_among(Among v[])
{
int i = 0;
int j = v.length;
int c = cursor;
int l = limit;
int common_i = 0;
int common_j = 0;
boolean first_key_inspected = false;
while (true) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j; // smaller
Among w = v[k];
int i2;
for (i2 = common; i2 < w.s.length; i2++) {
if (c + common == l) {
diff = -1;
break;
}
diff = current[c + common] - w.s[i2];
if (diff != 0) break;
common++;
}
cursor += s_size;
return true;
}
protected boolean eq_s_b(int s_size, CharSequence s)
{
if (cursor - limit_backward < s_size) return false;
int i;
for (i = 0; i != s_size; i++) {
if (current[cursor - s_size + i] != s.charAt(i)) return false;
if (diff < 0) {
j = k;
common_j = common;
} else {
i = k;
common_i = common;
}
cursor -= s_size;
return true;
}
if (j - i <= 1) {
if (i > 0) break; // v->s has been inspected
if (j == i) break; // only one item in v
protected boolean eq_v(CharSequence s)
{
return eq_s(s.length(), s);
}
// - but now we need to go round once more to get
// v->s inspected. This looks messy, but is actually
// the optimal approach.
protected boolean eq_v_b(CharSequence s)
{
return eq_s_b(s.length(), s);
}
protected int find_among(Among v[], int v_size)
{
int i = 0;
int j = v_size;
int c = cursor;
int l = limit;
int common_i = 0;
int common_j = 0;
boolean first_key_inspected = false;
while (true) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j; // smaller
Among w = v[k];
int i2;
for (i2 = common; i2 < w.s_size; i2++) {
if (c + common == l) {
diff = -1;
break;
}
diff = current[c + common] - w.s[i2];
if (diff != 0) break;
common++;
}
if (diff < 0) {
j = k;
common_j = common;
} else {
i = k;
common_i = common;
}
if (j - i <= 1) {
if (i > 0) break; // v->s has been inspected
if (j == i) break; // only one item in v
// - but now we need to go round once more to get
// v->s inspected. This looks messy, but is actually
// the optimal approach.
if (first_key_inspected) break;
first_key_inspected = true;
}
}
while (true) {
Among w = v[i];
if (common_i >= w.s_size) {
cursor = c + w.s_size;
if (w.method == null) return w.result;
boolean res = false;
try {
res = (boolean) w.method.invokeExact(this);
} catch (Error | RuntimeException e) {
throw e;
} catch (Throwable e) {
throw new UndeclaredThrowableException(e);
}
cursor = c + w.s_size;
if (res) return w.result;
}
i = w.substring_i;
if (i < 0) return 0;
if (first_key_inspected) break;
first_key_inspected = true;
}
}
while (true) {
Among w = v[i];
if (common_i >= w.s.length) {
cursor = c + w.s.length;
if (w.method == null) return w.result;
boolean res = false;
try {
res = (boolean) w.method.invokeExact(this);
} catch (Error | RuntimeException e) {
throw e;
} catch (Throwable e) {
throw new UndeclaredThrowableException(e);
}
cursor = c + w.s.length;
if (res) return w.result;
}
i = w.substring_i;
if (i < 0) return 0;
}
}
// find_among_b is for backwards processing. Same comments apply
protected int find_among_b(Among v[], int v_size)
{
int i = 0;
int j = v_size;
protected int find_among_b(Among v[])
{
int i = 0;
int j = v.length;
int c = cursor;
int lb = limit_backward;
int c = cursor;
int lb = limit_backward;
int common_i = 0;
int common_j = 0;
int common_i = 0;
int common_j = 0;
boolean first_key_inspected = false;
boolean first_key_inspected = false;
while (true) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j;
Among w = v[k];
int i2;
for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
if (c - common == lb) {
diff = -1;
break;
}
diff = current[c - 1 - common] - w.s[i2];
if (diff != 0) break;
common++;
}
if (diff < 0) {
j = k;
common_j = common;
} else {
i = k;
common_i = common;
}
if (j - i <= 1) {
if (i > 0) break;
if (j == i) break;
if (first_key_inspected) break;
first_key_inspected = true;
while (true) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j;
Among w = v[k];
int i2;
for (i2 = w.s.length - 1 - common; i2 >= 0; i2--) {
if (c - common == lb) {
diff = -1;
break;
}
diff = current[c - 1 - common] - w.s[i2];
if (diff != 0) break;
common++;
}
while (true) {
Among w = v[i];
if (common_i >= w.s_size) {
cursor = c - w.s_size;
if (w.method == null) return w.result;
boolean res = false;
try {
res = (boolean) w.method.invokeExact(this);
} catch (Error | RuntimeException e) {
throw e;
} catch (Throwable e) {
throw new UndeclaredThrowableException(e);
}
cursor = c - w.s_size;
if (res) return w.result;
}
i = w.substring_i;
if (i < 0) return 0;
if (diff < 0) {
j = k;
common_j = common;
} else {
i = k;
common_i = common;
}
if (j - i <= 1) {
if (i > 0) break;
if (j == i) break;
if (first_key_inspected) break;
first_key_inspected = true;
}
}
while (true) {
Among w = v[i];
if (common_i >= w.s.length) {
cursor = c - w.s.length;
if (w.method == null) return w.result;
boolean res = false;
try {
res = (boolean) w.method.invokeExact(this);
} catch (Error | RuntimeException e) {
throw e;
} catch (Throwable e) {
throw new UndeclaredThrowableException(e);
}
cursor = c - w.s.length;
if (res) return w.result;
}
i = w.substring_i;
if (i < 0) return 0;
}
}
// mini version of ArrayUtil.oversize from lucene, specialized to chars
static int oversize(int minTargetSize) {
int extra = minTargetSize >> 3;
if (extra < 3) {
extra = 3;
}
int newSize = minTargetSize + extra;
return (newSize + 3) & 0x7ffffffc;
}
/* to replace chars between c_bra and c_ket in current by the
* chars in s.
*/
protected int replace_s(int c_bra, int c_ket, CharSequence s) {
* chars in s.
*/
protected int replace_s(int c_bra, int c_ket, CharSequence s)
{
final int adjustment = s.length() - (c_ket - c_bra);
final int newLength = limit + adjustment;
//resize if necessary
if (newLength > current.length) {
char newBuffer[] = new char[ArrayUtil.oversize(newLength, Character.BYTES)];
char newBuffer[] = new char[oversize(newLength)];
System.arraycopy(current, 0, newBuffer, 0, limit);
current = newBuffer;
}
@ -409,7 +372,7 @@ public abstract class SnowballProgram {
// replacement, need to shift things around
if (adjustment != 0 && c_ket < limit) {
System.arraycopy(current, c_ket, current, c_bra + s.length(),
limit - c_ket);
limit - c_ket);
}
// insert the replacement text
// Note, faster is s.getChars(0, s.length(), current, c_bra);
@ -423,73 +386,69 @@ public abstract class SnowballProgram {
return adjustment;
}
protected void slice_check() {
protected void slice_check()
{
if (bra < 0 ||
bra > ket ||
ket > limit) {
throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit);
// FIXME: report error somehow.
/*
fprintf(stderr, "faulty slice operation:\n");
debug(z, -1, 0);
exit(1);
*/
bra > ket ||
ket > limit)
{
throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit);
}
}
protected void slice_from(CharSequence s) {
protected void slice_from(CharSequence s)
{
slice_check();
replace_s(bra, ket, s);
}
protected void slice_del() {
slice_from((CharSequence) "");
protected void slice_del()
{
slice_from("");
}
protected void insert(int c_bra, int c_ket, CharSequence s)
{
int adjustment = replace_s(c_bra, c_ket, s);
if (c_bra <= bra) bra += adjustment;
if (c_bra <= ket) ket += adjustment;
}
{
int adjustment = replace_s(c_bra, c_ket, s);
if (c_bra <= bra) bra += adjustment;
if (c_bra <= ket) ket += adjustment;
}
/* Copy the slice into the supplied StringBuffer */
protected StringBuilder slice_to(StringBuilder s)
{
slice_check();
int len = ket - bra;
s.setLength(0);
s.append(current, bra, len);
return s;
}
/* Copy the slice into the supplied StringBuilder */
protected void slice_to(StringBuilder s)
{
slice_check();
int len = ket - bra;
s.setLength(0);
s.append(current, bra, len);
}
protected StringBuilder assign_to(StringBuilder s)
{
s.setLength(0);
s.append(current, 0, limit);
return s;
}
protected void assign_to(StringBuilder s)
{
s.setLength(0);
s.append(current, 0, limit);
}
/*
extern void debug(struct SN_env * z, int number, int line_count)
{ int i;
int limit = SIZE(z->p);
//if (number >= 0) printf("%3d (line %4d): '", number, line_count);
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
for (i = 0; i <= limit; i++)
{ if (z->lb == i) printf("{");
if (z->bra == i) printf("[");
if (z->c == i) printf("|");
if (z->ket == i) printf("]");
if (z->l == i) printf("}");
if (i < limit)
{ int ch = z->p[i];
if (ch == 0) ch = '#';
printf("%c", ch);
}
int limit = SIZE(z->p);
//if (number >= 0) printf("%3d (line %4d): '", number, line_count);
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
for (i = 0; i <= limit; i++)
{ if (z->lb == i) printf("{");
if (z->bra == i) printf("[");
if (z->c == i) printf("|");
if (z->ket == i) printf("]");
if (z->l == i) printf("}");
if (i < limit)
{ int ch = z->p[i];
if (ch == 0) ch = '#';
printf("%c", ch);
}
printf("'\n");
}
printf("'\n");
}
*/
};
};

View File

@ -0,0 +1,42 @@
/*
Copyright (c) 2001, Dr Martin Porter
Copyright (c) 2004,2005, Richard Boulton
Copyright (c) 2013, Yoshiki Shibukawa
Copyright (c) 2006,2007,2009,2010,2011,2014-2019, Olly Betts
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the Snowball project nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.tartarus.snowball;
/**
* Parent class of all snowball stemmers, which must implement <code>stem</code>
*/
public abstract class SnowballStemmer extends SnowballProgram {
public abstract boolean stem();
static final long serialVersionUID = 2016072500L;
};

View File

@ -1,517 +1,394 @@
// This file was generated automatically by the Snowball to Java compiler
// Generated by Snowball 2.0.0 - https://snowballstem.org/
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
/**
* This class implements the stemming algorithm defined by a snowball script.
* <p>
* Generated by Snowball 2.0.0 - https://snowballstem.org/
* </p>
*/
@SuppressWarnings("unused")
public class ArmenianStemmer extends org.tartarus.snowball.SnowballStemmer {
@SuppressWarnings("unused") public class ArmenianStemmer extends SnowballProgram {
private static final long serialVersionUID = 1L;
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private static final long serialVersionUID = 1L;
private final static Among a_0[] = {
new Among("\u0580\u0578\u0580\u0564", -1, 1),
new Among("\u0565\u0580\u0578\u0580\u0564", 0, 1),
new Among("\u0561\u056C\u056B", -1, 1),
new Among("\u0561\u056F\u056B", -1, 1),
new Among("\u0578\u0580\u0561\u056F", -1, 1),
new Among("\u0565\u0572", -1, 1),
new Among("\u0561\u056F\u0561\u0576", -1, 1),
new Among("\u0561\u0580\u0561\u0576", -1, 1),
new Among("\u0565\u0576", -1, 1),
new Among("\u0565\u056F\u0565\u0576", 8, 1),
new Among("\u0565\u0580\u0565\u0576", 8, 1),
new Among("\u0578\u0580\u0567\u0576", -1, 1),
new Among("\u056B\u0576", -1, 1),
new Among("\u0563\u056B\u0576", 12, 1),
new Among("\u0578\u057E\u056B\u0576", 12, 1),
new Among("\u056C\u0561\u0575\u0576", -1, 1),
new Among("\u057E\u0578\u0582\u0576", -1, 1),
new Among("\u057A\u0565\u057D", -1, 1),
new Among("\u056B\u057E", -1, 1),
new Among("\u0561\u057F", -1, 1),
new Among("\u0561\u057E\u0565\u057F", -1, 1),
new Among("\u056F\u0578\u057F", -1, 1),
new Among("\u0562\u0561\u0580", -1, 1)
};
/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private final static Among a_1[] = {
new Among("\u0561", -1, 1),
new Among("\u0561\u0581\u0561", 0, 1),
new Among("\u0565\u0581\u0561", 0, 1),
new Among("\u057E\u0565", -1, 1),
new Among("\u0561\u0581\u0580\u056B", -1, 1),
new Among("\u0561\u0581\u056B", -1, 1),
new Among("\u0565\u0581\u056B", -1, 1),
new Among("\u057E\u0565\u0581\u056B", 6, 1),
new Among("\u0561\u056C", -1, 1),
new Among("\u0568\u0561\u056C", 8, 1),
new Among("\u0561\u0576\u0561\u056C", 8, 1),
new Among("\u0565\u0576\u0561\u056C", 8, 1),
new Among("\u0561\u0581\u0576\u0561\u056C", 8, 1),
new Among("\u0565\u056C", -1, 1),
new Among("\u0568\u0565\u056C", 13, 1),
new Among("\u0576\u0565\u056C", 13, 1),
new Among("\u0581\u0576\u0565\u056C", 15, 1),
new Among("\u0565\u0581\u0576\u0565\u056C", 16, 1),
new Among("\u0579\u0565\u056C", 13, 1),
new Among("\u057E\u0565\u056C", 13, 1),
new Among("\u0561\u0581\u057E\u0565\u056C", 19, 1),
new Among("\u0565\u0581\u057E\u0565\u056C", 19, 1),
new Among("\u057F\u0565\u056C", 13, 1),
new Among("\u0561\u057F\u0565\u056C", 22, 1),
new Among("\u0578\u057F\u0565\u056C", 22, 1),
new Among("\u056F\u0578\u057F\u0565\u056C", 24, 1),
new Among("\u057E\u0561\u056E", -1, 1),
new Among("\u0578\u0582\u0574", -1, 1),
new Among("\u057E\u0578\u0582\u0574", 27, 1),
new Among("\u0561\u0576", -1, 1),
new Among("\u0581\u0561\u0576", 29, 1),
new Among("\u0561\u0581\u0561\u0576", 30, 1),
new Among("\u0561\u0581\u0580\u056B\u0576", -1, 1),
new Among("\u0561\u0581\u056B\u0576", -1, 1),
new Among("\u0565\u0581\u056B\u0576", -1, 1),
new Among("\u057E\u0565\u0581\u056B\u0576", 34, 1),
new Among("\u0561\u056C\u056B\u057D", -1, 1),
new Among("\u0565\u056C\u056B\u057D", -1, 1),
new Among("\u0561\u057E", -1, 1),
new Among("\u0561\u0581\u0561\u057E", 38, 1),
new Among("\u0565\u0581\u0561\u057E", 38, 1),
new Among("\u0561\u056C\u0578\u057E", -1, 1),
new Among("\u0565\u056C\u0578\u057E", -1, 1),
new Among("\u0561\u0580", -1, 1),
new Among("\u0561\u0581\u0561\u0580", 43, 1),
new Among("\u0565\u0581\u0561\u0580", 43, 1),
new Among("\u0561\u0581\u0580\u056B\u0580", -1, 1),
new Among("\u0561\u0581\u056B\u0580", -1, 1),
new Among("\u0565\u0581\u056B\u0580", -1, 1),
new Among("\u057E\u0565\u0581\u056B\u0580", 48, 1),
new Among("\u0561\u0581", -1, 1),
new Among("\u0565\u0581", -1, 1),
new Among("\u0561\u0581\u0580\u0565\u0581", 51, 1),
new Among("\u0561\u056C\u0578\u0582\u0581", -1, 1),
new Among("\u0565\u056C\u0578\u0582\u0581", -1, 1),
new Among("\u0561\u056C\u0578\u0582", -1, 1),
new Among("\u0565\u056C\u0578\u0582", -1, 1),
new Among("\u0561\u0584", -1, 1),
new Among("\u0581\u0561\u0584", 57, 1),
new Among("\u0561\u0581\u0561\u0584", 58, 1),
new Among("\u0561\u0581\u0580\u056B\u0584", -1, 1),
new Among("\u0561\u0581\u056B\u0584", -1, 1),
new Among("\u0565\u0581\u056B\u0584", -1, 1),
new Among("\u057E\u0565\u0581\u056B\u0584", 62, 1),
new Among("\u0561\u0576\u0584", -1, 1),
new Among("\u0581\u0561\u0576\u0584", 64, 1),
new Among("\u0561\u0581\u0561\u0576\u0584", 65, 1),
new Among("\u0561\u0581\u0580\u056B\u0576\u0584", -1, 1),
new Among("\u0561\u0581\u056B\u0576\u0584", -1, 1),
new Among("\u0565\u0581\u056B\u0576\u0584", -1, 1),
new Among("\u057E\u0565\u0581\u056B\u0576\u0584", 69, 1)
};
private final static Among a_0[] = {
new Among ( "\u0580\u0578\u0580\u0564", -1, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0578\u0580\u0564", 0, 1, "", methodObject ),
new Among ( "\u0561\u056C\u056B", -1, 1, "", methodObject ),
new Among ( "\u0561\u056F\u056B", -1, 1, "", methodObject ),
new Among ( "\u0578\u0580\u0561\u056F", -1, 1, "", methodObject ),
new Among ( "\u0565\u0572", -1, 1, "", methodObject ),
new Among ( "\u0561\u056F\u0561\u0576", -1, 1, "", methodObject ),
new Among ( "\u0561\u0580\u0561\u0576", -1, 1, "", methodObject ),
new Among ( "\u0565\u0576", -1, 1, "", methodObject ),
new Among ( "\u0565\u056F\u0565\u0576", 8, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0565\u0576", 8, 1, "", methodObject ),
new Among ( "\u0578\u0580\u0567\u0576", -1, 1, "", methodObject ),
new Among ( "\u056B\u0576", -1, 1, "", methodObject ),
new Among ( "\u0563\u056B\u0576", 12, 1, "", methodObject ),
new Among ( "\u0578\u057E\u056B\u0576", 12, 1, "", methodObject ),
new Among ( "\u056C\u0561\u0575\u0576", -1, 1, "", methodObject ),
new Among ( "\u057E\u0578\u0582\u0576", -1, 1, "", methodObject ),
new Among ( "\u057A\u0565\u057D", -1, 1, "", methodObject ),
new Among ( "\u056B\u057E", -1, 1, "", methodObject ),
new Among ( "\u0561\u057F", -1, 1, "", methodObject ),
new Among ( "\u0561\u057E\u0565\u057F", -1, 1, "", methodObject ),
new Among ( "\u056F\u0578\u057F", -1, 1, "", methodObject ),
new Among ( "\u0562\u0561\u0580", -1, 1, "", methodObject )
};
private final static Among a_2[] = {
new Among("\u0578\u0580\u0564", -1, 1),
new Among("\u0578\u0582\u0575\u0569", -1, 1),
new Among("\u0578\u0582\u0570\u056B", -1, 1),
new Among("\u0581\u056B", -1, 1),
new Among("\u056B\u056C", -1, 1),
new Among("\u0561\u056F", -1, 1),
new Among("\u0575\u0561\u056F", 5, 1),
new Among("\u0561\u0576\u0561\u056F", 5, 1),
new Among("\u056B\u056F", -1, 1),
new Among("\u0578\u0582\u056F", -1, 1),
new Among("\u0561\u0576", -1, 1),
new Among("\u057A\u0561\u0576", 10, 1),
new Among("\u057D\u057F\u0561\u0576", 10, 1),
new Among("\u0561\u0580\u0561\u0576", 10, 1),
new Among("\u0565\u0572\u0567\u0576", -1, 1),
new Among("\u0575\u0578\u0582\u0576", -1, 1),
new Among("\u0578\u0582\u0569\u0575\u0578\u0582\u0576", 15, 1),
new Among("\u0561\u056E\u0578", -1, 1),
new Among("\u056B\u0579", -1, 1),
new Among("\u0578\u0582\u057D", -1, 1),
new Among("\u0578\u0582\u057D\u057F", -1, 1),
new Among("\u0563\u0561\u0580", -1, 1),
new Among("\u057E\u0578\u0580", -1, 1),
new Among("\u0561\u057E\u0578\u0580", 22, 1),
new Among("\u0578\u0581", -1, 1),
new Among("\u0561\u0576\u0585\u0581", -1, 1),
new Among("\u0578\u0582", -1, 1),
new Among("\u0584", -1, 1),
new Among("\u0579\u0565\u0584", 27, 1),
new Among("\u056B\u0584", 27, 1),
new Among("\u0561\u056C\u056B\u0584", 29, 1),
new Among("\u0561\u0576\u056B\u0584", 29, 1),
new Among("\u057E\u0561\u056E\u0584", 27, 1),
new Among("\u0578\u0582\u0575\u0584", 27, 1),
new Among("\u0565\u0576\u0584", 27, 1),
new Among("\u0578\u0576\u0584", 27, 1),
new Among("\u0578\u0582\u0576\u0584", 27, 1),
new Among("\u0574\u0578\u0582\u0576\u0584", 36, 1),
new Among("\u056B\u0579\u0584", 27, 1),
new Among("\u0561\u0580\u0584", 27, 1)
};
private final static Among a_1[] = {
new Among ( "\u0561", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561", 0, 1, "", methodObject ),
new Among ( "\u0565\u0581\u0561", 0, 1, "", methodObject ),
new Among ( "\u057E\u0565", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u056B", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u056B", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581\u056B", -1, 1, "", methodObject ),
new Among ( "\u057E\u0565\u0581\u056B", 6, 1, "", methodObject ),
new Among ( "\u0561\u056C", -1, 1, "", methodObject ),
new Among ( "\u0568\u0561\u056C", 8, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0561\u056C", 8, 1, "", methodObject ),
new Among ( "\u0565\u0576\u0561\u056C", 8, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0576\u0561\u056C", 8, 1, "", methodObject ),
new Among ( "\u0565\u056C", -1, 1, "", methodObject ),
new Among ( "\u0568\u0565\u056C", 13, 1, "", methodObject ),
new Among ( "\u0576\u0565\u056C", 13, 1, "", methodObject ),
new Among ( "\u0581\u0576\u0565\u056C", 15, 1, "", methodObject ),
new Among ( "\u0565\u0581\u0576\u0565\u056C", 16, 1, "", methodObject ),
new Among ( "\u0579\u0565\u056C", 13, 1, "", methodObject ),
new Among ( "\u057E\u0565\u056C", 13, 1, "", methodObject ),
new Among ( "\u0561\u0581\u057E\u0565\u056C", 19, 1, "", methodObject ),
new Among ( "\u0565\u0581\u057E\u0565\u056C", 19, 1, "", methodObject ),
new Among ( "\u057F\u0565\u056C", 13, 1, "", methodObject ),
new Among ( "\u0561\u057F\u0565\u056C", 22, 1, "", methodObject ),
new Among ( "\u0578\u057F\u0565\u056C", 22, 1, "", methodObject ),
new Among ( "\u056F\u0578\u057F\u0565\u056C", 24, 1, "", methodObject ),
new Among ( "\u057E\u0561\u056E", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0574", -1, 1, "", methodObject ),
new Among ( "\u057E\u0578\u0582\u0574", 27, 1, "", methodObject ),
new Among ( "\u0561\u0576", -1, 1, "", methodObject ),
new Among ( "\u0581\u0561\u0576", 29, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561\u0576", 30, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u056B\u0576", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u056B\u0576", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581\u056B\u0576", -1, 1, "", methodObject ),
new Among ( "\u057E\u0565\u0581\u056B\u0576", 34, 1, "", methodObject ),
new Among ( "\u0561\u056C\u056B\u057D", -1, 1, "", methodObject ),
new Among ( "\u0565\u056C\u056B\u057D", -1, 1, "", methodObject ),
new Among ( "\u0561\u057E", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561\u057E", 38, 1, "", methodObject ),
new Among ( "\u0565\u0581\u0561\u057E", 38, 1, "", methodObject ),
new Among ( "\u0561\u056C\u0578\u057E", -1, 1, "", methodObject ),
new Among ( "\u0565\u056C\u0578\u057E", -1, 1, "", methodObject ),
new Among ( "\u0561\u0580", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561\u0580", 43, 1, "", methodObject ),
new Among ( "\u0565\u0581\u0561\u0580", 43, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u056B\u0580", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u056B\u0580", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581\u056B\u0580", -1, 1, "", methodObject ),
new Among ( "\u057E\u0565\u0581\u056B\u0580", 48, 1, "", methodObject ),
new Among ( "\u0561\u0581", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u0565\u0581", 51, 1, "", methodObject ),
new Among ( "\u0561\u056C\u0578\u0582\u0581", -1, 1, "", methodObject ),
new Among ( "\u0565\u056C\u0578\u0582\u0581", -1, 1, "", methodObject ),
new Among ( "\u0561\u056C\u0578\u0582", -1, 1, "", methodObject ),
new Among ( "\u0565\u056C\u0578\u0582", -1, 1, "", methodObject ),
new Among ( "\u0561\u0584", -1, 1, "", methodObject ),
new Among ( "\u0581\u0561\u0584", 57, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561\u0584", 58, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u056B\u0584", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u056B\u0584", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581\u056B\u0584", -1, 1, "", methodObject ),
new Among ( "\u057E\u0565\u0581\u056B\u0584", 62, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0584", -1, 1, "", methodObject ),
new Among ( "\u0581\u0561\u0576\u0584", 64, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561\u0576\u0584", 65, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u056B\u0576\u0584", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u056B\u0576\u0584", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581\u056B\u0576\u0584", -1, 1, "", methodObject ),
new Among ( "\u057E\u0565\u0581\u056B\u0576\u0584", 69, 1, "", methodObject )
};
private final static Among a_3[] = {
new Among("\u057D\u0561", -1, 1),
new Among("\u057E\u0561", -1, 1),
new Among("\u0561\u0574\u0562", -1, 1),
new Among("\u0564", -1, 1),
new Among("\u0561\u0576\u0564", 3, 1),
new Among("\u0578\u0582\u0569\u0575\u0561\u0576\u0564", 4, 1),
new Among("\u057E\u0561\u0576\u0564", 4, 1),
new Among("\u0578\u057B\u0564", 3, 1),
new Among("\u0565\u0580\u0564", 3, 1),
new Among("\u0576\u0565\u0580\u0564", 8, 1),
new Among("\u0578\u0582\u0564", 3, 1),
new Among("\u0568", -1, 1),
new Among("\u0561\u0576\u0568", 11, 1),
new Among("\u0578\u0582\u0569\u0575\u0561\u0576\u0568", 12, 1),
new Among("\u057E\u0561\u0576\u0568", 12, 1),
new Among("\u0578\u057B\u0568", 11, 1),
new Among("\u0565\u0580\u0568", 11, 1),
new Among("\u0576\u0565\u0580\u0568", 16, 1),
new Among("\u056B", -1, 1),
new Among("\u057E\u056B", 18, 1),
new Among("\u0565\u0580\u056B", 18, 1),
new Among("\u0576\u0565\u0580\u056B", 20, 1),
new Among("\u0561\u0576\u0578\u0582\u0574", -1, 1),
new Among("\u0565\u0580\u0578\u0582\u0574", -1, 1),
new Among("\u0576\u0565\u0580\u0578\u0582\u0574", 23, 1),
new Among("\u0576", -1, 1),
new Among("\u0561\u0576", 25, 1),
new Among("\u0578\u0582\u0569\u0575\u0561\u0576", 26, 1),
new Among("\u057E\u0561\u0576", 26, 1),
new Among("\u056B\u0576", 25, 1),
new Among("\u0565\u0580\u056B\u0576", 29, 1),
new Among("\u0576\u0565\u0580\u056B\u0576", 30, 1),
new Among("\u0578\u0582\u0569\u0575\u0561\u0576\u0576", 25, 1),
new Among("\u0565\u0580\u0576", 25, 1),
new Among("\u0576\u0565\u0580\u0576", 33, 1),
new Among("\u0578\u0582\u0576", 25, 1),
new Among("\u0578\u057B", -1, 1),
new Among("\u0578\u0582\u0569\u0575\u0561\u0576\u057D", -1, 1),
new Among("\u057E\u0561\u0576\u057D", -1, 1),
new Among("\u0578\u057B\u057D", -1, 1),
new Among("\u0578\u057E", -1, 1),
new Among("\u0561\u0576\u0578\u057E", 40, 1),
new Among("\u057E\u0578\u057E", 40, 1),
new Among("\u0565\u0580\u0578\u057E", 40, 1),
new Among("\u0576\u0565\u0580\u0578\u057E", 43, 1),
new Among("\u0565\u0580", -1, 1),
new Among("\u0576\u0565\u0580", 45, 1),
new Among("\u0581", -1, 1),
new Among("\u056B\u0581", 47, 1),
new Among("\u057E\u0561\u0576\u056B\u0581", 48, 1),
new Among("\u0578\u057B\u056B\u0581", 48, 1),
new Among("\u057E\u056B\u0581", 48, 1),
new Among("\u0565\u0580\u056B\u0581", 48, 1),
new Among("\u0576\u0565\u0580\u056B\u0581", 52, 1),
new Among("\u0581\u056B\u0581", 48, 1),
new Among("\u0578\u0581", 47, 1),
new Among("\u0578\u0582\u0581", 47, 1)
};
private final static Among a_2[] = {
new Among ( "\u0578\u0580\u0564", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0575\u0569", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0570\u056B", -1, 1, "", methodObject ),
new Among ( "\u0581\u056B", -1, 1, "", methodObject ),
new Among ( "\u056B\u056C", -1, 1, "", methodObject ),
new Among ( "\u0561\u056F", -1, 1, "", methodObject ),
new Among ( "\u0575\u0561\u056F", 5, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0561\u056F", 5, 1, "", methodObject ),
new Among ( "\u056B\u056F", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u056F", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576", -1, 1, "", methodObject ),
new Among ( "\u057A\u0561\u0576", 10, 1, "", methodObject ),
new Among ( "\u057D\u057F\u0561\u0576", 10, 1, "", methodObject ),
new Among ( "\u0561\u0580\u0561\u0576", 10, 1, "", methodObject ),
new Among ( "\u0565\u0572\u0567\u0576", -1, 1, "", methodObject ),
new Among ( "\u0575\u0578\u0582\u0576", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0578\u0582\u0576", 15, 1, "", methodObject ),
new Among ( "\u0561\u056E\u0578", -1, 1, "", methodObject ),
new Among ( "\u056B\u0579", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u057D", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u057D\u057F", -1, 1, "", methodObject ),
new Among ( "\u0563\u0561\u0580", -1, 1, "", methodObject ),
new Among ( "\u057E\u0578\u0580", -1, 1, "", methodObject ),
new Among ( "\u0561\u057E\u0578\u0580", 22, 1, "", methodObject ),
new Among ( "\u0578\u0581", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0585\u0581", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582", -1, 1, "", methodObject ),
new Among ( "\u0584", -1, 1, "", methodObject ),
new Among ( "\u0579\u0565\u0584", 27, 1, "", methodObject ),
new Among ( "\u056B\u0584", 27, 1, "", methodObject ),
new Among ( "\u0561\u056C\u056B\u0584", 29, 1, "", methodObject ),
new Among ( "\u0561\u0576\u056B\u0584", 29, 1, "", methodObject ),
new Among ( "\u057E\u0561\u056E\u0584", 27, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0575\u0584", 27, 1, "", methodObject ),
new Among ( "\u0565\u0576\u0584", 27, 1, "", methodObject ),
new Among ( "\u0578\u0576\u0584", 27, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0576\u0584", 27, 1, "", methodObject ),
new Among ( "\u0574\u0578\u0582\u0576\u0584", 36, 1, "", methodObject ),
new Among ( "\u056B\u0579\u0584", 27, 1, "", methodObject ),
new Among ( "\u0561\u0580\u0584", 27, 1, "", methodObject )
};
private static final char g_v[] = {209, 4, 128, 0, 18 };
private final static Among a_3[] = {
new Among ( "\u057D\u0561", -1, 1, "", methodObject ),
new Among ( "\u057E\u0561", -1, 1, "", methodObject ),
new Among ( "\u0561\u0574\u0562", -1, 1, "", methodObject ),
new Among ( "\u0564", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0564", 3, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0564", 4, 1, "", methodObject ),
new Among ( "\u057E\u0561\u0576\u0564", 4, 1, "", methodObject ),
new Among ( "\u0578\u057B\u0564", 3, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0564", 3, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u0564", 8, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0564", 3, 1, "", methodObject ),
new Among ( "\u0568", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0568", 11, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0568", 12, 1, "", methodObject ),
new Among ( "\u057E\u0561\u0576\u0568", 12, 1, "", methodObject ),
new Among ( "\u0578\u057B\u0568", 11, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0568", 11, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u0568", 16, 1, "", methodObject ),
new Among ( "\u056B", -1, 1, "", methodObject ),
new Among ( "\u057E\u056B", 18, 1, "", methodObject ),
new Among ( "\u0565\u0580\u056B", 18, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u056B", 20, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0578\u0582\u0574", -1, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0578\u0582\u0574", -1, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u0578\u0582\u0574", 23, 1, "", methodObject ),
new Among ( "\u0576", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576", 25, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576", 26, 1, "", methodObject ),
new Among ( "\u057E\u0561\u0576", 26, 1, "", methodObject ),
new Among ( "\u056B\u0576", 25, 1, "", methodObject ),
new Among ( "\u0565\u0580\u056B\u0576", 29, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u056B\u0576", 30, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0576", 25, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0576", 25, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u0576", 33, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0576", 25, 1, "", methodObject ),
new Among ( "\u0578\u057B", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u057D", -1, 1, "", methodObject ),
new Among ( "\u057E\u0561\u0576\u057D", -1, 1, "", methodObject ),
new Among ( "\u0578\u057B\u057D", -1, 1, "", methodObject ),
new Among ( "\u0578\u057E", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0578\u057E", 40, 1, "", methodObject ),
new Among ( "\u057E\u0578\u057E", 40, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0578\u057E", 40, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u0578\u057E", 43, 1, "", methodObject ),
new Among ( "\u0565\u0580", -1, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580", 45, 1, "", methodObject ),
new Among ( "\u0581", -1, 1, "", methodObject ),
new Among ( "\u056B\u0581", 47, 1, "", methodObject ),
new Among ( "\u057E\u0561\u0576\u056B\u0581", 48, 1, "", methodObject ),
new Among ( "\u0578\u057B\u056B\u0581", 48, 1, "", methodObject ),
new Among ( "\u057E\u056B\u0581", 48, 1, "", methodObject ),
new Among ( "\u0565\u0580\u056B\u0581", 48, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u056B\u0581", 52, 1, "", methodObject ),
new Among ( "\u0581\u056B\u0581", 48, 1, "", methodObject ),
new Among ( "\u0578\u0581", 47, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0581", 47, 1, "", methodObject )
};
private int I_p2;
private int I_pV;
private static final char g_v[] = {209, 4, 128, 0, 18 };
private int I_p2;
private int I_pV;
private void copy_from(ArmenianStemmer other) {
I_p2 = other.I_p2;
I_pV = other.I_pV;
super.copy_from(other);
}
private boolean r_mark_regions() {
int v_1;
// (, line 58
I_pV = limit;
I_p2 = limit;
// do, line 62
v_1 = cursor;
lab0: do {
// (, line 62
// gopast, line 63
golab1: while(true)
{
lab2: do {
if (!(in_grouping(g_v, 1377, 1413)))
{
break lab2;
}
break golab1;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// setmark pV, line 63
I_pV = cursor;
// gopast, line 63
golab3: while(true)
{
lab4: do {
if (!(out_grouping(g_v, 1377, 1413)))
{
break lab4;
}
break golab3;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// gopast, line 64
golab5: while(true)
{
lab6: do {
if (!(in_grouping(g_v, 1377, 1413)))
{
break lab6;
}
break golab5;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// gopast, line 64
golab7: while(true)
{
lab8: do {
if (!(out_grouping(g_v, 1377, 1413)))
{
break lab8;
}
break golab7;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// setmark p2, line 64
I_p2 = cursor;
} while (false);
cursor = v_1;
return true;
}
private boolean r_R2() {
if (!(I_p2 <= cursor))
{
return false;
}
return true;
}
private boolean r_adjective() {
int among_var;
// (, line 72
// [, line 73
ket = cursor;
// substring, line 73
among_var = find_among_b(a_0, 23);
if (among_var == 0)
{
return false;
}
// ], line 73
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 98
// delete, line 98
slice_del();
break;
}
return true;
}
private boolean r_verb() {
int among_var;
// (, line 102
// [, line 103
ket = cursor;
// substring, line 103
among_var = find_among_b(a_1, 71);
if (among_var == 0)
{
return false;
}
// ], line 103
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 176
// delete, line 176
slice_del();
break;
}
return true;
}
private boolean r_noun() {
int among_var;
// (, line 180
// [, line 181
ket = cursor;
// substring, line 181
among_var = find_among_b(a_2, 40);
if (among_var == 0)
{
return false;
}
// ], line 181
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 223
// delete, line 223
slice_del();
break;
}
return true;
}
private boolean r_ending() {
int among_var;
// (, line 227
// [, line 228
ket = cursor;
// substring, line 228
among_var = find_among_b(a_3, 57);
if (among_var == 0)
{
return false;
}
// ], line 228
bra = cursor;
// call R2, line 228
if (!r_R2())
{
return false;
}
switch(among_var) {
case 0:
return false;
case 1:
// (, line 287
// delete, line 287
slice_del();
break;
}
return true;
}
@Override
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
int v_5;
int v_6;
int v_7;
// (, line 292
// do, line 294
v_1 = cursor;
lab0: do {
// call mark_regions, line 294
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 295
limit_backward = cursor; cursor = limit;
// setlimit, line 295
v_2 = limit - cursor;
// tomark, line 295
if (cursor < I_pV)
{
return false;
}
cursor = I_pV;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 295
// do, line 296
v_4 = limit - cursor;
lab1: do {
// call ending, line 296
if (!r_ending())
{
break lab1;
}
} while (false);
cursor = limit - v_4;
// do, line 297
v_5 = limit - cursor;
lab2: do {
// call verb, line 297
if (!r_verb())
{
break lab2;
}
} while (false);
cursor = limit - v_5;
// do, line 298
v_6 = limit - cursor;
lab3: do {
// call adjective, line 298
if (!r_adjective())
{
break lab3;
}
} while (false);
cursor = limit - v_6;
// do, line 299
v_7 = limit - cursor;
lab4: do {
// call noun, line 299
if (!r_noun())
{
break lab4;
}
} while (false);
cursor = limit - v_7;
limit_backward = v_3;
cursor = limit_backward; return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof ArmenianStemmer;
private boolean r_mark_regions() {
I_pV = limit;
I_p2 = limit;
int v_1 = cursor;
lab0: {
golab1: while(true)
{
lab2: {
if (!(in_grouping(g_v, 1377, 1413)))
{
break lab2;
}
@Override
public int hashCode() {
return ArmenianStemmer.class.getName().hashCode();
break golab1;
}
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
I_pV = cursor;
golab3: while(true)
{
lab4: {
if (!(out_grouping(g_v, 1377, 1413)))
{
break lab4;
}
break golab3;
}
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
golab5: while(true)
{
lab6: {
if (!(in_grouping(g_v, 1377, 1413)))
{
break lab6;
}
break golab5;
}
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
golab7: while(true)
{
lab8: {
if (!(out_grouping(g_v, 1377, 1413)))
{
break lab8;
}
break golab7;
}
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
I_p2 = cursor;
}
cursor = v_1;
return true;
}
private boolean r_R2() {
if (!(I_p2 <= cursor))
{
return false;
}
return true;
}
private boolean r_adjective() {
ket = cursor;
if (find_among_b(a_0) == 0)
{
return false;
}
bra = cursor;
slice_del();
return true;
}
private boolean r_verb() {
ket = cursor;
if (find_among_b(a_1) == 0)
{
return false;
}
bra = cursor;
slice_del();
return true;
}
private boolean r_noun() {
ket = cursor;
if (find_among_b(a_2) == 0)
{
return false;
}
bra = cursor;
slice_del();
return true;
}
private boolean r_ending() {
ket = cursor;
if (find_among_b(a_3) == 0)
{
return false;
}
bra = cursor;
if (!r_R2())
{
return false;
}
slice_del();
return true;
}
public boolean stem() {
r_mark_regions();
limit_backward = cursor;
cursor = limit;
if (cursor < I_pV)
{
return false;
}
int v_3 = limit_backward;
limit_backward = I_pV;
int v_4 = limit - cursor;
r_ending();
cursor = limit - v_4;
int v_5 = limit - cursor;
r_verb();
cursor = limit - v_5;
int v_6 = limit - cursor;
r_adjective();
cursor = limit - v_6;
int v_7 = limit - cursor;
r_noun();
cursor = limit - v_7;
limit_backward = v_3;
cursor = limit_backward;
return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof ArmenianStemmer;
}
@Override
public int hashCode() {
return ArmenianStemmer.class.getName().hashCode();
}

View File

@ -1,442 +1,301 @@
// This file was generated automatically by the Snowball to Java compiler
// Generated by Snowball 2.0.0 - https://snowballstem.org/
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
/**
* This class implements the stemming algorithm defined by a snowball script.
* <p>
* Generated by Snowball 2.0.0 - https://snowballstem.org/
* </p>
*/
@SuppressWarnings("unused")
public class DanishStemmer extends org.tartarus.snowball.SnowballStemmer {
@SuppressWarnings("unused") public class DanishStemmer extends SnowballProgram {
private static final long serialVersionUID = 1L;
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private static final long serialVersionUID = 1L;
private final static Among a_0[] = {
new Among("hed", -1, 1),
new Among("ethed", 0, 1),
new Among("ered", -1, 1),
new Among("e", -1, 1),
new Among("erede", 3, 1),
new Among("ende", 3, 1),
new Among("erende", 5, 1),
new Among("ene", 3, 1),
new Among("erne", 3, 1),
new Among("ere", 3, 1),
new Among("en", -1, 1),
new Among("heden", 10, 1),
new Among("eren", 10, 1),
new Among("er", -1, 1),
new Among("heder", 13, 1),
new Among("erer", 13, 1),
new Among("s", -1, 2),
new Among("heds", 16, 1),
new Among("es", 16, 1),
new Among("endes", 18, 1),
new Among("erendes", 19, 1),
new Among("enes", 18, 1),
new Among("ernes", 18, 1),
new Among("eres", 18, 1),
new Among("ens", 16, 1),
new Among("hedens", 24, 1),
new Among("erens", 24, 1),
new Among("ers", 16, 1),
new Among("ets", 16, 1),
new Among("erets", 28, 1),
new Among("et", -1, 1),
new Among("eret", 30, 1)
};
/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private final static Among a_1[] = {
new Among("gd", -1, -1),
new Among("dt", -1, -1),
new Among("gt", -1, -1),
new Among("kt", -1, -1)
};
private final static Among a_0[] = {
new Among ( "hed", -1, 1, "", methodObject ),
new Among ( "ethed", 0, 1, "", methodObject ),
new Among ( "ered", -1, 1, "", methodObject ),
new Among ( "e", -1, 1, "", methodObject ),
new Among ( "erede", 3, 1, "", methodObject ),
new Among ( "ende", 3, 1, "", methodObject ),
new Among ( "erende", 5, 1, "", methodObject ),
new Among ( "ene", 3, 1, "", methodObject ),
new Among ( "erne", 3, 1, "", methodObject ),
new Among ( "ere", 3, 1, "", methodObject ),
new Among ( "en", -1, 1, "", methodObject ),
new Among ( "heden", 10, 1, "", methodObject ),
new Among ( "eren", 10, 1, "", methodObject ),
new Among ( "er", -1, 1, "", methodObject ),
new Among ( "heder", 13, 1, "", methodObject ),
new Among ( "erer", 13, 1, "", methodObject ),
new Among ( "s", -1, 2, "", methodObject ),
new Among ( "heds", 16, 1, "", methodObject ),
new Among ( "es", 16, 1, "", methodObject ),
new Among ( "endes", 18, 1, "", methodObject ),
new Among ( "erendes", 19, 1, "", methodObject ),
new Among ( "enes", 18, 1, "", methodObject ),
new Among ( "ernes", 18, 1, "", methodObject ),
new Among ( "eres", 18, 1, "", methodObject ),
new Among ( "ens", 16, 1, "", methodObject ),
new Among ( "hedens", 24, 1, "", methodObject ),
new Among ( "erens", 24, 1, "", methodObject ),
new Among ( "ers", 16, 1, "", methodObject ),
new Among ( "ets", 16, 1, "", methodObject ),
new Among ( "erets", 28, 1, "", methodObject ),
new Among ( "et", -1, 1, "", methodObject ),
new Among ( "eret", 30, 1, "", methodObject )
};
private final static Among a_2[] = {
new Among("ig", -1, 1),
new Among("lig", 0, 1),
new Among("elig", 1, 1),
new Among("els", -1, 1),
new Among("l\u00F8st", -1, 2)
};
private final static Among a_1[] = {
new Among ( "gd", -1, -1, "", methodObject ),
new Among ( "dt", -1, -1, "", methodObject ),
new Among ( "gt", -1, -1, "", methodObject ),
new Among ( "kt", -1, -1, "", methodObject )
};
private static final char g_c[] = {119, 223, 119, 1 };
private final static Among a_2[] = {
new Among ( "ig", -1, 1, "", methodObject ),
new Among ( "lig", 0, 1, "", methodObject ),
new Among ( "elig", 1, 1, "", methodObject ),
new Among ( "els", -1, 1, "", methodObject ),
new Among ( "l\u00F8st", -1, 2, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
private int I_x;
private int I_p1;
private java.lang.StringBuilder S_ch = new java.lang.StringBuilder();
private int I_x;
private int I_p1;
private java.lang.StringBuilder S_ch = new java.lang.StringBuilder();
private void copy_from(DanishStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
S_ch = other.S_ch;
super.copy_from(other);
}
private boolean r_mark_regions() {
I_p1 = limit;
int v_1 = cursor;
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
I_x = cursor;
cursor = v_1;
golab0: while(true)
{
int v_2 = cursor;
lab1: {
if (!(in_grouping(g_v, 97, 248)))
{
break lab1;
}
cursor = v_2;
break golab0;
}
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
golab2: while(true)
{
lab3: {
if (!(out_grouping(g_v, 97, 248)))
{
break lab3;
}
break golab2;
}
if (cursor >= limit)
{
return false;
}
cursor++;
}
I_p1 = cursor;
lab4: {
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
}
return true;
}
private boolean r_mark_regions() {
int v_1;
int v_2;
// (, line 29
I_p1 = limit;
// test, line 33
v_1 = cursor;
// (, line 33
// hop, line 33
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
// setmark x, line 33
I_x = cursor;
cursor = v_1;
// goto, line 34
golab0: while(true)
{
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 248)))
{
break lab1;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
// gopast, line 34
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 248)))
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 34
I_p1 = cursor;
// try, line 35
lab4: do {
// (, line 35
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
private boolean r_main_suffix() {
int among_var;
if (cursor < I_p1)
{
return false;
}
int v_2 = limit_backward;
limit_backward = I_p1;
ket = cursor;
among_var = find_among_b(a_0);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
bra = cursor;
limit_backward = v_2;
switch (among_var) {
case 1:
slice_del();
break;
case 2:
if (!(in_grouping_b(g_s_ending, 97, 229)))
{
return false;
}
slice_del();
break;
}
return true;
}
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 40
// setlimit, line 41
v_1 = limit - cursor;
// tomark, line 41
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 41
// [, line 41
ket = cursor;
// substring, line 41
among_var = find_among_b(a_0, 32);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 41
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 48
// delete, line 48
slice_del();
break;
case 2:
// (, line 50
if (!(in_grouping_b(g_s_ending, 97, 229)))
{
return false;
}
// delete, line 50
slice_del();
break;
}
return true;
}
private boolean r_consonant_pair() {
int v_1 = limit - cursor;
if (cursor < I_p1)
{
return false;
}
int v_3 = limit_backward;
limit_backward = I_p1;
ket = cursor;
if (find_among_b(a_1) == 0)
{
limit_backward = v_3;
return false;
}
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
if (cursor <= limit_backward)
{
return false;
}
cursor--;
bra = cursor;
slice_del();
return true;
}
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// (, line 54
// test, line 55
v_1 = limit - cursor;
// (, line 55
// setlimit, line 56
v_2 = limit - cursor;
// tomark, line 56
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 56
// [, line 56
ket = cursor;
// substring, line 56
if (find_among_b(a_1, 4) == 0)
{
limit_backward = v_3;
return false;
}
// ], line 56
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
// next, line 62
if (cursor <= limit_backward)
{
return false;
}
cursor--;
// ], line 62
bra = cursor;
// delete, line 62
slice_del();
return true;
}
private boolean r_other_suffix() {
int among_var;
int v_1 = limit - cursor;
lab0: {
ket = cursor;
if (!(eq_s_b("st")))
{
break lab0;
}
bra = cursor;
if (!(eq_s_b("ig")))
{
break lab0;
}
slice_del();
}
cursor = limit - v_1;
if (cursor < I_p1)
{
return false;
}
int v_3 = limit_backward;
limit_backward = I_p1;
ket = cursor;
among_var = find_among_b(a_2);
if (among_var == 0)
{
limit_backward = v_3;
return false;
}
bra = cursor;
limit_backward = v_3;
switch (among_var) {
case 1:
slice_del();
int v_4 = limit - cursor;
r_consonant_pair();
cursor = limit - v_4;
break;
case 2:
slice_from("l\u00F8s");
break;
}
return true;
}
private boolean r_other_suffix() {
int among_var;
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 65
// do, line 66
v_1 = limit - cursor;
lab0: do {
// (, line 66
// [, line 66
ket = cursor;
// literal, line 66
if (!(eq_s_b(2, "st")))
{
break lab0;
}
// ], line 66
bra = cursor;
// literal, line 66
if (!(eq_s_b(2, "ig")))
{
break lab0;
}
// delete, line 66
slice_del();
} while (false);
cursor = limit - v_1;
// setlimit, line 67
v_2 = limit - cursor;
// tomark, line 67
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 67
// [, line 67
ket = cursor;
// substring, line 67
among_var = find_among_b(a_2, 5);
if (among_var == 0)
{
limit_backward = v_3;
return false;
}
// ], line 67
bra = cursor;
limit_backward = v_3;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 70
// delete, line 70
slice_del();
// do, line 70
v_4 = limit - cursor;
lab1: do {
// call consonant_pair, line 70
if (!r_consonant_pair())
{
break lab1;
}
} while (false);
cursor = limit - v_4;
break;
case 2:
// (, line 72
// <-, line 72
slice_from("l\u00F8s");
break;
}
return true;
}
private boolean r_undouble() {
if (cursor < I_p1)
{
return false;
}
int v_2 = limit_backward;
limit_backward = I_p1;
ket = cursor;
if (!(in_grouping_b(g_c, 98, 122)))
{
limit_backward = v_2;
return false;
}
bra = cursor;
slice_to(S_ch);
limit_backward = v_2;
if (!(eq_s_b(S_ch)))
{
return false;
}
slice_del();
return true;
}
private boolean r_undouble() {
int v_1;
int v_2;
// (, line 75
// setlimit, line 76
v_1 = limit - cursor;
// tomark, line 76
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 76
// [, line 76
ket = cursor;
if (!(out_grouping_b(g_v, 97, 248)))
{
limit_backward = v_2;
return false;
}
// ], line 76
bra = cursor;
// -> ch, line 76
S_ch = slice_to(S_ch);
limit_backward = v_2;
// name ch, line 77
if (!(eq_v_b(S_ch)))
{
return false;
}
// delete, line 78
slice_del();
return true;
}
public boolean stem() {
int v_1 = cursor;
r_mark_regions();
cursor = v_1;
limit_backward = cursor;
cursor = limit;
int v_2 = limit - cursor;
r_main_suffix();
cursor = limit - v_2;
int v_3 = limit - cursor;
r_consonant_pair();
cursor = limit - v_3;
int v_4 = limit - cursor;
r_other_suffix();
cursor = limit - v_4;
int v_5 = limit - cursor;
r_undouble();
cursor = limit - v_5;
cursor = limit_backward;
return true;
}
@Override
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
int v_5;
// (, line 82
// do, line 84
v_1 = cursor;
lab0: do {
// call mark_regions, line 84
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 85
limit_backward = cursor; cursor = limit;
// (, line 85
// do, line 86
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 86
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 87
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 87
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 88
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 88
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
// do, line 89
v_5 = limit - cursor;
lab4: do {
// call undouble, line 89
if (!r_undouble())
{
break lab4;
}
} while (false);
cursor = limit - v_5;
cursor = limit_backward; return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof DanishStemmer;
}
@Override
public boolean equals( Object o ) {
return o instanceof DanishStemmer;
}
@Override
public int hashCode() {
return DanishStemmer.class.getName().hashCode();
}
@Override
public int hashCode() {
return DanishStemmer.class.getName().hashCode();
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,210 @@
// Generated by Snowball 2.0.0 - https://snowballstem.org/
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
/**
* This class implements the stemming algorithm defined by a snowball script.
* <p>
* Generated by Snowball 2.0.0 - https://snowballstem.org/
* </p>
*/
@SuppressWarnings("unused")
public class HindiStemmer extends org.tartarus.snowball.SnowballStemmer {
private static final long serialVersionUID = 1L;
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private final static Among a_0[] = {
new Among("\u0906\u0901", -1, -1),
new Among("\u093E\u0901", -1, -1),
new Among("\u0907\u092F\u093E\u0901", 1, -1),
new Among("\u0906\u0907\u092F\u093E\u0901", 2, -1),
new Among("\u093E\u0907\u092F\u093E\u0901", 2, -1),
new Among("\u093F\u092F\u093E\u0901", 1, -1),
new Among("\u0906\u0902", -1, -1),
new Among("\u0909\u0906\u0902", 6, -1),
new Among("\u0941\u0906\u0902", 6, -1),
new Among("\u0908\u0902", -1, -1),
new Among("\u0906\u0908\u0902", 9, -1),
new Among("\u093E\u0908\u0902", 9, -1),
new Among("\u090F\u0902", -1, -1),
new Among("\u0906\u090F\u0902", 12, -1),
new Among("\u0909\u090F\u0902", 12, -1),
new Among("\u093E\u090F\u0902", 12, -1),
new Among("\u0924\u093E\u090F\u0902", 15, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0924\u093E\u090F\u0902", 16, -1),
new Among("\u0928\u093E\u090F\u0902", 15, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0928\u093E\u090F\u0902", 18, -1),
new Among("\u0941\u090F\u0902", 12, -1),
new Among("\u0913\u0902", -1, -1),
new Among("\u0906\u0913\u0902", 21, -1),
new Among("\u0909\u0913\u0902", 21, -1),
new Among("\u093E\u0913\u0902", 21, -1),
new Among("\u0924\u093E\u0913\u0902", 24, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0924\u093E\u0913\u0902", 25, -1),
new Among("\u0928\u093E\u0913\u0902", 24, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0928\u093E\u0913\u0902", 27, -1),
new Among("\u0941\u0913\u0902", 21, -1),
new Among("\u093E\u0902", -1, -1),
new Among("\u0907\u092F\u093E\u0902", 30, -1),
new Among("\u0906\u0907\u092F\u093E\u0902", 31, -1),
new Among("\u093E\u0907\u092F\u093E\u0902", 31, -1),
new Among("\u093F\u092F\u093E\u0902", 30, -1),
new Among("\u0940\u0902", -1, -1),
new Among("\u0924\u0940\u0902", 35, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0924\u0940\u0902", 36, -1),
new Among("\u0906\u0924\u0940\u0902", 36, -1),
new Among("\u093E\u0924\u0940\u0902", 36, -1),
new Among("\u0947\u0902", -1, -1),
new Among("\u094B\u0902", -1, -1),
new Among("\u0907\u092F\u094B\u0902", 41, -1),
new Among("\u0906\u0907\u092F\u094B\u0902", 42, -1),
new Among("\u093E\u0907\u092F\u094B\u0902", 42, -1),
new Among("\u093F\u092F\u094B\u0902", 41, -1),
new Among("\u0905", -1, -1),
new Among("\u0906", -1, -1),
new Among("\u0907", -1, -1),
new Among("\u0908", -1, -1),
new Among("\u0906\u0908", 49, -1),
new Among("\u093E\u0908", 49, -1),
new Among("\u0909", -1, -1),
new Among("\u090A", -1, -1),
new Among("\u090F", -1, -1),
new Among("\u0906\u090F", 54, -1),
new Among("\u0907\u090F", 54, -1),
new Among("\u0906\u0907\u090F", 56, -1),
new Among("\u093E\u0907\u090F", 56, -1),
new Among("\u093E\u090F", 54, -1),
new Among("\u093F\u090F", 54, -1),
new Among("\u0913", -1, -1),
new Among("\u0906\u0913", 61, -1),
new Among("\u093E\u0913", 61, -1),
new Among("\u0915\u0930", -1, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0915\u0930", 64, -1),
new Among("\u0906\u0915\u0930", 64, -1),
new Among("\u093E\u0915\u0930", 64, -1),
new Among("\u093E", -1, -1),
new Among("\u090A\u0902\u0917\u093E", 68, -1),
new Among("\u0906\u090A\u0902\u0917\u093E", 69, -1),
new Among("\u093E\u090A\u0902\u0917\u093E", 69, -1),
new Among("\u0942\u0902\u0917\u093E", 68, -1),
new Among("\u090F\u0917\u093E", 68, -1),
new Among("\u0906\u090F\u0917\u093E", 73, -1),
new Among("\u093E\u090F\u0917\u093E", 73, -1),
new Among("\u0947\u0917\u093E", 68, -1),
new Among("\u0924\u093E", 68, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0924\u093E", 77, -1),
new Among("\u0906\u0924\u093E", 77, -1),
new Among("\u093E\u0924\u093E", 77, -1),
new Among("\u0928\u093E", 68, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0928\u093E", 81, -1),
new Among("\u0906\u0928\u093E", 81, -1),
new Among("\u093E\u0928\u093E", 81, -1),
new Among("\u0906\u092F\u093E", 68, -1),
new Among("\u093E\u092F\u093E", 68, -1),
new Among("\u093F", -1, -1),
new Among("\u0940", -1, -1),
new Among("\u090A\u0902\u0917\u0940", 88, -1),
new Among("\u0906\u090A\u0902\u0917\u0940", 89, -1),
new Among("\u093E\u090A\u0902\u0917\u0940", 89, -1),
new Among("\u090F\u0902\u0917\u0940", 88, -1),
new Among("\u0906\u090F\u0902\u0917\u0940", 92, -1),
new Among("\u093E\u090F\u0902\u0917\u0940", 92, -1),
new Among("\u0942\u0902\u0917\u0940", 88, -1),
new Among("\u0947\u0902\u0917\u0940", 88, -1),
new Among("\u090F\u0917\u0940", 88, -1),
new Among("\u0906\u090F\u0917\u0940", 97, -1),
new Among("\u093E\u090F\u0917\u0940", 97, -1),
new Among("\u0913\u0917\u0940", 88, -1),
new Among("\u0906\u0913\u0917\u0940", 100, -1),
new Among("\u093E\u0913\u0917\u0940", 100, -1),
new Among("\u0947\u0917\u0940", 88, -1),
new Among("\u094B\u0917\u0940", 88, -1),
new Among("\u0924\u0940", 88, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0924\u0940", 105, -1),
new Among("\u0906\u0924\u0940", 105, -1),
new Among("\u093E\u0924\u0940", 105, -1),
new Among("\u0928\u0940", 88, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0928\u0940", 109, -1),
new Among("\u0941", -1, -1),
new Among("\u0942", -1, -1),
new Among("\u0947", -1, -1),
new Among("\u090F\u0902\u0917\u0947", 113, -1),
new Among("\u0906\u090F\u0902\u0917\u0947", 114, -1),
new Among("\u093E\u090F\u0902\u0917\u0947", 114, -1),
new Among("\u0947\u0902\u0917\u0947", 113, -1),
new Among("\u0913\u0917\u0947", 113, -1),
new Among("\u0906\u0913\u0917\u0947", 118, -1),
new Among("\u093E\u0913\u0917\u0947", 118, -1),
new Among("\u094B\u0917\u0947", 113, -1),
new Among("\u0924\u0947", 113, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0924\u0947", 122, -1),
new Among("\u0906\u0924\u0947", 122, -1),
new Among("\u093E\u0924\u0947", 122, -1),
new Among("\u0928\u0947", 113, -1, "r_CONSONANT", methodObject),
new Among("\u0905\u0928\u0947", 126, -1),
new Among("\u0906\u0928\u0947", 126, -1),
new Among("\u093E\u0928\u0947", 126, -1),
new Among("\u094B", -1, -1),
new Among("\u094D", -1, -1)
};
private static final char g_consonant[] = {255, 255, 255, 255, 159, 0, 0, 0, 248, 7 };
private int I_p;
public boolean r_CONSONANT() {
if (!(in_grouping_b(g_consonant, 2325, 2399)))
{
return false;
}
return true;
}
public boolean stem() {
int v_1 = cursor;
if (cursor >= limit)
{
return false;
}
cursor++;
I_p = cursor;
cursor = v_1;
limit_backward = cursor;
cursor = limit;
if (cursor < I_p)
{
return false;
}
int v_3 = limit_backward;
limit_backward = I_p;
ket = cursor;
if (find_among_b(a_0) == 0)
{
limit_backward = v_3;
return false;
}
bra = cursor;
limit_backward = v_3;
slice_del();
cursor = limit_backward;
return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof HindiStemmer;
}
@Override
public int hashCode() {
return HindiStemmer.class.getName().hashCode();
}
}

View File

@ -0,0 +1,392 @@
// Generated by Snowball 2.0.0 - https://snowballstem.org/
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
/**
* This class implements the stemming algorithm defined by a snowball script.
* <p>
* Generated by Snowball 2.0.0 - https://snowballstem.org/
* </p>
*/
@SuppressWarnings("unused")
public class IndonesianStemmer extends org.tartarus.snowball.SnowballStemmer {
private static final long serialVersionUID = 1L;
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private final static Among a_0[] = {
new Among("kah", -1, 1),
new Among("lah", -1, 1),
new Among("pun", -1, 1)
};
private final static Among a_1[] = {
new Among("nya", -1, 1),
new Among("ku", -1, 1),
new Among("mu", -1, 1)
};
private final static Among a_2[] = {
new Among("i", -1, 1, "r_SUFFIX_I_OK", methodObject),
new Among("an", -1, 1, "r_SUFFIX_AN_OK", methodObject),
new Among("kan", 1, 1, "r_SUFFIX_KAN_OK", methodObject)
};
private final static Among a_3[] = {
new Among("di", -1, 1),
new Among("ke", -1, 2),
new Among("me", -1, 1),
new Among("mem", 2, 5),
new Among("men", 2, 1),
new Among("meng", 4, 1),
new Among("meny", 4, 3, "r_VOWEL", methodObject),
new Among("pem", -1, 6),
new Among("pen", -1, 2),
new Among("peng", 8, 2),
new Among("peny", 8, 4, "r_VOWEL", methodObject),
new Among("ter", -1, 1)
};
private final static Among a_4[] = {
new Among("be", -1, 3, "r_KER", methodObject),
new Among("belajar", 0, 4),
new Among("ber", 0, 3),
new Among("pe", -1, 1),
new Among("pelajar", 3, 2),
new Among("per", 3, 1)
};
private static final char g_vowel[] = {17, 65, 16 };
private int I_prefix;
private int I_measure;
private boolean r_remove_particle() {
ket = cursor;
if (find_among_b(a_0) == 0)
{
return false;
}
bra = cursor;
slice_del();
I_measure -= 1;
return true;
}
private boolean r_remove_possessive_pronoun() {
ket = cursor;
if (find_among_b(a_1) == 0)
{
return false;
}
bra = cursor;
slice_del();
I_measure -= 1;
return true;
}
public boolean r_SUFFIX_KAN_OK() {
if (!(I_prefix != 3))
{
return false;
}
if (!(I_prefix != 2))
{
return false;
}
return true;
}
public boolean r_SUFFIX_AN_OK() {
if (!(I_prefix != 1))
{
return false;
}
return true;
}
public boolean r_SUFFIX_I_OK() {
if (!(I_prefix <= 2))
{
return false;
}
{
int v_1 = limit - cursor;
lab0: {
if (!(eq_s_b("s")))
{
break lab0;
}
return false;
}
cursor = limit - v_1;
}
return true;
}
private boolean r_remove_suffix() {
ket = cursor;
if (find_among_b(a_2) == 0)
{
return false;
}
bra = cursor;
slice_del();
I_measure -= 1;
return true;
}
public boolean r_VOWEL() {
if (!(in_grouping(g_vowel, 97, 117)))
{
return false;
}
return true;
}
public boolean r_KER() {
if (!(out_grouping(g_vowel, 97, 117)))
{
return false;
}
if (!(eq_s("er")))
{
return false;
}
return true;
}
private boolean r_remove_first_order_prefix() {
int among_var;
bra = cursor;
among_var = find_among(a_3);
if (among_var == 0)
{
return false;
}
ket = cursor;
switch (among_var) {
case 1:
slice_del();
I_prefix = 1;
I_measure -= 1;
break;
case 2:
slice_del();
I_prefix = 3;
I_measure -= 1;
break;
case 3:
I_prefix = 1;
slice_from("s");
I_measure -= 1;
break;
case 4:
I_prefix = 3;
slice_from("s");
I_measure -= 1;
break;
case 5:
I_prefix = 1;
I_measure -= 1;
lab0: {
int v_1 = cursor;
lab1: {
int v_2 = cursor;
if (!(in_grouping(g_vowel, 97, 117)))
{
break lab1;
}
cursor = v_2;
slice_from("p");
break lab0;
}
cursor = v_1;
slice_del();
}
break;
case 6:
I_prefix = 3;
I_measure -= 1;
lab2: {
int v_3 = cursor;
lab3: {
int v_4 = cursor;
if (!(in_grouping(g_vowel, 97, 117)))
{
break lab3;
}
cursor = v_4;
slice_from("p");
break lab2;
}
cursor = v_3;
slice_del();
}
break;
}
return true;
}
private boolean r_remove_second_order_prefix() {
int among_var;
bra = cursor;
among_var = find_among(a_4);
if (among_var == 0)
{
return false;
}
ket = cursor;
switch (among_var) {
case 1:
slice_del();
I_prefix = 2;
I_measure -= 1;
break;
case 2:
slice_from("ajar");
I_measure -= 1;
break;
case 3:
slice_del();
I_prefix = 4;
I_measure -= 1;
break;
case 4:
slice_from("ajar");
I_prefix = 4;
I_measure -= 1;
break;
}
return true;
}
public boolean stem() {
I_measure = 0;
int v_1 = cursor;
lab0: {
while(true)
{
int v_2 = cursor;
lab1: {
golab2: while(true)
{
lab3: {
if (!(in_grouping(g_vowel, 97, 117)))
{
break lab3;
}
break golab2;
}
if (cursor >= limit)
{
break lab1;
}
cursor++;
}
I_measure += 1;
continue;
}
cursor = v_2;
break;
}
}
cursor = v_1;
if (!(I_measure > 2))
{
return false;
}
I_prefix = 0;
limit_backward = cursor;
cursor = limit;
int v_4 = limit - cursor;
r_remove_particle();
cursor = limit - v_4;
if (!(I_measure > 2))
{
return false;
}
int v_5 = limit - cursor;
r_remove_possessive_pronoun();
cursor = limit - v_5;
cursor = limit_backward;
if (!(I_measure > 2))
{
return false;
}
lab4: {
int v_6 = cursor;
lab5: {
int v_7 = cursor;
if (!r_remove_first_order_prefix())
{
break lab5;
}
int v_8 = cursor;
lab6: {
int v_9 = cursor;
if (!(I_measure > 2))
{
break lab6;
}
limit_backward = cursor;
cursor = limit;
if (!r_remove_suffix())
{
break lab6;
}
cursor = limit_backward;
cursor = v_9;
if (!(I_measure > 2))
{
break lab6;
}
if (!r_remove_second_order_prefix())
{
break lab6;
}
}
cursor = v_8;
cursor = v_7;
break lab4;
}
cursor = v_6;
int v_10 = cursor;
r_remove_second_order_prefix();
cursor = v_10;
int v_11 = cursor;
lab7: {
if (!(I_measure > 2))
{
break lab7;
}
limit_backward = cursor;
cursor = limit;
if (!r_remove_suffix())
{
break lab7;
}
cursor = limit_backward;
}
cursor = v_11;
}
return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof IndonesianStemmer;
}
@Override
public int hashCode() {
return IndonesianStemmer.class.getName().hashCode();
}
}

View File

@ -1,588 +1,399 @@
// This file was generated automatically by the Snowball to Java compiler
// Generated by Snowball 2.0.0 - https://snowballstem.org/
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
/**
* This class implements the stemming algorithm defined by a snowball script.
* <p>
* Generated by Snowball 2.0.0 - https://snowballstem.org/
* </p>
*/
@SuppressWarnings("unused")
public class IrishStemmer extends org.tartarus.snowball.SnowballStemmer {
@SuppressWarnings("unused") public class IrishStemmer extends SnowballProgram {
private static final long serialVersionUID = 1L;
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private static final long serialVersionUID = 1L;
private final static Among a_0[] = {
new Among("b'", -1, 1),
new Among("bh", -1, 4),
new Among("bhf", 1, 2),
new Among("bp", -1, 8),
new Among("ch", -1, 5),
new Among("d'", -1, 1),
new Among("d'fh", 5, 2),
new Among("dh", -1, 6),
new Among("dt", -1, 9),
new Among("fh", -1, 2),
new Among("gc", -1, 5),
new Among("gh", -1, 7),
new Among("h-", -1, 1),
new Among("m'", -1, 1),
new Among("mb", -1, 4),
new Among("mh", -1, 10),
new Among("n-", -1, 1),
new Among("nd", -1, 6),
new Among("ng", -1, 7),
new Among("ph", -1, 8),
new Among("sh", -1, 3),
new Among("t-", -1, 1),
new Among("th", -1, 9),
new Among("ts", -1, 3)
};
/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private final static Among a_1[] = {
new Among("\u00EDochta", -1, 1),
new Among("a\u00EDochta", 0, 1),
new Among("ire", -1, 2),
new Among("aire", 2, 2),
new Among("abh", -1, 1),
new Among("eabh", 4, 1),
new Among("ibh", -1, 1),
new Among("aibh", 6, 1),
new Among("amh", -1, 1),
new Among("eamh", 8, 1),
new Among("imh", -1, 1),
new Among("aimh", 10, 1),
new Among("\u00EDocht", -1, 1),
new Among("a\u00EDocht", 12, 1),
new Among("ir\u00ED", -1, 2),
new Among("air\u00ED", 14, 2)
};
private final static Among a_0[] = {
new Among ( "b'", -1, 4, "", methodObject ),
new Among ( "bh", -1, 14, "", methodObject ),
new Among ( "bhf", 1, 9, "", methodObject ),
new Among ( "bp", -1, 11, "", methodObject ),
new Among ( "ch", -1, 15, "", methodObject ),
new Among ( "d'", -1, 2, "", methodObject ),
new Among ( "d'fh", 5, 3, "", methodObject ),
new Among ( "dh", -1, 16, "", methodObject ),
new Among ( "dt", -1, 13, "", methodObject ),
new Among ( "fh", -1, 17, "", methodObject ),
new Among ( "gc", -1, 7, "", methodObject ),
new Among ( "gh", -1, 18, "", methodObject ),
new Among ( "h-", -1, 1, "", methodObject ),
new Among ( "m'", -1, 4, "", methodObject ),
new Among ( "mb", -1, 6, "", methodObject ),
new Among ( "mh", -1, 19, "", methodObject ),
new Among ( "n-", -1, 1, "", methodObject ),
new Among ( "nd", -1, 8, "", methodObject ),
new Among ( "ng", -1, 10, "", methodObject ),
new Among ( "ph", -1, 20, "", methodObject ),
new Among ( "sh", -1, 5, "", methodObject ),
new Among ( "t-", -1, 1, "", methodObject ),
new Among ( "th", -1, 21, "", methodObject ),
new Among ( "ts", -1, 12, "", methodObject )
};
private final static Among a_2[] = {
new Among("\u00F3ideacha", -1, 6),
new Among("patacha", -1, 5),
new Among("achta", -1, 1),
new Among("arcachta", 2, 2),
new Among("eachta", 2, 1),
new Among("grafa\u00EDochta", -1, 4),
new Among("paite", -1, 5),
new Among("ach", -1, 1),
new Among("each", 7, 1),
new Among("\u00F3ideach", 8, 6),
new Among("gineach", 8, 3),
new Among("patach", 7, 5),
new Among("grafa\u00EDoch", -1, 4),
new Among("pataigh", -1, 5),
new Among("\u00F3idigh", -1, 6),
new Among("acht\u00FAil", -1, 1),
new Among("eacht\u00FAil", 15, 1),
new Among("gineas", -1, 3),
new Among("ginis", -1, 3),
new Among("acht", -1, 1),
new Among("arcacht", 19, 2),
new Among("eacht", 19, 1),
new Among("grafa\u00EDocht", -1, 4),
new Among("arcachta\u00ED", -1, 2),
new Among("grafa\u00EDochta\u00ED", -1, 4)
};
private final static Among a_1[] = {
new Among ( "\u00EDochta", -1, 1, "", methodObject ),
new Among ( "a\u00EDochta", 0, 1, "", methodObject ),
new Among ( "ire", -1, 2, "", methodObject ),
new Among ( "aire", 2, 2, "", methodObject ),
new Among ( "abh", -1, 1, "", methodObject ),
new Among ( "eabh", 4, 1, "", methodObject ),
new Among ( "ibh", -1, 1, "", methodObject ),
new Among ( "aibh", 6, 1, "", methodObject ),
new Among ( "amh", -1, 1, "", methodObject ),
new Among ( "eamh", 8, 1, "", methodObject ),
new Among ( "imh", -1, 1, "", methodObject ),
new Among ( "aimh", 10, 1, "", methodObject ),
new Among ( "\u00EDocht", -1, 1, "", methodObject ),
new Among ( "a\u00EDocht", 12, 1, "", methodObject ),
new Among ( "ir\u00ED", -1, 2, "", methodObject ),
new Among ( "air\u00ED", 14, 2, "", methodObject )
};
private final static Among a_3[] = {
new Among("imid", -1, 1),
new Among("aimid", 0, 1),
new Among("\u00EDmid", -1, 1),
new Among("a\u00EDmid", 2, 1),
new Among("adh", -1, 2),
new Among("eadh", 4, 2),
new Among("faidh", -1, 1),
new Among("fidh", -1, 1),
new Among("\u00E1il", -1, 2),
new Among("ain", -1, 2),
new Among("tear", -1, 2),
new Among("tar", -1, 2)
};
private final static Among a_2[] = {
new Among ( "\u00F3ideacha", -1, 6, "", methodObject ),
new Among ( "patacha", -1, 5, "", methodObject ),
new Among ( "achta", -1, 1, "", methodObject ),
new Among ( "arcachta", 2, 2, "", methodObject ),
new Among ( "eachta", 2, 1, "", methodObject ),
new Among ( "grafa\u00EDochta", -1, 4, "", methodObject ),
new Among ( "paite", -1, 5, "", methodObject ),
new Among ( "ach", -1, 1, "", methodObject ),
new Among ( "each", 7, 1, "", methodObject ),
new Among ( "\u00F3ideach", 8, 6, "", methodObject ),
new Among ( "gineach", 8, 3, "", methodObject ),
new Among ( "patach", 7, 5, "", methodObject ),
new Among ( "grafa\u00EDoch", -1, 4, "", methodObject ),
new Among ( "pataigh", -1, 5, "", methodObject ),
new Among ( "\u00F3idigh", -1, 6, "", methodObject ),
new Among ( "acht\u00FAil", -1, 1, "", methodObject ),
new Among ( "eacht\u00FAil", 15, 1, "", methodObject ),
new Among ( "gineas", -1, 3, "", methodObject ),
new Among ( "ginis", -1, 3, "", methodObject ),
new Among ( "acht", -1, 1, "", methodObject ),
new Among ( "arcacht", 19, 2, "", methodObject ),
new Among ( "eacht", 19, 1, "", methodObject ),
new Among ( "grafa\u00EDocht", -1, 4, "", methodObject ),
new Among ( "arcachta\u00ED", -1, 2, "", methodObject ),
new Among ( "grafa\u00EDochta\u00ED", -1, 4, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 2 };
private final static Among a_3[] = {
new Among ( "imid", -1, 1, "", methodObject ),
new Among ( "aimid", 0, 1, "", methodObject ),
new Among ( "\u00EDmid", -1, 1, "", methodObject ),
new Among ( "a\u00EDmid", 2, 1, "", methodObject ),
new Among ( "adh", -1, 2, "", methodObject ),
new Among ( "eadh", 4, 2, "", methodObject ),
new Among ( "faidh", -1, 1, "", methodObject ),
new Among ( "fidh", -1, 1, "", methodObject ),
new Among ( "\u00E1il", -1, 2, "", methodObject ),
new Among ( "ain", -1, 2, "", methodObject ),
new Among ( "tear", -1, 2, "", methodObject ),
new Among ( "tar", -1, 2, "", methodObject )
};
private int I_p2;
private int I_p1;
private int I_pV;
private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 2 };
private int I_p2;
private int I_p1;
private int I_pV;
private void copy_from(IrishStemmer other) {
I_p2 = other.I_p2;
I_p1 = other.I_p1;
I_pV = other.I_pV;
super.copy_from(other);
}
private boolean r_mark_regions() {
int v_1;
int v_3;
// (, line 28
I_pV = limit;
I_p1 = limit;
I_p2 = limit;
// do, line 34
v_1 = cursor;
lab0: do {
// (, line 34
// gopast, line 35
golab1: while(true)
{
lab2: do {
if (!(in_grouping(g_v, 97, 250)))
{
break lab2;
}
break golab1;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// setmark pV, line 35
I_pV = cursor;
} while (false);
cursor = v_1;
// do, line 37
v_3 = cursor;
lab3: do {
// (, line 37
// gopast, line 38
golab4: while(true)
{
lab5: do {
if (!(in_grouping(g_v, 97, 250)))
{
break lab5;
}
break golab4;
} while (false);
if (cursor >= limit)
{
break lab3;
}
cursor++;
}
// gopast, line 38
golab6: while(true)
{
lab7: do {
if (!(out_grouping(g_v, 97, 250)))
{
break lab7;
}
break golab6;
} while (false);
if (cursor >= limit)
{
break lab3;
}
cursor++;
}
// setmark p1, line 38
I_p1 = cursor;
// gopast, line 39
golab8: while(true)
{
lab9: do {
if (!(in_grouping(g_v, 97, 250)))
{
break lab9;
}
break golab8;
} while (false);
if (cursor >= limit)
{
break lab3;
}
cursor++;
}
// gopast, line 39
golab10: while(true)
{
lab11: do {
if (!(out_grouping(g_v, 97, 250)))
{
break lab11;
}
break golab10;
} while (false);
if (cursor >= limit)
{
break lab3;
}
cursor++;
}
// setmark p2, line 39
I_p2 = cursor;
} while (false);
cursor = v_3;
return true;
}
private boolean r_initial_morph() {
int among_var;
// (, line 43
// [, line 44
bra = cursor;
// substring, line 44
among_var = find_among(a_0, 24);
if (among_var == 0)
{
return false;
}
// ], line 44
ket = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 46
// delete, line 46
slice_del();
break;
case 2:
// (, line 50
// delete, line 50
slice_del();
break;
case 3:
// (, line 52
// <-, line 52
slice_from("f");
break;
case 4:
// (, line 55
// delete, line 55
slice_del();
break;
case 5:
// (, line 58
// <-, line 58
slice_from("s");
break;
case 6:
// (, line 61
// <-, line 61
slice_from("b");
break;
case 7:
// (, line 63
// <-, line 63
slice_from("c");
break;
case 8:
// (, line 65
// <-, line 65
slice_from("d");
break;
case 9:
// (, line 67
// <-, line 67
slice_from("f");
break;
case 10:
// (, line 69
// <-, line 69
slice_from("g");
break;
case 11:
// (, line 71
// <-, line 71
slice_from("p");
break;
case 12:
// (, line 73
// <-, line 73
slice_from("s");
break;
case 13:
// (, line 75
// <-, line 75
slice_from("t");
break;
case 14:
// (, line 79
// <-, line 79
slice_from("b");
break;
case 15:
// (, line 81
// <-, line 81
slice_from("c");
break;
case 16:
// (, line 83
// <-, line 83
slice_from("d");
break;
case 17:
// (, line 85
// <-, line 85
slice_from("f");
break;
case 18:
// (, line 87
// <-, line 87
slice_from("g");
break;
case 19:
// (, line 89
// <-, line 89
slice_from("m");
break;
case 20:
// (, line 91
// <-, line 91
slice_from("p");
break;
case 21:
// (, line 93
// <-, line 93
slice_from("t");
break;
}
return true;
}
private boolean r_RV() {
if (!(I_pV <= cursor))
{
return false;
}
return true;
}
private boolean r_R1() {
if (!(I_p1 <= cursor))
{
return false;
}
return true;
}
private boolean r_R2() {
if (!(I_p2 <= cursor))
{
return false;
}
return true;
}
private boolean r_noun_sfx() {
int among_var;
// (, line 103
// [, line 104
ket = cursor;
// substring, line 104
among_var = find_among_b(a_1, 16);
if (among_var == 0)
{
return false;
}
// ], line 104
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 108
// call R1, line 108
if (!r_R1())
{
return false;
}
// delete, line 108
slice_del();
break;
case 2:
// (, line 110
// call R2, line 110
if (!r_R2())
{
return false;
}
// delete, line 110
slice_del();
break;
}
return true;
}
private boolean r_deriv() {
int among_var;
// (, line 113
// [, line 114
ket = cursor;
// substring, line 114
among_var = find_among_b(a_2, 25);
if (among_var == 0)
{
return false;
}
// ], line 114
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 116
// call R2, line 116
if (!r_R2())
{
return false;
}
// delete, line 116
slice_del();
break;
case 2:
// (, line 118
// <-, line 118
slice_from("arc");
break;
case 3:
// (, line 120
// <-, line 120
slice_from("gin");
break;
case 4:
// (, line 122
// <-, line 122
slice_from("graf");
break;
case 5:
// (, line 124
// <-, line 124
slice_from("paite");
break;
case 6:
// (, line 126
// <-, line 126
slice_from("\u00F3id");
break;
}
return true;
}
private boolean r_verb_sfx() {
int among_var;
// (, line 129
// [, line 130
ket = cursor;
// substring, line 130
among_var = find_among_b(a_3, 12);
if (among_var == 0)
{
return false;
}
// ], line 130
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 133
// call RV, line 133
if (!r_RV())
{
return false;
}
// delete, line 133
slice_del();
break;
case 2:
// (, line 138
// call R1, line 138
if (!r_R1())
{
return false;
}
// delete, line 138
slice_del();
break;
}
return true;
}
@Override
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
int v_5;
// (, line 143
// do, line 144
v_1 = cursor;
lab0: do {
// call initial_morph, line 144
if (!r_initial_morph())
{
break lab0;
}
} while (false);
cursor = v_1;
// do, line 145
v_2 = cursor;
lab1: do {
// call mark_regions, line 145
if (!r_mark_regions())
{
break lab1;
}
} while (false);
cursor = v_2;
// backwards, line 146
limit_backward = cursor; cursor = limit;
// (, line 146
// do, line 147
v_3 = limit - cursor;
lab2: do {
// call noun_sfx, line 147
if (!r_noun_sfx())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 148
v_4 = limit - cursor;
lab3: do {
// call deriv, line 148
if (!r_deriv())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
// do, line 149
v_5 = limit - cursor;
lab4: do {
// call verb_sfx, line 149
if (!r_verb_sfx())
{
break lab4;
}
} while (false);
cursor = limit - v_5;
cursor = limit_backward; return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof IrishStemmer;
private boolean r_mark_regions() {
I_pV = limit;
I_p1 = limit;
I_p2 = limit;
int v_1 = cursor;
lab0: {
golab1: while(true)
{
lab2: {
if (!(in_grouping(g_v, 97, 250)))
{
break lab2;
}
@Override
public int hashCode() {
return IrishStemmer.class.getName().hashCode();
break golab1;
}
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
I_pV = cursor;
}
cursor = v_1;
int v_3 = cursor;
lab3: {
golab4: while(true)
{
lab5: {
if (!(in_grouping(g_v, 97, 250)))
{
break lab5;
}
break golab4;
}
if (cursor >= limit)
{
break lab3;
}
cursor++;
}
golab6: while(true)
{
lab7: {
if (!(out_grouping(g_v, 97, 250)))
{
break lab7;
}
break golab6;
}
if (cursor >= limit)
{
break lab3;
}
cursor++;
}
I_p1 = cursor;
golab8: while(true)
{
lab9: {
if (!(in_grouping(g_v, 97, 250)))
{
break lab9;
}
break golab8;
}
if (cursor >= limit)
{
break lab3;
}
cursor++;
}
golab10: while(true)
{
lab11: {
if (!(out_grouping(g_v, 97, 250)))
{
break lab11;
}
break golab10;
}
if (cursor >= limit)
{
break lab3;
}
cursor++;
}
I_p2 = cursor;
}
cursor = v_3;
return true;
}
private boolean r_initial_morph() {
int among_var;
bra = cursor;
among_var = find_among(a_0);
if (among_var == 0)
{
return false;
}
ket = cursor;
switch (among_var) {
case 1:
slice_del();
break;
case 2:
slice_from("f");
break;
case 3:
slice_from("s");
break;
case 4:
slice_from("b");
break;
case 5:
slice_from("c");
break;
case 6:
slice_from("d");
break;
case 7:
slice_from("g");
break;
case 8:
slice_from("p");
break;
case 9:
slice_from("t");
break;
case 10:
slice_from("m");
break;
}
return true;
}
private boolean r_RV() {
if (!(I_pV <= cursor))
{
return false;
}
return true;
}
private boolean r_R1() {
if (!(I_p1 <= cursor))
{
return false;
}
return true;
}
private boolean r_R2() {
if (!(I_p2 <= cursor))
{
return false;
}
return true;
}
private boolean r_noun_sfx() {
int among_var;
ket = cursor;
among_var = find_among_b(a_1);
if (among_var == 0)
{
return false;
}
bra = cursor;
switch (among_var) {
case 1:
if (!r_R1())
{
return false;
}
slice_del();
break;
case 2:
if (!r_R2())
{
return false;
}
slice_del();
break;
}
return true;
}
private boolean r_deriv() {
int among_var;
ket = cursor;
among_var = find_among_b(a_2);
if (among_var == 0)
{
return false;
}
bra = cursor;
switch (among_var) {
case 1:
if (!r_R2())
{
return false;
}
slice_del();
break;
case 2:
slice_from("arc");
break;
case 3:
slice_from("gin");
break;
case 4:
slice_from("graf");
break;
case 5:
slice_from("paite");
break;
case 6:
slice_from("\u00F3id");
break;
}
return true;
}
private boolean r_verb_sfx() {
int among_var;
ket = cursor;
among_var = find_among_b(a_3);
if (among_var == 0)
{
return false;
}
bra = cursor;
switch (among_var) {
case 1:
if (!r_RV())
{
return false;
}
slice_del();
break;
case 2:
if (!r_R1())
{
return false;
}
slice_del();
break;
}
return true;
}
public boolean stem() {
int v_1 = cursor;
r_initial_morph();
cursor = v_1;
r_mark_regions();
limit_backward = cursor;
cursor = limit;
int v_3 = limit - cursor;
r_noun_sfx();
cursor = limit - v_3;
int v_4 = limit - cursor;
r_deriv();
cursor = limit - v_4;
int v_5 = limit - cursor;
r_verb_sfx();
cursor = limit - v_5;
cursor = limit_backward;
return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof IrishStemmer;
}
@Override
public int hashCode() {
return IrishStemmer.class.getName().hashCode();
}

View File

@ -0,0 +1,319 @@
// Generated by Snowball 2.0.0 - https://snowballstem.org/
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
/**
* This class implements the stemming algorithm defined by a snowball script.
* <p>
* Generated by Snowball 2.0.0 - https://snowballstem.org/
* </p>
*/
@SuppressWarnings("unused")
public class NepaliStemmer extends org.tartarus.snowball.SnowballStemmer {
private static final long serialVersionUID = 1L;
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private final static Among a_0[] = {
new Among("\u0932\u093E\u0907", -1, 1),
new Among("\u0932\u093E\u0908", -1, 1),
new Among("\u0938\u0901\u0917", -1, 1),
new Among("\u0938\u0902\u0917", -1, 1),
new Among("\u092E\u093E\u0930\u094D\u092B\u0924", -1, 1),
new Among("\u0930\u0924", -1, 1),
new Among("\u0915\u093E", -1, 2),
new Among("\u092E\u093E", -1, 1),
new Among("\u0926\u094D\u0935\u093E\u0930\u093E", -1, 1),
new Among("\u0915\u093F", -1, 2),
new Among("\u092A\u091B\u093F", -1, 1),
new Among("\u0915\u0940", -1, 2),
new Among("\u0932\u0947", -1, 1),
new Among("\u0915\u0948", -1, 2),
new Among("\u0938\u0901\u0917\u0948", -1, 1),
new Among("\u092E\u0948", -1, 1),
new Among("\u0915\u094B", -1, 2)
};
private final static Among a_1[] = {
new Among("\u0901", -1, -1),
new Among("\u0902", -1, -1),
new Among("\u0948", -1, -1)
};
private final static Among a_2[] = {
new Among("\u0901", -1, 1),
new Among("\u0902", -1, 1),
new Among("\u0948", -1, 2)
};
private final static Among a_3[] = {
new Among("\u0925\u093F\u090F", -1, 1),
new Among("\u091B", -1, 1),
new Among("\u0907\u091B", 1, 1),
new Among("\u090F\u091B", 1, 1),
new Among("\u093F\u091B", 1, 1),
new Among("\u0947\u091B", 1, 1),
new Among("\u0928\u0947\u091B", 5, 1),
new Among("\u0939\u0941\u0928\u0947\u091B", 6, 1),
new Among("\u0907\u0928\u094D\u091B", 1, 1),
new Among("\u093F\u0928\u094D\u091B", 1, 1),
new Among("\u0939\u0941\u0928\u094D\u091B", 1, 1),
new Among("\u090F\u0915\u093E", -1, 1),
new Among("\u0907\u090F\u0915\u093E", 11, 1),
new Among("\u093F\u090F\u0915\u093E", 11, 1),
new Among("\u0947\u0915\u093E", -1, 1),
new Among("\u0928\u0947\u0915\u093E", 14, 1),
new Among("\u0926\u093E", -1, 1),
new Among("\u0907\u0926\u093E", 16, 1),
new Among("\u093F\u0926\u093E", 16, 1),
new Among("\u0926\u0947\u0916\u093F", -1, 1),
new Among("\u092E\u093E\u0925\u093F", -1, 1),
new Among("\u090F\u0915\u0940", -1, 1),
new Among("\u0907\u090F\u0915\u0940", 21, 1),
new Among("\u093F\u090F\u0915\u0940", 21, 1),
new Among("\u0947\u0915\u0940", -1, 1),
new Among("\u0926\u0947\u0916\u0940", -1, 1),
new Among("\u0925\u0940", -1, 1),
new Among("\u0926\u0940", -1, 1),
new Among("\u091B\u0941", -1, 1),
new Among("\u090F\u091B\u0941", 28, 1),
new Among("\u0947\u091B\u0941", 28, 1),
new Among("\u0928\u0947\u091B\u0941", 30, 1),
new Among("\u0928\u0941", -1, 1),
new Among("\u0939\u0930\u0941", -1, 1),
new Among("\u0939\u0930\u0942", -1, 1),
new Among("\u091B\u0947", -1, 1),
new Among("\u0925\u0947", -1, 1),
new Among("\u0928\u0947", -1, 1),
new Among("\u090F\u0915\u0948", -1, 1),
new Among("\u0947\u0915\u0948", -1, 1),
new Among("\u0928\u0947\u0915\u0948", 39, 1),
new Among("\u0926\u0948", -1, 1),
new Among("\u0907\u0926\u0948", 41, 1),
new Among("\u093F\u0926\u0948", 41, 1),
new Among("\u090F\u0915\u094B", -1, 1),
new Among("\u0907\u090F\u0915\u094B", 44, 1),
new Among("\u093F\u090F\u0915\u094B", 44, 1),
new Among("\u0947\u0915\u094B", -1, 1),
new Among("\u0928\u0947\u0915\u094B", 47, 1),
new Among("\u0926\u094B", -1, 1),
new Among("\u0907\u0926\u094B", 49, 1),
new Among("\u093F\u0926\u094B", 49, 1),
new Among("\u092F\u094B", -1, 1),
new Among("\u0907\u092F\u094B", 52, 1),
new Among("\u092D\u092F\u094B", 52, 1),
new Among("\u093F\u092F\u094B", 52, 1),
new Among("\u0925\u093F\u092F\u094B", 55, 1),
new Among("\u0926\u093F\u092F\u094B", 55, 1),
new Among("\u0925\u094D\u092F\u094B", 52, 1),
new Among("\u091B\u094C", -1, 1),
new Among("\u0907\u091B\u094C", 59, 1),
new Among("\u090F\u091B\u094C", 59, 1),
new Among("\u093F\u091B\u094C", 59, 1),
new Among("\u0947\u091B\u094C", 59, 1),
new Among("\u0928\u0947\u091B\u094C", 63, 1),
new Among("\u092F\u094C", -1, 1),
new Among("\u0925\u093F\u092F\u094C", 65, 1),
new Among("\u091B\u094D\u092F\u094C", 65, 1),
new Among("\u0925\u094D\u092F\u094C", 65, 1),
new Among("\u091B\u0928\u094D", -1, 1),
new Among("\u0907\u091B\u0928\u094D", 69, 1),
new Among("\u090F\u091B\u0928\u094D", 69, 1),
new Among("\u093F\u091B\u0928\u094D", 69, 1),
new Among("\u0947\u091B\u0928\u094D", 69, 1),
new Among("\u0928\u0947\u091B\u0928\u094D", 73, 1),
new Among("\u0932\u093E\u0928\u094D", -1, 1),
new Among("\u091B\u093F\u0928\u094D", -1, 1),
new Among("\u0925\u093F\u0928\u094D", -1, 1),
new Among("\u092A\u0930\u094D", -1, 1),
new Among("\u0907\u0938\u094D", -1, 1),
new Among("\u0925\u093F\u0907\u0938\u094D", 79, 1),
new Among("\u091B\u0938\u094D", -1, 1),
new Among("\u0907\u091B\u0938\u094D", 81, 1),
new Among("\u090F\u091B\u0938\u094D", 81, 1),
new Among("\u093F\u091B\u0938\u094D", 81, 1),
new Among("\u0947\u091B\u0938\u094D", 81, 1),
new Among("\u0928\u0947\u091B\u0938\u094D", 85, 1),
new Among("\u093F\u0938\u094D", -1, 1),
new Among("\u0925\u093F\u0938\u094D", 87, 1),
new Among("\u091B\u0947\u0938\u094D", -1, 1),
new Among("\u0939\u094B\u0938\u094D", -1, 1)
};
private boolean r_remove_category_1() {
int among_var;
ket = cursor;
among_var = find_among_b(a_0);
if (among_var == 0)
{
return false;
}
bra = cursor;
switch (among_var) {
case 1:
slice_del();
break;
case 2:
lab0: {
int v_1 = limit - cursor;
lab1: {
lab2: {
int v_2 = limit - cursor;
lab3: {
if (!(eq_s_b("\u090F")))
{
break lab3;
}
break lab2;
}
cursor = limit - v_2;
if (!(eq_s_b("\u0947")))
{
break lab1;
}
}
break lab0;
}
cursor = limit - v_1;
slice_del();
}
break;
}
return true;
}
private boolean r_check_category_2() {
ket = cursor;
if (find_among_b(a_1) == 0)
{
return false;
}
bra = cursor;
return true;
}
private boolean r_remove_category_2() {
int among_var;
ket = cursor;
among_var = find_among_b(a_2);
if (among_var == 0)
{
return false;
}
bra = cursor;
switch (among_var) {
case 1:
lab0: {
int v_1 = limit - cursor;
lab1: {
if (!(eq_s_b("\u092F\u094C")))
{
break lab1;
}
break lab0;
}
cursor = limit - v_1;
lab2: {
if (!(eq_s_b("\u091B\u094C")))
{
break lab2;
}
break lab0;
}
cursor = limit - v_1;
lab3: {
if (!(eq_s_b("\u0928\u094C")))
{
break lab3;
}
break lab0;
}
cursor = limit - v_1;
if (!(eq_s_b("\u0925\u0947")))
{
return false;
}
}
slice_del();
break;
case 2:
if (!(eq_s_b("\u0924\u094D\u0930")))
{
return false;
}
slice_del();
break;
}
return true;
}
private boolean r_remove_category_3() {
ket = cursor;
if (find_among_b(a_3) == 0)
{
return false;
}
bra = cursor;
slice_del();
return true;
}
public boolean stem() {
limit_backward = cursor;
cursor = limit;
int v_1 = limit - cursor;
r_remove_category_1();
cursor = limit - v_1;
int v_2 = limit - cursor;
lab0: {
while(true)
{
int v_3 = limit - cursor;
lab1: {
int v_4 = limit - cursor;
lab2: {
int v_5 = limit - cursor;
if (!r_check_category_2())
{
break lab2;
}
cursor = limit - v_5;
if (!r_remove_category_2())
{
break lab2;
}
}
cursor = limit - v_4;
if (!r_remove_category_3())
{
break lab1;
}
continue;
}
cursor = limit - v_3;
break;
}
}
cursor = limit - v_2;
cursor = limit_backward;
return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof NepaliStemmer;
}
@Override
public int hashCode() {
return NepaliStemmer.class.getName().hashCode();
}
}

View File

@ -1,376 +1,261 @@
// This file was generated automatically by the Snowball to Java compiler
// Generated by Snowball 2.0.0 - https://snowballstem.org/
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
/**
* This class implements the stemming algorithm defined by a snowball script.
* <p>
* Generated by Snowball 2.0.0 - https://snowballstem.org/
* </p>
*/
@SuppressWarnings("unused")
public class NorwegianStemmer extends org.tartarus.snowball.SnowballStemmer {
@SuppressWarnings("unused") public class NorwegianStemmer extends SnowballProgram {
private static final long serialVersionUID = 1L;
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private static final long serialVersionUID = 1L;
private final static Among a_0[] = {
new Among("a", -1, 1),
new Among("e", -1, 1),
new Among("ede", 1, 1),
new Among("ande", 1, 1),
new Among("ende", 1, 1),
new Among("ane", 1, 1),
new Among("ene", 1, 1),
new Among("hetene", 6, 1),
new Among("erte", 1, 3),
new Among("en", -1, 1),
new Among("heten", 9, 1),
new Among("ar", -1, 1),
new Among("er", -1, 1),
new Among("heter", 12, 1),
new Among("s", -1, 2),
new Among("as", 14, 1),
new Among("es", 14, 1),
new Among("edes", 16, 1),
new Among("endes", 16, 1),
new Among("enes", 16, 1),
new Among("hetenes", 19, 1),
new Among("ens", 14, 1),
new Among("hetens", 21, 1),
new Among("ers", 14, 1),
new Among("ets", 14, 1),
new Among("et", -1, 1),
new Among("het", 25, 1),
new Among("ert", -1, 3),
new Among("ast", -1, 1)
};
/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private final static Among a_1[] = {
new Among("dt", -1, -1),
new Among("vt", -1, -1)
};
private final static Among a_0[] = {
new Among ( "a", -1, 1, "", methodObject ),
new Among ( "e", -1, 1, "", methodObject ),
new Among ( "ede", 1, 1, "", methodObject ),
new Among ( "ande", 1, 1, "", methodObject ),
new Among ( "ende", 1, 1, "", methodObject ),
new Among ( "ane", 1, 1, "", methodObject ),
new Among ( "ene", 1, 1, "", methodObject ),
new Among ( "hetene", 6, 1, "", methodObject ),
new Among ( "erte", 1, 3, "", methodObject ),
new Among ( "en", -1, 1, "", methodObject ),
new Among ( "heten", 9, 1, "", methodObject ),
new Among ( "ar", -1, 1, "", methodObject ),
new Among ( "er", -1, 1, "", methodObject ),
new Among ( "heter", 12, 1, "", methodObject ),
new Among ( "s", -1, 2, "", methodObject ),
new Among ( "as", 14, 1, "", methodObject ),
new Among ( "es", 14, 1, "", methodObject ),
new Among ( "edes", 16, 1, "", methodObject ),
new Among ( "endes", 16, 1, "", methodObject ),
new Among ( "enes", 16, 1, "", methodObject ),
new Among ( "hetenes", 19, 1, "", methodObject ),
new Among ( "ens", 14, 1, "", methodObject ),
new Among ( "hetens", 21, 1, "", methodObject ),
new Among ( "ers", 14, 1, "", methodObject ),
new Among ( "ets", 14, 1, "", methodObject ),
new Among ( "et", -1, 1, "", methodObject ),
new Among ( "het", 25, 1, "", methodObject ),
new Among ( "ert", -1, 3, "", methodObject ),
new Among ( "ast", -1, 1, "", methodObject )
};
private final static Among a_2[] = {
new Among("leg", -1, 1),
new Among("eleg", 0, 1),
new Among("ig", -1, 1),
new Among("eig", 2, 1),
new Among("lig", 2, 1),
new Among("elig", 4, 1),
new Among("els", -1, 1),
new Among("lov", -1, 1),
new Among("elov", 7, 1),
new Among("slov", 7, 1),
new Among("hetslov", 9, 1)
};
private final static Among a_1[] = {
new Among ( "dt", -1, -1, "", methodObject ),
new Among ( "vt", -1, -1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private final static Among a_2[] = {
new Among ( "leg", -1, 1, "", methodObject ),
new Among ( "eleg", 0, 1, "", methodObject ),
new Among ( "ig", -1, 1, "", methodObject ),
new Among ( "eig", 2, 1, "", methodObject ),
new Among ( "lig", 2, 1, "", methodObject ),
new Among ( "elig", 4, 1, "", methodObject ),
new Among ( "els", -1, 1, "", methodObject ),
new Among ( "lov", -1, 1, "", methodObject ),
new Among ( "elov", 7, 1, "", methodObject ),
new Among ( "slov", 7, 1, "", methodObject ),
new Among ( "hetslov", 9, 1, "", methodObject )
};
private static final char g_s_ending[] = {119, 125, 149, 1 };
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
private int I_x;
private int I_p1;
private static final char g_s_ending[] = {119, 125, 149, 1 };
private int I_x;
private int I_p1;
private boolean r_mark_regions() {
I_p1 = limit;
int v_1 = cursor;
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
I_x = cursor;
cursor = v_1;
golab0: while(true)
{
int v_2 = cursor;
lab1: {
if (!(in_grouping(g_v, 97, 248)))
{
break lab1;
}
cursor = v_2;
break golab0;
}
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
golab2: while(true)
{
lab3: {
if (!(out_grouping(g_v, 97, 248)))
{
break lab3;
}
break golab2;
}
if (cursor >= limit)
{
return false;
}
cursor++;
}
I_p1 = cursor;
lab4: {
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
}
return true;
}
private void copy_from(NorwegianStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
super.copy_from(other);
}
private boolean r_mark_regions() {
int v_1;
int v_2;
// (, line 26
I_p1 = limit;
// test, line 30
v_1 = cursor;
// (, line 30
// hop, line 30
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
// setmark x, line 30
I_x = cursor;
cursor = v_1;
// goto, line 31
golab0: while(true)
{
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 248)))
{
break lab1;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
// gopast, line 31
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 248)))
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 31
I_p1 = cursor;
// try, line 32
lab4: do {
// (, line 32
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
int v_3;
// (, line 37
// setlimit, line 38
v_1 = limit - cursor;
// tomark, line 38
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 38
// [, line 38
ket = cursor;
// substring, line 38
among_var = find_among_b(a_0, 29);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 38
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 44
// delete, line 44
slice_del();
break;
case 2:
// (, line 46
// or, line 46
lab0: do {
v_3 = limit - cursor;
lab1: do {
if (!(in_grouping_b(g_s_ending, 98, 122)))
{
break lab1;
}
break lab0;
} while (false);
cursor = limit - v_3;
// (, line 46
// literal, line 46
if (!(eq_s_b(1, "k")))
{
return false;
}
if (!(out_grouping_b(g_v, 97, 248)))
{
return false;
}
} while (false);
// delete, line 46
slice_del();
break;
case 3:
// (, line 48
// <-, line 48
slice_from("er");
break;
}
return true;
}
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// (, line 52
// test, line 53
v_1 = limit - cursor;
// (, line 53
// setlimit, line 54
v_2 = limit - cursor;
// tomark, line 54
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 54
// [, line 54
ket = cursor;
// substring, line 54
if (find_among_b(a_1, 2) == 0)
{
limit_backward = v_3;
return false;
}
// ], line 54
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
// next, line 59
if (cursor <= limit_backward)
{
return false;
}
cursor--;
// ], line 59
bra = cursor;
// delete, line 59
slice_del();
return true;
}
private boolean r_other_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 62
// setlimit, line 63
v_1 = limit - cursor;
// tomark, line 63
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 63
// [, line 63
ket = cursor;
// substring, line 63
among_var = find_among_b(a_2, 11);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 63
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 67
// delete, line 67
slice_del();
break;
}
return true;
}
@Override
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 72
// do, line 74
v_1 = cursor;
lab0: do {
// call mark_regions, line 74
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 75
limit_backward = cursor; cursor = limit;
// (, line 75
// do, line 76
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 76
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 77
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 77
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 78
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 78
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof NorwegianStemmer;
private boolean r_main_suffix() {
int among_var;
if (cursor < I_p1)
{
return false;
}
int v_2 = limit_backward;
limit_backward = I_p1;
ket = cursor;
among_var = find_among_b(a_0);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
bra = cursor;
limit_backward = v_2;
switch (among_var) {
case 1:
slice_del();
break;
case 2:
lab0: {
int v_3 = limit - cursor;
lab1: {
if (!(in_grouping_b(g_s_ending, 98, 122)))
{
break lab1;
}
break lab0;
}
@Override
public int hashCode() {
return NorwegianStemmer.class.getName().hashCode();
cursor = limit - v_3;
if (!(eq_s_b("k")))
{
return false;
}
if (!(out_grouping_b(g_v, 97, 248)))
{
return false;
}
}
slice_del();
break;
case 3:
slice_from("er");
break;
}
return true;
}
private boolean r_consonant_pair() {
int v_1 = limit - cursor;
if (cursor < I_p1)
{
return false;
}
int v_3 = limit_backward;
limit_backward = I_p1;
ket = cursor;
if (find_among_b(a_1) == 0)
{
limit_backward = v_3;
return false;
}
bra = cursor;
limit_backward = v_3;
cursor = limit - v_1;
if (cursor <= limit_backward)
{
return false;
}
cursor--;
bra = cursor;
slice_del();
return true;
}
private boolean r_other_suffix() {
if (cursor < I_p1)
{
return false;
}
int v_2 = limit_backward;
limit_backward = I_p1;
ket = cursor;
if (find_among_b(a_2) == 0)
{
limit_backward = v_2;
return false;
}
bra = cursor;
limit_backward = v_2;
slice_del();
return true;
}
public boolean stem() {
int v_1 = cursor;
r_mark_regions();
cursor = v_1;
limit_backward = cursor;
cursor = limit;
int v_2 = limit - cursor;
r_main_suffix();
cursor = limit - v_2;
int v_3 = limit - cursor;
r_consonant_pair();
cursor = limit - v_3;
int v_4 = limit - cursor;
r_other_suffix();
cursor = limit - v_4;
cursor = limit_backward;
return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof NorwegianStemmer;
}
@Override
public int hashCode() {
return NorwegianStemmer.class.getName().hashCode();
}

File diff suppressed because it is too large Load Diff

View File

@ -1,367 +1,262 @@
// This file was generated automatically by the Snowball to Java compiler
// Generated by Snowball 2.0.0 - https://snowballstem.org/
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
/**
* This class implements the stemming algorithm defined by a snowball script.
* <p>
* Generated by Snowball 2.0.0 - https://snowballstem.org/
* </p>
*/
@SuppressWarnings("unused")
public class SwedishStemmer extends org.tartarus.snowball.SnowballStemmer {
@SuppressWarnings("unused") public class SwedishStemmer extends SnowballProgram {
private static final long serialVersionUID = 1L;
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private static final long serialVersionUID = 1L;
private final static Among a_0[] = {
new Among("a", -1, 1),
new Among("arna", 0, 1),
new Among("erna", 0, 1),
new Among("heterna", 2, 1),
new Among("orna", 0, 1),
new Among("ad", -1, 1),
new Among("e", -1, 1),
new Among("ade", 6, 1),
new Among("ande", 6, 1),
new Among("arne", 6, 1),
new Among("are", 6, 1),
new Among("aste", 6, 1),
new Among("en", -1, 1),
new Among("anden", 12, 1),
new Among("aren", 12, 1),
new Among("heten", 12, 1),
new Among("ern", -1, 1),
new Among("ar", -1, 1),
new Among("er", -1, 1),
new Among("heter", 18, 1),
new Among("or", -1, 1),
new Among("s", -1, 2),
new Among("as", 21, 1),
new Among("arnas", 22, 1),
new Among("ernas", 22, 1),
new Among("ornas", 22, 1),
new Among("es", 21, 1),
new Among("ades", 26, 1),
new Among("andes", 26, 1),
new Among("ens", 21, 1),
new Among("arens", 29, 1),
new Among("hetens", 29, 1),
new Among("erns", 21, 1),
new Among("at", -1, 1),
new Among("andet", -1, 1),
new Among("het", -1, 1),
new Among("ast", -1, 1)
};
/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
private final static Among a_1[] = {
new Among("dd", -1, -1),
new Among("gd", -1, -1),
new Among("nn", -1, -1),
new Among("dt", -1, -1),
new Among("gt", -1, -1),
new Among("kt", -1, -1),
new Among("tt", -1, -1)
};
private final static Among a_0[] = {
new Among ( "a", -1, 1, "", methodObject ),
new Among ( "arna", 0, 1, "", methodObject ),
new Among ( "erna", 0, 1, "", methodObject ),
new Among ( "heterna", 2, 1, "", methodObject ),
new Among ( "orna", 0, 1, "", methodObject ),
new Among ( "ad", -1, 1, "", methodObject ),
new Among ( "e", -1, 1, "", methodObject ),
new Among ( "ade", 6, 1, "", methodObject ),
new Among ( "ande", 6, 1, "", methodObject ),
new Among ( "arne", 6, 1, "", methodObject ),
new Among ( "are", 6, 1, "", methodObject ),
new Among ( "aste", 6, 1, "", methodObject ),
new Among ( "en", -1, 1, "", methodObject ),
new Among ( "anden", 12, 1, "", methodObject ),
new Among ( "aren", 12, 1, "", methodObject ),
new Among ( "heten", 12, 1, "", methodObject ),
new Among ( "ern", -1, 1, "", methodObject ),
new Among ( "ar", -1, 1, "", methodObject ),
new Among ( "er", -1, 1, "", methodObject ),
new Among ( "heter", 18, 1, "", methodObject ),
new Among ( "or", -1, 1, "", methodObject ),
new Among ( "s", -1, 2, "", methodObject ),
new Among ( "as", 21, 1, "", methodObject ),
new Among ( "arnas", 22, 1, "", methodObject ),
new Among ( "ernas", 22, 1, "", methodObject ),
new Among ( "ornas", 22, 1, "", methodObject ),
new Among ( "es", 21, 1, "", methodObject ),
new Among ( "ades", 26, 1, "", methodObject ),
new Among ( "andes", 26, 1, "", methodObject ),
new Among ( "ens", 21, 1, "", methodObject ),
new Among ( "arens", 29, 1, "", methodObject ),
new Among ( "hetens", 29, 1, "", methodObject ),
new Among ( "erns", 21, 1, "", methodObject ),
new Among ( "at", -1, 1, "", methodObject ),
new Among ( "andet", -1, 1, "", methodObject ),
new Among ( "het", -1, 1, "", methodObject ),
new Among ( "ast", -1, 1, "", methodObject )
};
private final static Among a_2[] = {
new Among("ig", -1, 1),
new Among("lig", 0, 1),
new Among("els", -1, 1),
new Among("fullt", -1, 3),
new Among("l\u00F6st", -1, 2)
};
private final static Among a_1[] = {
new Among ( "dd", -1, -1, "", methodObject ),
new Among ( "gd", -1, -1, "", methodObject ),
new Among ( "nn", -1, -1, "", methodObject ),
new Among ( "dt", -1, -1, "", methodObject ),
new Among ( "gt", -1, -1, "", methodObject ),
new Among ( "kt", -1, -1, "", methodObject ),
new Among ( "tt", -1, -1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
private final static Among a_2[] = {
new Among ( "ig", -1, 1, "", methodObject ),
new Among ( "lig", 0, 1, "", methodObject ),
new Among ( "els", -1, 1, "", methodObject ),
new Among ( "fullt", -1, 3, "", methodObject ),
new Among ( "l\u00F6st", -1, 2, "", methodObject )
};
private static final char g_s_ending[] = {119, 127, 149 };
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
private int I_x;
private int I_p1;
private static final char g_s_ending[] = {119, 127, 149 };
private int I_x;
private int I_p1;
private boolean r_mark_regions() {
I_p1 = limit;
int v_1 = cursor;
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
I_x = cursor;
cursor = v_1;
golab0: while(true)
{
int v_2 = cursor;
lab1: {
if (!(in_grouping(g_v, 97, 246)))
{
break lab1;
}
cursor = v_2;
break golab0;
}
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
golab2: while(true)
{
lab3: {
if (!(out_grouping(g_v, 97, 246)))
{
break lab3;
}
break golab2;
}
if (cursor >= limit)
{
return false;
}
cursor++;
}
I_p1 = cursor;
lab4: {
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
}
return true;
}
private void copy_from(SwedishStemmer other) {
I_x = other.I_x;
I_p1 = other.I_p1;
super.copy_from(other);
}
private boolean r_main_suffix() {
int among_var;
if (cursor < I_p1)
{
return false;
}
int v_2 = limit_backward;
limit_backward = I_p1;
ket = cursor;
among_var = find_among_b(a_0);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
bra = cursor;
limit_backward = v_2;
switch (among_var) {
case 1:
slice_del();
break;
case 2:
if (!(in_grouping_b(g_s_ending, 98, 121)))
{
return false;
}
slice_del();
break;
}
return true;
}
private boolean r_mark_regions() {
int v_1;
int v_2;
// (, line 26
I_p1 = limit;
// test, line 29
v_1 = cursor;
// (, line 29
// hop, line 29
{
int c = cursor + 3;
if (0 > c || c > limit)
{
return false;
}
cursor = c;
}
// setmark x, line 29
I_x = cursor;
cursor = v_1;
// goto, line 30
golab0: while(true)
{
v_2 = cursor;
lab1: do {
if (!(in_grouping(g_v, 97, 246)))
{
break lab1;
}
cursor = v_2;
break golab0;
} while (false);
cursor = v_2;
if (cursor >= limit)
{
return false;
}
cursor++;
}
// gopast, line 30
golab2: while(true)
{
lab3: do {
if (!(out_grouping(g_v, 97, 246)))
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
return false;
}
cursor++;
}
// setmark p1, line 30
I_p1 = cursor;
// try, line 31
lab4: do {
// (, line 31
if (!(I_p1 < I_x))
{
break lab4;
}
I_p1 = I_x;
} while (false);
return true;
}
private boolean r_consonant_pair() {
if (cursor < I_p1)
{
return false;
}
int v_2 = limit_backward;
limit_backward = I_p1;
int v_3 = limit - cursor;
if (find_among_b(a_1) == 0)
{
limit_backward = v_2;
return false;
}
cursor = limit - v_3;
ket = cursor;
if (cursor <= limit_backward)
{
limit_backward = v_2;
return false;
}
cursor--;
bra = cursor;
slice_del();
limit_backward = v_2;
return true;
}
private boolean r_main_suffix() {
int among_var;
int v_1;
int v_2;
// (, line 36
// setlimit, line 37
v_1 = limit - cursor;
// tomark, line 37
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 37
// [, line 37
ket = cursor;
// substring, line 37
among_var = find_among_b(a_0, 37);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 37
bra = cursor;
limit_backward = v_2;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 44
// delete, line 44
slice_del();
break;
case 2:
// (, line 46
if (!(in_grouping_b(g_s_ending, 98, 121)))
{
return false;
}
// delete, line 46
slice_del();
break;
}
return true;
}
private boolean r_other_suffix() {
int among_var;
if (cursor < I_p1)
{
return false;
}
int v_2 = limit_backward;
limit_backward = I_p1;
ket = cursor;
among_var = find_among_b(a_2);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
bra = cursor;
switch (among_var) {
case 1:
slice_del();
break;
case 2:
slice_from("l\u00F6s");
break;
case 3:
slice_from("full");
break;
}
limit_backward = v_2;
return true;
}
private boolean r_consonant_pair() {
int v_1;
int v_2;
int v_3;
// setlimit, line 50
v_1 = limit - cursor;
// tomark, line 50
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 50
// and, line 52
v_3 = limit - cursor;
// among, line 51
if (find_among_b(a_1, 7) == 0)
{
limit_backward = v_2;
return false;
}
cursor = limit - v_3;
// (, line 52
// [, line 52
ket = cursor;
// next, line 52
if (cursor <= limit_backward)
{
limit_backward = v_2;
return false;
}
cursor--;
// ], line 52
bra = cursor;
// delete, line 52
slice_del();
limit_backward = v_2;
return true;
}
public boolean stem() {
int v_1 = cursor;
r_mark_regions();
cursor = v_1;
limit_backward = cursor;
cursor = limit;
int v_2 = limit - cursor;
r_main_suffix();
cursor = limit - v_2;
int v_3 = limit - cursor;
r_consonant_pair();
cursor = limit - v_3;
int v_4 = limit - cursor;
r_other_suffix();
cursor = limit - v_4;
cursor = limit_backward;
return true;
}
private boolean r_other_suffix() {
int among_var;
int v_1;
int v_2;
// setlimit, line 55
v_1 = limit - cursor;
// tomark, line 55
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 55
// [, line 56
ket = cursor;
// substring, line 56
among_var = find_among_b(a_2, 5);
if (among_var == 0)
{
limit_backward = v_2;
return false;
}
// ], line 56
bra = cursor;
switch(among_var) {
case 0:
limit_backward = v_2;
return false;
case 1:
// (, line 57
// delete, line 57
slice_del();
break;
case 2:
// (, line 58
// <-, line 58
slice_from("l\u00F6s");
break;
case 3:
// (, line 59
// <-, line 59
slice_from("full");
break;
}
limit_backward = v_2;
return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof SwedishStemmer;
}
@Override
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 64
// do, line 66
v_1 = cursor;
lab0: do {
// call mark_regions, line 66
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 67
limit_backward = cursor; cursor = limit;
// (, line 67
// do, line 68
v_2 = limit - cursor;
lab1: do {
// call main_suffix, line 68
if (!r_main_suffix())
{
break lab1;
}
} while (false);
cursor = limit - v_2;
// do, line 69
v_3 = limit - cursor;
lab2: do {
// call consonant_pair, line 69
if (!r_consonant_pair())
{
break lab2;
}
} while (false);
cursor = limit - v_3;
// do, line 70
v_4 = limit - cursor;
lab3: do {
// call other_suffix, line 70
if (!r_other_suffix())
{
break lab3;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
}
@Override
public boolean equals( Object o ) {
return o instanceof SwedishStemmer;
}
@Override
public int hashCode() {
return SwedishStemmer.class.getName().hashCode();
}
@Override
public int hashCode() {
return SwedishStemmer.class.getName().hashCode();
}

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
| From https://snowballstem.org/algorithms/danish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
@ -60,7 +60,7 @@ hvor | where
eller | or
hvad | what
skal | must/shall etc.
selv | myself/youself/herself/ourselves etc., even
selv | myself/yourself/herself/ourselves etc., even
her | here
alle | all/everyone/everybody etc.
vil | will (verb)

View File

@ -1,12 +1,13 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
| From https://snowballstem.org/algorithms/dutch/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Dutch stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
@ -117,3 +118,4 @@ uw | your
iemand | somebody
geweest | been; past participle of 'be'
andere | other

View File

@ -1,12 +1,12 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
| From https://snowballstem.org/algorithms/english/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| An English stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
@ -317,3 +317,4 @@ very
| old
| high
| long

View File

@ -1,12 +1,12 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
| From https://snowballstem.org/algorithms/finnish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| forms of BE
olla
@ -48,8 +48,8 @@ me meidän meidät meitä meissä meistä meihin meillä meiltä meille
te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
tämä tämän tätä tässä tästä tähän tällä tältä tälle tänä täksi | this
tuo tuon tuota tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
@ -91,7 +91,6 @@ yli | over, across
| other
kun | when
niin | so
nyt | now
itse | self

View File

@ -1,7 +1,7 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
| From https://snowballstem.org/algorithms/french/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
@ -169,8 +169,8 @@ eussent
| Later additions (from Jean-Christophe Deschamps)
ceci | this
cela | that
celà | that
cela | that (added 11 Apr 2012. Omission reported by Adrien Grand)
celà | that (incorrect, though common)
cet | this
cette | this
ici | here

View File

@ -1,7 +1,7 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
| From https://snowballstem.org/algorithms/german/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|

View File

@ -1,12 +1,12 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
| From https://snowballstem.org/algorithms/hungarian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| Hungarian stop word list
| prepared by Anna Tordai

View File

@ -0,0 +1,99 @@
| From https://snowballstem.org/algorithms/indonesian/stop.txt
| This file is distributed under the BSD License.
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
yang | that
dan | and
di | in
dari | from
ini | this
pada kepada | at, to [person]
ada adalah | there is, is
dengan | with
untuk | for
dalam | in the
oleh | by
sebagai | as
juga | also, too
ke | to
atau | or
tidak | not
itu | that
sebuah | a
tersebut | the
dapat | can, may
ia | he/she, yes
telah | already
satu | one
memiliki | have
mereka | they
bahwa | that
lebih | more, more than
karena | because, since
seorang | one person, same
akan | will, about to
seperti | as, like
secara | on
kemudian | later, then
beberapa | some
banyak | many
antara | between
setelah | after
yaitu | that is
hanya | only
hingga | to
serta | along with
sama | same, and
dia | he/she/it (informal)
tetapi | but
namun | however
melalui | through
bisa | can
sehingga | so
ketika | when
suatu | a
sendiri | own (adverb)
bagi | for
semua | all
harus | must
setiap | each, every
maka | then
maupun | as well
tanpa | without
saja | only
jika | if
bukan | not
belum | not yet
sedangkan | while
yakni | i.e.
meskipun | although
hampir | almost
kita | we/us (inclusive)
demikian | thereby
daripada | from/than/instead of
apa | what/which/or/eh
ialah | is
sana | there
begitu | so
seseorang | someone
selain | besides
terlalu | too
ataupun | or
saya | me/I (formal)
bila | if/when
bagaimana | how
tapi | but
apabila | when/if
kalau | if
kami | we/us (exclusive)
melainkan | but (rather)
boleh | may,can
aku | I/me (informal)
anda | you (formal)
kamu | you (informal)
beliau | he/she/it (formal)
kalian | you (plural)

View File

@ -1,7 +1,7 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
| From https://snowballstem.org/algorithms/italian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|

View File

@ -1,7 +1,7 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
| From https://snowballstem.org/algorithms/norwegian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
@ -25,7 +25,7 @@ et | a/an
den | it/this/that
til | to
er | is/am/are
som | who/that
som | who/which/that
på | on
de | they / you(formal)
med | with
@ -84,7 +84,6 @@ noen | some
noe | some
ville | would
dere | you
som | who/which/that
deres | their/theirs
kun | only/just
ja | yes
@ -129,7 +128,6 @@ mange | many
også | also
slik | just
vært | been
være | to be
båe | both *
begge | both
siden | since
@ -155,7 +153,6 @@ hennar | her/hers
hennes | hers
hoss | how *
hossen | how *
ikkje | not *
ingi | noone *
inkje | noone *
korleis | how *
@ -177,7 +174,6 @@ noka | some (fem.) *
nokor | some *
noko | some *
nokre | some *
si | his/hers *
sia | since *
sidan | since *
so | so *

View File

@ -1,7 +1,7 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
| From https://snowballstem.org/algorithms/portuguese/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|

View File

@ -1,12 +1,13 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
| From https://snowballstem.org/algorithms/russian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| a russian stop word list. comments begin with vertical bar. each stop
| word is at the start of a line.

View File

@ -1,7 +1,7 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
| From https://snowballstem.org/algorithms/spanish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|

View File

@ -1,7 +1,7 @@
| From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
| From https://snowballstem.org/algorithms/swedish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| See https://snowballstem.org/license.html
| Also see https://opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|

View File

@ -108,7 +108,7 @@ import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.SnowballStemmer;
import org.xml.sax.InputSource;
/** tests random analysis chains */
@ -404,10 +404,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
return null; // unreachable code
}
});
put(SnowballProgram.class, random -> {
put(SnowballStemmer.class, random -> {
try {
String lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.length)];
Class<? extends SnowballProgram> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballProgram.class);
String lang = TestSnowball.SNOWBALL_LANGS.get(random.nextInt(TestSnowball.SNOWBALL_LANGS.size()));
Class<? extends SnowballStemmer> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballStemmer.class);
return clazz.getConstructor().newInstance();
} catch (Exception ex) {
Rethrow.rethrow(ex);

View File

@ -18,11 +18,16 @@ package org.apache.lucene.analysis.snowball;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordTokenizer;
@ -97,12 +102,14 @@ public class TestSnowball extends BaseTokenStreamTestCase {
}
/** for testing purposes ONLY */
public static String SNOWBALL_LANGS[] = {
"Armenian", "Basque", "Catalan", "Danish", "Dutch", "English",
"Finnish", "French", "German2", "German", "Hungarian", "Irish",
"Italian", "Kp", "Lovins", "Norwegian", "Porter", "Portuguese",
"Romanian", "Russian", "Spanish", "Swedish", "Turkish"
};
public static final List<String> SNOWBALL_LANGS;
static {
try (InputStream in = TestSnowball.class.getResourceAsStream("languages.txt")) {
SNOWBALL_LANGS = WordlistLoader.getLines(in, StandardCharsets.UTF_8);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
public void testEmptyTerm() throws IOException {
for (final String lang : SNOWBALL_LANGS) {

View File

@ -18,51 +18,38 @@ package org.apache.lucene.analysis.snowball;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Nightly;
import static org.apache.lucene.analysis.VocabularyAssert.*;
/**
* Test the snowball filters against the snowball data tests
*/
@Nightly
public class TestSnowballVocab extends LuceneTestCase {
/**
* Run all languages against their snowball vocabulary tests.
*/
public void testStemmers() throws IOException {
assertCorrectOutput("Arabic", "arabic");
assertCorrectOutput("Danish", "danish");
assertCorrectOutput("Dutch", "dutch");
assertCorrectOutput("English", "english");
assertCorrectOutput("Finnish", "finnish");
assertCorrectOutput("French", "french");
assertCorrectOutput("German", "german");
assertCorrectOutput("German2", "german2");
assertCorrectOutput("Hungarian", "hungarian");
assertCorrectOutput("Italian", "italian");
assertCorrectOutput("Kp", "kraaij_pohlmann");
assertCorrectOutput("Lovins", "lovins");
assertCorrectOutput("Norwegian", "norwegian");
assertCorrectOutput("Porter", "porter");
assertCorrectOutput("Portuguese", "portuguese");
assertCorrectOutput("Romanian", "romanian");
assertCorrectOutput("Russian", "russian");
assertCorrectOutput("Spanish", "spanish");
assertCorrectOutput("Swedish", "swedish");
assertCorrectOutput("Turkish", "turkish");
try (InputStream in = getClass().getResourceAsStream("test_languages.txt")) {
for (String datafile : WordlistLoader.getLines(in, StandardCharsets.UTF_8)) {
String language = "" + Character.toUpperCase(datafile.charAt(0)) + datafile.substring(1);
assertCorrectOutput(language, datafile + ".zip");
}
}
}
/**
* For the supplied language, run the stemmer against all strings in voc.txt
* The output should be the same as the string in output.txt
*/
private void assertCorrectOutput(final String snowballLanguage, String dataDirectory)
private void assertCorrectOutput(final String snowballLanguage, String zipfile)
throws IOException {
if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);
@ -74,8 +61,7 @@ public class TestSnowballVocab extends LuceneTestCase {
}
};
assertVocabulary(a, getDataPath("TestSnowballVocabData.zip"),
dataDirectory + "/voc.txt", dataDirectory + "/output.txt");
assertVocabulary(a, getDataPath(zipfile), "voc.txt", "output.txt");
a.close();
}
}

View File

@ -0,0 +1,32 @@
Arabic
Armenian
Basque
Catalan
Danish
Dutch
English
Estonian
Finnish
French
German2
German
Greek
Hindi
Hungarian
Indonesian
Irish
Italian
Kp
Lithuanian
Lovins
Nepali
Norwegian
Porter
Portuguese
Romanian
Russian
Serbian
Spanish
Swedish
Tamil
Turkish

View File

@ -0,0 +1,20 @@
danish
dutch
english
finnish
german
german2
hungarian
irish
italian
kp
lovins
nepali
norwegian
porter
portuguese
romanian
russian
spanish
swedish
turkish

View File

@ -1945,7 +1945,7 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
<!-- svg files generated by gnuplot -->
<pattern substring="Produced by GNUPLOT"/>
<!-- snowball stemmers generated by snowball compiler -->
<pattern substring="This file was generated automatically by the Snowball to Java compiler"/>
<pattern substring="Generated by Snowball"/>
<!-- parsers generated by antlr -->
<pattern substring="ANTLR GENERATED CODE"/>
</rat:substringMatcher>

View File

@ -29,7 +29,7 @@ import org.carrot2.text.linguistic.IStemmerFactory;
import org.carrot2.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.SnowballStemmer;
import org.tartarus.snowball.ext.DanishStemmer;
import org.tartarus.snowball.ext.DutchStemmer;
import org.tartarus.snowball.ext.EnglishStemmer;
@ -83,7 +83,7 @@ public class LuceneCarrot2StemmerFactory implements IStemmerFactory {
* This mapping is not dynamic because we want to keep the possibility to
* obfuscate these classes.
*/
private static HashMap<LanguageCode, Class<? extends SnowballProgram>> snowballStemmerClasses;
private static HashMap<LanguageCode, Class<? extends SnowballStemmer>> snowballStemmerClasses;
static {
snowballStemmerClasses = new HashMap<>();
snowballStemmerClasses.put(LanguageCode.DANISH, DanishStemmer.class);
@ -110,9 +110,9 @@ public class LuceneCarrot2StemmerFactory implements IStemmerFactory {
* An adapter converting Snowball programs into {@link IStemmer} interface.
*/
private static class SnowballStemmerAdapter implements IStemmer {
private final SnowballProgram snowballStemmer;
private final SnowballStemmer snowballStemmer;
public SnowballStemmerAdapter(SnowballProgram snowballStemmer) {
public SnowballStemmerAdapter(SnowballStemmer snowballStemmer) {
this.snowballStemmer = snowballStemmer;
}
@ -129,11 +129,11 @@ public class LuceneCarrot2StemmerFactory implements IStemmerFactory {
/**
* Create and return an {@link IStemmer} adapter for a
* {@link SnowballProgram} for a given language code. An identity stemmer is
* {@link SnowballStemmer} for a given language code. An identity stemmer is
* returned for unknown languages.
*/
public static IStemmer createStemmer(LanguageCode language) {
final Class<? extends SnowballProgram> stemmerClazz = snowballStemmerClasses
final Class<? extends SnowballStemmer> stemmerClazz = snowballStemmerClasses
.get(language);
if (stemmerClazz == null) {