mirror of https://github.com/apache/lucene.git
LUCENE-9220: regenerate all stemmers/stopwords/test data from snowball 2.0 (#1262)
Previous situation: * The snowball base classes (Among, SnowballProgram, etc) had accumulated local performance-related changes. There was a task that would also "patch" generated classes (e.g. GermanStemmer) after-the-fact. * Snowball classes had many "non-changes" from the original such as removal of tabs addition of javadocs, license headers, etc. * Snowball test data (inputs and expected stems) was incorporated into lucene testing, but this was maintained manually. Also files had become large, making the test too slow (Nightly). * Snowball stopwords lists from their website were manually maintained. In some cases encoding fixes were manually applied. * Some generated stemmers (such as Estonian and Armenian) exist in lucene, but have no corresponding `.sbl` file in snowball sources at all. Besides this mess, snowball project is "moving along" and acquiring new languages, adding non-BSD-licensed test data, huge test data, and other complexity. So it is time to automate the integration better. New situation: * Lucene has a `gradle snowball` regeneration task. It works on Linux or Mac only. It checks out their repos, applies the `snowball.patch` in our repository, compiles snowball stemmers, regenerates all java code, applies any adjustments so that our build is happy. * Tests data is automatically regenerated from the commit hash of the snowball test data repository. Not all languages are tested from their data: only where the license is simple BSD. Test data is also (deterministically) sampled, so that we don't have huge files. We just want to make sure our integration works. * Randomized tests are still set to test every language with generated fake words. The regeneration task ensures all languages get tested (it writes a simple text file list of them). * Stopword files are automatically regenerated from the commit hash of the snowball website repository. * The regeneration procedure is idempotent. This way when stuff does change, you know exactly what happened. For example if test data changes to a different license, you may see a git deletion. Or if a new language/stopwords/test data gets added, you will see git additions.
This commit is contained in:
parent
188f620208
commit
0203815ab2
|
@ -85,6 +85,7 @@ apply from: file('gradle/validation/owasp-dependency-check.gradle')
|
|||
apply from: file('gradle/generation/jflex.gradle')
|
||||
apply from: file('gradle/generation/javacc.gradle')
|
||||
apply from: file('gradle/generation/util.gradle')
|
||||
apply from: file('gradle/generation/snowball.gradle')
|
||||
|
||||
// Additional development aids.
|
||||
apply from: file('gradle/maven/maven-local.gradle')
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
apply plugin: "de.undercouch.download"
|
||||
|
||||
configure(rootProject) {
|
||||
task snowball() {
|
||||
description "Regenerate snowball-based sources, stopwords, and tests for ...lucene/analysis."
|
||||
group "generation"
|
||||
|
||||
dependsOn ":lucene:analysis:common:snowballGen"
|
||||
}
|
||||
}
|
||||
|
||||
configure(project(":lucene:analysis:common")) {
|
||||
ext {
|
||||
// git commit hash of source code https://github.com/snowballstem/snowball/
|
||||
snowballStemmerCommit = "53739a805cfa6c77ff8496dc711dc1c106d987c1"
|
||||
// git commit hash of stopwords https://github.com/snowballstem/snowball-website
|
||||
snowballWebsiteCommit = "ff891e74f08e7315523ee3c0cad55bb1b7831b9d"
|
||||
// git commit hash of test data https://github.com/snowballstem/snowball-data
|
||||
snowballDataCommit = "9145f8732ec952c8a3d1066be251da198a8bc792"
|
||||
|
||||
snowballWorkDir = file("${buildDir}/snowball")
|
||||
|
||||
snowballStemmerDir = file("${snowballWorkDir}/stemmers-${snowballStemmerCommit}")
|
||||
snowballWebsiteDir = file("${snowballWorkDir}/website-${snowballWebsiteCommit}")
|
||||
snowballDataDir = file("${snowballWorkDir}/data-${snowballDataCommit}")
|
||||
|
||||
snowballPatchFile = rootProject.file("gradle/generation/snowball.patch")
|
||||
snowballScript = rootProject.file("gradle/generation/snowball.sh")
|
||||
}
|
||||
|
||||
// downloads snowball stemmers (or use cached copy)
|
||||
task downloadSnowballStemmers(type: Download) {
|
||||
inputs.file(snowballPatchFile)
|
||||
src "https://github.com/snowballstem/snowball/archive/${snowballStemmerCommit}.zip"
|
||||
def snowballStemmerZip = file("${snowballStemmerDir}.zip")
|
||||
dest snowballStemmerZip
|
||||
overwrite false
|
||||
tempAndMove true
|
||||
|
||||
doLast {
|
||||
ant.unzip(src: snowballStemmerZip, dest: snowballStemmerDir, overwrite: "true") {
|
||||
ant.cutdirsmapper(dirs: "1")
|
||||
}
|
||||
ant.patch(patchfile: snowballPatchFile, dir: snowballStemmerDir, strip: "1")
|
||||
}
|
||||
}
|
||||
|
||||
// downloads snowball website (or use cached copy)
|
||||
task downloadSnowballWebsite(type: Download) {
|
||||
src "https://github.com/snowballstem/snowball-website/archive/${snowballWebsiteCommit}.zip"
|
||||
def snowballWebsiteZip = file("${snowballWebsiteDir}.zip")
|
||||
dest snowballWebsiteZip
|
||||
overwrite false
|
||||
tempAndMove true
|
||||
|
||||
doLast {
|
||||
ant.unzip(src: snowballWebsiteZip, dest: snowballWebsiteDir, overwrite: "true") {
|
||||
ant.cutdirsmapper(dirs: "1")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// downloads snowball test data (or use cached copy)
|
||||
task downloadSnowballData(type: Download) {
|
||||
src "https://github.com/snowballstem/snowball-data/archive/${snowballDataCommit}.zip"
|
||||
def snowballDataZip = file("${snowballDataDir}.zip")
|
||||
dest snowballDataZip
|
||||
overwrite false
|
||||
tempAndMove true
|
||||
|
||||
doLast {
|
||||
ant.unzip(src: snowballDataZip, dest: snowballDataDir, overwrite: "true") {
|
||||
ant.cutdirsmapper(dirs: "1")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// runs shell script to regenerate stemmers, base stemming subclasses, test data, and stopwords.
|
||||
task snowballGen() {
|
||||
dependsOn downloadSnowballStemmers
|
||||
dependsOn downloadSnowballWebsite
|
||||
dependsOn downloadSnowballData
|
||||
|
||||
doLast {
|
||||
project.exec {
|
||||
executable "bash"
|
||||
args = [snowballScript, snowballStemmerDir, snowballWebsiteDir, snowballDataDir, projectDir]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,123 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# remove this script when problems are fixed
|
||||
SRCDIR=$1
|
||||
WWWSRCDIR=$2
|
||||
TESTSRCDIR=$3
|
||||
PROJECTDIR=$4
|
||||
DESTDIR="${PROJECTDIR}/src/java/org/tartarus/snowball"
|
||||
WWWDSTDIR="${PROJECTDIR}/src/resources/org/apache/lucene/analysis/snowball"
|
||||
TESTDSTDIR="${PROJECTDIR}/src/test/org/apache/lucene/analysis/snowball"
|
||||
|
||||
trap 'echo "usage: ./snowball.sh <snowball> <snowball-website> <snowball-data> <analysis-common>" && exit 2' ERR
|
||||
test $# -eq 4
|
||||
|
||||
trap 'echo "*** BUILD FAILED ***" $BASH_SOURCE:$LINENO: error: "$BASH_COMMAND" returned $?' ERR
|
||||
set -eEuo pipefail
|
||||
|
||||
# reformats file indentation to kill the crazy space/tabs mix.
|
||||
# prevents early blindness !
|
||||
function reformat_java() {
|
||||
# convert tabs to 8 spaces, then reduce indent from 4 space to 2 space
|
||||
target=$1
|
||||
tmpfile=$(mktemp)
|
||||
cat ${target} | perl -p -e 's/\t/ /g' | perl -p -e 's/ / /g' > ${tmpfile}
|
||||
mv ${tmpfile} ${target}
|
||||
}
|
||||
|
||||
# generate stuff with existing makefile, just 'make' will try to do crazy stuff with e.g. python
|
||||
# and likely fail. so only ask for our specific target.
|
||||
(cd ${SRCDIR} && chmod a+x libstemmer/mkalgorithms.pl && make dist_libstemmer_java)
|
||||
|
||||
for file in "SnowballStemmer.java" "Among.java" "SnowballProgram.java"; do
|
||||
# add license header to files since they have none, otherwise rat will flip the fuck out
|
||||
echo "/*" > ${DESTDIR}/${file}
|
||||
cat ${SRCDIR}/COPYING >> ${DESTDIR}/${file}
|
||||
echo "*/" >> ${DESTDIR}/${file}
|
||||
cat ${SRCDIR}/java/org/tartarus/snowball/${file} >> ${DESTDIR}/${file}
|
||||
reformat_java ${DESTDIR}/${file}
|
||||
done
|
||||
|
||||
rm ${DESTDIR}/ext/*Stemmer.java
|
||||
rm -f ${TESTDSTDIR}/languages.txt
|
||||
for file in ${SRCDIR}/java/org/tartarus/snowball/ext/*.java; do
|
||||
# title-case the classes (fooStemmer -> FooStemmer) so they obey normal java conventions
|
||||
base=$(basename $file)
|
||||
oldclazz="${base%.*}"
|
||||
# one-off
|
||||
if [ "${oldclazz}" == "kraaij_pohlmannStemmer" ]; then
|
||||
newclazz="KpStemmer"
|
||||
else
|
||||
newclazz=${oldclazz^}
|
||||
fi
|
||||
echo ${newclazz} | sed -e 's/Stemmer//' >> ${TESTDSTDIR}/languages.txt
|
||||
cat $file | sed "s/${oldclazz}/${newclazz}/g" > ${DESTDIR}/ext/${newclazz}.java
|
||||
reformat_java ${DESTDIR}/ext/${newclazz}.java
|
||||
done
|
||||
|
||||
# regenerate test data
|
||||
rm -f ${TESTDSTDIR}/test_languages.txt
|
||||
rm -f ${TESTDSTDIR}/*.zip
|
||||
for file in ${TESTSRCDIR}/*; do
|
||||
# look for input (voc.txt) and expected output (output.txt) without any special licenses (COPYING)
|
||||
if [ -f "${file}/voc.txt" ] && [ -f "${file}/output.txt" ] && [ ! -f "${file}/COPYING" ]; then
|
||||
language=$(basename ${file})
|
||||
if [ "${language}" == "kraaij_pohlmann" ]; then
|
||||
language="kp"
|
||||
fi
|
||||
# make the .zip reproducible if data hasn't changed.
|
||||
arbitrary_timestamp="200001010000"
|
||||
# some test files are yuge, randomly sample up to this amount
|
||||
row_limit="2000"
|
||||
tmpdir=$(mktemp -d)
|
||||
myrandom="openssl enc -aes-256-ctr -k ${arbitrary_timestamp} -nosalt -iv 0 -md md5"
|
||||
for data in "voc.txt" "output.txt"; do
|
||||
shuf -n ${row_limit} --random-source=<(${myrandom} < /dev/zero 2>/dev/null) ${file}/${data} > ${tmpdir}/${data} \
|
||||
&& touch -t ${arbitrary_timestamp} ${tmpdir}/${data}
|
||||
done
|
||||
zip --quiet --junk-paths -X -9 ${TESTDSTDIR}/${language}.zip ${tmpdir}/voc.txt ${tmpdir}/output.txt
|
||||
echo "${language}" >> ${TESTDSTDIR}/test_languages.txt
|
||||
rm -r ${tmpdir}
|
||||
fi
|
||||
done
|
||||
|
||||
# regenerate stopwords data
|
||||
rm -f ${WWWDSTDIR}/*_stop.txt
|
||||
for file in ${WWWSRCDIR}/algorithms/*/stop.txt; do
|
||||
language=$(basename $(dirname ${file}))
|
||||
cat > ${WWWDSTDIR}/${language}_stop.txt << EOF
|
||||
| From https://snowballstem.org/algorithms/${language}/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
EOF
|
||||
case "$language" in
|
||||
danish)
|
||||
# clear up some slight mojibake on the website. TODO: fix this file!
|
||||
cat $file | sed 's/Ã¥/å/g' | sed 's/æ/æ/g' >> ${WWWDSTDIR}/${language}_stop.txt
|
||||
;;
|
||||
*)
|
||||
# try to confirm its really UTF-8
|
||||
iconv -f UTF-8 -t UTF-8 $file >> ${WWWDSTDIR}/${language}_stop.txt
|
||||
;;
|
||||
esac
|
||||
done
|
|
@ -210,7 +210,7 @@ class RatTask extends DefaultTask {
|
|||
// svg files generated by gnuplot
|
||||
pattern(substring: "Produced by GNUPLOT")
|
||||
// snowball stemmers generated by snowball compiler
|
||||
pattern(substring: "This file was generated automatically by the Snowball to Java compiler")
|
||||
pattern(substring: "Generated by Snowball")
|
||||
// parsers generated by antlr
|
||||
pattern(substring: "ANTLR GENERATED CODE")
|
||||
}
|
||||
|
|
|
@ -72,6 +72,10 @@ Improvements
|
|||
This tool no longer forceMerge(1)s to a single segment by default. If you
|
||||
rely upon this behavior, pass -max-segments 1 instead. (Robert Muir)
|
||||
|
||||
* LUCENE-9220: Upgrade snowball to 2.0. New snowball stemmers: Hindi, Indonesian,
|
||||
Nepali, Serbian, and Tamil. New stoplist: Indonesian. Adds gradle 'snowball'
|
||||
task to regenerate and ease future upgrades. (Robert Muir, Dawid Weiss)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
||||
|
|
|
@ -1,22 +1,12 @@
|
|||
Lucene Analyzers README file
|
||||
|
||||
This project provides pre-compiled version of the Snowball stemmers
|
||||
based on revision 502 of the Tartarus Snowball repository,
|
||||
now located at https://github.com/snowballstem/snowball/tree/e103b5c257383ee94a96e7fc58cab3c567bf079b (GitHub),
|
||||
This project provides pre-compiled version of the Snowball stemmers,
|
||||
now located at https://github.com/snowballstem/snowball/tree/53739a805cfa6c77ff8496dc711dc1c106d987c1 (GitHub),
|
||||
together with classes integrating them with the Lucene search engine.
|
||||
|
||||
A few changes has been made to the static Snowball code and compiled stemmers:
|
||||
|
||||
* Class SnowballProgram is made abstract and contains new abstract method stem() to avoid reflection in Lucene filter class SnowballFilter.
|
||||
* All use of StringBuffers has been refactored to StringBuilder for speed.
|
||||
* Snowball BSD license header has been added to the Java classes to avoid having RAT adding new ASL headers.
|
||||
* Uses Java 7 MethodHandles and fixes method visibility bug: http://article.gmane.org/gmane.comp.search.snowball/1139
|
||||
|
||||
If you want to add new stemmers, use the exact revision / Git commit above to generate the Java class, place it
|
||||
in src/java/org/tartarus/snowball/ext, and finally execute "ant patch-snowball". The latter will change the APIs
|
||||
of the generated class to make it compatible. Already patched classes are not modified.
|
||||
The Arabic stemmer has been generated from https://github.com/snowballstem/snowball/blob/master/algorithms/arabic.sbl
|
||||
using the latest version of snowball and patched manually.
|
||||
The snowball tree needs patches applied to properly generate efficient code for lucene.
|
||||
You can regenerate everything with 'gradlew snowball'
|
||||
Refer to gradle/generation/snowball* files in the build for upgrading snowball.
|
||||
|
||||
IMPORTANT NOTICE ON BACKWARDS COMPATIBILITY!
|
||||
|
||||
|
|
|
@ -29,8 +29,6 @@
|
|||
|
||||
<import file="../analysis-module-build.xml"/>
|
||||
|
||||
<property name="snowball.programs.dir" location="src/java/org/tartarus/snowball/ext"/>
|
||||
|
||||
<property name="unicode-props-file" location="src/java/org/apache/lucene/analysis/util/UnicodeProps.java"/>
|
||||
|
||||
<!-- Because of a bug in JFlex's ant task, HTMLStripCharFilter has to be generated last. -->
|
||||
|
@ -124,14 +122,4 @@
|
|||
|
||||
<target name="regenerate" depends="jflex,unicode-data"/>
|
||||
|
||||
<target name="patch-snowball" description="Patches all snowball programs in '${snowball.programs.dir}' to make them work with MethodHandles">
|
||||
<fileset id="snowball.programs" dir="${snowball.programs.dir}" includes="*Stemmer.java"/>
|
||||
<replaceregexp match="^public class \w+Stemmer\b" replace="@SuppressWarnings("unused") \0" flags="m" encoding="UTF-8">
|
||||
<fileset refid="snowball.programs"/>
|
||||
</replaceregexp>
|
||||
<replaceregexp match="private final static \w+Stemmer methodObject\b.*$" replace="/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();" flags="m" encoding="UTF-8">
|
||||
<fileset refid="snowball.programs"/>
|
||||
</replaceregexp>
|
||||
<fixcrlf srcdir="${snowball.programs.dir}" includes="*Stemmer.java" tab="remove" tablength="2" encoding="UTF-8" javafiles="yes" fixlast="yes"/>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
import org.tartarus.snowball.SnowballStemmer;
|
||||
|
||||
/**
|
||||
* A filter that stems words using a Snowball-generated stemmer.
|
||||
|
@ -51,12 +51,12 @@ import org.tartarus.snowball.SnowballProgram;
|
|||
*/
|
||||
public final class SnowballFilter extends TokenFilter {
|
||||
|
||||
private final SnowballProgram stemmer;
|
||||
private final SnowballStemmer stemmer;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
|
||||
|
||||
public SnowballFilter(TokenStream input, SnowballProgram stemmer) {
|
||||
public SnowballFilter(TokenStream input, SnowballStemmer stemmer) {
|
||||
super(input);
|
||||
this.stemmer = stemmer;
|
||||
}
|
||||
|
@ -76,8 +76,8 @@ public final class SnowballFilter extends TokenFilter {
|
|||
//Class.forName is frowned upon in place of the ResourceLoader but in this case,
|
||||
// the factory will use the other constructor so that the program is already loaded.
|
||||
try {
|
||||
Class<? extends SnowballProgram> stemClass =
|
||||
Class.forName("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(SnowballProgram.class);
|
||||
Class<? extends SnowballStemmer> stemClass =
|
||||
Class.forName("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(SnowballStemmer.class);
|
||||
stemmer = stemClass.getConstructor().newInstance();
|
||||
} catch (Exception e) {
|
||||
throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e);
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
|||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
import org.tartarus.snowball.SnowballStemmer;
|
||||
|
||||
/**
|
||||
* Factory for {@link SnowballFilter}, with configurable language
|
||||
|
@ -54,7 +54,7 @@ public class SnowballPorterFilterFactory extends TokenFilterFactory implements R
|
|||
|
||||
private final String language;
|
||||
private final String wordFiles;
|
||||
private Class<? extends SnowballProgram> stemClass;
|
||||
private Class<? extends SnowballStemmer> stemClass;
|
||||
private CharArraySet protectedWords = null;
|
||||
|
||||
/** Creates a new SnowballPorterFilterFactory */
|
||||
|
@ -70,7 +70,7 @@ public class SnowballPorterFilterFactory extends TokenFilterFactory implements R
|
|||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String className = "org.tartarus.snowball.ext." + language + "Stemmer";
|
||||
stemClass = loader.newInstance(className, SnowballProgram.class).getClass();
|
||||
stemClass = loader.newInstance(className, SnowballStemmer.class).getClass();
|
||||
|
||||
if (wordFiles != null) {
|
||||
protectedWords = getWordSet(loader, wordFiles, false);
|
||||
|
@ -79,7 +79,7 @@ public class SnowballPorterFilterFactory extends TokenFilterFactory implements R
|
|||
|
||||
@Override
|
||||
public TokenFilter create(TokenStream input) {
|
||||
SnowballProgram program;
|
||||
SnowballStemmer program;
|
||||
try {
|
||||
program = stemClass.getConstructor().newInstance();
|
||||
} catch (Exception e) {
|
||||
|
|
|
@ -1,34 +1,34 @@
|
|||
/*
|
||||
|
||||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2002, Richard Boulton
|
||||
Copyright (c) 2004,2005, Richard Boulton
|
||||
Copyright (c) 2013, Yoshiki Shibukawa
|
||||
Copyright (c) 2006,2007,2009,2010,2011,2014-2019, Olly Betts
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holders nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
3. Neither the name of the Snowball project nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
package org.tartarus.snowball;
|
||||
|
||||
import java.lang.invoke.MethodHandle;
|
||||
|
@ -37,47 +37,41 @@ import java.lang.invoke.MethodType;
|
|||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* This is the rev 502 of the Snowball SVN trunk,
|
||||
* now located at <a target="_blank" href="https://github.com/snowballstem/snowball/tree/e103b5c257383ee94a96e7fc58cab3c567bf079b">GitHub</a>,
|
||||
* but modified:
|
||||
* <ul>
|
||||
* <li>made abstract and introduced abstract method stem to avoid expensive reflection in filter class.
|
||||
* <li>refactored StringBuffers to StringBuilder
|
||||
* <li>uses char[] as buffer instead of StringBuffer/StringBuilder
|
||||
* <li>eq_s,eq_s_b,insert,replace_s take CharSequence like eq_v and eq_v_b
|
||||
* <li>use MethodHandles and fix <a target="_blank" href="http://article.gmane.org/gmane.comp.search.snowball/1139">method visibility bug</a>.
|
||||
* </ul>
|
||||
* Internal class used by Snowball stemmers
|
||||
*/
|
||||
public final class Among {
|
||||
|
||||
public Among(String s, int substring_i, int result,
|
||||
String methodname, MethodHandles.Lookup methodobject) {
|
||||
this.s_size = s.length();
|
||||
public class Among {
|
||||
public Among (String s, int substring_i, int result) {
|
||||
this.s = s.toCharArray();
|
||||
this.substring_i = substring_i;
|
||||
this.result = result;
|
||||
if (methodname.isEmpty()) {
|
||||
this.method = null;
|
||||
} else {
|
||||
final Class<? extends SnowballProgram> clazz = methodobject.lookupClass().asSubclass(SnowballProgram.class);
|
||||
this.method = null;
|
||||
}
|
||||
|
||||
public Among (String s, int substring_i, int result, String methodname,
|
||||
MethodHandles.Lookup methodobject) {
|
||||
this.s = s.toCharArray();
|
||||
this.substring_i = substring_i;
|
||||
this.result = result;
|
||||
final Class<? extends SnowballProgram> clazz = methodobject.lookupClass().asSubclass(SnowballProgram.class);
|
||||
if (methodname.length() > 0) {
|
||||
try {
|
||||
this.method = methodobject.findVirtual(clazz, methodname, MethodType.methodType(boolean.class))
|
||||
.asType(MethodType.methodType(boolean.class, SnowballProgram.class));
|
||||
.asType(MethodType.methodType(boolean.class, SnowballProgram.class));
|
||||
} catch (NoSuchMethodException | IllegalAccessException e) {
|
||||
throw new RuntimeException(String.format(Locale.ENGLISH,
|
||||
"Snowball program '%s' is broken, cannot access method: boolean %s()",
|
||||
clazz.getSimpleName(), methodname
|
||||
"Snowball program '%s' is broken, cannot access method: boolean %s()",
|
||||
clazz.getSimpleName(), methodname
|
||||
), e);
|
||||
}
|
||||
} else {
|
||||
this.method = null;
|
||||
}
|
||||
}
|
||||
|
||||
final int s_size; /* search string */
|
||||
final char[] s; /* search string */
|
||||
final int substring_i; /* index to longest matching substring */
|
||||
final int result; /* result of the lookup */
|
||||
final int result; /* result of the lookup */
|
||||
|
||||
// Make sure this is not accessible outside package for Java security reasons!
|
||||
final MethodHandle method; /* method to use if substring matches */
|
||||
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1,407 +1,370 @@
|
|||
/*
|
||||
|
||||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2002, Richard Boulton
|
||||
Copyright (c) 2004,2005, Richard Boulton
|
||||
Copyright (c) 2013, Yoshiki Shibukawa
|
||||
Copyright (c) 2006,2007,2009,2010,2011,2014-2019, Olly Betts
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holders nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
3. Neither the name of the Snowball project nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.tartarus.snowball;
|
||||
|
||||
import java.lang.reflect.UndeclaredThrowableException;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* This is the rev 502 of the Snowball SVN trunk,
|
||||
* now located at <a target="_blank" href="https://github.com/snowballstem/snowball/tree/e103b5c257383ee94a96e7fc58cab3c567bf079b">GitHub</a>,
|
||||
* but modified:
|
||||
* <ul>
|
||||
* <li>made abstract and introduced abstract method stem to avoid expensive reflection in filter class.
|
||||
* <li>refactored StringBuffers to StringBuilder
|
||||
* <li>uses char[] as buffer instead of StringBuffer/StringBuilder
|
||||
* <li>eq_s,eq_s_b,insert,replace_s take CharSequence like eq_v and eq_v_b
|
||||
* <li>use MethodHandles and fix <a target="_blank" href="http://article.gmane.org/gmane.comp.search.snowball/1139">method visibility bug</a>.
|
||||
* </ul>
|
||||
* Base class for a snowball stemmer
|
||||
*/
|
||||
public abstract class SnowballProgram {
|
||||
public class SnowballProgram implements Serializable {
|
||||
protected SnowballProgram()
|
||||
{
|
||||
current = new char[8];
|
||||
setCurrent("");
|
||||
}
|
||||
|
||||
protected SnowballProgram()
|
||||
{
|
||||
current = new char[8];
|
||||
setCurrent("");
|
||||
}
|
||||
static final long serialVersionUID = 2016072500L;
|
||||
|
||||
public abstract boolean stem();
|
||||
/**
|
||||
* Set the current string.
|
||||
*/
|
||||
public void setCurrent(String value)
|
||||
{
|
||||
current = value.toCharArray();
|
||||
cursor = 0;
|
||||
limit = value.length();
|
||||
limit_backward = 0;
|
||||
bra = cursor;
|
||||
ket = limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the current string.
|
||||
*/
|
||||
public void setCurrent(String value)
|
||||
{
|
||||
current = value.toCharArray();
|
||||
cursor = 0;
|
||||
limit = value.length();
|
||||
limit_backward = 0;
|
||||
bra = cursor;
|
||||
ket = limit;
|
||||
}
|
||||
/**
|
||||
* Get the current string.
|
||||
*/
|
||||
public String getCurrent()
|
||||
{
|
||||
return new String(current, 0, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current string.
|
||||
*/
|
||||
public String getCurrent()
|
||||
{
|
||||
return new String(current, 0, limit);
|
||||
}
|
||||
/**
|
||||
* Set the current string.
|
||||
* @param text character array containing input
|
||||
* @param length valid length of text.
|
||||
*/
|
||||
public void setCurrent(char text[], int length) {
|
||||
current = text;
|
||||
cursor = 0;
|
||||
limit = length;
|
||||
limit_backward = 0;
|
||||
bra = cursor;
|
||||
ket = limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the current string.
|
||||
* @param text character array containing input
|
||||
* @param length valid length of text.
|
||||
*/
|
||||
public void setCurrent(char text[], int length) {
|
||||
current = text;
|
||||
cursor = 0;
|
||||
limit = length;
|
||||
limit_backward = 0;
|
||||
bra = cursor;
|
||||
ket = limit;
|
||||
}
|
||||
/**
|
||||
* Get the current buffer containing the stem.
|
||||
* <p>
|
||||
* NOTE: this may be a reference to a different character array than the
|
||||
* one originally provided with setCurrent, in the exceptional case that
|
||||
* stemming produced a longer intermediate or result string.
|
||||
* </p>
|
||||
* <p>
|
||||
* It is necessary to use {@link #getCurrentBufferLength()} to determine
|
||||
* the valid length of the returned buffer. For example, many words are
|
||||
* stemmed simply by subtracting from the length to remove suffixes.
|
||||
* </p>
|
||||
* @see #getCurrentBufferLength()
|
||||
*/
|
||||
public char[] getCurrentBuffer() {
|
||||
return current;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current buffer containing the stem.
|
||||
* <p>
|
||||
* NOTE: this may be a reference to a different character array than the
|
||||
* one originally provided with setCurrent, in the exceptional case that
|
||||
* stemming produced a longer intermediate or result string.
|
||||
* </p>
|
||||
* <p>
|
||||
* It is necessary to use {@link #getCurrentBufferLength()} to determine
|
||||
* the valid length of the returned buffer. For example, many words are
|
||||
* stemmed simply by subtracting from the length to remove suffixes.
|
||||
* </p>
|
||||
* @see #getCurrentBufferLength()
|
||||
*/
|
||||
public char[] getCurrentBuffer() {
|
||||
return current;
|
||||
}
|
||||
/**
|
||||
* Get the valid length of the character array in
|
||||
* {@link #getCurrentBuffer()}.
|
||||
* @return valid length of the array.
|
||||
*/
|
||||
public int getCurrentBufferLength() {
|
||||
return limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the valid length of the character array in
|
||||
* {@link #getCurrentBuffer()}.
|
||||
* @return valid length of the array.
|
||||
*/
|
||||
public int getCurrentBufferLength() {
|
||||
return limit;
|
||||
}
|
||||
// current string
|
||||
private char current[];
|
||||
|
||||
// current string
|
||||
private char current[];
|
||||
protected int cursor;
|
||||
protected int limit;
|
||||
protected int limit_backward;
|
||||
protected int bra;
|
||||
protected int ket;
|
||||
|
||||
protected int cursor;
|
||||
protected int limit;
|
||||
protected int limit_backward;
|
||||
protected int bra;
|
||||
protected int ket;
|
||||
public SnowballProgram(SnowballProgram other) {
|
||||
current = other.current;
|
||||
cursor = other.cursor;
|
||||
limit = other.limit;
|
||||
limit_backward = other.limit_backward;
|
||||
bra = other.bra;
|
||||
ket = other.ket;
|
||||
}
|
||||
|
||||
protected void copy_from(SnowballProgram other)
|
||||
{
|
||||
current = other.current;
|
||||
cursor = other.cursor;
|
||||
limit = other.limit;
|
||||
limit_backward = other.limit_backward;
|
||||
bra = other.bra;
|
||||
ket = other.ket;
|
||||
}
|
||||
protected void copy_from(SnowballProgram other)
|
||||
{
|
||||
current = other.current;
|
||||
cursor = other.cursor;
|
||||
limit = other.limit;
|
||||
limit_backward = other.limit_backward;
|
||||
bra = other.bra;
|
||||
ket = other.ket;
|
||||
}
|
||||
|
||||
protected boolean in_grouping(char [] s, int min, int max)
|
||||
{
|
||||
if (cursor >= limit) return false;
|
||||
char ch = current[cursor];
|
||||
if (ch > max || ch < min) return false;
|
||||
ch -= min;
|
||||
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
|
||||
protected boolean in_grouping(char [] s, int min, int max)
|
||||
{
|
||||
if (cursor >= limit) return false;
|
||||
char ch = current[cursor];
|
||||
if (ch > max || ch < min) return false;
|
||||
ch -= min;
|
||||
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
|
||||
cursor++;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean in_grouping_b(char [] s, int min, int max)
|
||||
{
|
||||
if (cursor <= limit_backward) return false;
|
||||
char ch = current[cursor - 1];
|
||||
if (ch > max || ch < min) return false;
|
||||
ch -= min;
|
||||
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
|
||||
cursor--;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean out_grouping(char [] s, int min, int max)
|
||||
{
|
||||
if (cursor >= limit) return false;
|
||||
char ch = current[cursor];
|
||||
if (ch > max || ch < min) {
|
||||
cursor++;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean in_grouping_b(char [] s, int min, int max)
|
||||
{
|
||||
if (cursor <= limit_backward) return false;
|
||||
char ch = current[cursor - 1];
|
||||
if (ch > max || ch < min) return false;
|
||||
ch -= min;
|
||||
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
|
||||
cursor--;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean out_grouping(char [] s, int min, int max)
|
||||
{
|
||||
if (cursor >= limit) return false;
|
||||
char ch = current[cursor];
|
||||
if (ch > max || ch < min) {
|
||||
cursor++;
|
||||
return true;
|
||||
}
|
||||
ch -= min;
|
||||
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
|
||||
cursor ++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected boolean out_grouping_b(char [] s, int min, int max)
|
||||
{
|
||||
if (cursor <= limit_backward) return false;
|
||||
char ch = current[cursor - 1];
|
||||
if (ch > max || ch < min) {
|
||||
cursor--;
|
||||
return true;
|
||||
}
|
||||
ch -= min;
|
||||
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
|
||||
cursor--;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected boolean in_range(int min, int max)
|
||||
{
|
||||
if (cursor >= limit) return false;
|
||||
char ch = current[cursor];
|
||||
if (ch > max || ch < min) return false;
|
||||
ch -= min;
|
||||
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
|
||||
cursor++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected boolean in_range_b(int min, int max)
|
||||
{
|
||||
if (cursor <= limit_backward) return false;
|
||||
char ch = current[cursor - 1];
|
||||
if (ch > max || ch < min) return false;
|
||||
protected boolean out_grouping_b(char [] s, int min, int max)
|
||||
{
|
||||
if (cursor <= limit_backward) return false;
|
||||
char ch = current[cursor - 1];
|
||||
if (ch > max || ch < min) {
|
||||
cursor--;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean out_range(int min, int max)
|
||||
{
|
||||
if (cursor >= limit) return false;
|
||||
char ch = current[cursor];
|
||||
if (!(ch > max || ch < min)) return false;
|
||||
cursor++;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean out_range_b(int min, int max)
|
||||
{
|
||||
if (cursor <= limit_backward) return false;
|
||||
char ch = current[cursor - 1];
|
||||
if(!(ch > max || ch < min)) return false;
|
||||
ch -= min;
|
||||
if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
|
||||
cursor--;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected boolean eq_s(int s_size, CharSequence s)
|
||||
{
|
||||
if (limit - cursor < s_size) return false;
|
||||
int i;
|
||||
for (i = 0; i != s_size; i++) {
|
||||
if (current[cursor + i] != s.charAt(i)) return false;
|
||||
protected boolean eq_s(CharSequence s)
|
||||
{
|
||||
if (limit - cursor < s.length()) return false;
|
||||
int i;
|
||||
for (i = 0; i != s.length(); i++) {
|
||||
if (current[cursor + i] != s.charAt(i)) return false;
|
||||
}
|
||||
cursor += s.length();
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean eq_s_b(CharSequence s)
|
||||
{
|
||||
if (cursor - limit_backward < s.length()) return false;
|
||||
int i;
|
||||
for (i = 0; i != s.length(); i++) {
|
||||
if (current[cursor - s.length() + i] != s.charAt(i)) return false;
|
||||
}
|
||||
cursor -= s.length();
|
||||
return true;
|
||||
}
|
||||
|
||||
protected int find_among(Among v[])
|
||||
{
|
||||
int i = 0;
|
||||
int j = v.length;
|
||||
|
||||
int c = cursor;
|
||||
int l = limit;
|
||||
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
|
||||
boolean first_key_inspected = false;
|
||||
|
||||
while (true) {
|
||||
int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j; // smaller
|
||||
Among w = v[k];
|
||||
int i2;
|
||||
for (i2 = common; i2 < w.s.length; i2++) {
|
||||
if (c + common == l) {
|
||||
diff = -1;
|
||||
break;
|
||||
}
|
||||
diff = current[c + common] - w.s[i2];
|
||||
if (diff != 0) break;
|
||||
common++;
|
||||
}
|
||||
cursor += s_size;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean eq_s_b(int s_size, CharSequence s)
|
||||
{
|
||||
if (cursor - limit_backward < s_size) return false;
|
||||
int i;
|
||||
for (i = 0; i != s_size; i++) {
|
||||
if (current[cursor - s_size + i] != s.charAt(i)) return false;
|
||||
if (diff < 0) {
|
||||
j = k;
|
||||
common_j = common;
|
||||
} else {
|
||||
i = k;
|
||||
common_i = common;
|
||||
}
|
||||
cursor -= s_size;
|
||||
return true;
|
||||
}
|
||||
if (j - i <= 1) {
|
||||
if (i > 0) break; // v->s has been inspected
|
||||
if (j == i) break; // only one item in v
|
||||
|
||||
protected boolean eq_v(CharSequence s)
|
||||
{
|
||||
return eq_s(s.length(), s);
|
||||
}
|
||||
// - but now we need to go round once more to get
|
||||
// v->s inspected. This looks messy, but is actually
|
||||
// the optimal approach.
|
||||
|
||||
protected boolean eq_v_b(CharSequence s)
|
||||
{
|
||||
return eq_s_b(s.length(), s);
|
||||
}
|
||||
|
||||
protected int find_among(Among v[], int v_size)
|
||||
{
|
||||
int i = 0;
|
||||
int j = v_size;
|
||||
|
||||
int c = cursor;
|
||||
int l = limit;
|
||||
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
|
||||
boolean first_key_inspected = false;
|
||||
|
||||
while (true) {
|
||||
int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j; // smaller
|
||||
Among w = v[k];
|
||||
int i2;
|
||||
for (i2 = common; i2 < w.s_size; i2++) {
|
||||
if (c + common == l) {
|
||||
diff = -1;
|
||||
break;
|
||||
}
|
||||
diff = current[c + common] - w.s[i2];
|
||||
if (diff != 0) break;
|
||||
common++;
|
||||
}
|
||||
if (diff < 0) {
|
||||
j = k;
|
||||
common_j = common;
|
||||
} else {
|
||||
i = k;
|
||||
common_i = common;
|
||||
}
|
||||
if (j - i <= 1) {
|
||||
if (i > 0) break; // v->s has been inspected
|
||||
if (j == i) break; // only one item in v
|
||||
|
||||
// - but now we need to go round once more to get
|
||||
// v->s inspected. This looks messy, but is actually
|
||||
// the optimal approach.
|
||||
|
||||
if (first_key_inspected) break;
|
||||
first_key_inspected = true;
|
||||
}
|
||||
}
|
||||
while (true) {
|
||||
Among w = v[i];
|
||||
if (common_i >= w.s_size) {
|
||||
cursor = c + w.s_size;
|
||||
if (w.method == null) return w.result;
|
||||
boolean res = false;
|
||||
try {
|
||||
res = (boolean) w.method.invokeExact(this);
|
||||
} catch (Error | RuntimeException e) {
|
||||
throw e;
|
||||
} catch (Throwable e) {
|
||||
throw new UndeclaredThrowableException(e);
|
||||
}
|
||||
cursor = c + w.s_size;
|
||||
if (res) return w.result;
|
||||
}
|
||||
i = w.substring_i;
|
||||
if (i < 0) return 0;
|
||||
if (first_key_inspected) break;
|
||||
first_key_inspected = true;
|
||||
}
|
||||
}
|
||||
while (true) {
|
||||
Among w = v[i];
|
||||
if (common_i >= w.s.length) {
|
||||
cursor = c + w.s.length;
|
||||
if (w.method == null) return w.result;
|
||||
boolean res = false;
|
||||
try {
|
||||
res = (boolean) w.method.invokeExact(this);
|
||||
} catch (Error | RuntimeException e) {
|
||||
throw e;
|
||||
} catch (Throwable e) {
|
||||
throw new UndeclaredThrowableException(e);
|
||||
}
|
||||
cursor = c + w.s.length;
|
||||
if (res) return w.result;
|
||||
}
|
||||
i = w.substring_i;
|
||||
if (i < 0) return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// find_among_b is for backwards processing. Same comments apply
|
||||
protected int find_among_b(Among v[], int v_size)
|
||||
{
|
||||
int i = 0;
|
||||
int j = v_size;
|
||||
protected int find_among_b(Among v[])
|
||||
{
|
||||
int i = 0;
|
||||
int j = v.length;
|
||||
|
||||
int c = cursor;
|
||||
int lb = limit_backward;
|
||||
int c = cursor;
|
||||
int lb = limit_backward;
|
||||
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
|
||||
boolean first_key_inspected = false;
|
||||
boolean first_key_inspected = false;
|
||||
|
||||
while (true) {
|
||||
int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j;
|
||||
Among w = v[k];
|
||||
int i2;
|
||||
for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
|
||||
if (c - common == lb) {
|
||||
diff = -1;
|
||||
break;
|
||||
}
|
||||
diff = current[c - 1 - common] - w.s[i2];
|
||||
if (diff != 0) break;
|
||||
common++;
|
||||
}
|
||||
if (diff < 0) {
|
||||
j = k;
|
||||
common_j = common;
|
||||
} else {
|
||||
i = k;
|
||||
common_i = common;
|
||||
}
|
||||
if (j - i <= 1) {
|
||||
if (i > 0) break;
|
||||
if (j == i) break;
|
||||
if (first_key_inspected) break;
|
||||
first_key_inspected = true;
|
||||
while (true) {
|
||||
int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j;
|
||||
Among w = v[k];
|
||||
int i2;
|
||||
for (i2 = w.s.length - 1 - common; i2 >= 0; i2--) {
|
||||
if (c - common == lb) {
|
||||
diff = -1;
|
||||
break;
|
||||
}
|
||||
diff = current[c - 1 - common] - w.s[i2];
|
||||
if (diff != 0) break;
|
||||
common++;
|
||||
}
|
||||
while (true) {
|
||||
Among w = v[i];
|
||||
if (common_i >= w.s_size) {
|
||||
cursor = c - w.s_size;
|
||||
if (w.method == null) return w.result;
|
||||
|
||||
boolean res = false;
|
||||
try {
|
||||
res = (boolean) w.method.invokeExact(this);
|
||||
} catch (Error | RuntimeException e) {
|
||||
throw e;
|
||||
} catch (Throwable e) {
|
||||
throw new UndeclaredThrowableException(e);
|
||||
}
|
||||
cursor = c - w.s_size;
|
||||
if (res) return w.result;
|
||||
}
|
||||
i = w.substring_i;
|
||||
if (i < 0) return 0;
|
||||
if (diff < 0) {
|
||||
j = k;
|
||||
common_j = common;
|
||||
} else {
|
||||
i = k;
|
||||
common_i = common;
|
||||
}
|
||||
if (j - i <= 1) {
|
||||
if (i > 0) break;
|
||||
if (j == i) break;
|
||||
if (first_key_inspected) break;
|
||||
first_key_inspected = true;
|
||||
}
|
||||
}
|
||||
while (true) {
|
||||
Among w = v[i];
|
||||
if (common_i >= w.s.length) {
|
||||
cursor = c - w.s.length;
|
||||
if (w.method == null) return w.result;
|
||||
|
||||
boolean res = false;
|
||||
try {
|
||||
res = (boolean) w.method.invokeExact(this);
|
||||
} catch (Error | RuntimeException e) {
|
||||
throw e;
|
||||
} catch (Throwable e) {
|
||||
throw new UndeclaredThrowableException(e);
|
||||
}
|
||||
cursor = c - w.s.length;
|
||||
if (res) return w.result;
|
||||
}
|
||||
i = w.substring_i;
|
||||
if (i < 0) return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// mini version of ArrayUtil.oversize from lucene, specialized to chars
|
||||
static int oversize(int minTargetSize) {
|
||||
int extra = minTargetSize >> 3;
|
||||
if (extra < 3) {
|
||||
extra = 3;
|
||||
}
|
||||
int newSize = minTargetSize + extra;
|
||||
return (newSize + 3) & 0x7ffffffc;
|
||||
}
|
||||
|
||||
/* to replace chars between c_bra and c_ket in current by the
|
||||
* chars in s.
|
||||
*/
|
||||
protected int replace_s(int c_bra, int c_ket, CharSequence s) {
|
||||
* chars in s.
|
||||
*/
|
||||
protected int replace_s(int c_bra, int c_ket, CharSequence s)
|
||||
{
|
||||
final int adjustment = s.length() - (c_ket - c_bra);
|
||||
final int newLength = limit + adjustment;
|
||||
//resize if necessary
|
||||
if (newLength > current.length) {
|
||||
char newBuffer[] = new char[ArrayUtil.oversize(newLength, Character.BYTES)];
|
||||
char newBuffer[] = new char[oversize(newLength)];
|
||||
System.arraycopy(current, 0, newBuffer, 0, limit);
|
||||
current = newBuffer;
|
||||
}
|
||||
|
@ -409,7 +372,7 @@ public abstract class SnowballProgram {
|
|||
// replacement, need to shift things around
|
||||
if (adjustment != 0 && c_ket < limit) {
|
||||
System.arraycopy(current, c_ket, current, c_bra + s.length(),
|
||||
limit - c_ket);
|
||||
limit - c_ket);
|
||||
}
|
||||
// insert the replacement text
|
||||
// Note, faster is s.getChars(0, s.length(), current, c_bra);
|
||||
|
@ -423,73 +386,69 @@ public abstract class SnowballProgram {
|
|||
return adjustment;
|
||||
}
|
||||
|
||||
protected void slice_check() {
|
||||
protected void slice_check()
|
||||
{
|
||||
if (bra < 0 ||
|
||||
bra > ket ||
|
||||
ket > limit) {
|
||||
throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit);
|
||||
// FIXME: report error somehow.
|
||||
/*
|
||||
fprintf(stderr, "faulty slice operation:\n");
|
||||
debug(z, -1, 0);
|
||||
exit(1);
|
||||
*/
|
||||
bra > ket ||
|
||||
ket > limit)
|
||||
{
|
||||
throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit);
|
||||
}
|
||||
}
|
||||
|
||||
protected void slice_from(CharSequence s) {
|
||||
protected void slice_from(CharSequence s)
|
||||
{
|
||||
slice_check();
|
||||
replace_s(bra, ket, s);
|
||||
}
|
||||
|
||||
protected void slice_del() {
|
||||
slice_from((CharSequence) "");
|
||||
protected void slice_del()
|
||||
{
|
||||
slice_from("");
|
||||
}
|
||||
|
||||
protected void insert(int c_bra, int c_ket, CharSequence s)
|
||||
{
|
||||
int adjustment = replace_s(c_bra, c_ket, s);
|
||||
if (c_bra <= bra) bra += adjustment;
|
||||
if (c_bra <= ket) ket += adjustment;
|
||||
}
|
||||
{
|
||||
int adjustment = replace_s(c_bra, c_ket, s);
|
||||
if (c_bra <= bra) bra += adjustment;
|
||||
if (c_bra <= ket) ket += adjustment;
|
||||
}
|
||||
|
||||
/* Copy the slice into the supplied StringBuffer */
|
||||
protected StringBuilder slice_to(StringBuilder s)
|
||||
{
|
||||
slice_check();
|
||||
int len = ket - bra;
|
||||
s.setLength(0);
|
||||
s.append(current, bra, len);
|
||||
return s;
|
||||
}
|
||||
/* Copy the slice into the supplied StringBuilder */
|
||||
protected void slice_to(StringBuilder s)
|
||||
{
|
||||
slice_check();
|
||||
int len = ket - bra;
|
||||
s.setLength(0);
|
||||
s.append(current, bra, len);
|
||||
}
|
||||
|
||||
protected StringBuilder assign_to(StringBuilder s)
|
||||
{
|
||||
s.setLength(0);
|
||||
s.append(current, 0, limit);
|
||||
return s;
|
||||
}
|
||||
protected void assign_to(StringBuilder s)
|
||||
{
|
||||
s.setLength(0);
|
||||
s.append(current, 0, limit);
|
||||
}
|
||||
|
||||
/*
|
||||
extern void debug(struct SN_env * z, int number, int line_count)
|
||||
{ int i;
|
||||
int limit = SIZE(z->p);
|
||||
//if (number >= 0) printf("%3d (line %4d): '", number, line_count);
|
||||
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
|
||||
for (i = 0; i <= limit; i++)
|
||||
{ if (z->lb == i) printf("{");
|
||||
if (z->bra == i) printf("[");
|
||||
if (z->c == i) printf("|");
|
||||
if (z->ket == i) printf("]");
|
||||
if (z->l == i) printf("}");
|
||||
if (i < limit)
|
||||
{ int ch = z->p[i];
|
||||
if (ch == 0) ch = '#';
|
||||
printf("%c", ch);
|
||||
}
|
||||
int limit = SIZE(z->p);
|
||||
//if (number >= 0) printf("%3d (line %4d): '", number, line_count);
|
||||
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
|
||||
for (i = 0; i <= limit; i++)
|
||||
{ if (z->lb == i) printf("{");
|
||||
if (z->bra == i) printf("[");
|
||||
if (z->c == i) printf("|");
|
||||
if (z->ket == i) printf("]");
|
||||
if (z->l == i) printf("}");
|
||||
if (i < limit)
|
||||
{ int ch = z->p[i];
|
||||
if (ch == 0) ch = '#';
|
||||
printf("%c", ch);
|
||||
}
|
||||
printf("'\n");
|
||||
}
|
||||
printf("'\n");
|
||||
}
|
||||
*/
|
||||
};
|
||||
|
||||
};
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2004,2005, Richard Boulton
|
||||
Copyright (c) 2013, Yoshiki Shibukawa
|
||||
Copyright (c) 2006,2007,2009,2010,2011,2014-2019, Olly Betts
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
3. Neither the name of the Snowball project nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.tartarus.snowball;
|
||||
|
||||
/**
|
||||
* Parent class of all snowball stemmers, which must implement <code>stem</code>
|
||||
*/
|
||||
public abstract class SnowballStemmer extends SnowballProgram {
|
||||
public abstract boolean stem();
|
||||
|
||||
static final long serialVersionUID = 2016072500L;
|
||||
};
|
File diff suppressed because it is too large
Load Diff
|
@ -1,517 +1,394 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
// Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
/**
|
||||
* This class implements the stemming algorithm defined by a snowball script.
|
||||
* <p>
|
||||
* Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
* </p>
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public class ArmenianStemmer extends org.tartarus.snowball.SnowballStemmer {
|
||||
|
||||
@SuppressWarnings("unused") public class ArmenianStemmer extends SnowballProgram {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final static Among a_0[] = {
|
||||
new Among("\u0580\u0578\u0580\u0564", -1, 1),
|
||||
new Among("\u0565\u0580\u0578\u0580\u0564", 0, 1),
|
||||
new Among("\u0561\u056C\u056B", -1, 1),
|
||||
new Among("\u0561\u056F\u056B", -1, 1),
|
||||
new Among("\u0578\u0580\u0561\u056F", -1, 1),
|
||||
new Among("\u0565\u0572", -1, 1),
|
||||
new Among("\u0561\u056F\u0561\u0576", -1, 1),
|
||||
new Among("\u0561\u0580\u0561\u0576", -1, 1),
|
||||
new Among("\u0565\u0576", -1, 1),
|
||||
new Among("\u0565\u056F\u0565\u0576", 8, 1),
|
||||
new Among("\u0565\u0580\u0565\u0576", 8, 1),
|
||||
new Among("\u0578\u0580\u0567\u0576", -1, 1),
|
||||
new Among("\u056B\u0576", -1, 1),
|
||||
new Among("\u0563\u056B\u0576", 12, 1),
|
||||
new Among("\u0578\u057E\u056B\u0576", 12, 1),
|
||||
new Among("\u056C\u0561\u0575\u0576", -1, 1),
|
||||
new Among("\u057E\u0578\u0582\u0576", -1, 1),
|
||||
new Among("\u057A\u0565\u057D", -1, 1),
|
||||
new Among("\u056B\u057E", -1, 1),
|
||||
new Among("\u0561\u057F", -1, 1),
|
||||
new Among("\u0561\u057E\u0565\u057F", -1, 1),
|
||||
new Among("\u056F\u0578\u057F", -1, 1),
|
||||
new Among("\u0562\u0561\u0580", -1, 1)
|
||||
};
|
||||
|
||||
/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
private final static Among a_1[] = {
|
||||
new Among("\u0561", -1, 1),
|
||||
new Among("\u0561\u0581\u0561", 0, 1),
|
||||
new Among("\u0565\u0581\u0561", 0, 1),
|
||||
new Among("\u057E\u0565", -1, 1),
|
||||
new Among("\u0561\u0581\u0580\u056B", -1, 1),
|
||||
new Among("\u0561\u0581\u056B", -1, 1),
|
||||
new Among("\u0565\u0581\u056B", -1, 1),
|
||||
new Among("\u057E\u0565\u0581\u056B", 6, 1),
|
||||
new Among("\u0561\u056C", -1, 1),
|
||||
new Among("\u0568\u0561\u056C", 8, 1),
|
||||
new Among("\u0561\u0576\u0561\u056C", 8, 1),
|
||||
new Among("\u0565\u0576\u0561\u056C", 8, 1),
|
||||
new Among("\u0561\u0581\u0576\u0561\u056C", 8, 1),
|
||||
new Among("\u0565\u056C", -1, 1),
|
||||
new Among("\u0568\u0565\u056C", 13, 1),
|
||||
new Among("\u0576\u0565\u056C", 13, 1),
|
||||
new Among("\u0581\u0576\u0565\u056C", 15, 1),
|
||||
new Among("\u0565\u0581\u0576\u0565\u056C", 16, 1),
|
||||
new Among("\u0579\u0565\u056C", 13, 1),
|
||||
new Among("\u057E\u0565\u056C", 13, 1),
|
||||
new Among("\u0561\u0581\u057E\u0565\u056C", 19, 1),
|
||||
new Among("\u0565\u0581\u057E\u0565\u056C", 19, 1),
|
||||
new Among("\u057F\u0565\u056C", 13, 1),
|
||||
new Among("\u0561\u057F\u0565\u056C", 22, 1),
|
||||
new Among("\u0578\u057F\u0565\u056C", 22, 1),
|
||||
new Among("\u056F\u0578\u057F\u0565\u056C", 24, 1),
|
||||
new Among("\u057E\u0561\u056E", -1, 1),
|
||||
new Among("\u0578\u0582\u0574", -1, 1),
|
||||
new Among("\u057E\u0578\u0582\u0574", 27, 1),
|
||||
new Among("\u0561\u0576", -1, 1),
|
||||
new Among("\u0581\u0561\u0576", 29, 1),
|
||||
new Among("\u0561\u0581\u0561\u0576", 30, 1),
|
||||
new Among("\u0561\u0581\u0580\u056B\u0576", -1, 1),
|
||||
new Among("\u0561\u0581\u056B\u0576", -1, 1),
|
||||
new Among("\u0565\u0581\u056B\u0576", -1, 1),
|
||||
new Among("\u057E\u0565\u0581\u056B\u0576", 34, 1),
|
||||
new Among("\u0561\u056C\u056B\u057D", -1, 1),
|
||||
new Among("\u0565\u056C\u056B\u057D", -1, 1),
|
||||
new Among("\u0561\u057E", -1, 1),
|
||||
new Among("\u0561\u0581\u0561\u057E", 38, 1),
|
||||
new Among("\u0565\u0581\u0561\u057E", 38, 1),
|
||||
new Among("\u0561\u056C\u0578\u057E", -1, 1),
|
||||
new Among("\u0565\u056C\u0578\u057E", -1, 1),
|
||||
new Among("\u0561\u0580", -1, 1),
|
||||
new Among("\u0561\u0581\u0561\u0580", 43, 1),
|
||||
new Among("\u0565\u0581\u0561\u0580", 43, 1),
|
||||
new Among("\u0561\u0581\u0580\u056B\u0580", -1, 1),
|
||||
new Among("\u0561\u0581\u056B\u0580", -1, 1),
|
||||
new Among("\u0565\u0581\u056B\u0580", -1, 1),
|
||||
new Among("\u057E\u0565\u0581\u056B\u0580", 48, 1),
|
||||
new Among("\u0561\u0581", -1, 1),
|
||||
new Among("\u0565\u0581", -1, 1),
|
||||
new Among("\u0561\u0581\u0580\u0565\u0581", 51, 1),
|
||||
new Among("\u0561\u056C\u0578\u0582\u0581", -1, 1),
|
||||
new Among("\u0565\u056C\u0578\u0582\u0581", -1, 1),
|
||||
new Among("\u0561\u056C\u0578\u0582", -1, 1),
|
||||
new Among("\u0565\u056C\u0578\u0582", -1, 1),
|
||||
new Among("\u0561\u0584", -1, 1),
|
||||
new Among("\u0581\u0561\u0584", 57, 1),
|
||||
new Among("\u0561\u0581\u0561\u0584", 58, 1),
|
||||
new Among("\u0561\u0581\u0580\u056B\u0584", -1, 1),
|
||||
new Among("\u0561\u0581\u056B\u0584", -1, 1),
|
||||
new Among("\u0565\u0581\u056B\u0584", -1, 1),
|
||||
new Among("\u057E\u0565\u0581\u056B\u0584", 62, 1),
|
||||
new Among("\u0561\u0576\u0584", -1, 1),
|
||||
new Among("\u0581\u0561\u0576\u0584", 64, 1),
|
||||
new Among("\u0561\u0581\u0561\u0576\u0584", 65, 1),
|
||||
new Among("\u0561\u0581\u0580\u056B\u0576\u0584", -1, 1),
|
||||
new Among("\u0561\u0581\u056B\u0576\u0584", -1, 1),
|
||||
new Among("\u0565\u0581\u056B\u0576\u0584", -1, 1),
|
||||
new Among("\u057E\u0565\u0581\u056B\u0576\u0584", 69, 1)
|
||||
};
|
||||
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "\u0580\u0578\u0580\u0564", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0578\u0580\u0564", 0, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056F\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0580\u0561\u056F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0572", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056F\u0561\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0580\u0561\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056F\u0565\u0576", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0565\u0576", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0580\u0567\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0563\u056B\u0576", 12, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057E\u056B\u0576", 12, 1, "", methodObject ),
|
||||
new Among ( "\u056C\u0561\u0575\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0578\u0582\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057A\u0565\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u057E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u057F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u057E\u0565\u057F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056F\u0578\u057F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0562\u0561\u0580", -1, 1, "", methodObject )
|
||||
};
|
||||
private final static Among a_2[] = {
|
||||
new Among("\u0578\u0580\u0564", -1, 1),
|
||||
new Among("\u0578\u0582\u0575\u0569", -1, 1),
|
||||
new Among("\u0578\u0582\u0570\u056B", -1, 1),
|
||||
new Among("\u0581\u056B", -1, 1),
|
||||
new Among("\u056B\u056C", -1, 1),
|
||||
new Among("\u0561\u056F", -1, 1),
|
||||
new Among("\u0575\u0561\u056F", 5, 1),
|
||||
new Among("\u0561\u0576\u0561\u056F", 5, 1),
|
||||
new Among("\u056B\u056F", -1, 1),
|
||||
new Among("\u0578\u0582\u056F", -1, 1),
|
||||
new Among("\u0561\u0576", -1, 1),
|
||||
new Among("\u057A\u0561\u0576", 10, 1),
|
||||
new Among("\u057D\u057F\u0561\u0576", 10, 1),
|
||||
new Among("\u0561\u0580\u0561\u0576", 10, 1),
|
||||
new Among("\u0565\u0572\u0567\u0576", -1, 1),
|
||||
new Among("\u0575\u0578\u0582\u0576", -1, 1),
|
||||
new Among("\u0578\u0582\u0569\u0575\u0578\u0582\u0576", 15, 1),
|
||||
new Among("\u0561\u056E\u0578", -1, 1),
|
||||
new Among("\u056B\u0579", -1, 1),
|
||||
new Among("\u0578\u0582\u057D", -1, 1),
|
||||
new Among("\u0578\u0582\u057D\u057F", -1, 1),
|
||||
new Among("\u0563\u0561\u0580", -1, 1),
|
||||
new Among("\u057E\u0578\u0580", -1, 1),
|
||||
new Among("\u0561\u057E\u0578\u0580", 22, 1),
|
||||
new Among("\u0578\u0581", -1, 1),
|
||||
new Among("\u0561\u0576\u0585\u0581", -1, 1),
|
||||
new Among("\u0578\u0582", -1, 1),
|
||||
new Among("\u0584", -1, 1),
|
||||
new Among("\u0579\u0565\u0584", 27, 1),
|
||||
new Among("\u056B\u0584", 27, 1),
|
||||
new Among("\u0561\u056C\u056B\u0584", 29, 1),
|
||||
new Among("\u0561\u0576\u056B\u0584", 29, 1),
|
||||
new Among("\u057E\u0561\u056E\u0584", 27, 1),
|
||||
new Among("\u0578\u0582\u0575\u0584", 27, 1),
|
||||
new Among("\u0565\u0576\u0584", 27, 1),
|
||||
new Among("\u0578\u0576\u0584", 27, 1),
|
||||
new Among("\u0578\u0582\u0576\u0584", 27, 1),
|
||||
new Among("\u0574\u0578\u0582\u0576\u0584", 36, 1),
|
||||
new Among("\u056B\u0579\u0584", 27, 1),
|
||||
new Among("\u0561\u0580\u0584", 27, 1)
|
||||
};
|
||||
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "\u0561", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561", 0, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u0561", 0, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u0581\u056B", 6, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0568\u0561\u056C", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0561\u056C", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0576\u0561\u056C", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0576\u0561\u056C", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056C", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0568\u0565\u056C", 13, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u056C", 13, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u0576\u0565\u056C", 15, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u0576\u0565\u056C", 16, 1, "", methodObject ),
|
||||
new Among ( "\u0579\u0565\u056C", 13, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u056C", 13, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u057E\u0565\u056C", 19, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u057E\u0565\u056C", 19, 1, "", methodObject ),
|
||||
new Among ( "\u057F\u0565\u056C", 13, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u057F\u0565\u056C", 22, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057F\u0565\u056C", 22, 1, "", methodObject ),
|
||||
new Among ( "\u056F\u0578\u057F\u0565\u056C", 24, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u056E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0574", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0578\u0582\u0574", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u0561\u0576", 29, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561\u0576", 30, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u056B\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u056B\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u056B\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u0581\u056B\u0576", 34, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u056B\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056C\u056B\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u057E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561\u057E", 38, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u0561\u057E", 38, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u0578\u057E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056C\u0578\u057E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561\u0580", 43, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u0561\u0580", 43, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u056B\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u056B\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u056B\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u0581\u056B\u0580", 48, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u0565\u0581", 51, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u0578\u0582\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056C\u0578\u0582\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u0578\u0582", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056C\u0578\u0582", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u0561\u0584", 57, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561\u0584", 58, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u056B\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u056B\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u056B\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u0581\u056B\u0584", 62, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u0561\u0576\u0584", 64, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561\u0576\u0584", 65, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u056B\u0576\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u056B\u0576\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u056B\u0576\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u0581\u056B\u0576\u0584", 69, 1, "", methodObject )
|
||||
};
|
||||
private final static Among a_3[] = {
|
||||
new Among("\u057D\u0561", -1, 1),
|
||||
new Among("\u057E\u0561", -1, 1),
|
||||
new Among("\u0561\u0574\u0562", -1, 1),
|
||||
new Among("\u0564", -1, 1),
|
||||
new Among("\u0561\u0576\u0564", 3, 1),
|
||||
new Among("\u0578\u0582\u0569\u0575\u0561\u0576\u0564", 4, 1),
|
||||
new Among("\u057E\u0561\u0576\u0564", 4, 1),
|
||||
new Among("\u0578\u057B\u0564", 3, 1),
|
||||
new Among("\u0565\u0580\u0564", 3, 1),
|
||||
new Among("\u0576\u0565\u0580\u0564", 8, 1),
|
||||
new Among("\u0578\u0582\u0564", 3, 1),
|
||||
new Among("\u0568", -1, 1),
|
||||
new Among("\u0561\u0576\u0568", 11, 1),
|
||||
new Among("\u0578\u0582\u0569\u0575\u0561\u0576\u0568", 12, 1),
|
||||
new Among("\u057E\u0561\u0576\u0568", 12, 1),
|
||||
new Among("\u0578\u057B\u0568", 11, 1),
|
||||
new Among("\u0565\u0580\u0568", 11, 1),
|
||||
new Among("\u0576\u0565\u0580\u0568", 16, 1),
|
||||
new Among("\u056B", -1, 1),
|
||||
new Among("\u057E\u056B", 18, 1),
|
||||
new Among("\u0565\u0580\u056B", 18, 1),
|
||||
new Among("\u0576\u0565\u0580\u056B", 20, 1),
|
||||
new Among("\u0561\u0576\u0578\u0582\u0574", -1, 1),
|
||||
new Among("\u0565\u0580\u0578\u0582\u0574", -1, 1),
|
||||
new Among("\u0576\u0565\u0580\u0578\u0582\u0574", 23, 1),
|
||||
new Among("\u0576", -1, 1),
|
||||
new Among("\u0561\u0576", 25, 1),
|
||||
new Among("\u0578\u0582\u0569\u0575\u0561\u0576", 26, 1),
|
||||
new Among("\u057E\u0561\u0576", 26, 1),
|
||||
new Among("\u056B\u0576", 25, 1),
|
||||
new Among("\u0565\u0580\u056B\u0576", 29, 1),
|
||||
new Among("\u0576\u0565\u0580\u056B\u0576", 30, 1),
|
||||
new Among("\u0578\u0582\u0569\u0575\u0561\u0576\u0576", 25, 1),
|
||||
new Among("\u0565\u0580\u0576", 25, 1),
|
||||
new Among("\u0576\u0565\u0580\u0576", 33, 1),
|
||||
new Among("\u0578\u0582\u0576", 25, 1),
|
||||
new Among("\u0578\u057B", -1, 1),
|
||||
new Among("\u0578\u0582\u0569\u0575\u0561\u0576\u057D", -1, 1),
|
||||
new Among("\u057E\u0561\u0576\u057D", -1, 1),
|
||||
new Among("\u0578\u057B\u057D", -1, 1),
|
||||
new Among("\u0578\u057E", -1, 1),
|
||||
new Among("\u0561\u0576\u0578\u057E", 40, 1),
|
||||
new Among("\u057E\u0578\u057E", 40, 1),
|
||||
new Among("\u0565\u0580\u0578\u057E", 40, 1),
|
||||
new Among("\u0576\u0565\u0580\u0578\u057E", 43, 1),
|
||||
new Among("\u0565\u0580", -1, 1),
|
||||
new Among("\u0576\u0565\u0580", 45, 1),
|
||||
new Among("\u0581", -1, 1),
|
||||
new Among("\u056B\u0581", 47, 1),
|
||||
new Among("\u057E\u0561\u0576\u056B\u0581", 48, 1),
|
||||
new Among("\u0578\u057B\u056B\u0581", 48, 1),
|
||||
new Among("\u057E\u056B\u0581", 48, 1),
|
||||
new Among("\u0565\u0580\u056B\u0581", 48, 1),
|
||||
new Among("\u0576\u0565\u0580\u056B\u0581", 52, 1),
|
||||
new Among("\u0581\u056B\u0581", 48, 1),
|
||||
new Among("\u0578\u0581", 47, 1),
|
||||
new Among("\u0578\u0582\u0581", 47, 1)
|
||||
};
|
||||
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "\u0578\u0580\u0564", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0575\u0569", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0570\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u056C", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0575\u0561\u056F", 5, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0561\u056F", 5, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u056F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u056F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057A\u0561\u0576", 10, 1, "", methodObject ),
|
||||
new Among ( "\u057D\u057F\u0561\u0576", 10, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0580\u0561\u0576", 10, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0572\u0567\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0575\u0578\u0582\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0578\u0582\u0576", 15, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056E\u0578", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0579", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u057D\u057F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0563\u0561\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0578\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u057E\u0578\u0580", 22, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0585\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0579\u0565\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u056B\u0584", 29, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u056B\u0584", 29, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u056E\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0575\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0576\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0576\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0576\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0574\u0578\u0582\u0576\u0584", 36, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0579\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0580\u0584", 27, 1, "", methodObject )
|
||||
};
|
||||
private static final char g_v[] = {209, 4, 128, 0, 18 };
|
||||
|
||||
private final static Among a_3[] = {
|
||||
new Among ( "\u057D\u0561", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0574\u0562", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0564", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0564", 3, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0564", 4, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u0576\u0564", 4, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057B\u0564", 3, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0564", 3, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u0564", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0564", 3, 1, "", methodObject ),
|
||||
new Among ( "\u0568", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0568", 11, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0568", 12, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u0576\u0568", 12, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057B\u0568", 11, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0568", 11, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u0568", 16, 1, "", methodObject ),
|
||||
new Among ( "\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u056B", 18, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u056B", 18, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u056B", 20, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0578\u0582\u0574", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0578\u0582\u0574", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u0578\u0582\u0574", 23, 1, "", methodObject ),
|
||||
new Among ( "\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576", 25, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576", 26, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u0576", 26, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0576", 25, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u056B\u0576", 29, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u056B\u0576", 30, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0576", 25, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0576", 25, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u0576", 33, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0576", 25, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u0576\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057B\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0578\u057E", 40, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0578\u057E", 40, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0578\u057E", 40, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u0578\u057E", 43, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580", 45, 1, "", methodObject ),
|
||||
new Among ( "\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0581", 47, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u0576\u056B\u0581", 48, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057B\u056B\u0581", 48, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u056B\u0581", 48, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u056B\u0581", 48, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u056B\u0581", 52, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u056B\u0581", 48, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0581", 47, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0581", 47, 1, "", methodObject )
|
||||
};
|
||||
private int I_p2;
|
||||
private int I_pV;
|
||||
|
||||
private static final char g_v[] = {209, 4, 128, 0, 18 };
|
||||
|
||||
private int I_p2;
|
||||
private int I_pV;
|
||||
|
||||
private void copy_from(ArmenianStemmer other) {
|
||||
I_p2 = other.I_p2;
|
||||
I_pV = other.I_pV;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
// (, line 58
|
||||
I_pV = limit;
|
||||
I_p2 = limit;
|
||||
// do, line 62
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// (, line 62
|
||||
// gopast, line 63
|
||||
golab1: while(true)
|
||||
{
|
||||
lab2: do {
|
||||
if (!(in_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
break golab1;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark pV, line 63
|
||||
I_pV = cursor;
|
||||
// gopast, line 63
|
||||
golab3: while(true)
|
||||
{
|
||||
lab4: do {
|
||||
if (!(out_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
break golab3;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 64
|
||||
golab5: while(true)
|
||||
{
|
||||
lab6: do {
|
||||
if (!(in_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab6;
|
||||
}
|
||||
break golab5;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 64
|
||||
golab7: while(true)
|
||||
{
|
||||
lab8: do {
|
||||
if (!(out_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab8;
|
||||
}
|
||||
break golab7;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p2, line 64
|
||||
I_p2 = cursor;
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_R2() {
|
||||
if (!(I_p2 <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_adjective() {
|
||||
int among_var;
|
||||
// (, line 72
|
||||
// [, line 73
|
||||
ket = cursor;
|
||||
// substring, line 73
|
||||
among_var = find_among_b(a_0, 23);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 73
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 98
|
||||
// delete, line 98
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_verb() {
|
||||
int among_var;
|
||||
// (, line 102
|
||||
// [, line 103
|
||||
ket = cursor;
|
||||
// substring, line 103
|
||||
among_var = find_among_b(a_1, 71);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 103
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 176
|
||||
// delete, line 176
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_noun() {
|
||||
int among_var;
|
||||
// (, line 180
|
||||
// [, line 181
|
||||
ket = cursor;
|
||||
// substring, line 181
|
||||
among_var = find_among_b(a_2, 40);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 181
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 223
|
||||
// delete, line 223
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_ending() {
|
||||
int among_var;
|
||||
// (, line 227
|
||||
// [, line 228
|
||||
ket = cursor;
|
||||
// substring, line 228
|
||||
among_var = find_among_b(a_3, 57);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 228
|
||||
bra = cursor;
|
||||
// call R2, line 228
|
||||
if (!r_R2())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 287
|
||||
// delete, line 287
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
int v_5;
|
||||
int v_6;
|
||||
int v_7;
|
||||
// (, line 292
|
||||
// do, line 294
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 294
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 295
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// setlimit, line 295
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 295
|
||||
if (cursor < I_pV)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_pV;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 295
|
||||
// do, line 296
|
||||
v_4 = limit - cursor;
|
||||
lab1: do {
|
||||
// call ending, line 296
|
||||
if (!r_ending())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
// do, line 297
|
||||
v_5 = limit - cursor;
|
||||
lab2: do {
|
||||
// call verb, line 297
|
||||
if (!r_verb())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_5;
|
||||
// do, line 298
|
||||
v_6 = limit - cursor;
|
||||
lab3: do {
|
||||
// call adjective, line 298
|
||||
if (!r_adjective())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_6;
|
||||
// do, line 299
|
||||
v_7 = limit - cursor;
|
||||
lab4: do {
|
||||
// call noun, line 299
|
||||
if (!r_noun())
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_7;
|
||||
limit_backward = v_3;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof ArmenianStemmer;
|
||||
private boolean r_mark_regions() {
|
||||
I_pV = limit;
|
||||
I_p2 = limit;
|
||||
int v_1 = cursor;
|
||||
lab0: {
|
||||
golab1: while(true)
|
||||
{
|
||||
lab2: {
|
||||
if (!(in_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return ArmenianStemmer.class.getName().hashCode();
|
||||
break golab1;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
I_pV = cursor;
|
||||
golab3: while(true)
|
||||
{
|
||||
lab4: {
|
||||
if (!(out_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
break golab3;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
golab5: while(true)
|
||||
{
|
||||
lab6: {
|
||||
if (!(in_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab6;
|
||||
}
|
||||
break golab5;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
golab7: while(true)
|
||||
{
|
||||
lab8: {
|
||||
if (!(out_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab8;
|
||||
}
|
||||
break golab7;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
I_p2 = cursor;
|
||||
}
|
||||
cursor = v_1;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_R2() {
|
||||
if (!(I_p2 <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_adjective() {
|
||||
ket = cursor;
|
||||
if (find_among_b(a_0) == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_verb() {
|
||||
ket = cursor;
|
||||
if (find_among_b(a_1) == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_noun() {
|
||||
ket = cursor;
|
||||
if (find_among_b(a_2) == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_ending() {
|
||||
ket = cursor;
|
||||
if (find_among_b(a_3) == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
if (!r_R2())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
r_mark_regions();
|
||||
limit_backward = cursor;
|
||||
cursor = limit;
|
||||
if (cursor < I_pV)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_3 = limit_backward;
|
||||
limit_backward = I_pV;
|
||||
int v_4 = limit - cursor;
|
||||
r_ending();
|
||||
cursor = limit - v_4;
|
||||
int v_5 = limit - cursor;
|
||||
r_verb();
|
||||
cursor = limit - v_5;
|
||||
int v_6 = limit - cursor;
|
||||
r_adjective();
|
||||
cursor = limit - v_6;
|
||||
int v_7 = limit - cursor;
|
||||
r_noun();
|
||||
cursor = limit - v_7;
|
||||
limit_backward = v_3;
|
||||
cursor = limit_backward;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof ArmenianStemmer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return ArmenianStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,442 +1,301 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
// Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
/**
|
||||
* This class implements the stemming algorithm defined by a snowball script.
|
||||
* <p>
|
||||
* Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
* </p>
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public class DanishStemmer extends org.tartarus.snowball.SnowballStemmer {
|
||||
|
||||
@SuppressWarnings("unused") public class DanishStemmer extends SnowballProgram {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final static Among a_0[] = {
|
||||
new Among("hed", -1, 1),
|
||||
new Among("ethed", 0, 1),
|
||||
new Among("ered", -1, 1),
|
||||
new Among("e", -1, 1),
|
||||
new Among("erede", 3, 1),
|
||||
new Among("ende", 3, 1),
|
||||
new Among("erende", 5, 1),
|
||||
new Among("ene", 3, 1),
|
||||
new Among("erne", 3, 1),
|
||||
new Among("ere", 3, 1),
|
||||
new Among("en", -1, 1),
|
||||
new Among("heden", 10, 1),
|
||||
new Among("eren", 10, 1),
|
||||
new Among("er", -1, 1),
|
||||
new Among("heder", 13, 1),
|
||||
new Among("erer", 13, 1),
|
||||
new Among("s", -1, 2),
|
||||
new Among("heds", 16, 1),
|
||||
new Among("es", 16, 1),
|
||||
new Among("endes", 18, 1),
|
||||
new Among("erendes", 19, 1),
|
||||
new Among("enes", 18, 1),
|
||||
new Among("ernes", 18, 1),
|
||||
new Among("eres", 18, 1),
|
||||
new Among("ens", 16, 1),
|
||||
new Among("hedens", 24, 1),
|
||||
new Among("erens", 24, 1),
|
||||
new Among("ers", 16, 1),
|
||||
new Among("ets", 16, 1),
|
||||
new Among("erets", 28, 1),
|
||||
new Among("et", -1, 1),
|
||||
new Among("eret", 30, 1)
|
||||
};
|
||||
|
||||
/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
private final static Among a_1[] = {
|
||||
new Among("gd", -1, -1),
|
||||
new Among("dt", -1, -1),
|
||||
new Among("gt", -1, -1),
|
||||
new Among("kt", -1, -1)
|
||||
};
|
||||
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "hed", -1, 1, "", methodObject ),
|
||||
new Among ( "ethed", 0, 1, "", methodObject ),
|
||||
new Among ( "ered", -1, 1, "", methodObject ),
|
||||
new Among ( "e", -1, 1, "", methodObject ),
|
||||
new Among ( "erede", 3, 1, "", methodObject ),
|
||||
new Among ( "ende", 3, 1, "", methodObject ),
|
||||
new Among ( "erende", 5, 1, "", methodObject ),
|
||||
new Among ( "ene", 3, 1, "", methodObject ),
|
||||
new Among ( "erne", 3, 1, "", methodObject ),
|
||||
new Among ( "ere", 3, 1, "", methodObject ),
|
||||
new Among ( "en", -1, 1, "", methodObject ),
|
||||
new Among ( "heden", 10, 1, "", methodObject ),
|
||||
new Among ( "eren", 10, 1, "", methodObject ),
|
||||
new Among ( "er", -1, 1, "", methodObject ),
|
||||
new Among ( "heder", 13, 1, "", methodObject ),
|
||||
new Among ( "erer", 13, 1, "", methodObject ),
|
||||
new Among ( "s", -1, 2, "", methodObject ),
|
||||
new Among ( "heds", 16, 1, "", methodObject ),
|
||||
new Among ( "es", 16, 1, "", methodObject ),
|
||||
new Among ( "endes", 18, 1, "", methodObject ),
|
||||
new Among ( "erendes", 19, 1, "", methodObject ),
|
||||
new Among ( "enes", 18, 1, "", methodObject ),
|
||||
new Among ( "ernes", 18, 1, "", methodObject ),
|
||||
new Among ( "eres", 18, 1, "", methodObject ),
|
||||
new Among ( "ens", 16, 1, "", methodObject ),
|
||||
new Among ( "hedens", 24, 1, "", methodObject ),
|
||||
new Among ( "erens", 24, 1, "", methodObject ),
|
||||
new Among ( "ers", 16, 1, "", methodObject ),
|
||||
new Among ( "ets", 16, 1, "", methodObject ),
|
||||
new Among ( "erets", 28, 1, "", methodObject ),
|
||||
new Among ( "et", -1, 1, "", methodObject ),
|
||||
new Among ( "eret", 30, 1, "", methodObject )
|
||||
};
|
||||
private final static Among a_2[] = {
|
||||
new Among("ig", -1, 1),
|
||||
new Among("lig", 0, 1),
|
||||
new Among("elig", 1, 1),
|
||||
new Among("els", -1, 1),
|
||||
new Among("l\u00F8st", -1, 2)
|
||||
};
|
||||
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "gd", -1, -1, "", methodObject ),
|
||||
new Among ( "dt", -1, -1, "", methodObject ),
|
||||
new Among ( "gt", -1, -1, "", methodObject ),
|
||||
new Among ( "kt", -1, -1, "", methodObject )
|
||||
};
|
||||
private static final char g_c[] = {119, 223, 119, 1 };
|
||||
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "ig", -1, 1, "", methodObject ),
|
||||
new Among ( "lig", 0, 1, "", methodObject ),
|
||||
new Among ( "elig", 1, 1, "", methodObject ),
|
||||
new Among ( "els", -1, 1, "", methodObject ),
|
||||
new Among ( "l\u00F8st", -1, 2, "", methodObject )
|
||||
};
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
|
||||
|
||||
private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
private java.lang.StringBuilder S_ch = new java.lang.StringBuilder();
|
||||
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
private java.lang.StringBuilder S_ch = new java.lang.StringBuilder();
|
||||
|
||||
private void copy_from(DanishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
S_ch = other.S_ch;
|
||||
super.copy_from(other);
|
||||
}
|
||||
private boolean r_mark_regions() {
|
||||
I_p1 = limit;
|
||||
int v_1 = cursor;
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
golab0: while(true)
|
||||
{
|
||||
int v_2 = cursor;
|
||||
lab1: {
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
}
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
I_p1 = cursor;
|
||||
lab4: {
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 29
|
||||
I_p1 = limit;
|
||||
// test, line 33
|
||||
v_1 = cursor;
|
||||
// (, line 33
|
||||
// hop, line 33
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
// setmark x, line 33
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 34
|
||||
golab0: while(true)
|
||||
{
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 34
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 34
|
||||
I_p1 = cursor;
|
||||
// try, line 35
|
||||
lab4: do {
|
||||
// (, line 35
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_2 = limit_backward;
|
||||
limit_backward = I_p1;
|
||||
ket = cursor;
|
||||
among_var = find_among_b(a_0);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
if (!(in_grouping_b(g_s_ending, 97, 229)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 40
|
||||
// setlimit, line 41
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 41
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 41
|
||||
// [, line 41
|
||||
ket = cursor;
|
||||
// substring, line 41
|
||||
among_var = find_among_b(a_0, 32);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 41
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 48
|
||||
// delete, line 48
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 50
|
||||
if (!(in_grouping_b(g_s_ending, 97, 229)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 50
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1 = limit - cursor;
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_3 = limit_backward;
|
||||
limit_backward = I_p1;
|
||||
ket = cursor;
|
||||
if (find_among_b(a_1) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
bra = cursor;
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 54
|
||||
// test, line 55
|
||||
v_1 = limit - cursor;
|
||||
// (, line 55
|
||||
// setlimit, line 56
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 56
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 56
|
||||
// [, line 56
|
||||
ket = cursor;
|
||||
// substring, line 56
|
||||
if (find_among_b(a_1, 4) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 56
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
// next, line 62
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 62
|
||||
bra = cursor;
|
||||
// delete, line 62
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
int v_1 = limit - cursor;
|
||||
lab0: {
|
||||
ket = cursor;
|
||||
if (!(eq_s_b("st")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
bra = cursor;
|
||||
if (!(eq_s_b("ig")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
slice_del();
|
||||
}
|
||||
cursor = limit - v_1;
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_3 = limit_backward;
|
||||
limit_backward = I_p1;
|
||||
ket = cursor;
|
||||
among_var = find_among_b(a_2);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
slice_del();
|
||||
int v_4 = limit - cursor;
|
||||
r_consonant_pair();
|
||||
cursor = limit - v_4;
|
||||
break;
|
||||
case 2:
|
||||
slice_from("l\u00F8s");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
// (, line 65
|
||||
// do, line 66
|
||||
v_1 = limit - cursor;
|
||||
lab0: do {
|
||||
// (, line 66
|
||||
// [, line 66
|
||||
ket = cursor;
|
||||
// literal, line 66
|
||||
if (!(eq_s_b(2, "st")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
// ], line 66
|
||||
bra = cursor;
|
||||
// literal, line 66
|
||||
if (!(eq_s_b(2, "ig")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
// delete, line 66
|
||||
slice_del();
|
||||
} while (false);
|
||||
cursor = limit - v_1;
|
||||
// setlimit, line 67
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 67
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 67
|
||||
// [, line 67
|
||||
ket = cursor;
|
||||
// substring, line 67
|
||||
among_var = find_among_b(a_2, 5);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 67
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 70
|
||||
// delete, line 70
|
||||
slice_del();
|
||||
// do, line 70
|
||||
v_4 = limit - cursor;
|
||||
lab1: do {
|
||||
// call consonant_pair, line 70
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
break;
|
||||
case 2:
|
||||
// (, line 72
|
||||
// <-, line 72
|
||||
slice_from("l\u00F8s");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private boolean r_undouble() {
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_2 = limit_backward;
|
||||
limit_backward = I_p1;
|
||||
ket = cursor;
|
||||
if (!(in_grouping_b(g_c, 98, 122)))
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
slice_to(S_ch);
|
||||
limit_backward = v_2;
|
||||
if (!(eq_s_b(S_ch)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_undouble() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 75
|
||||
// setlimit, line 76
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 76
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 76
|
||||
// [, line 76
|
||||
ket = cursor;
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 76
|
||||
bra = cursor;
|
||||
// -> ch, line 76
|
||||
S_ch = slice_to(S_ch);
|
||||
limit_backward = v_2;
|
||||
// name ch, line 77
|
||||
if (!(eq_v_b(S_ch)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 78
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
public boolean stem() {
|
||||
int v_1 = cursor;
|
||||
r_mark_regions();
|
||||
cursor = v_1;
|
||||
limit_backward = cursor;
|
||||
cursor = limit;
|
||||
int v_2 = limit - cursor;
|
||||
r_main_suffix();
|
||||
cursor = limit - v_2;
|
||||
int v_3 = limit - cursor;
|
||||
r_consonant_pair();
|
||||
cursor = limit - v_3;
|
||||
int v_4 = limit - cursor;
|
||||
r_other_suffix();
|
||||
cursor = limit - v_4;
|
||||
int v_5 = limit - cursor;
|
||||
r_undouble();
|
||||
cursor = limit - v_5;
|
||||
cursor = limit_backward;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
int v_5;
|
||||
// (, line 82
|
||||
// do, line 84
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 84
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 85
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 85
|
||||
// do, line 86
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 86
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 87
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 87
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 88
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 88
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
// do, line 89
|
||||
v_5 = limit - cursor;
|
||||
lab4: do {
|
||||
// call undouble, line 89
|
||||
if (!r_undouble())
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_5;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof DanishStemmer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof DanishStemmer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return DanishStemmer.class.getName().hashCode();
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return DanishStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,210 @@
|
|||
// Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
|
||||
import org.tartarus.snowball.Among;
|
||||
|
||||
/**
|
||||
* This class implements the stemming algorithm defined by a snowball script.
|
||||
* <p>
|
||||
* Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
* </p>
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public class HindiStemmer extends org.tartarus.snowball.SnowballStemmer {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
|
||||
private final static Among a_0[] = {
|
||||
new Among("\u0906\u0901", -1, -1),
|
||||
new Among("\u093E\u0901", -1, -1),
|
||||
new Among("\u0907\u092F\u093E\u0901", 1, -1),
|
||||
new Among("\u0906\u0907\u092F\u093E\u0901", 2, -1),
|
||||
new Among("\u093E\u0907\u092F\u093E\u0901", 2, -1),
|
||||
new Among("\u093F\u092F\u093E\u0901", 1, -1),
|
||||
new Among("\u0906\u0902", -1, -1),
|
||||
new Among("\u0909\u0906\u0902", 6, -1),
|
||||
new Among("\u0941\u0906\u0902", 6, -1),
|
||||
new Among("\u0908\u0902", -1, -1),
|
||||
new Among("\u0906\u0908\u0902", 9, -1),
|
||||
new Among("\u093E\u0908\u0902", 9, -1),
|
||||
new Among("\u090F\u0902", -1, -1),
|
||||
new Among("\u0906\u090F\u0902", 12, -1),
|
||||
new Among("\u0909\u090F\u0902", 12, -1),
|
||||
new Among("\u093E\u090F\u0902", 12, -1),
|
||||
new Among("\u0924\u093E\u090F\u0902", 15, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0924\u093E\u090F\u0902", 16, -1),
|
||||
new Among("\u0928\u093E\u090F\u0902", 15, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0928\u093E\u090F\u0902", 18, -1),
|
||||
new Among("\u0941\u090F\u0902", 12, -1),
|
||||
new Among("\u0913\u0902", -1, -1),
|
||||
new Among("\u0906\u0913\u0902", 21, -1),
|
||||
new Among("\u0909\u0913\u0902", 21, -1),
|
||||
new Among("\u093E\u0913\u0902", 21, -1),
|
||||
new Among("\u0924\u093E\u0913\u0902", 24, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0924\u093E\u0913\u0902", 25, -1),
|
||||
new Among("\u0928\u093E\u0913\u0902", 24, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0928\u093E\u0913\u0902", 27, -1),
|
||||
new Among("\u0941\u0913\u0902", 21, -1),
|
||||
new Among("\u093E\u0902", -1, -1),
|
||||
new Among("\u0907\u092F\u093E\u0902", 30, -1),
|
||||
new Among("\u0906\u0907\u092F\u093E\u0902", 31, -1),
|
||||
new Among("\u093E\u0907\u092F\u093E\u0902", 31, -1),
|
||||
new Among("\u093F\u092F\u093E\u0902", 30, -1),
|
||||
new Among("\u0940\u0902", -1, -1),
|
||||
new Among("\u0924\u0940\u0902", 35, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0924\u0940\u0902", 36, -1),
|
||||
new Among("\u0906\u0924\u0940\u0902", 36, -1),
|
||||
new Among("\u093E\u0924\u0940\u0902", 36, -1),
|
||||
new Among("\u0947\u0902", -1, -1),
|
||||
new Among("\u094B\u0902", -1, -1),
|
||||
new Among("\u0907\u092F\u094B\u0902", 41, -1),
|
||||
new Among("\u0906\u0907\u092F\u094B\u0902", 42, -1),
|
||||
new Among("\u093E\u0907\u092F\u094B\u0902", 42, -1),
|
||||
new Among("\u093F\u092F\u094B\u0902", 41, -1),
|
||||
new Among("\u0905", -1, -1),
|
||||
new Among("\u0906", -1, -1),
|
||||
new Among("\u0907", -1, -1),
|
||||
new Among("\u0908", -1, -1),
|
||||
new Among("\u0906\u0908", 49, -1),
|
||||
new Among("\u093E\u0908", 49, -1),
|
||||
new Among("\u0909", -1, -1),
|
||||
new Among("\u090A", -1, -1),
|
||||
new Among("\u090F", -1, -1),
|
||||
new Among("\u0906\u090F", 54, -1),
|
||||
new Among("\u0907\u090F", 54, -1),
|
||||
new Among("\u0906\u0907\u090F", 56, -1),
|
||||
new Among("\u093E\u0907\u090F", 56, -1),
|
||||
new Among("\u093E\u090F", 54, -1),
|
||||
new Among("\u093F\u090F", 54, -1),
|
||||
new Among("\u0913", -1, -1),
|
||||
new Among("\u0906\u0913", 61, -1),
|
||||
new Among("\u093E\u0913", 61, -1),
|
||||
new Among("\u0915\u0930", -1, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0915\u0930", 64, -1),
|
||||
new Among("\u0906\u0915\u0930", 64, -1),
|
||||
new Among("\u093E\u0915\u0930", 64, -1),
|
||||
new Among("\u093E", -1, -1),
|
||||
new Among("\u090A\u0902\u0917\u093E", 68, -1),
|
||||
new Among("\u0906\u090A\u0902\u0917\u093E", 69, -1),
|
||||
new Among("\u093E\u090A\u0902\u0917\u093E", 69, -1),
|
||||
new Among("\u0942\u0902\u0917\u093E", 68, -1),
|
||||
new Among("\u090F\u0917\u093E", 68, -1),
|
||||
new Among("\u0906\u090F\u0917\u093E", 73, -1),
|
||||
new Among("\u093E\u090F\u0917\u093E", 73, -1),
|
||||
new Among("\u0947\u0917\u093E", 68, -1),
|
||||
new Among("\u0924\u093E", 68, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0924\u093E", 77, -1),
|
||||
new Among("\u0906\u0924\u093E", 77, -1),
|
||||
new Among("\u093E\u0924\u093E", 77, -1),
|
||||
new Among("\u0928\u093E", 68, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0928\u093E", 81, -1),
|
||||
new Among("\u0906\u0928\u093E", 81, -1),
|
||||
new Among("\u093E\u0928\u093E", 81, -1),
|
||||
new Among("\u0906\u092F\u093E", 68, -1),
|
||||
new Among("\u093E\u092F\u093E", 68, -1),
|
||||
new Among("\u093F", -1, -1),
|
||||
new Among("\u0940", -1, -1),
|
||||
new Among("\u090A\u0902\u0917\u0940", 88, -1),
|
||||
new Among("\u0906\u090A\u0902\u0917\u0940", 89, -1),
|
||||
new Among("\u093E\u090A\u0902\u0917\u0940", 89, -1),
|
||||
new Among("\u090F\u0902\u0917\u0940", 88, -1),
|
||||
new Among("\u0906\u090F\u0902\u0917\u0940", 92, -1),
|
||||
new Among("\u093E\u090F\u0902\u0917\u0940", 92, -1),
|
||||
new Among("\u0942\u0902\u0917\u0940", 88, -1),
|
||||
new Among("\u0947\u0902\u0917\u0940", 88, -1),
|
||||
new Among("\u090F\u0917\u0940", 88, -1),
|
||||
new Among("\u0906\u090F\u0917\u0940", 97, -1),
|
||||
new Among("\u093E\u090F\u0917\u0940", 97, -1),
|
||||
new Among("\u0913\u0917\u0940", 88, -1),
|
||||
new Among("\u0906\u0913\u0917\u0940", 100, -1),
|
||||
new Among("\u093E\u0913\u0917\u0940", 100, -1),
|
||||
new Among("\u0947\u0917\u0940", 88, -1),
|
||||
new Among("\u094B\u0917\u0940", 88, -1),
|
||||
new Among("\u0924\u0940", 88, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0924\u0940", 105, -1),
|
||||
new Among("\u0906\u0924\u0940", 105, -1),
|
||||
new Among("\u093E\u0924\u0940", 105, -1),
|
||||
new Among("\u0928\u0940", 88, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0928\u0940", 109, -1),
|
||||
new Among("\u0941", -1, -1),
|
||||
new Among("\u0942", -1, -1),
|
||||
new Among("\u0947", -1, -1),
|
||||
new Among("\u090F\u0902\u0917\u0947", 113, -1),
|
||||
new Among("\u0906\u090F\u0902\u0917\u0947", 114, -1),
|
||||
new Among("\u093E\u090F\u0902\u0917\u0947", 114, -1),
|
||||
new Among("\u0947\u0902\u0917\u0947", 113, -1),
|
||||
new Among("\u0913\u0917\u0947", 113, -1),
|
||||
new Among("\u0906\u0913\u0917\u0947", 118, -1),
|
||||
new Among("\u093E\u0913\u0917\u0947", 118, -1),
|
||||
new Among("\u094B\u0917\u0947", 113, -1),
|
||||
new Among("\u0924\u0947", 113, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0924\u0947", 122, -1),
|
||||
new Among("\u0906\u0924\u0947", 122, -1),
|
||||
new Among("\u093E\u0924\u0947", 122, -1),
|
||||
new Among("\u0928\u0947", 113, -1, "r_CONSONANT", methodObject),
|
||||
new Among("\u0905\u0928\u0947", 126, -1),
|
||||
new Among("\u0906\u0928\u0947", 126, -1),
|
||||
new Among("\u093E\u0928\u0947", 126, -1),
|
||||
new Among("\u094B", -1, -1),
|
||||
new Among("\u094D", -1, -1)
|
||||
};
|
||||
|
||||
private static final char g_consonant[] = {255, 255, 255, 255, 159, 0, 0, 0, 248, 7 };
|
||||
|
||||
private int I_p;
|
||||
|
||||
|
||||
public boolean r_CONSONANT() {
|
||||
if (!(in_grouping_b(g_consonant, 2325, 2399)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
int v_1 = cursor;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
I_p = cursor;
|
||||
cursor = v_1;
|
||||
limit_backward = cursor;
|
||||
cursor = limit;
|
||||
if (cursor < I_p)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_3 = limit_backward;
|
||||
limit_backward = I_p;
|
||||
ket = cursor;
|
||||
if (find_among_b(a_0) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
slice_del();
|
||||
cursor = limit_backward;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof HindiStemmer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return HindiStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,392 @@
|
|||
// Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
|
||||
import org.tartarus.snowball.Among;
|
||||
|
||||
/**
|
||||
* This class implements the stemming algorithm defined by a snowball script.
|
||||
* <p>
|
||||
* Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
* </p>
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public class IndonesianStemmer extends org.tartarus.snowball.SnowballStemmer {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
|
||||
private final static Among a_0[] = {
|
||||
new Among("kah", -1, 1),
|
||||
new Among("lah", -1, 1),
|
||||
new Among("pun", -1, 1)
|
||||
};
|
||||
|
||||
private final static Among a_1[] = {
|
||||
new Among("nya", -1, 1),
|
||||
new Among("ku", -1, 1),
|
||||
new Among("mu", -1, 1)
|
||||
};
|
||||
|
||||
private final static Among a_2[] = {
|
||||
new Among("i", -1, 1, "r_SUFFIX_I_OK", methodObject),
|
||||
new Among("an", -1, 1, "r_SUFFIX_AN_OK", methodObject),
|
||||
new Among("kan", 1, 1, "r_SUFFIX_KAN_OK", methodObject)
|
||||
};
|
||||
|
||||
private final static Among a_3[] = {
|
||||
new Among("di", -1, 1),
|
||||
new Among("ke", -1, 2),
|
||||
new Among("me", -1, 1),
|
||||
new Among("mem", 2, 5),
|
||||
new Among("men", 2, 1),
|
||||
new Among("meng", 4, 1),
|
||||
new Among("meny", 4, 3, "r_VOWEL", methodObject),
|
||||
new Among("pem", -1, 6),
|
||||
new Among("pen", -1, 2),
|
||||
new Among("peng", 8, 2),
|
||||
new Among("peny", 8, 4, "r_VOWEL", methodObject),
|
||||
new Among("ter", -1, 1)
|
||||
};
|
||||
|
||||
private final static Among a_4[] = {
|
||||
new Among("be", -1, 3, "r_KER", methodObject),
|
||||
new Among("belajar", 0, 4),
|
||||
new Among("ber", 0, 3),
|
||||
new Among("pe", -1, 1),
|
||||
new Among("pelajar", 3, 2),
|
||||
new Among("per", 3, 1)
|
||||
};
|
||||
|
||||
private static final char g_vowel[] = {17, 65, 16 };
|
||||
|
||||
private int I_prefix;
|
||||
private int I_measure;
|
||||
|
||||
|
||||
private boolean r_remove_particle() {
|
||||
ket = cursor;
|
||||
if (find_among_b(a_0) == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
slice_del();
|
||||
I_measure -= 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_remove_possessive_pronoun() {
|
||||
ket = cursor;
|
||||
if (find_among_b(a_1) == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
slice_del();
|
||||
I_measure -= 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean r_SUFFIX_KAN_OK() {
|
||||
if (!(I_prefix != 3))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (!(I_prefix != 2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean r_SUFFIX_AN_OK() {
|
||||
if (!(I_prefix != 1))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean r_SUFFIX_I_OK() {
|
||||
if (!(I_prefix <= 2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
{
|
||||
int v_1 = limit - cursor;
|
||||
lab0: {
|
||||
if (!(eq_s_b("s")))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
cursor = limit - v_1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_remove_suffix() {
|
||||
ket = cursor;
|
||||
if (find_among_b(a_2) == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
slice_del();
|
||||
I_measure -= 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean r_VOWEL() {
|
||||
if (!(in_grouping(g_vowel, 97, 117)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean r_KER() {
|
||||
if (!(out_grouping(g_vowel, 97, 117)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (!(eq_s("er")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_remove_first_order_prefix() {
|
||||
int among_var;
|
||||
bra = cursor;
|
||||
among_var = find_among(a_3);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
ket = cursor;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
slice_del();
|
||||
I_prefix = 1;
|
||||
I_measure -= 1;
|
||||
break;
|
||||
case 2:
|
||||
slice_del();
|
||||
I_prefix = 3;
|
||||
I_measure -= 1;
|
||||
break;
|
||||
case 3:
|
||||
I_prefix = 1;
|
||||
slice_from("s");
|
||||
I_measure -= 1;
|
||||
break;
|
||||
case 4:
|
||||
I_prefix = 3;
|
||||
slice_from("s");
|
||||
I_measure -= 1;
|
||||
break;
|
||||
case 5:
|
||||
I_prefix = 1;
|
||||
I_measure -= 1;
|
||||
lab0: {
|
||||
int v_1 = cursor;
|
||||
lab1: {
|
||||
int v_2 = cursor;
|
||||
if (!(in_grouping(g_vowel, 97, 117)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
slice_from("p");
|
||||
break lab0;
|
||||
}
|
||||
cursor = v_1;
|
||||
slice_del();
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
I_prefix = 3;
|
||||
I_measure -= 1;
|
||||
lab2: {
|
||||
int v_3 = cursor;
|
||||
lab3: {
|
||||
int v_4 = cursor;
|
||||
if (!(in_grouping(g_vowel, 97, 117)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
cursor = v_4;
|
||||
slice_from("p");
|
||||
break lab2;
|
||||
}
|
||||
cursor = v_3;
|
||||
slice_del();
|
||||
}
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_remove_second_order_prefix() {
|
||||
int among_var;
|
||||
bra = cursor;
|
||||
among_var = find_among(a_4);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
ket = cursor;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
slice_del();
|
||||
I_prefix = 2;
|
||||
I_measure -= 1;
|
||||
break;
|
||||
case 2:
|
||||
slice_from("ajar");
|
||||
I_measure -= 1;
|
||||
break;
|
||||
case 3:
|
||||
slice_del();
|
||||
I_prefix = 4;
|
||||
I_measure -= 1;
|
||||
break;
|
||||
case 4:
|
||||
slice_from("ajar");
|
||||
I_prefix = 4;
|
||||
I_measure -= 1;
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
I_measure = 0;
|
||||
int v_1 = cursor;
|
||||
lab0: {
|
||||
while(true)
|
||||
{
|
||||
int v_2 = cursor;
|
||||
lab1: {
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: {
|
||||
if (!(in_grouping(g_vowel, 97, 117)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
I_measure += 1;
|
||||
continue;
|
||||
}
|
||||
cursor = v_2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cursor = v_1;
|
||||
if (!(I_measure > 2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
I_prefix = 0;
|
||||
limit_backward = cursor;
|
||||
cursor = limit;
|
||||
int v_4 = limit - cursor;
|
||||
r_remove_particle();
|
||||
cursor = limit - v_4;
|
||||
if (!(I_measure > 2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_5 = limit - cursor;
|
||||
r_remove_possessive_pronoun();
|
||||
cursor = limit - v_5;
|
||||
cursor = limit_backward;
|
||||
if (!(I_measure > 2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
lab4: {
|
||||
int v_6 = cursor;
|
||||
lab5: {
|
||||
int v_7 = cursor;
|
||||
if (!r_remove_first_order_prefix())
|
||||
{
|
||||
break lab5;
|
||||
}
|
||||
int v_8 = cursor;
|
||||
lab6: {
|
||||
int v_9 = cursor;
|
||||
if (!(I_measure > 2))
|
||||
{
|
||||
break lab6;
|
||||
}
|
||||
limit_backward = cursor;
|
||||
cursor = limit;
|
||||
if (!r_remove_suffix())
|
||||
{
|
||||
break lab6;
|
||||
}
|
||||
cursor = limit_backward;
|
||||
cursor = v_9;
|
||||
if (!(I_measure > 2))
|
||||
{
|
||||
break lab6;
|
||||
}
|
||||
if (!r_remove_second_order_prefix())
|
||||
{
|
||||
break lab6;
|
||||
}
|
||||
}
|
||||
cursor = v_8;
|
||||
cursor = v_7;
|
||||
break lab4;
|
||||
}
|
||||
cursor = v_6;
|
||||
int v_10 = cursor;
|
||||
r_remove_second_order_prefix();
|
||||
cursor = v_10;
|
||||
int v_11 = cursor;
|
||||
lab7: {
|
||||
if (!(I_measure > 2))
|
||||
{
|
||||
break lab7;
|
||||
}
|
||||
limit_backward = cursor;
|
||||
cursor = limit;
|
||||
if (!r_remove_suffix())
|
||||
{
|
||||
break lab7;
|
||||
}
|
||||
cursor = limit_backward;
|
||||
}
|
||||
cursor = v_11;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof IndonesianStemmer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return IndonesianStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,588 +1,399 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
// Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
/**
|
||||
* This class implements the stemming algorithm defined by a snowball script.
|
||||
* <p>
|
||||
* Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
* </p>
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public class IrishStemmer extends org.tartarus.snowball.SnowballStemmer {
|
||||
|
||||
@SuppressWarnings("unused") public class IrishStemmer extends SnowballProgram {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final static Among a_0[] = {
|
||||
new Among("b'", -1, 1),
|
||||
new Among("bh", -1, 4),
|
||||
new Among("bhf", 1, 2),
|
||||
new Among("bp", -1, 8),
|
||||
new Among("ch", -1, 5),
|
||||
new Among("d'", -1, 1),
|
||||
new Among("d'fh", 5, 2),
|
||||
new Among("dh", -1, 6),
|
||||
new Among("dt", -1, 9),
|
||||
new Among("fh", -1, 2),
|
||||
new Among("gc", -1, 5),
|
||||
new Among("gh", -1, 7),
|
||||
new Among("h-", -1, 1),
|
||||
new Among("m'", -1, 1),
|
||||
new Among("mb", -1, 4),
|
||||
new Among("mh", -1, 10),
|
||||
new Among("n-", -1, 1),
|
||||
new Among("nd", -1, 6),
|
||||
new Among("ng", -1, 7),
|
||||
new Among("ph", -1, 8),
|
||||
new Among("sh", -1, 3),
|
||||
new Among("t-", -1, 1),
|
||||
new Among("th", -1, 9),
|
||||
new Among("ts", -1, 3)
|
||||
};
|
||||
|
||||
/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
private final static Among a_1[] = {
|
||||
new Among("\u00EDochta", -1, 1),
|
||||
new Among("a\u00EDochta", 0, 1),
|
||||
new Among("ire", -1, 2),
|
||||
new Among("aire", 2, 2),
|
||||
new Among("abh", -1, 1),
|
||||
new Among("eabh", 4, 1),
|
||||
new Among("ibh", -1, 1),
|
||||
new Among("aibh", 6, 1),
|
||||
new Among("amh", -1, 1),
|
||||
new Among("eamh", 8, 1),
|
||||
new Among("imh", -1, 1),
|
||||
new Among("aimh", 10, 1),
|
||||
new Among("\u00EDocht", -1, 1),
|
||||
new Among("a\u00EDocht", 12, 1),
|
||||
new Among("ir\u00ED", -1, 2),
|
||||
new Among("air\u00ED", 14, 2)
|
||||
};
|
||||
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "b'", -1, 4, "", methodObject ),
|
||||
new Among ( "bh", -1, 14, "", methodObject ),
|
||||
new Among ( "bhf", 1, 9, "", methodObject ),
|
||||
new Among ( "bp", -1, 11, "", methodObject ),
|
||||
new Among ( "ch", -1, 15, "", methodObject ),
|
||||
new Among ( "d'", -1, 2, "", methodObject ),
|
||||
new Among ( "d'fh", 5, 3, "", methodObject ),
|
||||
new Among ( "dh", -1, 16, "", methodObject ),
|
||||
new Among ( "dt", -1, 13, "", methodObject ),
|
||||
new Among ( "fh", -1, 17, "", methodObject ),
|
||||
new Among ( "gc", -1, 7, "", methodObject ),
|
||||
new Among ( "gh", -1, 18, "", methodObject ),
|
||||
new Among ( "h-", -1, 1, "", methodObject ),
|
||||
new Among ( "m'", -1, 4, "", methodObject ),
|
||||
new Among ( "mb", -1, 6, "", methodObject ),
|
||||
new Among ( "mh", -1, 19, "", methodObject ),
|
||||
new Among ( "n-", -1, 1, "", methodObject ),
|
||||
new Among ( "nd", -1, 8, "", methodObject ),
|
||||
new Among ( "ng", -1, 10, "", methodObject ),
|
||||
new Among ( "ph", -1, 20, "", methodObject ),
|
||||
new Among ( "sh", -1, 5, "", methodObject ),
|
||||
new Among ( "t-", -1, 1, "", methodObject ),
|
||||
new Among ( "th", -1, 21, "", methodObject ),
|
||||
new Among ( "ts", -1, 12, "", methodObject )
|
||||
};
|
||||
private final static Among a_2[] = {
|
||||
new Among("\u00F3ideacha", -1, 6),
|
||||
new Among("patacha", -1, 5),
|
||||
new Among("achta", -1, 1),
|
||||
new Among("arcachta", 2, 2),
|
||||
new Among("eachta", 2, 1),
|
||||
new Among("grafa\u00EDochta", -1, 4),
|
||||
new Among("paite", -1, 5),
|
||||
new Among("ach", -1, 1),
|
||||
new Among("each", 7, 1),
|
||||
new Among("\u00F3ideach", 8, 6),
|
||||
new Among("gineach", 8, 3),
|
||||
new Among("patach", 7, 5),
|
||||
new Among("grafa\u00EDoch", -1, 4),
|
||||
new Among("pataigh", -1, 5),
|
||||
new Among("\u00F3idigh", -1, 6),
|
||||
new Among("acht\u00FAil", -1, 1),
|
||||
new Among("eacht\u00FAil", 15, 1),
|
||||
new Among("gineas", -1, 3),
|
||||
new Among("ginis", -1, 3),
|
||||
new Among("acht", -1, 1),
|
||||
new Among("arcacht", 19, 2),
|
||||
new Among("eacht", 19, 1),
|
||||
new Among("grafa\u00EDocht", -1, 4),
|
||||
new Among("arcachta\u00ED", -1, 2),
|
||||
new Among("grafa\u00EDochta\u00ED", -1, 4)
|
||||
};
|
||||
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "\u00EDochta", -1, 1, "", methodObject ),
|
||||
new Among ( "a\u00EDochta", 0, 1, "", methodObject ),
|
||||
new Among ( "ire", -1, 2, "", methodObject ),
|
||||
new Among ( "aire", 2, 2, "", methodObject ),
|
||||
new Among ( "abh", -1, 1, "", methodObject ),
|
||||
new Among ( "eabh", 4, 1, "", methodObject ),
|
||||
new Among ( "ibh", -1, 1, "", methodObject ),
|
||||
new Among ( "aibh", 6, 1, "", methodObject ),
|
||||
new Among ( "amh", -1, 1, "", methodObject ),
|
||||
new Among ( "eamh", 8, 1, "", methodObject ),
|
||||
new Among ( "imh", -1, 1, "", methodObject ),
|
||||
new Among ( "aimh", 10, 1, "", methodObject ),
|
||||
new Among ( "\u00EDocht", -1, 1, "", methodObject ),
|
||||
new Among ( "a\u00EDocht", 12, 1, "", methodObject ),
|
||||
new Among ( "ir\u00ED", -1, 2, "", methodObject ),
|
||||
new Among ( "air\u00ED", 14, 2, "", methodObject )
|
||||
};
|
||||
private final static Among a_3[] = {
|
||||
new Among("imid", -1, 1),
|
||||
new Among("aimid", 0, 1),
|
||||
new Among("\u00EDmid", -1, 1),
|
||||
new Among("a\u00EDmid", 2, 1),
|
||||
new Among("adh", -1, 2),
|
||||
new Among("eadh", 4, 2),
|
||||
new Among("faidh", -1, 1),
|
||||
new Among("fidh", -1, 1),
|
||||
new Among("\u00E1il", -1, 2),
|
||||
new Among("ain", -1, 2),
|
||||
new Among("tear", -1, 2),
|
||||
new Among("tar", -1, 2)
|
||||
};
|
||||
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "\u00F3ideacha", -1, 6, "", methodObject ),
|
||||
new Among ( "patacha", -1, 5, "", methodObject ),
|
||||
new Among ( "achta", -1, 1, "", methodObject ),
|
||||
new Among ( "arcachta", 2, 2, "", methodObject ),
|
||||
new Among ( "eachta", 2, 1, "", methodObject ),
|
||||
new Among ( "grafa\u00EDochta", -1, 4, "", methodObject ),
|
||||
new Among ( "paite", -1, 5, "", methodObject ),
|
||||
new Among ( "ach", -1, 1, "", methodObject ),
|
||||
new Among ( "each", 7, 1, "", methodObject ),
|
||||
new Among ( "\u00F3ideach", 8, 6, "", methodObject ),
|
||||
new Among ( "gineach", 8, 3, "", methodObject ),
|
||||
new Among ( "patach", 7, 5, "", methodObject ),
|
||||
new Among ( "grafa\u00EDoch", -1, 4, "", methodObject ),
|
||||
new Among ( "pataigh", -1, 5, "", methodObject ),
|
||||
new Among ( "\u00F3idigh", -1, 6, "", methodObject ),
|
||||
new Among ( "acht\u00FAil", -1, 1, "", methodObject ),
|
||||
new Among ( "eacht\u00FAil", 15, 1, "", methodObject ),
|
||||
new Among ( "gineas", -1, 3, "", methodObject ),
|
||||
new Among ( "ginis", -1, 3, "", methodObject ),
|
||||
new Among ( "acht", -1, 1, "", methodObject ),
|
||||
new Among ( "arcacht", 19, 2, "", methodObject ),
|
||||
new Among ( "eacht", 19, 1, "", methodObject ),
|
||||
new Among ( "grafa\u00EDocht", -1, 4, "", methodObject ),
|
||||
new Among ( "arcachta\u00ED", -1, 2, "", methodObject ),
|
||||
new Among ( "grafa\u00EDochta\u00ED", -1, 4, "", methodObject )
|
||||
};
|
||||
private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 2 };
|
||||
|
||||
private final static Among a_3[] = {
|
||||
new Among ( "imid", -1, 1, "", methodObject ),
|
||||
new Among ( "aimid", 0, 1, "", methodObject ),
|
||||
new Among ( "\u00EDmid", -1, 1, "", methodObject ),
|
||||
new Among ( "a\u00EDmid", 2, 1, "", methodObject ),
|
||||
new Among ( "adh", -1, 2, "", methodObject ),
|
||||
new Among ( "eadh", 4, 2, "", methodObject ),
|
||||
new Among ( "faidh", -1, 1, "", methodObject ),
|
||||
new Among ( "fidh", -1, 1, "", methodObject ),
|
||||
new Among ( "\u00E1il", -1, 2, "", methodObject ),
|
||||
new Among ( "ain", -1, 2, "", methodObject ),
|
||||
new Among ( "tear", -1, 2, "", methodObject ),
|
||||
new Among ( "tar", -1, 2, "", methodObject )
|
||||
};
|
||||
private int I_p2;
|
||||
private int I_p1;
|
||||
private int I_pV;
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 2 };
|
||||
|
||||
private int I_p2;
|
||||
private int I_p1;
|
||||
private int I_pV;
|
||||
|
||||
private void copy_from(IrishStemmer other) {
|
||||
I_p2 = other.I_p2;
|
||||
I_p1 = other.I_p1;
|
||||
I_pV = other.I_pV;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_3;
|
||||
// (, line 28
|
||||
I_pV = limit;
|
||||
I_p1 = limit;
|
||||
I_p2 = limit;
|
||||
// do, line 34
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// (, line 34
|
||||
// gopast, line 35
|
||||
golab1: while(true)
|
||||
{
|
||||
lab2: do {
|
||||
if (!(in_grouping(g_v, 97, 250)))
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
break golab1;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark pV, line 35
|
||||
I_pV = cursor;
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// do, line 37
|
||||
v_3 = cursor;
|
||||
lab3: do {
|
||||
// (, line 37
|
||||
// gopast, line 38
|
||||
golab4: while(true)
|
||||
{
|
||||
lab5: do {
|
||||
if (!(in_grouping(g_v, 97, 250)))
|
||||
{
|
||||
break lab5;
|
||||
}
|
||||
break golab4;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 38
|
||||
golab6: while(true)
|
||||
{
|
||||
lab7: do {
|
||||
if (!(out_grouping(g_v, 97, 250)))
|
||||
{
|
||||
break lab7;
|
||||
}
|
||||
break golab6;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 38
|
||||
I_p1 = cursor;
|
||||
// gopast, line 39
|
||||
golab8: while(true)
|
||||
{
|
||||
lab9: do {
|
||||
if (!(in_grouping(g_v, 97, 250)))
|
||||
{
|
||||
break lab9;
|
||||
}
|
||||
break golab8;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 39
|
||||
golab10: while(true)
|
||||
{
|
||||
lab11: do {
|
||||
if (!(out_grouping(g_v, 97, 250)))
|
||||
{
|
||||
break lab11;
|
||||
}
|
||||
break golab10;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p2, line 39
|
||||
I_p2 = cursor;
|
||||
} while (false);
|
||||
cursor = v_3;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_initial_morph() {
|
||||
int among_var;
|
||||
// (, line 43
|
||||
// [, line 44
|
||||
bra = cursor;
|
||||
// substring, line 44
|
||||
among_var = find_among(a_0, 24);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 44
|
||||
ket = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 46
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 50
|
||||
// delete, line 50
|
||||
slice_del();
|
||||
break;
|
||||
case 3:
|
||||
// (, line 52
|
||||
// <-, line 52
|
||||
slice_from("f");
|
||||
break;
|
||||
case 4:
|
||||
// (, line 55
|
||||
// delete, line 55
|
||||
slice_del();
|
||||
break;
|
||||
case 5:
|
||||
// (, line 58
|
||||
// <-, line 58
|
||||
slice_from("s");
|
||||
break;
|
||||
case 6:
|
||||
// (, line 61
|
||||
// <-, line 61
|
||||
slice_from("b");
|
||||
break;
|
||||
case 7:
|
||||
// (, line 63
|
||||
// <-, line 63
|
||||
slice_from("c");
|
||||
break;
|
||||
case 8:
|
||||
// (, line 65
|
||||
// <-, line 65
|
||||
slice_from("d");
|
||||
break;
|
||||
case 9:
|
||||
// (, line 67
|
||||
// <-, line 67
|
||||
slice_from("f");
|
||||
break;
|
||||
case 10:
|
||||
// (, line 69
|
||||
// <-, line 69
|
||||
slice_from("g");
|
||||
break;
|
||||
case 11:
|
||||
// (, line 71
|
||||
// <-, line 71
|
||||
slice_from("p");
|
||||
break;
|
||||
case 12:
|
||||
// (, line 73
|
||||
// <-, line 73
|
||||
slice_from("s");
|
||||
break;
|
||||
case 13:
|
||||
// (, line 75
|
||||
// <-, line 75
|
||||
slice_from("t");
|
||||
break;
|
||||
case 14:
|
||||
// (, line 79
|
||||
// <-, line 79
|
||||
slice_from("b");
|
||||
break;
|
||||
case 15:
|
||||
// (, line 81
|
||||
// <-, line 81
|
||||
slice_from("c");
|
||||
break;
|
||||
case 16:
|
||||
// (, line 83
|
||||
// <-, line 83
|
||||
slice_from("d");
|
||||
break;
|
||||
case 17:
|
||||
// (, line 85
|
||||
// <-, line 85
|
||||
slice_from("f");
|
||||
break;
|
||||
case 18:
|
||||
// (, line 87
|
||||
// <-, line 87
|
||||
slice_from("g");
|
||||
break;
|
||||
case 19:
|
||||
// (, line 89
|
||||
// <-, line 89
|
||||
slice_from("m");
|
||||
break;
|
||||
case 20:
|
||||
// (, line 91
|
||||
// <-, line 91
|
||||
slice_from("p");
|
||||
break;
|
||||
case 21:
|
||||
// (, line 93
|
||||
// <-, line 93
|
||||
slice_from("t");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_RV() {
|
||||
if (!(I_pV <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_R1() {
|
||||
if (!(I_p1 <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_R2() {
|
||||
if (!(I_p2 <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_noun_sfx() {
|
||||
int among_var;
|
||||
// (, line 103
|
||||
// [, line 104
|
||||
ket = cursor;
|
||||
// substring, line 104
|
||||
among_var = find_among_b(a_1, 16);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 104
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 108
|
||||
// call R1, line 108
|
||||
if (!r_R1())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 108
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 110
|
||||
// call R2, line 110
|
||||
if (!r_R2())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 110
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_deriv() {
|
||||
int among_var;
|
||||
// (, line 113
|
||||
// [, line 114
|
||||
ket = cursor;
|
||||
// substring, line 114
|
||||
among_var = find_among_b(a_2, 25);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 114
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 116
|
||||
// call R2, line 116
|
||||
if (!r_R2())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 116
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 118
|
||||
// <-, line 118
|
||||
slice_from("arc");
|
||||
break;
|
||||
case 3:
|
||||
// (, line 120
|
||||
// <-, line 120
|
||||
slice_from("gin");
|
||||
break;
|
||||
case 4:
|
||||
// (, line 122
|
||||
// <-, line 122
|
||||
slice_from("graf");
|
||||
break;
|
||||
case 5:
|
||||
// (, line 124
|
||||
// <-, line 124
|
||||
slice_from("paite");
|
||||
break;
|
||||
case 6:
|
||||
// (, line 126
|
||||
// <-, line 126
|
||||
slice_from("\u00F3id");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_verb_sfx() {
|
||||
int among_var;
|
||||
// (, line 129
|
||||
// [, line 130
|
||||
ket = cursor;
|
||||
// substring, line 130
|
||||
among_var = find_among_b(a_3, 12);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 130
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 133
|
||||
// call RV, line 133
|
||||
if (!r_RV())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 133
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 138
|
||||
// call R1, line 138
|
||||
if (!r_R1())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 138
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
int v_5;
|
||||
// (, line 143
|
||||
// do, line 144
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call initial_morph, line 144
|
||||
if (!r_initial_morph())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// do, line 145
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
// call mark_regions, line 145
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
// backwards, line 146
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 146
|
||||
// do, line 147
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call noun_sfx, line 147
|
||||
if (!r_noun_sfx())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 148
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call deriv, line 148
|
||||
if (!r_deriv())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
// do, line 149
|
||||
v_5 = limit - cursor;
|
||||
lab4: do {
|
||||
// call verb_sfx, line 149
|
||||
if (!r_verb_sfx())
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_5;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof IrishStemmer;
|
||||
private boolean r_mark_regions() {
|
||||
I_pV = limit;
|
||||
I_p1 = limit;
|
||||
I_p2 = limit;
|
||||
int v_1 = cursor;
|
||||
lab0: {
|
||||
golab1: while(true)
|
||||
{
|
||||
lab2: {
|
||||
if (!(in_grouping(g_v, 97, 250)))
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return IrishStemmer.class.getName().hashCode();
|
||||
break golab1;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
I_pV = cursor;
|
||||
}
|
||||
cursor = v_1;
|
||||
int v_3 = cursor;
|
||||
lab3: {
|
||||
golab4: while(true)
|
||||
{
|
||||
lab5: {
|
||||
if (!(in_grouping(g_v, 97, 250)))
|
||||
{
|
||||
break lab5;
|
||||
}
|
||||
break golab4;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
golab6: while(true)
|
||||
{
|
||||
lab7: {
|
||||
if (!(out_grouping(g_v, 97, 250)))
|
||||
{
|
||||
break lab7;
|
||||
}
|
||||
break golab6;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
I_p1 = cursor;
|
||||
golab8: while(true)
|
||||
{
|
||||
lab9: {
|
||||
if (!(in_grouping(g_v, 97, 250)))
|
||||
{
|
||||
break lab9;
|
||||
}
|
||||
break golab8;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
golab10: while(true)
|
||||
{
|
||||
lab11: {
|
||||
if (!(out_grouping(g_v, 97, 250)))
|
||||
{
|
||||
break lab11;
|
||||
}
|
||||
break golab10;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
I_p2 = cursor;
|
||||
}
|
||||
cursor = v_3;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_initial_morph() {
|
||||
int among_var;
|
||||
bra = cursor;
|
||||
among_var = find_among(a_0);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
ket = cursor;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
slice_from("f");
|
||||
break;
|
||||
case 3:
|
||||
slice_from("s");
|
||||
break;
|
||||
case 4:
|
||||
slice_from("b");
|
||||
break;
|
||||
case 5:
|
||||
slice_from("c");
|
||||
break;
|
||||
case 6:
|
||||
slice_from("d");
|
||||
break;
|
||||
case 7:
|
||||
slice_from("g");
|
||||
break;
|
||||
case 8:
|
||||
slice_from("p");
|
||||
break;
|
||||
case 9:
|
||||
slice_from("t");
|
||||
break;
|
||||
case 10:
|
||||
slice_from("m");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_RV() {
|
||||
if (!(I_pV <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_R1() {
|
||||
if (!(I_p1 <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_R2() {
|
||||
if (!(I_p2 <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_noun_sfx() {
|
||||
int among_var;
|
||||
ket = cursor;
|
||||
among_var = find_among_b(a_1);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
if (!r_R1())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
if (!r_R2())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_deriv() {
|
||||
int among_var;
|
||||
ket = cursor;
|
||||
among_var = find_among_b(a_2);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
if (!r_R2())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
slice_from("arc");
|
||||
break;
|
||||
case 3:
|
||||
slice_from("gin");
|
||||
break;
|
||||
case 4:
|
||||
slice_from("graf");
|
||||
break;
|
||||
case 5:
|
||||
slice_from("paite");
|
||||
break;
|
||||
case 6:
|
||||
slice_from("\u00F3id");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_verb_sfx() {
|
||||
int among_var;
|
||||
ket = cursor;
|
||||
among_var = find_among_b(a_3);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
if (!r_RV())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
if (!r_R1())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
int v_1 = cursor;
|
||||
r_initial_morph();
|
||||
cursor = v_1;
|
||||
r_mark_regions();
|
||||
limit_backward = cursor;
|
||||
cursor = limit;
|
||||
int v_3 = limit - cursor;
|
||||
r_noun_sfx();
|
||||
cursor = limit - v_3;
|
||||
int v_4 = limit - cursor;
|
||||
r_deriv();
|
||||
cursor = limit - v_4;
|
||||
int v_5 = limit - cursor;
|
||||
r_verb_sfx();
|
||||
cursor = limit - v_5;
|
||||
cursor = limit_backward;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof IrishStemmer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return IrishStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,319 @@
|
|||
// Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
|
||||
import org.tartarus.snowball.Among;
|
||||
|
||||
/**
|
||||
* This class implements the stemming algorithm defined by a snowball script.
|
||||
* <p>
|
||||
* Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
* </p>
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public class NepaliStemmer extends org.tartarus.snowball.SnowballStemmer {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
|
||||
private final static Among a_0[] = {
|
||||
new Among("\u0932\u093E\u0907", -1, 1),
|
||||
new Among("\u0932\u093E\u0908", -1, 1),
|
||||
new Among("\u0938\u0901\u0917", -1, 1),
|
||||
new Among("\u0938\u0902\u0917", -1, 1),
|
||||
new Among("\u092E\u093E\u0930\u094D\u092B\u0924", -1, 1),
|
||||
new Among("\u0930\u0924", -1, 1),
|
||||
new Among("\u0915\u093E", -1, 2),
|
||||
new Among("\u092E\u093E", -1, 1),
|
||||
new Among("\u0926\u094D\u0935\u093E\u0930\u093E", -1, 1),
|
||||
new Among("\u0915\u093F", -1, 2),
|
||||
new Among("\u092A\u091B\u093F", -1, 1),
|
||||
new Among("\u0915\u0940", -1, 2),
|
||||
new Among("\u0932\u0947", -1, 1),
|
||||
new Among("\u0915\u0948", -1, 2),
|
||||
new Among("\u0938\u0901\u0917\u0948", -1, 1),
|
||||
new Among("\u092E\u0948", -1, 1),
|
||||
new Among("\u0915\u094B", -1, 2)
|
||||
};
|
||||
|
||||
private final static Among a_1[] = {
|
||||
new Among("\u0901", -1, -1),
|
||||
new Among("\u0902", -1, -1),
|
||||
new Among("\u0948", -1, -1)
|
||||
};
|
||||
|
||||
private final static Among a_2[] = {
|
||||
new Among("\u0901", -1, 1),
|
||||
new Among("\u0902", -1, 1),
|
||||
new Among("\u0948", -1, 2)
|
||||
};
|
||||
|
||||
private final static Among a_3[] = {
|
||||
new Among("\u0925\u093F\u090F", -1, 1),
|
||||
new Among("\u091B", -1, 1),
|
||||
new Among("\u0907\u091B", 1, 1),
|
||||
new Among("\u090F\u091B", 1, 1),
|
||||
new Among("\u093F\u091B", 1, 1),
|
||||
new Among("\u0947\u091B", 1, 1),
|
||||
new Among("\u0928\u0947\u091B", 5, 1),
|
||||
new Among("\u0939\u0941\u0928\u0947\u091B", 6, 1),
|
||||
new Among("\u0907\u0928\u094D\u091B", 1, 1),
|
||||
new Among("\u093F\u0928\u094D\u091B", 1, 1),
|
||||
new Among("\u0939\u0941\u0928\u094D\u091B", 1, 1),
|
||||
new Among("\u090F\u0915\u093E", -1, 1),
|
||||
new Among("\u0907\u090F\u0915\u093E", 11, 1),
|
||||
new Among("\u093F\u090F\u0915\u093E", 11, 1),
|
||||
new Among("\u0947\u0915\u093E", -1, 1),
|
||||
new Among("\u0928\u0947\u0915\u093E", 14, 1),
|
||||
new Among("\u0926\u093E", -1, 1),
|
||||
new Among("\u0907\u0926\u093E", 16, 1),
|
||||
new Among("\u093F\u0926\u093E", 16, 1),
|
||||
new Among("\u0926\u0947\u0916\u093F", -1, 1),
|
||||
new Among("\u092E\u093E\u0925\u093F", -1, 1),
|
||||
new Among("\u090F\u0915\u0940", -1, 1),
|
||||
new Among("\u0907\u090F\u0915\u0940", 21, 1),
|
||||
new Among("\u093F\u090F\u0915\u0940", 21, 1),
|
||||
new Among("\u0947\u0915\u0940", -1, 1),
|
||||
new Among("\u0926\u0947\u0916\u0940", -1, 1),
|
||||
new Among("\u0925\u0940", -1, 1),
|
||||
new Among("\u0926\u0940", -1, 1),
|
||||
new Among("\u091B\u0941", -1, 1),
|
||||
new Among("\u090F\u091B\u0941", 28, 1),
|
||||
new Among("\u0947\u091B\u0941", 28, 1),
|
||||
new Among("\u0928\u0947\u091B\u0941", 30, 1),
|
||||
new Among("\u0928\u0941", -1, 1),
|
||||
new Among("\u0939\u0930\u0941", -1, 1),
|
||||
new Among("\u0939\u0930\u0942", -1, 1),
|
||||
new Among("\u091B\u0947", -1, 1),
|
||||
new Among("\u0925\u0947", -1, 1),
|
||||
new Among("\u0928\u0947", -1, 1),
|
||||
new Among("\u090F\u0915\u0948", -1, 1),
|
||||
new Among("\u0947\u0915\u0948", -1, 1),
|
||||
new Among("\u0928\u0947\u0915\u0948", 39, 1),
|
||||
new Among("\u0926\u0948", -1, 1),
|
||||
new Among("\u0907\u0926\u0948", 41, 1),
|
||||
new Among("\u093F\u0926\u0948", 41, 1),
|
||||
new Among("\u090F\u0915\u094B", -1, 1),
|
||||
new Among("\u0907\u090F\u0915\u094B", 44, 1),
|
||||
new Among("\u093F\u090F\u0915\u094B", 44, 1),
|
||||
new Among("\u0947\u0915\u094B", -1, 1),
|
||||
new Among("\u0928\u0947\u0915\u094B", 47, 1),
|
||||
new Among("\u0926\u094B", -1, 1),
|
||||
new Among("\u0907\u0926\u094B", 49, 1),
|
||||
new Among("\u093F\u0926\u094B", 49, 1),
|
||||
new Among("\u092F\u094B", -1, 1),
|
||||
new Among("\u0907\u092F\u094B", 52, 1),
|
||||
new Among("\u092D\u092F\u094B", 52, 1),
|
||||
new Among("\u093F\u092F\u094B", 52, 1),
|
||||
new Among("\u0925\u093F\u092F\u094B", 55, 1),
|
||||
new Among("\u0926\u093F\u092F\u094B", 55, 1),
|
||||
new Among("\u0925\u094D\u092F\u094B", 52, 1),
|
||||
new Among("\u091B\u094C", -1, 1),
|
||||
new Among("\u0907\u091B\u094C", 59, 1),
|
||||
new Among("\u090F\u091B\u094C", 59, 1),
|
||||
new Among("\u093F\u091B\u094C", 59, 1),
|
||||
new Among("\u0947\u091B\u094C", 59, 1),
|
||||
new Among("\u0928\u0947\u091B\u094C", 63, 1),
|
||||
new Among("\u092F\u094C", -1, 1),
|
||||
new Among("\u0925\u093F\u092F\u094C", 65, 1),
|
||||
new Among("\u091B\u094D\u092F\u094C", 65, 1),
|
||||
new Among("\u0925\u094D\u092F\u094C", 65, 1),
|
||||
new Among("\u091B\u0928\u094D", -1, 1),
|
||||
new Among("\u0907\u091B\u0928\u094D", 69, 1),
|
||||
new Among("\u090F\u091B\u0928\u094D", 69, 1),
|
||||
new Among("\u093F\u091B\u0928\u094D", 69, 1),
|
||||
new Among("\u0947\u091B\u0928\u094D", 69, 1),
|
||||
new Among("\u0928\u0947\u091B\u0928\u094D", 73, 1),
|
||||
new Among("\u0932\u093E\u0928\u094D", -1, 1),
|
||||
new Among("\u091B\u093F\u0928\u094D", -1, 1),
|
||||
new Among("\u0925\u093F\u0928\u094D", -1, 1),
|
||||
new Among("\u092A\u0930\u094D", -1, 1),
|
||||
new Among("\u0907\u0938\u094D", -1, 1),
|
||||
new Among("\u0925\u093F\u0907\u0938\u094D", 79, 1),
|
||||
new Among("\u091B\u0938\u094D", -1, 1),
|
||||
new Among("\u0907\u091B\u0938\u094D", 81, 1),
|
||||
new Among("\u090F\u091B\u0938\u094D", 81, 1),
|
||||
new Among("\u093F\u091B\u0938\u094D", 81, 1),
|
||||
new Among("\u0947\u091B\u0938\u094D", 81, 1),
|
||||
new Among("\u0928\u0947\u091B\u0938\u094D", 85, 1),
|
||||
new Among("\u093F\u0938\u094D", -1, 1),
|
||||
new Among("\u0925\u093F\u0938\u094D", 87, 1),
|
||||
new Among("\u091B\u0947\u0938\u094D", -1, 1),
|
||||
new Among("\u0939\u094B\u0938\u094D", -1, 1)
|
||||
};
|
||||
|
||||
|
||||
|
||||
private boolean r_remove_category_1() {
|
||||
int among_var;
|
||||
ket = cursor;
|
||||
among_var = find_among_b(a_0);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
lab0: {
|
||||
int v_1 = limit - cursor;
|
||||
lab1: {
|
||||
lab2: {
|
||||
int v_2 = limit - cursor;
|
||||
lab3: {
|
||||
if (!(eq_s_b("\u090F")))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break lab2;
|
||||
}
|
||||
cursor = limit - v_2;
|
||||
if (!(eq_s_b("\u0947")))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
}
|
||||
break lab0;
|
||||
}
|
||||
cursor = limit - v_1;
|
||||
slice_del();
|
||||
}
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_check_category_2() {
|
||||
ket = cursor;
|
||||
if (find_among_b(a_1) == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_remove_category_2() {
|
||||
int among_var;
|
||||
ket = cursor;
|
||||
among_var = find_among_b(a_2);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
lab0: {
|
||||
int v_1 = limit - cursor;
|
||||
lab1: {
|
||||
if (!(eq_s_b("\u092F\u094C")))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
break lab0;
|
||||
}
|
||||
cursor = limit - v_1;
|
||||
lab2: {
|
||||
if (!(eq_s_b("\u091B\u094C")))
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
break lab0;
|
||||
}
|
||||
cursor = limit - v_1;
|
||||
lab3: {
|
||||
if (!(eq_s_b("\u0928\u094C")))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break lab0;
|
||||
}
|
||||
cursor = limit - v_1;
|
||||
if (!(eq_s_b("\u0925\u0947")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
if (!(eq_s_b("\u0924\u094D\u0930")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_remove_category_3() {
|
||||
ket = cursor;
|
||||
if (find_among_b(a_3) == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
limit_backward = cursor;
|
||||
cursor = limit;
|
||||
int v_1 = limit - cursor;
|
||||
r_remove_category_1();
|
||||
cursor = limit - v_1;
|
||||
int v_2 = limit - cursor;
|
||||
lab0: {
|
||||
while(true)
|
||||
{
|
||||
int v_3 = limit - cursor;
|
||||
lab1: {
|
||||
int v_4 = limit - cursor;
|
||||
lab2: {
|
||||
int v_5 = limit - cursor;
|
||||
if (!r_check_category_2())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
cursor = limit - v_5;
|
||||
if (!r_remove_category_2())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
}
|
||||
cursor = limit - v_4;
|
||||
if (!r_remove_category_3())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
cursor = limit - v_3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cursor = limit - v_2;
|
||||
cursor = limit_backward;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof NepaliStemmer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return NepaliStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,376 +1,261 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
// Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
/**
|
||||
* This class implements the stemming algorithm defined by a snowball script.
|
||||
* <p>
|
||||
* Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
* </p>
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public class NorwegianStemmer extends org.tartarus.snowball.SnowballStemmer {
|
||||
|
||||
@SuppressWarnings("unused") public class NorwegianStemmer extends SnowballProgram {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final static Among a_0[] = {
|
||||
new Among("a", -1, 1),
|
||||
new Among("e", -1, 1),
|
||||
new Among("ede", 1, 1),
|
||||
new Among("ande", 1, 1),
|
||||
new Among("ende", 1, 1),
|
||||
new Among("ane", 1, 1),
|
||||
new Among("ene", 1, 1),
|
||||
new Among("hetene", 6, 1),
|
||||
new Among("erte", 1, 3),
|
||||
new Among("en", -1, 1),
|
||||
new Among("heten", 9, 1),
|
||||
new Among("ar", -1, 1),
|
||||
new Among("er", -1, 1),
|
||||
new Among("heter", 12, 1),
|
||||
new Among("s", -1, 2),
|
||||
new Among("as", 14, 1),
|
||||
new Among("es", 14, 1),
|
||||
new Among("edes", 16, 1),
|
||||
new Among("endes", 16, 1),
|
||||
new Among("enes", 16, 1),
|
||||
new Among("hetenes", 19, 1),
|
||||
new Among("ens", 14, 1),
|
||||
new Among("hetens", 21, 1),
|
||||
new Among("ers", 14, 1),
|
||||
new Among("ets", 14, 1),
|
||||
new Among("et", -1, 1),
|
||||
new Among("het", 25, 1),
|
||||
new Among("ert", -1, 3),
|
||||
new Among("ast", -1, 1)
|
||||
};
|
||||
|
||||
/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
private final static Among a_1[] = {
|
||||
new Among("dt", -1, -1),
|
||||
new Among("vt", -1, -1)
|
||||
};
|
||||
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "a", -1, 1, "", methodObject ),
|
||||
new Among ( "e", -1, 1, "", methodObject ),
|
||||
new Among ( "ede", 1, 1, "", methodObject ),
|
||||
new Among ( "ande", 1, 1, "", methodObject ),
|
||||
new Among ( "ende", 1, 1, "", methodObject ),
|
||||
new Among ( "ane", 1, 1, "", methodObject ),
|
||||
new Among ( "ene", 1, 1, "", methodObject ),
|
||||
new Among ( "hetene", 6, 1, "", methodObject ),
|
||||
new Among ( "erte", 1, 3, "", methodObject ),
|
||||
new Among ( "en", -1, 1, "", methodObject ),
|
||||
new Among ( "heten", 9, 1, "", methodObject ),
|
||||
new Among ( "ar", -1, 1, "", methodObject ),
|
||||
new Among ( "er", -1, 1, "", methodObject ),
|
||||
new Among ( "heter", 12, 1, "", methodObject ),
|
||||
new Among ( "s", -1, 2, "", methodObject ),
|
||||
new Among ( "as", 14, 1, "", methodObject ),
|
||||
new Among ( "es", 14, 1, "", methodObject ),
|
||||
new Among ( "edes", 16, 1, "", methodObject ),
|
||||
new Among ( "endes", 16, 1, "", methodObject ),
|
||||
new Among ( "enes", 16, 1, "", methodObject ),
|
||||
new Among ( "hetenes", 19, 1, "", methodObject ),
|
||||
new Among ( "ens", 14, 1, "", methodObject ),
|
||||
new Among ( "hetens", 21, 1, "", methodObject ),
|
||||
new Among ( "ers", 14, 1, "", methodObject ),
|
||||
new Among ( "ets", 14, 1, "", methodObject ),
|
||||
new Among ( "et", -1, 1, "", methodObject ),
|
||||
new Among ( "het", 25, 1, "", methodObject ),
|
||||
new Among ( "ert", -1, 3, "", methodObject ),
|
||||
new Among ( "ast", -1, 1, "", methodObject )
|
||||
};
|
||||
private final static Among a_2[] = {
|
||||
new Among("leg", -1, 1),
|
||||
new Among("eleg", 0, 1),
|
||||
new Among("ig", -1, 1),
|
||||
new Among("eig", 2, 1),
|
||||
new Among("lig", 2, 1),
|
||||
new Among("elig", 4, 1),
|
||||
new Among("els", -1, 1),
|
||||
new Among("lov", -1, 1),
|
||||
new Among("elov", 7, 1),
|
||||
new Among("slov", 7, 1),
|
||||
new Among("hetslov", 9, 1)
|
||||
};
|
||||
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "dt", -1, -1, "", methodObject ),
|
||||
new Among ( "vt", -1, -1, "", methodObject )
|
||||
};
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "leg", -1, 1, "", methodObject ),
|
||||
new Among ( "eleg", 0, 1, "", methodObject ),
|
||||
new Among ( "ig", -1, 1, "", methodObject ),
|
||||
new Among ( "eig", 2, 1, "", methodObject ),
|
||||
new Among ( "lig", 2, 1, "", methodObject ),
|
||||
new Among ( "elig", 4, 1, "", methodObject ),
|
||||
new Among ( "els", -1, 1, "", methodObject ),
|
||||
new Among ( "lov", -1, 1, "", methodObject ),
|
||||
new Among ( "elov", 7, 1, "", methodObject ),
|
||||
new Among ( "slov", 7, 1, "", methodObject ),
|
||||
new Among ( "hetslov", 9, 1, "", methodObject )
|
||||
};
|
||||
private static final char g_s_ending[] = {119, 125, 149, 1 };
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
|
||||
private static final char g_s_ending[] = {119, 125, 149, 1 };
|
||||
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
private boolean r_mark_regions() {
|
||||
I_p1 = limit;
|
||||
int v_1 = cursor;
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
golab0: while(true)
|
||||
{
|
||||
int v_2 = cursor;
|
||||
lab1: {
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
}
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
I_p1 = cursor;
|
||||
lab4: {
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void copy_from(NorwegianStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 26
|
||||
I_p1 = limit;
|
||||
// test, line 30
|
||||
v_1 = cursor;
|
||||
// (, line 30
|
||||
// hop, line 30
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
// setmark x, line 30
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 31
|
||||
golab0: while(true)
|
||||
{
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 31
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 248)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 31
|
||||
I_p1 = cursor;
|
||||
// try, line 32
|
||||
lab4: do {
|
||||
// (, line 32
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 37
|
||||
// setlimit, line 38
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 38
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 38
|
||||
// [, line 38
|
||||
ket = cursor;
|
||||
// substring, line 38
|
||||
among_var = find_among_b(a_0, 29);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 38
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 44
|
||||
// delete, line 44
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 46
|
||||
// or, line 46
|
||||
lab0: do {
|
||||
v_3 = limit - cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping_b(g_s_ending, 98, 122)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
break lab0;
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// (, line 46
|
||||
// literal, line 46
|
||||
if (!(eq_s_b(1, "k")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
} while (false);
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
case 3:
|
||||
// (, line 48
|
||||
// <-, line 48
|
||||
slice_from("er");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// (, line 52
|
||||
// test, line 53
|
||||
v_1 = limit - cursor;
|
||||
// (, line 53
|
||||
// setlimit, line 54
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 54
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 54
|
||||
// [, line 54
|
||||
ket = cursor;
|
||||
// substring, line 54
|
||||
if (find_among_b(a_1, 2) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
// ], line 54
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
// next, line 59
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 59
|
||||
bra = cursor;
|
||||
// delete, line 59
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 62
|
||||
// setlimit, line 63
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 63
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 63
|
||||
// [, line 63
|
||||
ket = cursor;
|
||||
// substring, line 63
|
||||
among_var = find_among_b(a_2, 11);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 63
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 67
|
||||
// delete, line 67
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
// (, line 72
|
||||
// do, line 74
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 74
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 75
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 75
|
||||
// do, line 76
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 76
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 77
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 77
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 78
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 78
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof NorwegianStemmer;
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_2 = limit_backward;
|
||||
limit_backward = I_p1;
|
||||
ket = cursor;
|
||||
among_var = find_among_b(a_0);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
lab0: {
|
||||
int v_3 = limit - cursor;
|
||||
lab1: {
|
||||
if (!(in_grouping_b(g_s_ending, 98, 122)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
break lab0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return NorwegianStemmer.class.getName().hashCode();
|
||||
cursor = limit - v_3;
|
||||
if (!(eq_s_b("k")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (!(out_grouping_b(g_v, 97, 248)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
slice_del();
|
||||
break;
|
||||
case 3:
|
||||
slice_from("er");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1 = limit - cursor;
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_3 = limit_backward;
|
||||
limit_backward = I_p1;
|
||||
ket = cursor;
|
||||
if (find_among_b(a_1) == 0)
|
||||
{
|
||||
limit_backward = v_3;
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
limit_backward = v_3;
|
||||
cursor = limit - v_1;
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
bra = cursor;
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_2 = limit_backward;
|
||||
limit_backward = I_p1;
|
||||
ket = cursor;
|
||||
if (find_among_b(a_2) == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
slice_del();
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
int v_1 = cursor;
|
||||
r_mark_regions();
|
||||
cursor = v_1;
|
||||
limit_backward = cursor;
|
||||
cursor = limit;
|
||||
int v_2 = limit - cursor;
|
||||
r_main_suffix();
|
||||
cursor = limit - v_2;
|
||||
int v_3 = limit - cursor;
|
||||
r_consonant_pair();
|
||||
cursor = limit - v_3;
|
||||
int v_4 = limit - cursor;
|
||||
r_other_suffix();
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof NorwegianStemmer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return NorwegianStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,367 +1,262 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
// Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
/**
|
||||
* This class implements the stemming algorithm defined by a snowball script.
|
||||
* <p>
|
||||
* Generated by Snowball 2.0.0 - https://snowballstem.org/
|
||||
* </p>
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public class SwedishStemmer extends org.tartarus.snowball.SnowballStemmer {
|
||||
|
||||
@SuppressWarnings("unused") public class SwedishStemmer extends SnowballProgram {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final static Among a_0[] = {
|
||||
new Among("a", -1, 1),
|
||||
new Among("arna", 0, 1),
|
||||
new Among("erna", 0, 1),
|
||||
new Among("heterna", 2, 1),
|
||||
new Among("orna", 0, 1),
|
||||
new Among("ad", -1, 1),
|
||||
new Among("e", -1, 1),
|
||||
new Among("ade", 6, 1),
|
||||
new Among("ande", 6, 1),
|
||||
new Among("arne", 6, 1),
|
||||
new Among("are", 6, 1),
|
||||
new Among("aste", 6, 1),
|
||||
new Among("en", -1, 1),
|
||||
new Among("anden", 12, 1),
|
||||
new Among("aren", 12, 1),
|
||||
new Among("heten", 12, 1),
|
||||
new Among("ern", -1, 1),
|
||||
new Among("ar", -1, 1),
|
||||
new Among("er", -1, 1),
|
||||
new Among("heter", 18, 1),
|
||||
new Among("or", -1, 1),
|
||||
new Among("s", -1, 2),
|
||||
new Among("as", 21, 1),
|
||||
new Among("arnas", 22, 1),
|
||||
new Among("ernas", 22, 1),
|
||||
new Among("ornas", 22, 1),
|
||||
new Among("es", 21, 1),
|
||||
new Among("ades", 26, 1),
|
||||
new Among("andes", 26, 1),
|
||||
new Among("ens", 21, 1),
|
||||
new Among("arens", 29, 1),
|
||||
new Among("hetens", 29, 1),
|
||||
new Among("erns", 21, 1),
|
||||
new Among("at", -1, 1),
|
||||
new Among("andet", -1, 1),
|
||||
new Among("het", -1, 1),
|
||||
new Among("ast", -1, 1)
|
||||
};
|
||||
|
||||
/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
|
||||
private final static Among a_1[] = {
|
||||
new Among("dd", -1, -1),
|
||||
new Among("gd", -1, -1),
|
||||
new Among("nn", -1, -1),
|
||||
new Among("dt", -1, -1),
|
||||
new Among("gt", -1, -1),
|
||||
new Among("kt", -1, -1),
|
||||
new Among("tt", -1, -1)
|
||||
};
|
||||
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "a", -1, 1, "", methodObject ),
|
||||
new Among ( "arna", 0, 1, "", methodObject ),
|
||||
new Among ( "erna", 0, 1, "", methodObject ),
|
||||
new Among ( "heterna", 2, 1, "", methodObject ),
|
||||
new Among ( "orna", 0, 1, "", methodObject ),
|
||||
new Among ( "ad", -1, 1, "", methodObject ),
|
||||
new Among ( "e", -1, 1, "", methodObject ),
|
||||
new Among ( "ade", 6, 1, "", methodObject ),
|
||||
new Among ( "ande", 6, 1, "", methodObject ),
|
||||
new Among ( "arne", 6, 1, "", methodObject ),
|
||||
new Among ( "are", 6, 1, "", methodObject ),
|
||||
new Among ( "aste", 6, 1, "", methodObject ),
|
||||
new Among ( "en", -1, 1, "", methodObject ),
|
||||
new Among ( "anden", 12, 1, "", methodObject ),
|
||||
new Among ( "aren", 12, 1, "", methodObject ),
|
||||
new Among ( "heten", 12, 1, "", methodObject ),
|
||||
new Among ( "ern", -1, 1, "", methodObject ),
|
||||
new Among ( "ar", -1, 1, "", methodObject ),
|
||||
new Among ( "er", -1, 1, "", methodObject ),
|
||||
new Among ( "heter", 18, 1, "", methodObject ),
|
||||
new Among ( "or", -1, 1, "", methodObject ),
|
||||
new Among ( "s", -1, 2, "", methodObject ),
|
||||
new Among ( "as", 21, 1, "", methodObject ),
|
||||
new Among ( "arnas", 22, 1, "", methodObject ),
|
||||
new Among ( "ernas", 22, 1, "", methodObject ),
|
||||
new Among ( "ornas", 22, 1, "", methodObject ),
|
||||
new Among ( "es", 21, 1, "", methodObject ),
|
||||
new Among ( "ades", 26, 1, "", methodObject ),
|
||||
new Among ( "andes", 26, 1, "", methodObject ),
|
||||
new Among ( "ens", 21, 1, "", methodObject ),
|
||||
new Among ( "arens", 29, 1, "", methodObject ),
|
||||
new Among ( "hetens", 29, 1, "", methodObject ),
|
||||
new Among ( "erns", 21, 1, "", methodObject ),
|
||||
new Among ( "at", -1, 1, "", methodObject ),
|
||||
new Among ( "andet", -1, 1, "", methodObject ),
|
||||
new Among ( "het", -1, 1, "", methodObject ),
|
||||
new Among ( "ast", -1, 1, "", methodObject )
|
||||
};
|
||||
private final static Among a_2[] = {
|
||||
new Among("ig", -1, 1),
|
||||
new Among("lig", 0, 1),
|
||||
new Among("els", -1, 1),
|
||||
new Among("fullt", -1, 3),
|
||||
new Among("l\u00F6st", -1, 2)
|
||||
};
|
||||
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "dd", -1, -1, "", methodObject ),
|
||||
new Among ( "gd", -1, -1, "", methodObject ),
|
||||
new Among ( "nn", -1, -1, "", methodObject ),
|
||||
new Among ( "dt", -1, -1, "", methodObject ),
|
||||
new Among ( "gt", -1, -1, "", methodObject ),
|
||||
new Among ( "kt", -1, -1, "", methodObject ),
|
||||
new Among ( "tt", -1, -1, "", methodObject )
|
||||
};
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
|
||||
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "ig", -1, 1, "", methodObject ),
|
||||
new Among ( "lig", 0, 1, "", methodObject ),
|
||||
new Among ( "els", -1, 1, "", methodObject ),
|
||||
new Among ( "fullt", -1, 3, "", methodObject ),
|
||||
new Among ( "l\u00F6st", -1, 2, "", methodObject )
|
||||
};
|
||||
private static final char g_s_ending[] = {119, 127, 149 };
|
||||
|
||||
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
|
||||
private static final char g_s_ending[] = {119, 127, 149 };
|
||||
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
private boolean r_mark_regions() {
|
||||
I_p1 = limit;
|
||||
int v_1 = cursor;
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
golab0: while(true)
|
||||
{
|
||||
int v_2 = cursor;
|
||||
lab1: {
|
||||
if (!(in_grouping(g_v, 97, 246)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
}
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: {
|
||||
if (!(out_grouping(g_v, 97, 246)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
}
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
I_p1 = cursor;
|
||||
lab4: {
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void copy_from(SwedishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
I_p1 = other.I_p1;
|
||||
super.copy_from(other);
|
||||
}
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_2 = limit_backward;
|
||||
limit_backward = I_p1;
|
||||
ket = cursor;
|
||||
among_var = find_among_b(a_0);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
if (!(in_grouping_b(g_s_ending, 98, 121)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 26
|
||||
I_p1 = limit;
|
||||
// test, line 29
|
||||
v_1 = cursor;
|
||||
// (, line 29
|
||||
// hop, line 29
|
||||
{
|
||||
int c = cursor + 3;
|
||||
if (0 > c || c > limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = c;
|
||||
}
|
||||
// setmark x, line 29
|
||||
I_x = cursor;
|
||||
cursor = v_1;
|
||||
// goto, line 30
|
||||
golab0: while(true)
|
||||
{
|
||||
v_2 = cursor;
|
||||
lab1: do {
|
||||
if (!(in_grouping(g_v, 97, 246)))
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
cursor = v_2;
|
||||
break golab0;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 30
|
||||
golab2: while(true)
|
||||
{
|
||||
lab3: do {
|
||||
if (!(out_grouping(g_v, 97, 246)))
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
break golab2;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 30
|
||||
I_p1 = cursor;
|
||||
// try, line 31
|
||||
lab4: do {
|
||||
// (, line 31
|
||||
if (!(I_p1 < I_x))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
I_p1 = I_x;
|
||||
} while (false);
|
||||
return true;
|
||||
}
|
||||
private boolean r_consonant_pair() {
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_2 = limit_backward;
|
||||
limit_backward = I_p1;
|
||||
int v_3 = limit - cursor;
|
||||
if (find_among_b(a_1) == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor = limit - v_3;
|
||||
ket = cursor;
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
bra = cursor;
|
||||
slice_del();
|
||||
limit_backward = v_2;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_main_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// (, line 36
|
||||
// setlimit, line 37
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 37
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 37
|
||||
// [, line 37
|
||||
ket = cursor;
|
||||
// substring, line 37
|
||||
among_var = find_among_b(a_0, 37);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 37
|
||||
bra = cursor;
|
||||
limit_backward = v_2;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 44
|
||||
// delete, line 44
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 46
|
||||
if (!(in_grouping_b(g_s_ending, 98, 121)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 46
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int v_2 = limit_backward;
|
||||
limit_backward = I_p1;
|
||||
ket = cursor;
|
||||
among_var = find_among_b(a_2);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
bra = cursor;
|
||||
switch (among_var) {
|
||||
case 1:
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
slice_from("l\u00F6s");
|
||||
break;
|
||||
case 3:
|
||||
slice_from("full");
|
||||
break;
|
||||
}
|
||||
limit_backward = v_2;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_consonant_pair() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
// setlimit, line 50
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 50
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 50
|
||||
// and, line 52
|
||||
v_3 = limit - cursor;
|
||||
// among, line 51
|
||||
if (find_among_b(a_1, 7) == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor = limit - v_3;
|
||||
// (, line 52
|
||||
// [, line 52
|
||||
ket = cursor;
|
||||
// next, line 52
|
||||
if (cursor <= limit_backward)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
cursor--;
|
||||
// ], line 52
|
||||
bra = cursor;
|
||||
// delete, line 52
|
||||
slice_del();
|
||||
limit_backward = v_2;
|
||||
return true;
|
||||
}
|
||||
public boolean stem() {
|
||||
int v_1 = cursor;
|
||||
r_mark_regions();
|
||||
cursor = v_1;
|
||||
limit_backward = cursor;
|
||||
cursor = limit;
|
||||
int v_2 = limit - cursor;
|
||||
r_main_suffix();
|
||||
cursor = limit - v_2;
|
||||
int v_3 = limit - cursor;
|
||||
r_consonant_pair();
|
||||
cursor = limit - v_3;
|
||||
int v_4 = limit - cursor;
|
||||
r_other_suffix();
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_other_suffix() {
|
||||
int among_var;
|
||||
int v_1;
|
||||
int v_2;
|
||||
// setlimit, line 55
|
||||
v_1 = limit - cursor;
|
||||
// tomark, line 55
|
||||
if (cursor < I_p1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_p1;
|
||||
v_2 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_1;
|
||||
// (, line 55
|
||||
// [, line 56
|
||||
ket = cursor;
|
||||
// substring, line 56
|
||||
among_var = find_among_b(a_2, 5);
|
||||
if (among_var == 0)
|
||||
{
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
}
|
||||
// ], line 56
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
limit_backward = v_2;
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 57
|
||||
// delete, line 57
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 58
|
||||
// <-, line 58
|
||||
slice_from("l\u00F6s");
|
||||
break;
|
||||
case 3:
|
||||
// (, line 59
|
||||
// <-, line 59
|
||||
slice_from("full");
|
||||
break;
|
||||
}
|
||||
limit_backward = v_2;
|
||||
return true;
|
||||
}
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof SwedishStemmer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
// (, line 64
|
||||
// do, line 66
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 66
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 67
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 67
|
||||
// do, line 68
|
||||
v_2 = limit - cursor;
|
||||
lab1: do {
|
||||
// call main_suffix, line 68
|
||||
if (!r_main_suffix())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
// do, line 69
|
||||
v_3 = limit - cursor;
|
||||
lab2: do {
|
||||
// call consonant_pair, line 69
|
||||
if (!r_consonant_pair())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
// do, line 70
|
||||
v_4 = limit - cursor;
|
||||
lab3: do {
|
||||
// call other_suffix, line 70
|
||||
if (!r_other_suffix())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof SwedishStemmer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return SwedishStemmer.class.getName().hashCode();
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return SwedishStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
|
||||
| From https://snowballstem.org/algorithms/danish/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
@ -60,7 +60,7 @@ hvor | where
|
|||
eller | or
|
||||
hvad | what
|
||||
skal | must/shall etc.
|
||||
selv | myself/youself/herself/ourselves etc., even
|
||||
selv | myself/yourself/herself/ourselves etc., even
|
||||
her | here
|
||||
alle | all/everyone/everybody etc.
|
||||
vil | will (verb)
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
|
||||
| From https://snowballstem.org/algorithms/dutch/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
|
||||
| A Dutch stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
|
@ -117,3 +118,4 @@ uw | your
|
|||
iemand | somebody
|
||||
geweest | been; past participle of 'be'
|
||||
andere | other
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
|
||||
| From https://snowballstem.org/algorithms/english/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
@ -317,3 +317,4 @@ very
|
|||
| old
|
||||
| high
|
||||
| long
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
|
||||
| From https://snowballstem.org/algorithms/finnish/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
@ -48,8 +48,8 @@ me meidän meidät meitä meissä meistä meihin meillä meiltä meille
|
|||
te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
|
||||
he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
|
||||
|
||||
tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
|
||||
tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
|
||||
tämä tämän tätä tässä tästä tähän tällä tältä tälle tänä täksi | this
|
||||
tuo tuon tuota tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
|
||||
se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
|
||||
nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
|
||||
nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
|
||||
|
@ -91,7 +91,6 @@ yli | over, across
|
|||
| other
|
||||
|
||||
kun | when
|
||||
niin | so
|
||||
nyt | now
|
||||
itse | self
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
|
||||
| From https://snowballstem.org/algorithms/french/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
@ -169,8 +169,8 @@ eussent
|
|||
|
||||
| Later additions (from Jean-Christophe Deschamps)
|
||||
ceci | this
|
||||
cela | that
|
||||
celà | that
|
||||
cela | that (added 11 Apr 2012. Omission reported by Adrien Grand)
|
||||
celà | that (incorrect, though common)
|
||||
cet | this
|
||||
cette | this
|
||||
ici | here
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
|
||||
| From https://snowballstem.org/algorithms/german/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
|
||||
| From https://snowballstem.org/algorithms/hungarian/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
| From https://snowballstem.org/algorithms/indonesian/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
yang | that
|
||||
dan | and
|
||||
di | in
|
||||
dari | from
|
||||
ini | this
|
||||
pada kepada | at, to [person]
|
||||
ada adalah | there is, is
|
||||
dengan | with
|
||||
untuk | for
|
||||
dalam | in the
|
||||
oleh | by
|
||||
sebagai | as
|
||||
juga | also, too
|
||||
ke | to
|
||||
atau | or
|
||||
tidak | not
|
||||
itu | that
|
||||
sebuah | a
|
||||
tersebut | the
|
||||
dapat | can, may
|
||||
ia | he/she, yes
|
||||
telah | already
|
||||
satu | one
|
||||
memiliki | have
|
||||
mereka | they
|
||||
bahwa | that
|
||||
lebih | more, more than
|
||||
karena | because, since
|
||||
seorang | one person, same
|
||||
akan | will, about to
|
||||
seperti | as, like
|
||||
secara | on
|
||||
kemudian | later, then
|
||||
beberapa | some
|
||||
banyak | many
|
||||
antara | between
|
||||
setelah | after
|
||||
yaitu | that is
|
||||
hanya | only
|
||||
hingga | to
|
||||
serta | along with
|
||||
sama | same, and
|
||||
dia | he/she/it (informal)
|
||||
tetapi | but
|
||||
namun | however
|
||||
melalui | through
|
||||
bisa | can
|
||||
sehingga | so
|
||||
ketika | when
|
||||
suatu | a
|
||||
sendiri | own (adverb)
|
||||
bagi | for
|
||||
semua | all
|
||||
harus | must
|
||||
setiap | each, every
|
||||
maka | then
|
||||
maupun | as well
|
||||
tanpa | without
|
||||
saja | only
|
||||
jika | if
|
||||
bukan | not
|
||||
belum | not yet
|
||||
sedangkan | while
|
||||
yakni | i.e.
|
||||
meskipun | although
|
||||
hampir | almost
|
||||
kita | we/us (inclusive)
|
||||
demikian | thereby
|
||||
daripada | from/than/instead of
|
||||
apa | what/which/or/eh
|
||||
ialah | is
|
||||
sana | there
|
||||
begitu | so
|
||||
seseorang | someone
|
||||
selain | besides
|
||||
terlalu | too
|
||||
ataupun | or
|
||||
saya | me/I (formal)
|
||||
bila | if/when
|
||||
bagaimana | how
|
||||
tapi | but
|
||||
apabila | when/if
|
||||
kalau | if
|
||||
kami | we/us (exclusive)
|
||||
melainkan | but (rather)
|
||||
boleh | may,can
|
||||
aku | I/me (informal)
|
||||
anda | you (formal)
|
||||
kamu | you (informal)
|
||||
beliau | he/she/it (formal)
|
||||
kalian | you (plural)
|
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
|
||||
| From https://snowballstem.org/algorithms/italian/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
|
||||
| From https://snowballstem.org/algorithms/norwegian/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
@ -25,7 +25,7 @@ et | a/an
|
|||
den | it/this/that
|
||||
til | to
|
||||
er | is/am/are
|
||||
som | who/that
|
||||
som | who/which/that
|
||||
på | on
|
||||
de | they / you(formal)
|
||||
med | with
|
||||
|
@ -84,7 +84,6 @@ noen | some
|
|||
noe | some
|
||||
ville | would
|
||||
dere | you
|
||||
som | who/which/that
|
||||
deres | their/theirs
|
||||
kun | only/just
|
||||
ja | yes
|
||||
|
@ -129,7 +128,6 @@ mange | many
|
|||
også | also
|
||||
slik | just
|
||||
vært | been
|
||||
være | to be
|
||||
båe | both *
|
||||
begge | both
|
||||
siden | since
|
||||
|
@ -155,7 +153,6 @@ hennar | her/hers
|
|||
hennes | hers
|
||||
hoss | how *
|
||||
hossen | how *
|
||||
ikkje | not *
|
||||
ingi | noone *
|
||||
inkje | noone *
|
||||
korleis | how *
|
||||
|
@ -177,7 +174,6 @@ noka | some (fem.) *
|
|||
nokor | some *
|
||||
noko | some *
|
||||
nokre | some *
|
||||
si | his/hers *
|
||||
sia | since *
|
||||
sidan | since *
|
||||
so | so *
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
|
||||
| From https://snowballstem.org/algorithms/portuguese/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
|
||||
| From https://snowballstem.org/algorithms/russian/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
|
||||
| a russian stop word list. comments begin with vertical bar. each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
|
||||
| From https://snowballstem.org/algorithms/spanish/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
|
||||
| From https://snowballstem.org/algorithms/swedish/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| See https://snowballstem.org/license.html
|
||||
| Also see https://opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
|
|
|
@ -108,7 +108,7 @@ import org.apache.lucene.util.automaton.Operations;
|
|||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
import org.tartarus.snowball.SnowballStemmer;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
/** tests random analysis chains */
|
||||
|
@ -404,10 +404,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
return null; // unreachable code
|
||||
}
|
||||
});
|
||||
put(SnowballProgram.class, random -> {
|
||||
put(SnowballStemmer.class, random -> {
|
||||
try {
|
||||
String lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.length)];
|
||||
Class<? extends SnowballProgram> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballProgram.class);
|
||||
String lang = TestSnowball.SNOWBALL_LANGS.get(random.nextInt(TestSnowball.SNOWBALL_LANGS.size()));
|
||||
Class<? extends SnowballStemmer> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballStemmer.class);
|
||||
return clazz.getConstructor().newInstance();
|
||||
} catch (Exception ex) {
|
||||
Rethrow.rethrow(ex);
|
||||
|
|
|
@ -18,11 +18,16 @@ package org.apache.lucene.analysis.snowball;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
|
||||
|
@ -97,12 +102,14 @@ public class TestSnowball extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
/** for testing purposes ONLY */
|
||||
public static String SNOWBALL_LANGS[] = {
|
||||
"Armenian", "Basque", "Catalan", "Danish", "Dutch", "English",
|
||||
"Finnish", "French", "German2", "German", "Hungarian", "Irish",
|
||||
"Italian", "Kp", "Lovins", "Norwegian", "Porter", "Portuguese",
|
||||
"Romanian", "Russian", "Spanish", "Swedish", "Turkish"
|
||||
};
|
||||
public static final List<String> SNOWBALL_LANGS;
|
||||
static {
|
||||
try (InputStream in = TestSnowball.class.getResourceAsStream("languages.txt")) {
|
||||
SNOWBALL_LANGS = WordlistLoader.getLines(in, StandardCharsets.UTF_8);
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
for (final String lang : SNOWBALL_LANGS) {
|
||||
|
|
|
@ -18,51 +18,38 @@ package org.apache.lucene.analysis.snowball;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.Nightly;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
/**
|
||||
* Test the snowball filters against the snowball data tests
|
||||
*/
|
||||
@Nightly
|
||||
public class TestSnowballVocab extends LuceneTestCase {
|
||||
/**
|
||||
* Run all languages against their snowball vocabulary tests.
|
||||
*/
|
||||
public void testStemmers() throws IOException {
|
||||
assertCorrectOutput("Arabic", "arabic");
|
||||
assertCorrectOutput("Danish", "danish");
|
||||
assertCorrectOutput("Dutch", "dutch");
|
||||
assertCorrectOutput("English", "english");
|
||||
assertCorrectOutput("Finnish", "finnish");
|
||||
assertCorrectOutput("French", "french");
|
||||
assertCorrectOutput("German", "german");
|
||||
assertCorrectOutput("German2", "german2");
|
||||
assertCorrectOutput("Hungarian", "hungarian");
|
||||
assertCorrectOutput("Italian", "italian");
|
||||
assertCorrectOutput("Kp", "kraaij_pohlmann");
|
||||
assertCorrectOutput("Lovins", "lovins");
|
||||
assertCorrectOutput("Norwegian", "norwegian");
|
||||
assertCorrectOutput("Porter", "porter");
|
||||
assertCorrectOutput("Portuguese", "portuguese");
|
||||
assertCorrectOutput("Romanian", "romanian");
|
||||
assertCorrectOutput("Russian", "russian");
|
||||
assertCorrectOutput("Spanish", "spanish");
|
||||
assertCorrectOutput("Swedish", "swedish");
|
||||
assertCorrectOutput("Turkish", "turkish");
|
||||
try (InputStream in = getClass().getResourceAsStream("test_languages.txt")) {
|
||||
for (String datafile : WordlistLoader.getLines(in, StandardCharsets.UTF_8)) {
|
||||
String language = "" + Character.toUpperCase(datafile.charAt(0)) + datafile.substring(1);
|
||||
assertCorrectOutput(language, datafile + ".zip");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For the supplied language, run the stemmer against all strings in voc.txt
|
||||
* The output should be the same as the string in output.txt
|
||||
*/
|
||||
private void assertCorrectOutput(final String snowballLanguage, String dataDirectory)
|
||||
private void assertCorrectOutput(final String snowballLanguage, String zipfile)
|
||||
throws IOException {
|
||||
if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);
|
||||
|
||||
|
@ -74,8 +61,7 @@ public class TestSnowballVocab extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
assertVocabulary(a, getDataPath("TestSnowballVocabData.zip"),
|
||||
dataDirectory + "/voc.txt", dataDirectory + "/output.txt");
|
||||
assertVocabulary(a, getDataPath(zipfile), "voc.txt", "output.txt");
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,32 @@
|
|||
Arabic
|
||||
Armenian
|
||||
Basque
|
||||
Catalan
|
||||
Danish
|
||||
Dutch
|
||||
English
|
||||
Estonian
|
||||
Finnish
|
||||
French
|
||||
German2
|
||||
German
|
||||
Greek
|
||||
Hindi
|
||||
Hungarian
|
||||
Indonesian
|
||||
Irish
|
||||
Italian
|
||||
Kp
|
||||
Lithuanian
|
||||
Lovins
|
||||
Nepali
|
||||
Norwegian
|
||||
Porter
|
||||
Portuguese
|
||||
Romanian
|
||||
Russian
|
||||
Serbian
|
||||
Spanish
|
||||
Swedish
|
||||
Tamil
|
||||
Turkish
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,20 @@
|
|||
danish
|
||||
dutch
|
||||
english
|
||||
finnish
|
||||
german
|
||||
german2
|
||||
hungarian
|
||||
irish
|
||||
italian
|
||||
kp
|
||||
lovins
|
||||
nepali
|
||||
norwegian
|
||||
porter
|
||||
portuguese
|
||||
romanian
|
||||
russian
|
||||
spanish
|
||||
swedish
|
||||
turkish
|
Binary file not shown.
|
@ -1945,7 +1945,7 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
|
|||
<!-- svg files generated by gnuplot -->
|
||||
<pattern substring="Produced by GNUPLOT"/>
|
||||
<!-- snowball stemmers generated by snowball compiler -->
|
||||
<pattern substring="This file was generated automatically by the Snowball to Java compiler"/>
|
||||
<pattern substring="Generated by Snowball"/>
|
||||
<!-- parsers generated by antlr -->
|
||||
<pattern substring="ANTLR GENERATED CODE"/>
|
||||
</rat:substringMatcher>
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.carrot2.text.linguistic.IStemmerFactory;
|
|||
import org.carrot2.util.ReflectionUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
import org.tartarus.snowball.SnowballStemmer;
|
||||
import org.tartarus.snowball.ext.DanishStemmer;
|
||||
import org.tartarus.snowball.ext.DutchStemmer;
|
||||
import org.tartarus.snowball.ext.EnglishStemmer;
|
||||
|
@ -83,7 +83,7 @@ public class LuceneCarrot2StemmerFactory implements IStemmerFactory {
|
|||
* This mapping is not dynamic because we want to keep the possibility to
|
||||
* obfuscate these classes.
|
||||
*/
|
||||
private static HashMap<LanguageCode, Class<? extends SnowballProgram>> snowballStemmerClasses;
|
||||
private static HashMap<LanguageCode, Class<? extends SnowballStemmer>> snowballStemmerClasses;
|
||||
static {
|
||||
snowballStemmerClasses = new HashMap<>();
|
||||
snowballStemmerClasses.put(LanguageCode.DANISH, DanishStemmer.class);
|
||||
|
@ -110,9 +110,9 @@ public class LuceneCarrot2StemmerFactory implements IStemmerFactory {
|
|||
* An adapter converting Snowball programs into {@link IStemmer} interface.
|
||||
*/
|
||||
private static class SnowballStemmerAdapter implements IStemmer {
|
||||
private final SnowballProgram snowballStemmer;
|
||||
private final SnowballStemmer snowballStemmer;
|
||||
|
||||
public SnowballStemmerAdapter(SnowballProgram snowballStemmer) {
|
||||
public SnowballStemmerAdapter(SnowballStemmer snowballStemmer) {
|
||||
this.snowballStemmer = snowballStemmer;
|
||||
}
|
||||
|
||||
|
@ -129,11 +129,11 @@ public class LuceneCarrot2StemmerFactory implements IStemmerFactory {
|
|||
|
||||
/**
|
||||
* Create and return an {@link IStemmer} adapter for a
|
||||
* {@link SnowballProgram} for a given language code. An identity stemmer is
|
||||
* {@link SnowballStemmer} for a given language code. An identity stemmer is
|
||||
* returned for unknown languages.
|
||||
*/
|
||||
public static IStemmer createStemmer(LanguageCode language) {
|
||||
final Class<? extends SnowballProgram> stemmerClazz = snowballStemmerClasses
|
||||
final Class<? extends SnowballStemmer> stemmerClazz = snowballStemmerClasses
|
||||
.get(language);
|
||||
|
||||
if (stemmerClazz == null) {
|
||||
|
|
Loading…
Reference in New Issue