From 42a1eb04038a15556755a384d23dddd35b9f7843 Mon Sep 17 00:00:00 2001 From: Gert Morten Paimla Date: Sun, 30 Jun 2019 09:12:55 +0900 Subject: [PATCH] LUCENE-8891: Add snowball stemmer and analyzer for Estonian language. Signed-off-by: Tomoko Uchida --- lucene/CHANGES.txt | 3 + .../lucene/analysis/et/EstonianAnalyzer.java | 127 ++ .../lucene/analysis/et/package-info.java | 21 + .../snowball/ext/EstonianStemmer.java | 1904 +++++++++++++++++ .../apache/lucene/analysis/et/stopwords.txt | 1603 ++++++++++++++ .../analysis/et/TestEstonianAnalyzer.java | 62 + 6 files changed, 3720 insertions(+) create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/et/EstonianAnalyzer.java create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/et/package-info.java create mode 100644 lucene/analysis/common/src/java/org/tartarus/snowball/ext/EstonianStemmer.java create mode 100644 lucene/analysis/common/src/resources/org/apache/lucene/analysis/et/stopwords.txt create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/et/TestEstonianAnalyzer.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index abff75fae78..aecf105fecd 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -52,6 +52,9 @@ API Changes New Features +* LUCENE-8891: Snowball stemmer/analyzer for the Estonian language. + (Gert Morten Paimla via Tomoko Uchida) + * LUCENE-8815: Provide a DoubleValues implementation for retrieving the value of features without requiring a separate numeric field. Note that as feature values are stored with only 8 bits of mantissa the values returned may have a delta from the original values indexed. diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/et/EstonianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/et/EstonianAnalyzer.java new file mode 100644 index 00000000000..e8a839f933a --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/et/EstonianAnalyzer.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.et; + + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.StopFilter; +import org.apache.lucene.analysis.StopwordAnalyzerBase; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; +import org.apache.lucene.analysis.snowball.SnowballFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.tartarus.snowball.ext.EstonianStemmer; + +/** + * {@link Analyzer} for Estonian. + */ +public final class EstonianAnalyzer extends StopwordAnalyzerBase { + private final CharArraySet stemExclusionSet; + + /** File containing default Estonian stopwords. */ + public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; + + /** + * Returns an unmodifiable instance of the default stop words set. + * @return default stop words set. + */ + public static CharArraySet getDefaultStopSet(){ + return DefaultSetHolder.DEFAULT_STOP_SET; + } + + /** + * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class + * accesses the static final set the first time.; + */ + private static class DefaultSetHolder { + static final CharArraySet DEFAULT_STOP_SET; + + static { + try { + DEFAULT_STOP_SET = loadStopwordSet(false, + EstonianAnalyzer.class, DEFAULT_STOPWORD_FILE, "#"); + } catch (IOException ex) { + // default set should always be present as it is part of the + // distribution (JAR) + throw new RuntimeException("Unable to load default stopword set"); + } + } + } + + /** + * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. + */ + public EstonianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param stopwords a stopword set + */ + public EstonianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); + } + + /** + * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before + * stemming. + * + * @param stopwords a stopword set + * @param stemExclusionSet a set of terms not to be stemmed + */ + public EstonianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + } + + /** + * Creates a + * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} + * which tokenizes all the text in the provided {@link Reader}. + * + * @return A + * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} + * built from an {@link StandardTokenizer} filtered with + * {@link LowerCaseFilter}, {@link StopFilter} + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is + * provided and {@link SnowballFilter}. + */ + @Override + protected TokenStreamComponents createComponents(String fieldName) { + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new LowerCaseFilter(source); + result = new StopFilter(result, stopwords); + if(!stemExclusionSet.isEmpty()) + result = new SetKeywordMarkerFilter(result, stemExclusionSet); + result = new SnowballFilter(result, new EstonianStemmer()); + return new TokenStreamComponents(source, result); + } + + @Override + protected TokenStream normalize(String fieldName, TokenStream in) { + return new LowerCaseFilter(in); + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/et/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/et/package-info.java new file mode 100644 index 00000000000..624e0c011b0 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/et/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Analyzer for Estonian. + */ +package org.apache.lucene.analysis.et; diff --git a/lucene/analysis/common/src/java/org/tartarus/snowball/ext/EstonianStemmer.java b/lucene/analysis/common/src/java/org/tartarus/snowball/ext/EstonianStemmer.java new file mode 100644 index 00000000000..5e7fb186f75 --- /dev/null +++ b/lucene/analysis/common/src/java/org/tartarus/snowball/ext/EstonianStemmer.java @@ -0,0 +1,1904 @@ +// This file was generated automatically by the Snowball to Java compiler + +package org.tartarus.snowball.ext; + +import org.tartarus.snowball.Among; + +/** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +@SuppressWarnings("unused") public class EstonianStemmer extends org.tartarus.snowball.SnowballProgram { + + private static final long serialVersionUID = 1L; + + /* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup(); + + private final static Among a_0[] = { + new Among ( "gi", -1, 1, "", methodObject ), + new Among ( "ki", -1, 2, "", methodObject ) + }; + + private final static Among a_1[] = { + new Among ( "da", -1, 11, "", methodObject ), + new Among ( "mata", -1, 3, "", methodObject ), + new Among ( "b", -1, 13, "", methodObject ), + new Among ( "ksid", -1, 2, "", methodObject ), + new Among ( "nuksid", 3, 1, "", methodObject ), + new Among ( "me", -1, 10, "", methodObject ), + new Among ( "sime", 5, 7, "", methodObject ), + new Among ( "ksime", 6, 2, "", methodObject ), + new Among ( "nuksime", 7, 1, "", methodObject ), + new Among ( "akse", -1, 6, "", methodObject ), + new Among ( "dakse", 9, 4, "", methodObject ), + new Among ( "takse", 9, 4, "", methodObject ), + new Among ( "site", -1, 8, "", methodObject ), + new Among ( "ksite", 12, 2, "", methodObject ), + new Among ( "nuksite", 13, 1, "", methodObject ), + new Among ( "n", -1, 12, "", methodObject ), + new Among ( "sin", 15, 9, "", methodObject ), + new Among ( "ksin", 16, 2, "", methodObject ), + new Among ( "nuksin", 17, 1, "", methodObject ), + new Among ( "daks", -1, 5, "", methodObject ), + new Among ( "taks", -1, 5, "", methodObject ) + }; + + private final static Among a_2[] = { + new Among ( "aa", -1, -1, "", methodObject ), + new Among ( "ee", -1, -1, "", methodObject ), + new Among ( "ii", -1, -1, "", methodObject ), + new Among ( "oo", -1, -1, "", methodObject ), + new Among ( "uu", -1, -1, "", methodObject ), + new Among ( "\u00E4\u00E4", -1, -1, "", methodObject ), + new Among ( "\u00F5\u00F5", -1, -1, "", methodObject ), + new Among ( "\u00F6\u00F6", -1, -1, "", methodObject ), + new Among ( "\u00FC\u00FC", -1, -1, "", methodObject ) + }; + + private final static Among a_3[] = { + new Among ( "i", -1, 1, "", methodObject ) + }; + + private final static Among a_4[] = { + new Among ( "lane", -1, 3, "", methodObject ), + new Among ( "line", -1, 11, "", methodObject ), + new Among ( "mine", -1, 7, "", methodObject ), + new Among ( "lasse", -1, 1, "", methodObject ), + new Among ( "lisse", -1, 9, "", methodObject ), + new Among ( "misse", -1, 5, "", methodObject ), + new Among ( "lasi", -1, 4, "", methodObject ), + new Among ( "lisi", -1, 12, "", methodObject ), + new Among ( "misi", -1, 8, "", methodObject ), + new Among ( "last", -1, 2, "", methodObject ), + new Among ( "list", -1, 10, "", methodObject ), + new Among ( "mist", -1, 6, "", methodObject ) + }; + + private final static Among a_5[] = { + new Among ( "ga", -1, 5, "", methodObject ), + new Among ( "ta", -1, 7, "", methodObject ), + new Among ( "le", -1, 3, "", methodObject ), + new Among ( "sse", -1, 1, "", methodObject ), + new Among ( "l", -1, 9, "", methodObject ), + new Among ( "s", -1, 8, "", methodObject ), + new Among ( "ks", 5, 6, "", methodObject ), + new Among ( "t", -1, 8, "", methodObject ), + new Among ( "lt", 7, 4, "", methodObject ), + new Among ( "st", 7, 2, "", methodObject ) + }; + + private final static Among a_6[] = { + new Among ( "d", -1, 7, "", methodObject ), + new Among ( "sid", 0, 4, "", methodObject ), + new Among ( "de", -1, 6, "", methodObject ), + new Among ( "ikkude", 2, 1, "", methodObject ), + new Among ( "ike", -1, 3, "", methodObject ), + new Among ( "ikke", -1, 2, "", methodObject ), + new Among ( "te", -1, 5, "", methodObject ) + }; + + private final static Among a_7[] = { + new Among ( "kk", -1, -1, "", methodObject ), + new Among ( "pp", -1, -1, "", methodObject ), + new Among ( "tt", -1, -1, "", methodObject ) + }; + + private final static Among a_8[] = { + new Among ( "va", -1, -1, "", methodObject ), + new Among ( "du", -1, -1, "", methodObject ), + new Among ( "nu", -1, -1, "", methodObject ), + new Among ( "tu", -1, -1, "", methodObject ) + }; + + private final static Among a_9[] = { + new Among ( "ma", -1, 2, "", methodObject ), + new Among ( "mai", -1, 1, "", methodObject ), + new Among ( "m", -1, 3, "", methodObject ) + }; + + private final static Among a_10[] = { + new Among ( "joob", -1, 1, "", methodObject ), + new Among ( "jood", -1, 1, "", methodObject ), + new Among ( "joodakse", 1, 3, "", methodObject ), + new Among ( "jooma", -1, 3, "", methodObject ), + new Among ( "joomata", 3, 3, "", methodObject ), + new Among ( "joome", -1, 1, "", methodObject ), + new Among ( "joon", -1, 1, "", methodObject ), + new Among ( "joote", -1, 1, "", methodObject ), + new Among ( "joovad", -1, 1, "", methodObject ), + new Among ( "juua", -1, 3, "", methodObject ), + new Among ( "juuakse", 9, 3, "", methodObject ), + new Among ( "j\u00E4i", -1, 38, "", methodObject ), + new Among ( "j\u00E4id", 11, 38, "", methodObject ), + new Among ( "j\u00E4ime", 11, 38, "", methodObject ), + new Among ( "j\u00E4in", 11, 38, "", methodObject ), + new Among ( "j\u00E4ite", 11, 38, "", methodObject ), + new Among ( "j\u00E4\u00E4b", -1, 36, "", methodObject ), + new Among ( "j\u00E4\u00E4d", -1, 36, "", methodObject ), + new Among ( "j\u00E4\u00E4da", 17, 39, "", methodObject ), + new Among ( "j\u00E4\u00E4dakse", 18, 39, "", methodObject ), + new Among ( "j\u00E4\u00E4di", 17, 39, "", methodObject ), + new Among ( "j\u00E4\u00E4ks", -1, 37, "", methodObject ), + new Among ( "j\u00E4\u00E4ksid", 21, 37, "", methodObject ), + new Among ( "j\u00E4\u00E4ksime", 21, 37, "", methodObject ), + new Among ( "j\u00E4\u00E4ksin", 21, 37, "", methodObject ), + new Among ( "j\u00E4\u00E4ksite", 21, 37, "", methodObject ), + new Among ( "j\u00E4\u00E4ma", -1, 39, "", methodObject ), + new Among ( "j\u00E4\u00E4mata", 26, 39, "", methodObject ), + new Among ( "j\u00E4\u00E4me", -1, 36, "", methodObject ), + new Among ( "j\u00E4\u00E4n", -1, 36, "", methodObject ), + new Among ( "j\u00E4\u00E4te", -1, 36, "", methodObject ), + new Among ( "j\u00E4\u00E4vad", -1, 36, "", methodObject ), + new Among ( "j\u00F5i", -1, 2, "", methodObject ), + new Among ( "j\u00F5id", 32, 2, "", methodObject ), + new Among ( "j\u00F5ime", 32, 2, "", methodObject ), + new Among ( "j\u00F5in", 32, 2, "", methodObject ), + new Among ( "j\u00F5ite", 32, 2, "", methodObject ), + new Among ( "keeb", -1, 12, "", methodObject ), + new Among ( "keed", -1, 12, "", methodObject ), + new Among ( "keedakse", 38, 14, "", methodObject ), + new Among ( "keeks", -1, 13, "", methodObject ), + new Among ( "keeksid", 40, 13, "", methodObject ), + new Among ( "keeksime", 40, 13, "", methodObject ), + new Among ( "keeksin", 40, 13, "", methodObject ), + new Among ( "keeksite", 40, 13, "", methodObject ), + new Among ( "keema", -1, 14, "", methodObject ), + new Among ( "keemata", 45, 14, "", methodObject ), + new Among ( "keeme", -1, 12, "", methodObject ), + new Among ( "keen", -1, 12, "", methodObject ), + new Among ( "kees", -1, 12, "", methodObject ), + new Among ( "keeta", -1, 14, "", methodObject ), + new Among ( "keete", -1, 12, "", methodObject ), + new Among ( "keevad", -1, 12, "", methodObject ), + new Among ( "k\u00E4ia", -1, 24, "", methodObject ), + new Among ( "k\u00E4iakse", 53, 24, "", methodObject ), + new Among ( "k\u00E4ib", -1, 22, "", methodObject ), + new Among ( "k\u00E4id", -1, 22, "", methodObject ), + new Among ( "k\u00E4idi", 56, 24, "", methodObject ), + new Among ( "k\u00E4iks", -1, 23, "", methodObject ), + new Among ( "k\u00E4iksid", 58, 23, "", methodObject ), + new Among ( "k\u00E4iksime", 58, 23, "", methodObject ), + new Among ( "k\u00E4iksin", 58, 23, "", methodObject ), + new Among ( "k\u00E4iksite", 58, 23, "", methodObject ), + new Among ( "k\u00E4ima", -1, 24, "", methodObject ), + new Among ( "k\u00E4imata", 63, 24, "", methodObject ), + new Among ( "k\u00E4ime", -1, 22, "", methodObject ), + new Among ( "k\u00E4in", -1, 22, "", methodObject ), + new Among ( "k\u00E4is", -1, 22, "", methodObject ), + new Among ( "k\u00E4ite", -1, 22, "", methodObject ), + new Among ( "k\u00E4ivad", -1, 22, "", methodObject ), + new Among ( "laob", -1, 47, "", methodObject ), + new Among ( "laod", -1, 47, "", methodObject ), + new Among ( "laoks", -1, 48, "", methodObject ), + new Among ( "laoksid", 72, 48, "", methodObject ), + new Among ( "laoksime", 72, 48, "", methodObject ), + new Among ( "laoksin", 72, 48, "", methodObject ), + new Among ( "laoksite", 72, 48, "", methodObject ), + new Among ( "laome", -1, 47, "", methodObject ), + new Among ( "laon", -1, 47, "", methodObject ), + new Among ( "laote", -1, 47, "", methodObject ), + new Among ( "laovad", -1, 47, "", methodObject ), + new Among ( "loeb", -1, 43, "", methodObject ), + new Among ( "loed", -1, 43, "", methodObject ), + new Among ( "loeks", -1, 44, "", methodObject ), + new Among ( "loeksid", 83, 44, "", methodObject ), + new Among ( "loeksime", 83, 44, "", methodObject ), + new Among ( "loeksin", 83, 44, "", methodObject ), + new Among ( "loeksite", 83, 44, "", methodObject ), + new Among ( "loeme", -1, 43, "", methodObject ), + new Among ( "loen", -1, 43, "", methodObject ), + new Among ( "loete", -1, 43, "", methodObject ), + new Among ( "loevad", -1, 43, "", methodObject ), + new Among ( "loob", -1, 19, "", methodObject ), + new Among ( "lood", -1, 19, "", methodObject ), + new Among ( "loodi", 93, 21, "", methodObject ), + new Among ( "looks", -1, 20, "", methodObject ), + new Among ( "looksid", 95, 20, "", methodObject ), + new Among ( "looksime", 95, 20, "", methodObject ), + new Among ( "looksin", 95, 20, "", methodObject ), + new Among ( "looksite", 95, 20, "", methodObject ), + new Among ( "looma", -1, 21, "", methodObject ), + new Among ( "loomata", 100, 21, "", methodObject ), + new Among ( "loome", -1, 19, "", methodObject ), + new Among ( "loon", -1, 19, "", methodObject ), + new Among ( "loote", -1, 19, "", methodObject ), + new Among ( "loovad", -1, 19, "", methodObject ), + new Among ( "luua", -1, 21, "", methodObject ), + new Among ( "luuakse", 106, 21, "", methodObject ), + new Among ( "l\u00F5i", -1, 18, "", methodObject ), + new Among ( "l\u00F5id", 108, 18, "", methodObject ), + new Among ( "l\u00F5ime", 108, 18, "", methodObject ), + new Among ( "l\u00F5in", 108, 18, "", methodObject ), + new Among ( "l\u00F5ite", 108, 18, "", methodObject ), + new Among ( "l\u00F6\u00F6b", -1, 15, "", methodObject ), + new Among ( "l\u00F6\u00F6d", -1, 15, "", methodObject ), + new Among ( "l\u00F6\u00F6dakse", 114, 17, "", methodObject ), + new Among ( "l\u00F6\u00F6di", 114, 17, "", methodObject ), + new Among ( "l\u00F6\u00F6ks", -1, 16, "", methodObject ), + new Among ( "l\u00F6\u00F6ksid", 117, 16, "", methodObject ), + new Among ( "l\u00F6\u00F6ksime", 117, 16, "", methodObject ), + new Among ( "l\u00F6\u00F6ksin", 117, 16, "", methodObject ), + new Among ( "l\u00F6\u00F6ksite", 117, 16, "", methodObject ), + new Among ( "l\u00F6\u00F6ma", -1, 17, "", methodObject ), + new Among ( "l\u00F6\u00F6mata", 122, 17, "", methodObject ), + new Among ( "l\u00F6\u00F6me", -1, 15, "", methodObject ), + new Among ( "l\u00F6\u00F6n", -1, 15, "", methodObject ), + new Among ( "l\u00F6\u00F6te", -1, 15, "", methodObject ), + new Among ( "l\u00F6\u00F6vad", -1, 15, "", methodObject ), + new Among ( "l\u00FC\u00FCa", -1, 17, "", methodObject ), + new Among ( "l\u00FC\u00FCakse", 128, 17, "", methodObject ), + new Among ( "m\u00FC\u00FCa", -1, 42, "", methodObject ), + new Among ( "m\u00FC\u00FCakse", 130, 42, "", methodObject ), + new Among ( "m\u00FC\u00FCb", -1, 40, "", methodObject ), + new Among ( "m\u00FC\u00FCd", -1, 40, "", methodObject ), + new Among ( "m\u00FC\u00FCdi", 133, 42, "", methodObject ), + new Among ( "m\u00FC\u00FCks", -1, 41, "", methodObject ), + new Among ( "m\u00FC\u00FCksid", 135, 41, "", methodObject ), + new Among ( "m\u00FC\u00FCksime", 135, 41, "", methodObject ), + new Among ( "m\u00FC\u00FCksin", 135, 41, "", methodObject ), + new Among ( "m\u00FC\u00FCksite", 135, 41, "", methodObject ), + new Among ( "m\u00FC\u00FCma", -1, 42, "", methodObject ), + new Among ( "m\u00FC\u00FCmata", 140, 42, "", methodObject ), + new Among ( "m\u00FC\u00FCme", -1, 40, "", methodObject ), + new Among ( "m\u00FC\u00FCn", -1, 40, "", methodObject ), + new Among ( "m\u00FC\u00FCs", -1, 40, "", methodObject ), + new Among ( "m\u00FC\u00FCte", -1, 40, "", methodObject ), + new Among ( "m\u00FC\u00FCvad", -1, 40, "", methodObject ), + new Among ( "n\u00E4eb", -1, 52, "", methodObject ), + new Among ( "n\u00E4ed", -1, 52, "", methodObject ), + new Among ( "n\u00E4eks", -1, 53, "", methodObject ), + new Among ( "n\u00E4eksid", 149, 53, "", methodObject ), + new Among ( "n\u00E4eksime", 149, 53, "", methodObject ), + new Among ( "n\u00E4eksin", 149, 53, "", methodObject ), + new Among ( "n\u00E4eksite", 149, 53, "", methodObject ), + new Among ( "n\u00E4eme", -1, 52, "", methodObject ), + new Among ( "n\u00E4en", -1, 52, "", methodObject ), + new Among ( "n\u00E4ete", -1, 52, "", methodObject ), + new Among ( "n\u00E4evad", -1, 52, "", methodObject ), + new Among ( "n\u00E4gema", -1, 54, "", methodObject ), + new Among ( "n\u00E4gemata", 158, 54, "", methodObject ), + new Among ( "n\u00E4ha", -1, 54, "", methodObject ), + new Among ( "n\u00E4hakse", 160, 54, "", methodObject ), + new Among ( "n\u00E4hti", -1, 54, "", methodObject ), + new Among ( "p\u00F5eb", -1, 45, "", methodObject ), + new Among ( "p\u00F5ed", -1, 45, "", methodObject ), + new Among ( "p\u00F5eks", -1, 46, "", methodObject ), + new Among ( "p\u00F5eksid", 165, 46, "", methodObject ), + new Among ( "p\u00F5eksime", 165, 46, "", methodObject ), + new Among ( "p\u00F5eksin", 165, 46, "", methodObject ), + new Among ( "p\u00F5eksite", 165, 46, "", methodObject ), + new Among ( "p\u00F5eme", -1, 45, "", methodObject ), + new Among ( "p\u00F5en", -1, 45, "", methodObject ), + new Among ( "p\u00F5ete", -1, 45, "", methodObject ), + new Among ( "p\u00F5evad", -1, 45, "", methodObject ), + new Among ( "saab", -1, 4, "", methodObject ), + new Among ( "saad", -1, 4, "", methodObject ), + new Among ( "saada", 175, 7, "", methodObject ), + new Among ( "saadakse", 176, 7, "", methodObject ), + new Among ( "saadi", 175, 7, "", methodObject ), + new Among ( "saaks", -1, 5, "", methodObject ), + new Among ( "saaksid", 179, 5, "", methodObject ), + new Among ( "saaksime", 179, 5, "", methodObject ), + new Among ( "saaksin", 179, 5, "", methodObject ), + new Among ( "saaksite", 179, 5, "", methodObject ), + new Among ( "saama", -1, 7, "", methodObject ), + new Among ( "saamata", 184, 7, "", methodObject ), + new Among ( "saame", -1, 4, "", methodObject ), + new Among ( "saan", -1, 4, "", methodObject ), + new Among ( "saate", -1, 4, "", methodObject ), + new Among ( "saavad", -1, 4, "", methodObject ), + new Among ( "sai", -1, 6, "", methodObject ), + new Among ( "said", 190, 6, "", methodObject ), + new Among ( "saime", 190, 6, "", methodObject ), + new Among ( "sain", 190, 6, "", methodObject ), + new Among ( "saite", 190, 6, "", methodObject ), + new Among ( "s\u00F5i", -1, 27, "", methodObject ), + new Among ( "s\u00F5id", 195, 27, "", methodObject ), + new Among ( "s\u00F5ime", 195, 27, "", methodObject ), + new Among ( "s\u00F5in", 195, 27, "", methodObject ), + new Among ( "s\u00F5ite", 195, 27, "", methodObject ), + new Among ( "s\u00F6\u00F6b", -1, 25, "", methodObject ), + new Among ( "s\u00F6\u00F6d", -1, 25, "", methodObject ), + new Among ( "s\u00F6\u00F6dakse", 201, 28, "", methodObject ), + new Among ( "s\u00F6\u00F6di", 201, 28, "", methodObject ), + new Among ( "s\u00F6\u00F6ks", -1, 26, "", methodObject ), + new Among ( "s\u00F6\u00F6ksid", 204, 26, "", methodObject ), + new Among ( "s\u00F6\u00F6ksime", 204, 26, "", methodObject ), + new Among ( "s\u00F6\u00F6ksin", 204, 26, "", methodObject ), + new Among ( "s\u00F6\u00F6ksite", 204, 26, "", methodObject ), + new Among ( "s\u00F6\u00F6ma", -1, 28, "", methodObject ), + new Among ( "s\u00F6\u00F6mata", 209, 28, "", methodObject ), + new Among ( "s\u00F6\u00F6me", -1, 25, "", methodObject ), + new Among ( "s\u00F6\u00F6n", -1, 25, "", methodObject ), + new Among ( "s\u00F6\u00F6te", -1, 25, "", methodObject ), + new Among ( "s\u00F6\u00F6vad", -1, 25, "", methodObject ), + new Among ( "s\u00FC\u00FCa", -1, 28, "", methodObject ), + new Among ( "s\u00FC\u00FCakse", 215, 28, "", methodObject ), + new Among ( "teeb", -1, 50, "", methodObject ), + new Among ( "teed", -1, 50, "", methodObject ), + new Among ( "teeks", -1, 49, "", methodObject ), + new Among ( "teeksid", 219, 49, "", methodObject ), + new Among ( "teeksime", 219, 49, "", methodObject ), + new Among ( "teeksin", 219, 49, "", methodObject ), + new Among ( "teeksite", 219, 49, "", methodObject ), + new Among ( "teeme", -1, 50, "", methodObject ), + new Among ( "teen", -1, 50, "", methodObject ), + new Among ( "teete", -1, 50, "", methodObject ), + new Among ( "teevad", -1, 50, "", methodObject ), + new Among ( "tegema", -1, 51, "", methodObject ), + new Among ( "tegemata", 228, 51, "", methodObject ), + new Among ( "teha", -1, 51, "", methodObject ), + new Among ( "tehakse", 230, 51, "", methodObject ), + new Among ( "tehti", -1, 51, "", methodObject ), + new Among ( "toob", -1, 29, "", methodObject ), + new Among ( "tood", -1, 29, "", methodObject ), + new Among ( "toodi", 234, 32, "", methodObject ), + new Among ( "tooks", -1, 30, "", methodObject ), + new Among ( "tooksid", 236, 30, "", methodObject ), + new Among ( "tooksime", 236, 30, "", methodObject ), + new Among ( "tooksin", 236, 30, "", methodObject ), + new Among ( "tooksite", 236, 30, "", methodObject ), + new Among ( "tooma", -1, 32, "", methodObject ), + new Among ( "toomata", 241, 32, "", methodObject ), + new Among ( "toome", -1, 29, "", methodObject ), + new Among ( "toon", -1, 29, "", methodObject ), + new Among ( "toote", -1, 29, "", methodObject ), + new Among ( "toovad", -1, 29, "", methodObject ), + new Among ( "tuua", -1, 32, "", methodObject ), + new Among ( "tuuakse", 247, 32, "", methodObject ), + new Among ( "t\u00F5i", -1, 31, "", methodObject ), + new Among ( "t\u00F5id", 249, 31, "", methodObject ), + new Among ( "t\u00F5ime", 249, 31, "", methodObject ), + new Among ( "t\u00F5in", 249, 31, "", methodObject ), + new Among ( "t\u00F5ite", 249, 31, "", methodObject ), + new Among ( "viia", -1, 11, "", methodObject ), + new Among ( "viiakse", 254, 11, "", methodObject ), + new Among ( "viib", -1, 8, "", methodObject ), + new Among ( "viid", -1, 8, "", methodObject ), + new Among ( "viidi", 257, 11, "", methodObject ), + new Among ( "viiks", -1, 9, "", methodObject ), + new Among ( "viiksid", 259, 9, "", methodObject ), + new Among ( "viiksime", 259, 9, "", methodObject ), + new Among ( "viiksin", 259, 9, "", methodObject ), + new Among ( "viiksite", 259, 9, "", methodObject ), + new Among ( "viima", -1, 11, "", methodObject ), + new Among ( "viimata", 264, 11, "", methodObject ), + new Among ( "viime", -1, 8, "", methodObject ), + new Among ( "viin", -1, 8, "", methodObject ), + new Among ( "viisime", -1, 10, "", methodObject ), + new Among ( "viisin", -1, 10, "", methodObject ), + new Among ( "viisite", -1, 10, "", methodObject ), + new Among ( "viite", -1, 8, "", methodObject ), + new Among ( "viivad", -1, 8, "", methodObject ), + new Among ( "v\u00F5ib", -1, 33, "", methodObject ), + new Among ( "v\u00F5id", -1, 33, "", methodObject ), + new Among ( "v\u00F5ida", 274, 35, "", methodObject ), + new Among ( "v\u00F5idakse", 275, 35, "", methodObject ), + new Among ( "v\u00F5idi", 274, 35, "", methodObject ), + new Among ( "v\u00F5iks", -1, 34, "", methodObject ), + new Among ( "v\u00F5iksid", 278, 34, "", methodObject ), + new Among ( "v\u00F5iksime", 278, 34, "", methodObject ), + new Among ( "v\u00F5iksin", 278, 34, "", methodObject ), + new Among ( "v\u00F5iksite", 278, 34, "", methodObject ), + new Among ( "v\u00F5ima", -1, 35, "", methodObject ), + new Among ( "v\u00F5imata", 283, 35, "", methodObject ), + new Among ( "v\u00F5ime", -1, 33, "", methodObject ), + new Among ( "v\u00F5in", -1, 33, "", methodObject ), + new Among ( "v\u00F5is", -1, 33, "", methodObject ), + new Among ( "v\u00F5ite", -1, 33, "", methodObject ), + new Among ( "v\u00F5ivad", -1, 33, "", methodObject ) + }; + + private static final char g_V1[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 48, 8 }; + + private static final char g_RV[] = {17, 65, 16 }; + + private static final char g_KI[] = {117, 66, 6, 1, 0, 0, 0, 128, 0, 0, 0, 16, 8 }; + + private static final char g_GI[] = {21, 123, 243, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 48, 8 }; + + private int I_p1; + private boolean B_is_verb; + + private void copy_from(EstonianStemmer other) { + I_p1 = other.I_p1; + B_is_verb = other.B_is_verb; + super.copy_from(other); + } + + private boolean r_mark_regions() { + int v_1; + // (, line 40 + I_p1 = limit; + // goto, line 44 + golab0: while(true) + { + v_1 = cursor; + lab1: do { + if (!(in_grouping(g_V1, 97, 252))) + { + break lab1; + } + cursor = v_1; + break golab0; + } while (false); + cursor = v_1; + if (cursor >= limit) + { + return false; + } + cursor++; + } + // gopast, line 44 + golab2: while(true) + { + lab3: do { + if (!(out_grouping(g_V1, 97, 252))) + { + break lab3; + } + break golab2; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; + } + // setmark p1, line 44 + I_p1 = cursor; + return true; + } + + private boolean r_emphasis() { + int among_var; + int v_1; + int v_2; + int v_3; + int v_4; + int v_5; + // (, line 50 + // setlimit, line 51 + v_1 = limit - cursor; + // tomark, line 51 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 51 + // [, line 51 + ket = cursor; + // substring, line 51 + among_var = find_among_b(a_0, 2); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 51 + bra = cursor; + limit_backward = v_2; + // test, line 52 + v_3 = limit - cursor; + // hop, line 52 + { + int c = cursor - 4; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + cursor = limit - v_3; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 54 + // (, line 54 + // and, line 54 + v_4 = limit - cursor; + if (!(in_grouping_b(g_GI, 97, 252))) + { + return false; + } + cursor = limit - v_4; + // not, line 54 + { + v_5 = limit - cursor; + lab0: do { + // call LONGV, line 54 + if (!r_LONGV()) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_5; + } + // delete, line 54 + slice_del(); + break; + case 2: + // (, line 55 + if (!(in_grouping_b(g_KI, 98, 197))) + { + return false; + } + // delete, line 55 + slice_del(); + break; + } + return true; + } + + private boolean r_verb() { + int among_var; + int v_1; + int v_2; + // (, line 60 + // setlimit, line 61 + v_1 = limit - cursor; + // tomark, line 61 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 61 + // [, line 61 + ket = cursor; + // substring, line 61 + among_var = find_among_b(a_1, 21); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 61 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 63 + // delete, line 63 + slice_del(); + break; + case 2: + // (, line 64 + // delete, line 64 + slice_del(); + break; + case 3: + // (, line 65 + // delete, line 65 + slice_del(); + break; + case 4: + // (, line 66 + // delete, line 66 + slice_del(); + break; + case 5: + // (, line 67 + // delete, line 67 + slice_del(); + break; + case 6: + // (, line 68 + // <-, line 68 + slice_from("a"); + break; + case 7: + // (, line 69 + // delete, line 69 + slice_del(); + break; + case 8: + // (, line 70 + // delete, line 70 + slice_del(); + break; + case 9: + // (, line 71 + // delete, line 71 + slice_del(); + break; + case 10: + // (, line 72 + if (!(in_grouping_b(g_V1, 97, 252))) + { + return false; + } + // delete, line 72 + slice_del(); + break; + case 11: + // (, line 73 + if (!(in_grouping_b(g_V1, 97, 252))) + { + return false; + } + // delete, line 73 + slice_del(); + break; + case 12: + // (, line 74 + if (!(in_grouping_b(g_V1, 97, 252))) + { + return false; + } + // delete, line 74 + slice_del(); + break; + case 13: + // (, line 75 + if (!(in_grouping_b(g_V1, 97, 252))) + { + return false; + } + // delete, line 75 + slice_del(); + break; + } + // set is_verb, line 77 + B_is_verb = true; + return true; + } + + private boolean r_LONGV() { + // among, line 81 + if (find_among_b(a_2, 9) == 0) + { + return false; + } + return true; + } + + private boolean r_i_plural() { + int among_var; + int v_1; + int v_2; + // (, line 83 + // setlimit, line 84 + v_1 = limit - cursor; + // tomark, line 84 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 84 + // [, line 84 + ket = cursor; + // substring, line 84 + among_var = find_among_b(a_3, 1); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 84 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 86 + if (!(in_grouping_b(g_RV, 97, 117))) + { + return false; + } + break; + } + // delete, line 88 + slice_del(); + return true; + } + + private boolean r_special_noun_endings() { + int among_var; + int v_1; + int v_2; + // (, line 91 + // setlimit, line 92 + v_1 = limit - cursor; + // tomark, line 92 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 92 + // [, line 92 + ket = cursor; + // substring, line 92 + among_var = find_among_b(a_4, 12); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 92 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 94 + // <-, line 94 + slice_from("lase"); + break; + case 2: + // (, line 95 + // <-, line 95 + slice_from("lase"); + break; + case 3: + // (, line 96 + // <-, line 96 + slice_from("lase"); + break; + case 4: + // (, line 97 + // <-, line 97 + slice_from("lase"); + break; + case 5: + // (, line 98 + // <-, line 98 + slice_from("mise"); + break; + case 6: + // (, line 99 + // <-, line 99 + slice_from("mise"); + break; + case 7: + // (, line 100 + // <-, line 100 + slice_from("mise"); + break; + case 8: + // (, line 101 + // <-, line 101 + slice_from("mise"); + break; + case 9: + // (, line 102 + // <-, line 102 + slice_from("lise"); + break; + case 10: + // (, line 103 + // <-, line 103 + slice_from("lise"); + break; + case 11: + // (, line 104 + // <-, line 104 + slice_from("lise"); + break; + case 12: + // (, line 105 + // <-, line 105 + slice_from("lise"); + break; + } + return true; + } + + private boolean r_case_ending() { + int among_var; + int v_1; + int v_2; + int v_3; + int v_4; + int v_5; + int v_6; + int v_7; + int v_8; + int v_9; + int v_10; + int v_11; + // (, line 109 + // setlimit, line 110 + v_1 = limit - cursor; + // tomark, line 110 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 110 + // [, line 110 + ket = cursor; + // substring, line 110 + among_var = find_among_b(a_5, 10); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 110 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 112 + // or, line 112 + lab0: do { + v_3 = limit - cursor; + lab1: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_3; + // call LONGV, line 112 + if (!r_LONGV()) + { + return false; + } + } while (false); + break; + case 2: + // (, line 113 + // or, line 113 + lab2: do { + v_4 = limit - cursor; + lab3: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab3; + } + break lab2; + } while (false); + cursor = limit - v_4; + // call LONGV, line 113 + if (!r_LONGV()) + { + return false; + } + } while (false); + break; + case 3: + // (, line 114 + // or, line 114 + lab4: do { + v_5 = limit - cursor; + lab5: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab5; + } + break lab4; + } while (false); + cursor = limit - v_5; + // call LONGV, line 114 + if (!r_LONGV()) + { + return false; + } + } while (false); + break; + case 4: + // (, line 115 + // or, line 115 + lab6: do { + v_6 = limit - cursor; + lab7: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab7; + } + break lab6; + } while (false); + cursor = limit - v_6; + // call LONGV, line 115 + if (!r_LONGV()) + { + return false; + } + } while (false); + break; + case 5: + // (, line 116 + // or, line 116 + lab8: do { + v_7 = limit - cursor; + lab9: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab9; + } + break lab8; + } while (false); + cursor = limit - v_7; + // call LONGV, line 116 + if (!r_LONGV()) + { + return false; + } + } while (false); + break; + case 6: + // (, line 117 + // or, line 117 + lab10: do { + v_8 = limit - cursor; + lab11: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab11; + } + break lab10; + } while (false); + cursor = limit - v_8; + // call LONGV, line 117 + if (!r_LONGV()) + { + return false; + } + } while (false); + break; + case 7: + // (, line 118 + // or, line 118 + lab12: do { + v_9 = limit - cursor; + lab13: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab13; + } + break lab12; + } while (false); + cursor = limit - v_9; + // call LONGV, line 118 + if (!r_LONGV()) + { + return false; + } + } while (false); + break; + case 8: + // (, line 120 + // or, line 120 + lab14: do { + v_10 = limit - cursor; + lab15: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab15; + } + break lab14; + } while (false); + cursor = limit - v_10; + // call LONGV, line 120 + if (!r_LONGV()) + { + return false; + } + } while (false); + break; + case 9: + // (, line 121 + // or, line 121 + lab16: do { + v_11 = limit - cursor; + lab17: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab17; + } + break lab16; + } while (false); + cursor = limit - v_11; + // call LONGV, line 121 + if (!r_LONGV()) + { + return false; + } + } while (false); + break; + } + // delete, line 123 + slice_del(); + return true; + } + + private boolean r_plural_three_first_cases() { + int among_var; + int v_1; + int v_2; + int v_3; + int v_4; + int v_5; + int v_6; + int v_7; + int v_8; + int v_9; + int v_10; + // (, line 127 + // setlimit, line 128 + v_1 = limit - cursor; + // tomark, line 128 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 128 + // [, line 128 + ket = cursor; + // substring, line 128 + among_var = find_among_b(a_6, 7); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 128 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 130 + // <-, line 130 + slice_from("iku"); + break; + case 2: + // (, line 131 + // <-, line 131 + slice_from("iku"); + break; + case 3: + // (, line 132 + // <-, line 132 + slice_from("iku"); + break; + case 4: + // (, line 133 + // not, line 133 + { + v_3 = limit - cursor; + lab0: do { + // call LONGV, line 133 + if (!r_LONGV()) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_3; + } + // delete, line 133 + slice_del(); + break; + case 5: + // (, line 134 + // or, line 134 + lab1: do { + v_4 = limit - cursor; + lab2: do { + // (, line 134 + // test, line 134 + v_5 = limit - cursor; + // hop, line 134 + { + int c = cursor - 4; + if (limit_backward > c || c > limit) + { + break lab2; + } + cursor = c; + } + cursor = limit - v_5; + // (, line 134 + // or, line 134 + lab3: do { + v_6 = limit - cursor; + lab4: do { + // (, line 134 + // literal, line 134 + if (!(eq_s_b(3, "mis"))) + { + break lab4; + } + // <-, line 134 + slice_from("e"); + break lab3; + } while (false); + cursor = limit - v_6; + lab5: do { + // (, line 134 + // literal, line 134 + if (!(eq_s_b(3, "las"))) + { + break lab5; + } + // <-, line 134 + slice_from("e"); + break lab3; + } while (false); + cursor = limit - v_6; + lab6: do { + // (, line 134 + // literal, line 134 + if (!(eq_s_b(3, "lis"))) + { + break lab6; + } + // <-, line 134 + slice_from("e"); + break lab3; + } while (false); + cursor = limit - v_6; + // (, line 134 + // not, line 134 + { + v_7 = limit - cursor; + lab7: do { + // literal, line 134 + if (!(eq_s_b(1, "t"))) + { + break lab7; + } + break lab2; + } while (false); + cursor = limit - v_7; + } + // delete, line 134 + slice_del(); + } while (false); + break lab1; + } while (false); + cursor = limit - v_4; + // (, line 134 + // not, line 134 + { + v_8 = limit - cursor; + lab8: do { + // literal, line 134 + if (!(eq_s_b(1, "t"))) + { + break lab8; + } + return false; + } while (false); + cursor = limit - v_8; + } + // <-, line 134 + slice_from("t"); + } while (false); + break; + case 6: + // (, line 135 + // (, line 135 + // or, line 135 + lab9: do { + v_9 = limit - cursor; + lab10: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab10; + } + break lab9; + } while (false); + cursor = limit - v_9; + // call LONGV, line 135 + if (!r_LONGV()) + { + return false; + } + } while (false); + // delete, line 135 + slice_del(); + break; + case 7: + // (, line 136 + // (, line 136 + // or, line 136 + lab11: do { + v_10 = limit - cursor; + lab12: do { + if (!(in_grouping_b(g_RV, 97, 117))) + { + break lab12; + } + break lab11; + } while (false); + cursor = limit - v_10; + // call LONGV, line 136 + if (!r_LONGV()) + { + return false; + } + } while (false); + // delete, line 136 + slice_del(); + break; + } + return true; + } + + private boolean r_double() { + int v_1; + // (, line 140 + // test, line 141 + v_1 = limit - cursor; + // among, line 141 + if (find_among_b(a_7, 3) == 0) + { + return false; + } + cursor = limit - v_1; + return true; + } + + private boolean r_undouble() { + // (, line 144 + // next, line 145 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // [, line 145 + ket = cursor; + // hop, line 145 + { + int c = cursor - 1; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + // ], line 145 + bra = cursor; + // delete, line 145 + slice_del(); + return true; + } + + private boolean r_nu() { + int v_1; + int v_2; + // (, line 148 + // setlimit, line 149 + v_1 = limit - cursor; + // tomark, line 149 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 149 + // [, line 149 + ket = cursor; + // substring, line 149 + if (find_among_b(a_8, 4) == 0) + { + limit_backward = v_2; + return false; + } + // ], line 149 + bra = cursor; + limit_backward = v_2; + // delete, line 156 + slice_del(); + return true; + } + + private boolean r_remove_double_kpt() { + int v_1; + // (, line 159 + // (, line 160 + if (!(in_grouping_b(g_V1, 97, 252))) + { + return false; + } + // and, line 161 + v_1 = limit - cursor; + // (, line 160 + // call double, line 160 + if (!r_double()) + { + return false; + } + cursor = limit - v_1; + // call undouble, line 161 + if (!r_undouble()) + { + return false; + } + return true; + } + + private boolean r_degrees() { + int among_var; + int v_1; + int v_2; + // (, line 164 + // setlimit, line 165 + v_1 = limit - cursor; + // tomark, line 165 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 165 + // [, line 165 + ket = cursor; + // substring, line 165 + among_var = find_among_b(a_9, 3); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 165 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 167 + if (!(in_grouping_b(g_RV, 97, 117))) + { + return false; + } + // delete, line 167 + slice_del(); + break; + case 2: + // (, line 168 + // delete, line 168 + slice_del(); + break; + case 3: + // (, line 169 + if (!(in_grouping_b(g_RV, 97, 117))) + { + return false; + } + // delete, line 169 + slice_del(); + break; + } + return true; + } + + private boolean r_substantive() { + int v_1; + int v_2; + int v_3; + int v_4; + int v_5; + int v_6; + // (, line 173 + // do, line 174 + v_1 = limit - cursor; + lab0: do { + // call special_noun_endings, line 174 + if (!r_special_noun_endings()) + { + break lab0; + } + } while (false); + cursor = limit - v_1; + // do, line 175 + v_2 = limit - cursor; + lab1: do { + // call case_ending, line 175 + if (!r_case_ending()) + { + break lab1; + } + } while (false); + cursor = limit - v_2; + // do, line 176 + v_3 = limit - cursor; + lab2: do { + // call plural_three_first_cases, line 176 + if (!r_plural_three_first_cases()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + // do, line 177 + v_4 = limit - cursor; + lab3: do { + // call degrees, line 177 + if (!r_degrees()) + { + break lab3; + } + } while (false); + cursor = limit - v_4; + // do, line 178 + v_5 = limit - cursor; + lab4: do { + // call i_plural, line 178 + if (!r_i_plural()) + { + break lab4; + } + } while (false); + cursor = limit - v_5; + // do, line 179 + v_6 = limit - cursor; + lab5: do { + // call nu, line 179 + if (!r_nu()) + { + break lab5; + } + } while (false); + cursor = limit - v_6; + return true; + } + + private boolean r_verb_exceptions() { + int among_var; + // (, line 184 + // [, line 185 + bra = cursor; + // substring, line 185 + among_var = find_among(a_10, 290); + if (among_var == 0) + { + return false; + } + // ], line 185 + ket = cursor; + // atlimit, line 185 + if (cursor < limit) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 187 + // <-, line 187 + slice_from("joo"); + break; + case 2: + // (, line 188 + // <-, line 188 + slice_from("joo"); + break; + case 3: + // (, line 189 + // <-, line 189 + slice_from("joo"); + break; + case 4: + // (, line 190 + // <-, line 190 + slice_from("saa"); + break; + case 5: + // (, line 191 + // <-, line 191 + slice_from("saa"); + break; + case 6: + // (, line 192 + // <-, line 192 + slice_from("saa"); + break; + case 7: + // (, line 193 + // <-, line 193 + slice_from("saa"); + break; + case 8: + // (, line 194 + // <-, line 194 + slice_from("viima"); + break; + case 9: + // (, line 195 + // <-, line 195 + slice_from("viima"); + break; + case 10: + // (, line 196 + // <-, line 196 + slice_from("viima"); + break; + case 11: + // (, line 197 + // <-, line 197 + slice_from("viima"); + break; + case 12: + // (, line 198 + // <-, line 198 + slice_from("keesi"); + break; + case 13: + // (, line 199 + // <-, line 199 + slice_from("keesi"); + break; + case 14: + // (, line 200 + // <-, line 200 + slice_from("keesi"); + break; + case 15: + // (, line 201 + // <-, line 201 + slice_from("l\u00F6\u00F6"); + break; + case 16: + // (, line 202 + // <-, line 202 + slice_from("l\u00F6\u00F6"); + break; + case 17: + // (, line 203 + // <-, line 203 + slice_from("l\u00F6\u00F6"); + break; + case 18: + // (, line 204 + // <-, line 204 + slice_from("l\u00F6i"); + break; + case 19: + // (, line 205 + // <-, line 205 + slice_from("loo"); + break; + case 20: + // (, line 206 + // <-, line 206 + slice_from("loo"); + break; + case 21: + // (, line 207 + // <-, line 207 + slice_from("loo"); + break; + case 22: + // (, line 208 + // <-, line 208 + slice_from("k\u00E4isi"); + break; + case 23: + // (, line 209 + // <-, line 209 + slice_from("k\u00E4isi"); + break; + case 24: + // (, line 210 + // <-, line 210 + slice_from("k\u00E4isi"); + break; + case 25: + // (, line 211 + // <-, line 211 + slice_from("s\u00F6\u00F6"); + break; + case 26: + // (, line 212 + // <-, line 212 + slice_from("s\u00F6\u00F6"); + break; + case 27: + // (, line 213 + // <-, line 213 + slice_from("s\u00F6\u00F6"); + break; + case 28: + // (, line 214 + // <-, line 214 + slice_from("s\u00F6\u00F6"); + break; + case 29: + // (, line 215 + // <-, line 215 + slice_from("too"); + break; + case 30: + // (, line 216 + // <-, line 216 + slice_from("too"); + break; + case 31: + // (, line 217 + // <-, line 217 + slice_from("too"); + break; + case 32: + // (, line 218 + // <-, line 218 + slice_from("too"); + break; + case 33: + // (, line 219 + // <-, line 219 + slice_from("v\u00F5isi"); + break; + case 34: + // (, line 220 + // <-, line 220 + slice_from("v\u00F5isi"); + break; + case 35: + // (, line 221 + // <-, line 221 + slice_from("v\u00F5isi"); + break; + case 36: + // (, line 222 + // <-, line 222 + slice_from("j\u00E4\u00E4ma"); + break; + case 37: + // (, line 223 + // <-, line 223 + slice_from("j\u00E4\u00E4ma"); + break; + case 38: + // (, line 224 + // <-, line 224 + slice_from("j\u00E4\u00E4ma"); + break; + case 39: + // (, line 225 + // <-, line 225 + slice_from("j\u00E4\u00E4ma"); + break; + case 40: + // (, line 226 + // <-, line 226 + slice_from("m\u00FC\u00FCsi"); + break; + case 41: + // (, line 227 + // <-, line 227 + slice_from("m\u00FC\u00FCsi"); + break; + case 42: + // (, line 228 + // <-, line 228 + slice_from("m\u00FC\u00FCsi"); + break; + case 43: + // (, line 229 + // <-, line 229 + slice_from("luge"); + break; + case 44: + // (, line 230 + // <-, line 230 + slice_from("luge"); + break; + case 45: + // (, line 231 + // <-, line 231 + slice_from("p\u00F5de"); + break; + case 46: + // (, line 232 + // <-, line 232 + slice_from("p\u00F5de"); + break; + case 47: + // (, line 233 + // <-, line 233 + slice_from("ladu"); + break; + case 48: + // (, line 234 + // <-, line 234 + slice_from("ladu"); + break; + case 49: + // (, line 235 + // <-, line 235 + slice_from("tegi"); + break; + case 50: + // (, line 236 + // <-, line 236 + slice_from("tegi"); + break; + case 51: + // (, line 237 + // <-, line 237 + slice_from("tegi"); + break; + case 52: + // (, line 238 + // <-, line 238 + slice_from("n\u00E4gi"); + break; + case 53: + // (, line 239 + // <-, line 239 + slice_from("n\u00E4gi"); + break; + case 54: + // (, line 240 + // <-, line 240 + slice_from("n\u00E4gi"); + break; + } + return true; + } + + public boolean stem() { + int v_1; + int v_2; + int v_3; + int v_4; + int v_5; + int v_7; + int v_8; + // (, line 245 + // do, line 246 + v_1 = cursor; + lab0: do { + // call mark_regions, line 246 + if (!r_mark_regions()) + { + break lab0; + } + } while (false); + cursor = v_1; + // not, line 247 + { + v_2 = cursor; + lab1: do { + // call verb_exceptions, line 247 + if (!r_verb_exceptions()) + { + break lab1; + } + return false; + } while (false); + cursor = v_2; + } + // unset is_verb, line 248 + B_is_verb = false; + // backwards, line 249 + limit_backward = cursor; cursor = limit; + // (, line 249 + // do, line 250 + v_3 = limit - cursor; + lab2: do { + // call emphasis, line 250 + if (!r_emphasis()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + // do, line 251 + v_4 = limit - cursor; + lab3: do { + // call verb, line 251 + if (!r_verb()) + { + break lab3; + } + } while (false); + cursor = limit - v_4; + // try, line 252 + v_5 = limit - cursor; + lab4: do { + // (, line 252 + // not, line 252 + lab5: do { + // Boolean test is_verb, line 252 + if (!(B_is_verb)) + { + break lab5; + } + cursor = limit - v_5; + break lab4; + } while (false); + // do, line 252 + v_7 = limit - cursor; + lab6: do { + // call substantive, line 252 + if (!r_substantive()) + { + break lab6; + } + } while (false); + cursor = limit - v_7; + } while (false); + // do, line 253 + v_8 = limit - cursor; + lab7: do { + // call remove_double_kpt, line 253 + if (!r_remove_double_kpt()) + { + break lab7; + } + } while (false); + cursor = limit - v_8; + cursor = limit_backward; return true; + } + + public boolean equals( Object o ) { + return o instanceof EstonianStemmer; + } + + public int hashCode() { + return EstonianStemmer.class.getName().hashCode(); + } + + + +} + diff --git a/lucene/analysis/common/src/resources/org/apache/lucene/analysis/et/stopwords.txt b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/et/stopwords.txt new file mode 100644 index 00000000000..1b06a134b9a --- /dev/null +++ b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/et/stopwords.txt @@ -0,0 +1,1603 @@ +# Estonian stopwords list +all +alla +allapoole +allpool +alt +altpoolt +eel +eespool +enne +hommikupoole +hoolimata +ilma +kaudu +keset +kesk +kohe +koos +kuhupoole +kuni +kuspool +kustpoolt +kõige +käsikäes +lappi +ligi +läbi +mööda +paitsi +peale +pealepoole +pealpool +pealt +pealtpoolt +piki +pikku +piku +pikuti +põiki +pärast +päri +risti +sealpool +sealtpoolt +seespool +seltsis +siiapoole +siinpool +siitpoolt +sinnapoole +sissepoole +taga +tagantpoolt +tagapidi +tagapool +taha +tahapoole +teispool +teispoole +tänu +tükkis +vaatamata +vastu +väljapoole +väljaspool +väljastpoolt +õhtupoole +ühes +ühestükis +ühestükkis +ülalpool +ülaltpoolt +üle +ülespoole +ülevalpool +ülevaltpoolt +ümber +ümbert +aegu +aegus +alguks +algul +algule +algult +alguni +all +alla +alt +alul +alutsi +arvel +asemel +asemele +eel +eeli +ees +eesotsas +eest +eestotsast +esitsi +ette +etteotsa +haaval +heaks +hoolimata +hulgas +hulgast +hulka +jalgu +jalus +jalust +jaoks +jooksul +juurde +juures +juurest +jälil +jälile +järel +järele +järelt +järgi +kaasas +kallal +kallale +kallalt +kamul +kannul +kannule +kannult +kaudu +kaupa +keskel +keskele +keskelt +keskis +keskpaiku +kestel +kestes +kilda +killas +killast +kimpu +kimpus +kiuste +kohal +kohale +kohalt +kohaselt +kohe +kohta +koos +korral +kukil +kukile +kukilt +kulul +kõrva +kõrval +kõrvale +kõrvalt +kõrvas +kõrvast +käekõrval +käekõrvale +käekõrvalt +käes +käest +kätte +külge +küljes +küljest +küüsi +küüsis +küüsist +ligi +ligidal +ligidale +ligidalt +aegu +aegus +alguks +algul +algule +algult +alguni +all +alla +alt +alul +alutsi +arvel +asemel +asemele +eel +eeli +ees +eesotsas +eest +eestotsast +esitsi +ette +etteotsa +haaval +heaks +hoolimata +hulgas +hulgast +hulka +jalgu +jalus +jalust +jaoks +jooksul +juurde +juures +juurest +jälil +jälile +järel +järele +järelt +järgi +kaasas +kallal +kallale +kallalt +kamul +kannul +kannule +kannult +kaudu +kaupa +keskel +keskele +keskelt +keskis +keskpaiku +kestel +kestes +kilda +killas +killast +kimpu +kimpus +kiuste +kohal +kohale +kohalt +kohaselt +kohe +kohta +koos +korral +kukil +kukile +kukilt +kulul +kõrva +kõrval +kõrvale +kõrvalt +kõrvas +kõrvast +käekõrval +käekõrvale +käekõrvalt +käes +käest +kätte +külge +küljes +küljest +küüsi +küüsis +küüsist +ligi +ligidal +ligidale +ligidalt +lool +läbi +lähedal +lähedale +lähedalt +man +mant +manu +meelest +mööda +nahas +nahka +nahkas +najal +najale +najalt +nõjal +nõjale +otsa +otsas +otsast +paigale +paigu +paiku +peal +peale +pealt +perra +perrä +pidi +pihta +piki +pikku +pool +poole +poolest +poolt +puhul +puksiiris +pähe +päralt +päras +pärast +päri +ringi +ringis +risust +saadetusel +saadik +saatel +saati +seas +seast +sees +seest +sekka +seljataga +seltsi +seltsis +seltsist +sisse +slepis +suhtes +šlepis +taga +tagant +tagantotsast +tagaotsas +tagaselja +tagasi +tagast +tagutsi +taha +tahaotsa +takka +tarvis +tasa +tuuri +tuuris +tõttu +tükkis +uhal +vaatamata +vahel +vahele +vahelt +vahepeal +vahepeale +vahepealt +vahetsi +varal +varale +varul +vastas +vastast +vastu +veerde +veeres +viisi +võidu +võrd +võrdki +võrra +võrragi +väel +väele +vältel +väärt +väärtki +äärde +ääre +ääres +äärest +ühes +üle +ümber +ümbert +a +abil +aina +ainult +alalt +alates +alati +alles +b +c +d +e +eales +ealeski +edasi +edaspidi +eelkõige +eemal +ei +eks +end +enda +enese +ennem +esialgu +f +g +h +hoopis +i +iganes +igatahes +igati +iial +iialgi +ikka +ikkagi +ilmaski +iseenda +iseenese +iseenesest +isegi +j +jah +ju +juba +juhul +just +järelikult +k +ka +kah +kas +kasvõi +keda +kestahes +kogu +koguni +kohati +kokku +kuhu +kuhugi +kuidagi +kuidas +kunagi +kus +kusagil +kusjuures +kuskil +kust +kõigepealt +küll +l +liiga +lisaks +m +miks +mil +millal +millalgi +mispärast +mistahes +mistõttu +mitte +muide +muidu +muidugi +muist +mujal +mujale +mujalt +mõlemad +mõnda +mõne +mõnikord +n +nii +niikaua +niimoodi +niipaljuke +niisama +niisiis +niivõrd +nõnda +nüüd +o +omaette +omakorda +omavahel +ometi +p +palju +paljuke +palju-palju +peaaegu +peagi +peamiselt +pigem +pisut +praegu +päris +r +rohkem +s +samas +samuti +seal +sealt +sedakorda +sedapuhku +seega +seejuures +seejärel +seekord +seepärast +seetõttu +sellepärast +seni +sestap +siia +siiani +siin +siinkohal +siis +siiski +siit +sinna +suht +š +z +ž +t +teel +teineteise +tõesti +täiesti +u +umbes +v +w +veel +veelgi +vist +võibolla +võib-olla +väga +vähemalt +välja +väljas +väljast +õ +ä +ära +ö +ü +ühtlasi +üksi +ükskõik +ülal +ülale +ülalt +üles +ülesse +üleval +ülevalt +ülimalt +üsna +x +y +aga +ega +ehk +ehkki +elik +ellik +enge +ennegu +ent +et +ja +justkui +kui +kuid +kuigi +kuivõrd +kuna +kuni +kut +mistab +muudkui +nagu +nigu +ning +olgugi +otsekui +otsenagu +selmet +sest +sestab +vaid +või +aa +adaa +adjöö +ae +ah +ahaa +ahah +ah-ah-ah +ah-haa +ahoi +ai +aidaa +aidu-raidu +aih +aijeh +aituma +aitäh +aitüma +ammuu +amps +ampsti +aptsih +ass +at +ata +at-at-at +atsih +atsihh +auh +bai-bai +bingo +braavo +brr +ee +eeh +eh +ehee +eheh +eh-eh-hee +eh-eh-ee +ehei +ehh +ehhee +einoh +ena +ennäe +ennäh +fuh +fui +fuih +haa +hah +hahaa +hah-hah-hah +halleluuja +hallo +halloo +hass +hee +heh +he-he-hee +hei +heldeke(ne) +heureka +hihii +hip-hip-hurraa +hmh +hmjah +hoh-hoh-hoo +hohoo +hoi +hollallaa +hoo +hoplaa +hopp +hops +hopsassaa +hopsti +hosianna +huh +huidii +huist +hurjah +hurjeh +hurjoh +hurjuh +hurraa +huu +hõhõh +hõi +hõissa +hõissassa +hõk +hõkk +häh +hä-hä-hää +hüvasti +ih-ah-haa +ih-ih-hii +ii-ha-ha +issake +issakene +isver +jaa-ah +ja-ah +jaah +janäe +jeeh +jeerum +jeever +jessas +jestas +juhhei +jumalaga +jumalime +jumaluke +jumalukene +jutas +kaaps +kaapsti +kaasike +kae +kalps +kalpsti +kannäe +kanäe +kappadi +kaps +kapsti +karkõmm +karkäuh +karkääks +karkääksti +karmauh +karmauhti +karnaps +karnapsti +karniuhti +karpartsaki +karpauh +karpauhti +karplauh +karplauhti +karprauh +karprauhti +karsumdi +karsumm +kartsumdi +kartsumm +karviuh +karviuhti +kaske +kassa +kauh +kauhti +keh +keksti +kepsti +khe +khm +kih +kiiks +kiiksti +kiis +kiiss +kikerii +kikerikii +kili +kilk +kilk-kõlk +kilks +kilks-kolks +kilks-kõlks +kill +killadi +killadi|-kolladi +killadi-kõlladi +killa-kolla +killa-kõlla +kill-kõll +kimps-komps +kipp +kips-kõps +kiriküüt +kirra-kõrra +kirr-kõrr +kirts +klaps +klapsti +klirdi +klirr +klonks +klops +klopsti +kluk +klu-kluu +klõks +klõksti +klõmdi +klõmm +klõmpsti +klõnks +klõnksti +klõps +klõpsti +kläu +kohva-kohva +kok +koks +koksti +kolaki +kolk +kolks +kolksti +koll +kolladi +komp +komps +kompsti +kop +kopp +koppadi +kops +kopsti +kossu +kotsu +kraa +kraak +kraaks +kraaps +kraapsti +krahh +kraks +kraksti +kraps +krapsti +krauh +krauhti +kriiks +kriiksti +kriips +kriips-kraaps +kripa-krõpa +krips-kraps +kriuh +kriuks +kriuksti +kromps +kronk +kronks +krooks +kruu +krõks +krõksti +krõpa +krõps +krõpsti +krõuh +kräu +kräuh +kräuhti +kräuks +kss +kukeleegu +kukku +kuku +kulu +kurluu +kurnäu +kuss +kussu +kõks +kõksti +kõldi +kõlks +kõlksti +kõll +kõmaki +kõmdi +kõmm +kõmps +kõpp +kõps +kõpsadi +kõpsat +kõpsti +kõrr +kõrra-kõrra +kõss +kõtt +kõõksti +kärr +kärts +kärtsti +käuks +käuksti +kääga +kääks +kääksti +köh +köki-möki +köksti +laks +laksti +lampsti +larts +lartsti +lats +latsti +leelo +legoo +lehva +liiri-lõõri +lika-lõka +likat-lõkat +limpsti +lips +lipsti +lirts +lirtsaki +lirtsti +lonksti +lops +lopsti +lorts +lortsti +luks +lups +lupsti +lurts +lurtsti +lõks +lõksti +lõmps +lõmpsti +lõnks +lõnksti +lärts +lärtsti +läts +lätsti +lörts +lörtsti +lötsti +lööps +lööpsti +marss +mats +matsti +mauh +mauhti +mh +mhh +mhmh +miau +mjaa +mkm +m-mh +mnjaa +mnjah +moens +mulks +mulksti +mull-mull +mull-mull-mull +muu +muuh +mõh +mõmm +mäh +mäts +mäu +mää +möh +möh-öh-ää +möö +müh-müh +mühüh +müks +müksti +müraki +mürr +mürts +mürtsaki +mürtsti +mütaku +müta-mäta +müta-müta +müt-müt +müt-müt-müt +müts +mütsti +mütt +naa +naah +nah +naks +naksti +nanuu +naps +napsti +nilpsti +nipsti +nirr +niuh +niuh-näuh +niuhti +noh +noksti +nolpsti +nonoh +nonoo +nonäh +noo +nooh +nooks +norr +nurr +nuuts +nõh +nõhh +nõka-nõka +nõks +nõksat-nõksat +nõks-nõks +nõksti +nõõ +nõõh +näeh +näh +nälpsti +nämm-nämm +näpsti +näts +nätsti +näu +näuh +näuhti +näuks +näuksti +nääh +nääks +nühkat-nühkat +oeh +oh +ohh +ohhh +oh-hoi +oh-hoo +ohoh +oh-oh-oo +oh-oh-hoo +ohoi +ohoo +oi +oih +oijee +oijeh +oo +ooh +oo-oh +oo-ohh +oot +ossa +ot +paa +pah +pahh +pakaa +pamm +pantsti +pardon +pardonks +parlartsti +parts +partsti +partsumdi +partsumm +pastoi +pats +patst +patsti +pau +pauh +pauhti +pele +pfui +phuh +phuuh +phäh +phähh +piiks +piip +piiri-pääri +pimm +pimm-pamm +pimm-pomm +pimm-põmm +piraki +piuks +piu-pau +plaks +plaksti +plarts +plartsti +plats +platsti +plauh +plauhh +plauhti +pliks +pliks-plaks +plinn +pliraki +plirts +plirtsti +pliu +pliuh +ploks +plotsti +plumps +plumpsti +plõks +plõksti +plõmdi +plõmm +plõnn +plärr +plärts +plärtsat +plärtsti +pläu +pläuh +plää +plörtsat +pomm +popp +pops +popsti +ports +pot +pots +potsti +pott +praks +praksti +prants +prantsaki +prantsti +prassai +prauh +prauhh +prauhti +priks +priuh +priuhh +priuh-prauh +proosit +proost +prr +prrr +prõks +prõksti +prõmdi +prõmm +prõntsti +prääk +prääks +pst +psst +ptrr +ptruu +ptüi +puh +puhh +puksti +pumm +pumps +pup-pup-pup +purts +puuh +põks +põksti +põmdi +põmm +põmmadi +põnks +põnn +põnnadi +põnt +põnts +põntsti +põraki +põrr +põrra-põrra +päh +pähh +päntsti +pää +pöörd +püh +raks +raksti +raps +rapsti +ratataa +rauh +riips +riipsti +riks +riks-raks +rips-raps +rivitult +robaki +rops +ropsaki +ropsti +ruik +räntsti +räts +röh +röhh +sah +sahh +sahkat +saps +sapsti +sauh +sauhti +servus +sihkadi-sahkadi +sihka-sahka +sihkat-sahkat +silks +silk-solk +sips +sipsti +sirr +sirr-sorr +sirts +sirtsti +siu +siuh +siuh-sauh +siuh-säuh +siuhti +siuks +siuts +skool +so +soh +solks +solksti +solpsti +soo +sooh +so-oh +soo-oh +sopp +sops +sopsti +sorr +sorts +sortsti +so-soo +soss +soss-soss +ss +sss +sst +stopp +suhkat-sahkat +sulk +sulks +sulksti +sull +sulla-sulla +sulpa-sulpa +sulps +sulpsti +sumaki +sumdi +summ +summat-summat +sups +supsaku +supsti +surts +surtsti +suss +susti +suts +sutsti +säh +sähke +särts +särtsti +säu +säuh +säuhti +taevake +taevakene +takk +tere +terekest +tibi-tibi +tikk-takk +tiks +tilk +tilks +till +tilla-talla +till-tall +tilulii +tinn +tip +tip-tap +tirr +tirtsti +tiu +tjaa +tjah +tohhoh +tohhoo +tohoh +tohoo +tok +tokk +toks +toksti +tonks +tonksti +tota +totsti +tot-tot +tprr +tpruu +trah +trahh +trallallaa +trill +trillallaa +trr +trrr +tsah +tsahh +tsilk +tsilk-tsolk +tsirr +tsiuh +tskae +tsolk +tss +tst +tsst +tsuhh +tsuk +tsumm +tsurr +tsäuh +tšao +tšš +tššš +tuk +tuks +turts +turtsti +tutki +tutkit +tutu-lutu +tutulutu +tuut +tuutu-luutu +tõks +tötsti +tümps +uh +uhh +uh-huu +uhtsa +uhtsaa +uhuh +uhuu +ui +uih +uih-aih +uijah +uijeh +uist +uit +uka +upsti +uraa +urjah +urjeh +urjoh +urjuh +urr +urraa +ust +utu +uu +uuh +vaak +vaat +vae +vaeh +vai +vat +vau +vhüüt +vidiit +viiks +vilks +vilksti +vinki-vinki +virdi +virr +viu +viudi +viuh +viuhti +voeh +voh +vohh +volks +volksti +vooh +vops +vopsti +vot +vuh +vuhti +vuih +vulks +vulksti +vull +vulpsti +vups +vupsaki +vupsaku +vupsti +vurdi +vurr +vurra-vurra +vurts +vurtsti +vutt +võe +võeh +või +võih +võrr +võts +võtt +vääks +õe +õits +õk +õkk +õrr +õss +õuh +äh +ähh +ähhähhää +äh-hää +äh-äh-hää +äiu +äiu-ää +äss +ää +ääh +äähh +öh +öhh +ök +üh +eelmine +eikeegi +eimiski +emb-kumb +enam +enim +iga +igasugune +igaüks +ise +isesugune +järgmine +keegi +kes +kumb +kumbki +kõik +meiesugune +meietaoline +midagi +mihuke +mihukene +milletaoline +milline +mina +minake +mingi +mingisugune +minusugune +minutaoline +mis +miski +miskisugune +missugune +misuke +mitmes +mitmesugune +mitu +mitu-mitu +mitu-setu +muu +mõlema +mõnesugune +mõni +mõningane +mõningas +mäherdune +määrane +naasugune +need +nemad +nendesugune +nendetaoline +nihuke +nihukene +niimitu +niisamasugune +niisugune +nisuke +nisukene +oma +omaenese +omasugune +omataoline +pool +praegune +sama +samasugune +samataoline +see +seesama +seesamane +seesamune +seesinane +seesugune +selline +sihuke +sihukene +sina +sinusugune +sinutaoline +siuke +siukene +säherdune +säärane +taoline +teiesugune +teine +teistsugune +tema +temake +temakene +temasugune +temataoline +too +toosama +toosamane +üks +üksteise +hakkama +minema +olema +pidama +saama +tegema +tulema +võima diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/et/TestEstonianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/et/TestEstonianAnalyzer.java new file mode 100644 index 00000000000..0615906da72 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/et/TestEstonianAnalyzer.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.et; + + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; + +public class TestEstonianAnalyzer extends BaseTokenStreamTestCase { + + /** This test fails with NPE when the + * stopwords file is missing in classpath */ + public void testResourcesAvailable() { + new EstonianAnalyzer().close(); + } + + /** test stopwords and stemming */ + public void testBasics() throws IOException { + Analyzer a = new EstonianAnalyzer(); + // stemming + checkOneTerm(a, "teadaolevalt", "teadaole"); + checkOneTerm(a, "teadaolevaid", "teadaole"); + checkOneTerm(a, "teadaolevatest", "teadaole"); + checkOneTerm(a, "teadaolevail", "teadaole"); + checkOneTerm(a, "teadaolevatele", "teadaole"); + checkOneTerm(a, "teadaolevatel", "teadaole"); + checkOneTerm(a, "teadaolevateks", "teadaole"); + checkOneTerm(a, "teadaolevate", "teadaole"); + checkOneTerm(a, "teadaolevaks", "teadaole"); + checkOneTerm(a, "teadaoleval", "teadaole"); + checkOneTerm(a, "teadaolevates", "teadaole"); + checkOneTerm(a, "teadaolevat", "teadaole"); + checkOneTerm(a, "teadaolevast", "teadaole"); + checkOneTerm(a, "teadaoleva", "teadaole"); + checkOneTerm(a, "teadaolevais", "teadaole"); + checkOneTerm(a, "teadaolevas", "teadaole"); + checkOneTerm(a, "teadaolevad", "teadaole"); + checkOneTerm(a, "teadaolevale", "teadaole"); + checkOneTerm(a, "teadaolevatesse", "teadaole"); + // stopword + assertAnalyzesTo(a, "alla", new String[] { }); + a.close(); + } + + +}