diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 06335c071a3..03c22bb9a42 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -141,6 +141,9 @@ New Features the specified distance from the center point. Fix GeoPointInBBoxQuery to handle dateline crossing. +* LUCENE-6694: Add LithuanianAnalyzer and LithuanianStemmer. + (Dainius Jocas via Robert Muir) + API Changes * LUCENE-6508: Simplify Lock api, there is now just diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java new file mode 100644 index 00000000000..32efb88e2f2 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java @@ -0,0 +1,124 @@ +package org.apache.lucene.analysis.lt; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.snowball.SnowballFilter; +import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.analysis.util.StopwordAnalyzerBase; +import org.tartarus.snowball.ext.LithuanianStemmer; + +/** + * {@link Analyzer} for Lithuanian. + */ +public final class LithuanianAnalyzer extends StopwordAnalyzerBase { + private final CharArraySet stemExclusionSet; + + /** File containing default Lithuanian stopwords. */ + public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; + + /** + * Returns an unmodifiable instance of the default stop words set. + * @return default stop words set. + */ + public static CharArraySet getDefaultStopSet(){ + return DefaultSetHolder.DEFAULT_STOP_SET; + } + + /** + * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class + * accesses the static final set the first time.; + */ + private static class DefaultSetHolder { + static final CharArraySet DEFAULT_STOP_SET; + + static { + try { + DEFAULT_STOP_SET = loadStopwordSet(false, + LithuanianAnalyzer.class, DEFAULT_STOPWORD_FILE, "#"); + } catch (IOException ex) { + // default set should always be present as it is part of the + // distribution (JAR) + throw new RuntimeException("Unable to load default stopword set"); + } + } + } + + /** + * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. + */ + public LithuanianAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param stopwords a stopword set + */ + public LithuanianAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); + } + + /** + * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before + * stemming. + * + * @param stopwords a stopword set + * @param stemExclusionSet a set of terms not to be stemmed + */ + public LithuanianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + } + + /** + * Creates a + * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} + * which tokenizes all the text in the provided {@link Reader}. + * + * @return A + * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} + * built from an {@link StandardTokenizer} filtered with + * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is + * provided and {@link SnowballFilter}. + */ + @Override + protected TokenStreamComponents createComponents(String fieldName) { + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new StandardFilter(source); + result = new LowerCaseFilter(result); + result = new StopFilter(result, stopwords); + if(!stemExclusionSet.isEmpty()) + result = new SetKeywordMarkerFilter(result, stemExclusionSet); + result = new SnowballFilter(result, new LithuanianStemmer()); + return new TokenStreamComponents(source, result); + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/package-info.java new file mode 100644 index 00000000000..c01c039e3e7 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Analyzer for Lithuanian. + */ +package org.apache.lucene.analysis.lt; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/stem_ISO_8859_1.sbl b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/stem_ISO_8859_1.sbl new file mode 100644 index 00000000000..d1158c19628 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/stem_ISO_8859_1.sbl @@ -0,0 +1,396 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +externals ( stem ) +/* Special characters in Unicode Latin-1 and Latin Extended-A */ +// ' nosine +stringdef a' decimal '261' // ą a + ogonek +stringdef e' decimal '281' // ę e + ogonek +stringdef i' decimal '303' // į i + ogonek +stringdef u' decimal '371' // ų u + ogonek + +// . taskas +stringdef e. decimal '279' // ė e + dot + +// - ilgoji +stringdef u- decimal '363' // ū u + macron + +// * varnele +stringdef c* decimal '269' // č c + caron (haček) +stringdef s* decimal '353' // š s + caron (haček) +stringdef z* decimal '382' // ž z + caron (haček) + +// [C](VC)^m[V|C] +// definitions of variables for +// p1 - position of m = 0 +// p2 - position of m = 1 +integers ( p1 p2 s) + +// booleans - to be commented +// CHANGE +booleans ( CHANGE ) + +// escape symbols for substituting lithuanian characters +stringescapes { } + +// groupings +// v - lithuanian vowels +groupings ( v ) + +// v - all lithuanian vowels +define v 'aeiyou{a'}{e'}{i'}{u'}{e.}{u-}' + +// all lithuanian stemmer routines: 4 steps +routines ( + step2 R1 step1 fix_chdz fix_gd fix_conflicts +) + +backwardmode( + + define R1 as $p1 <= cursor + define step1 as ( + setlimit tomark p1 for ([substring]) R1 among( + // Daiktavardžiai + // I linksniuotė + 'as' 'ias' 'is' 'ys' // vyras, kelias, brolis, gaidys + 'o' 'io' // vyro, kelio + 'ui' 'iui' // vyrui, keliui + '{a'}' 'i{a'}' '{i'}' // vyrą, kelią, brolį + 'u' 'iu' // vyru, keliu + 'e' 'yje' // vyre, kelyje + 'y' 'au' 'i' // kely, brolau, broli, + 'an' // nusižengiman + + 'ai' 'iai' // vyrai, keliai + '{u'}' 'i{u'}' // vyrų, kelių + 'ams' 'am' // vyrams, vyram + 'iams' 'iam' // broliams, broliam + 'us' 'ius' // vyrus, brolius + 'ais' 'iais' // vyrais, keliais + 'uose' 'iuose' 'uos' 'iuos' // vyruose, keliuose, vyruos, keliuos + 'uosna' 'iuosna' // vyruosna, keliuosna + 'ysna' // žutysna + + 'asis' 'aisi' // sukimasis, sukimaisi + 'osi' '{u'}si' // sukimosi, sukimųsi + 'uisi' // sukimuisi + '{a'}si' // sukimąsi + 'usi' // sukimusi + 'esi' // sukimesi + + 'uo' // mėnuo + + + // II linksniuote + 'a' 'ia' // galva, vysnios + 'os' 'ios' // galvos, vysnios + 'oj' 'oje' 'ioje' // galvoje, vysnioje + 'osna' 'iosna' // galvosna, vyšniosna + 'om' 'oms' 'ioms' // galvoms, vysnioms + 'omis' 'iomis' // galvomis, vysniomis + 'ose' 'iose' // galvose, vysniose + 'on' 'ion' // galvon, vyšnion + + + // III linksniuote + '{e.}' // gervė + '{e.}s' // gervės + 'ei' // gervei + '{e'}' // gervę + '{e.}j' '{e.}je' // gervėj, gervėje + '{e.}ms' // gervėms + 'es' // gerves + '{e.}mis' // gervėmis + '{e.}se' // gervėse + '{e.}sna' // gervėsna + '{e.}n' // žydaitėn + + + // IV linksniuote + 'aus' 'iaus' // sūnaus, skaičiaus + 'umi' 'iumi' // sūnumi, skaičiumi + 'uje' 'iuje' // sūnuje, skaičiuje + 'iau' // skaičiau + + '{u-}s' // sūnūs + 'ums' // sūnums + 'umis' // sūnumis + 'un' 'iun' // sūnun, administratoriun + + + // V linksniuote + 'ies' 'ens' 'enio' 'ers' // avies, vandens, sesers + 'eniui' 'eriai' // vandeniui, eriai + 'en{i'}' 'er{i'}' // vandenį, seserį + 'imi' 'eniu' 'erimi' 'eria' // avimi, vandeniu, seserimi, seseria + 'enyje' 'eryje' // vandenyje, seseryje + 'ie' 'enie' 'erie' // avie, vandenie, seserie + + 'enys' 'erys' // vandenys, seserys + // 'en{u'}' konfliktas su 'žandenų' 'antenų' + 'er{u'}' // seserų + 'ims' 'enims' 'erims' // avims, vandemins, seserims + 'enis' // vandenis + 'imis' // žebenkštimis + 'enimis' // vandenimis + 'yse' 'enyse' 'eryse' // avyse, vandenyse, seseryse + + + // Būdvardžiai + // (i)a linksniuotė + 'iem' 'iems' // geriem, geriems + 'ame' 'iame' // naujame, mediniame + + + // Veiksmažodžiai + // Tiesioginė nuosaka + // esamasis laikas + // (i)a asmenuotė + 'uosi' 'iuosi' // dirbuosi, traukiuosi + 'iesi' // dirbiesi + 'asi' 'iasi' // dirbasi, traukiasi + 'am{e.}s' 'iam{e.}s' // dirbamės, traukiamės + 'at' 'ate' 'iat' 'iate' // dirbat, dirbate, ariat, traukiate + 'at{e.}s' 'iat{e.}s' // dirbatės, traukiatės + + // i asmenuotė + 'isi' // tikisi + 'im' // mylim + //'ime' konfliktassu daiktavardžiu vietininku, pvz. 'gėrime' + 'im{e.}s' // tikimės + 'it' 'ite' // mylit, mylite, tikitės + // 'it{e.}s' konfliktas su priesaga ir dgs. vardininko galūne -ait-ės pvz. žydaitės + + // o asmenuotė + 'ome' 'omės' // mokome, bijomės + 'ot' 'ote' 'otės' // mokot, mokote, bijotės + + // būtasis laikas + // o asmenuotė + '{e.}jo' '{e.}josi' // tikėjo, tikėjosi + 'ot{e.}s' // tikėjotės + + // ė asmenuotė + 'eisi' // mokeisi + '{e.}si' // mokėsi + '{e.}m' '{e.}me' // mokėm, mokėme + '{e.}m{e.}s' // mokėmės + '{e.}t' '{e.}te' // mokėt, mokėte + '{e.}t{e.}s' // mokėtės + + // būtasis dažninis laikas + 'ausi' // mokydavausi + 'om{e.}s' // mokydavomės + + + // būsimasis laikas + 'siu' 'siuosi' // dirbsiu, mokysiuosi + 'si' 'siesi' // dirbsi, dirbsiesi + 's' 'ysis' // dirbs, mokysis + 'sim' 'sime' // dirbsim, dirbsime + 'sit' 'site' // gersit, gersite + + // tariamoji nuosaka + '{c*}iau' '{c*}iausi' // dirbčiau + 'tum' 'tumei' // dirbtum, dirbtumei + 'tumeis' 'tumeisi' // mokytumeis, mokytumeisi + // 't{u'}' nes blogai batutų -> batų + 't{u'}si' // mokytųsi + // 'tume' konfliktas su 'šventume' + 'tum{e.}m' // dirbtumėm + 'tum{e.}me' // dirbtumėme + 'tum{e.}m{e.}s' // mokytumėmės + 'tute' 'tum{e.}t' // dirbtute, dirbtumėt + 'tum{e.}te' // dirbtumėte + 'tum{e.}t{e.}s' // mokytumėtės + + // liepiamoji nuosaka + 'k' 'ki' // dirbk, dirbki, mokykis + // 'kis' konfliktas viln-išk-is + // 'kime' konfliktas, nes pirkime + 'kim{e.}s' // mokykimės + + // bendratis + 'uoti' 'iuoti' // meluoti, dygsniuoti + 'auti' 'iauti' // draugauti, girtuokliauti + 'oti' 'ioti' // dovanoti, meškerioti + '{e.}ti' // auklėti + 'yti' // akyti + 'inti' // auginti + 'in{e.}ti' // blusinėti + 'enti' // gyventi + 'tel{e.}ti' // bumbtelėti + 'ter{e.}ti' // bumbterėti + + 'ti' // skalbti + // 'tis' konfliktas, nes rytme-tis -> rytme + + // dalyviai + '{a'}s' 'i{a'}s' '{i'}s' // dirbąs, žaidžiąs, gulįs + 't{u'}s' // suktųs -> suk + 'sim{e.}s' // suksimės + 'sit{e.}s' // suksitės + 'kite' // supkite + ) + + delete + ) + + define step2 as repeat ( + setlimit tomark p1 for ([substring]) among( + // daiktavardziu priesagos + + // budvardziu priesagos + // 'in' // konfliktas su 'augintinis' ir 'akiniais' // lauk-in-is + 'ing' // tvark-ing-as + 'i{s*}k' // lenk-išk-as + '{e.}t' // dem-ėt-as + 'ot' // garban-ot-as + 'uot' 'iuot' // lang-uot-as, akin-iuot-as + // 'tin', nes augintinis // dirb-tin-is + // 'ut', nes batutas, degutas etc. // maž-ut-is + 'yt' // maž-yt-is + 'iuk' // maž-iuk-as + 'iul' // maž-ul-is + '{e.}l' // maž-ėl-is + 'yl' // maž-yl-is + 'u{c*}iuk' // maž-učiuk-as + 'uliuk' // maž-uliuk-as + 'ut{e.}ait' // maž-utėlait-is + 'ok' // did-ok-as + 'iok' // višč-iok-as + 'sv' '{s*}v' 'zgan' // sal-sv-as, pilk-šv-as, bal-zgan-as + 'op' 'iop' // dvej-op-as, viener-iop-as + 'ain' // apval-ain-as + 'yk{s*}t' 'yk{s*}{c*}' // ten-ykšt-is, vakar-ykšč-ias + + // laisniai + 'esn' // did-esn-is + 'aus' 'iaus' // nauj-aus-ias, ger-iaus-ias + + // ivardziuotiniai budvardziai + // vyriska gimine + 'ias' // žaliasis + 'oj' 'ioj' // gerojo, žaliojo + 'aj' 'iaj' // gerajam, žaliajam + '{a'}j' 'i{a'}j' // garąjį, žaliąjį + 'uoj' 'iuoj' // geruoju, žaliuoju + 'iej' // gerieji + '{u'}j' 'i{u'}j' // gerųjų, žaliųjų + 'ies' // geriesiems + 'uos' 'iuos' // geruosius, žaliuosius + 'ais' 'iais' // geraisiais, žaliaisiais + + // moteriska gimine + 'os' 'ios' // gerosios, žaliosios + '{a'}s' 'i{a'}s' // gerąsios, žaliąsias + + // būtasis dažninis laikas + 'dav' // ei-dav-o + + // dalyvių priesagos + 'ant' 'iant' + 'int' // tur-int-is + '{e.}j' // tur-ėj-o + '{e'}' // + '{e.}j{e'}' + '{e'}s' // dirb-ęs-is + + 'siant' // dirb-siant + + // pusdalyviai + 'dam' // bėg-dam-as + + 'auj' // ūkinink-auj-a + 'jam' + 'iau' + 'am' // baiminim-ams-i + ) + + delete + ) + + define fix_conflicts as ( + [substring] among ( + // 'lietuvaite' -> 'lietuvaitė', konfliktas su 'myl-ite' + 'aite' (<-'ait{e.}' set CHANGE) + // 'lietuvaitės' -> 'lietuvaitė', konfliktas su 'myl-itės' + 'ait{e.}s' (<-'ait{e.}' set CHANGE) + + // ''ūs-uotės' -> 'ūs-uotė', konfliktas 'mokotės' + 'uot{e.}s' (<-'uot{e.}' set CHANGE) + // ''ūs-uote' -> 'ūs-uotė', konfliktas 'mokote' + 'uote' (<-'uot{e.}' set CHANGE) + + // 'žerėjime' -> 'žėrėjimas', konfliktas su 'žais-ime' + '{e.}jime' (<-'{e.}jimas' set CHANGE) + + // 'žvilgesiu' -> 'žvilgesys', konfliktas su 'dirb-siu' + 'esiu' (<-'esys' set CHANGE) + // 'duobkasiu' -> 'duobkasys', konfliktas su 'pakasiu' + 'asius' (<-'asys' set CHANGE) + + // 'žioravime' -> 'žioravimas', konfliktas su 'myl-ime' + 'avime' (<-'avimas' set CHANGE) + 'ojime' (<-'ojimas' set CHANGE) + + // 'advokatės' -> 'advokatė', konfliktas su 'dirb-atės' + 'okat{e.}s' (<-'okat{e.}' set CHANGE) + // 'advokate' -> 'advokatė', konfliktas su 'dirb-ate' + 'okate' (<-'okat{e.}' set CHANGE) + ) + ) + + define fix_chdz as ( + [substring] among ( + '{c*}' (<-'t' set CHANGE) + 'd{z*}' (<-'d' set CHANGE) + ) + ) + + define fix_gd as ( + [substring] among ( + 'gd' (<-'g' set CHANGE) + //'{e.}k' (<-'{e.}g' set CHANGE) + ) + ) + +) + +define stem as ( + + $p1 = limit + $p2 = limit + $s = size + + do ( + // priešdėlis 'a' ilgeniuose nei 6 raidės žodžiuose, pvz. 'a-liejus'. + try (test 'a' $s > 6 hop 1) + + gopast v gopast non-v setmark p1 + gopast v gopast non-v setmark p2 + ) + + backwards ( + do fix_conflicts + do step1 + do fix_chdz + do step2 + do fix_chdz + do fix_gd + ) + +) diff --git a/lucene/analysis/common/src/java/org/tartarus/snowball/ext/LithuanianStemmer.java b/lucene/analysis/common/src/java/org/tartarus/snowball/ext/LithuanianStemmer.java new file mode 100644 index 00000000000..c490fa4b8bc --- /dev/null +++ b/lucene/analysis/common/src/java/org/tartarus/snowball/ext/LithuanianStemmer.java @@ -0,0 +1,769 @@ +// This file was generated automatically by the Snowball to Java compiler + +package org.tartarus.snowball.ext; + +import org.tartarus.snowball.Among; + +/** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +public class LithuanianStemmer extends org.tartarus.snowball.SnowballProgram { + + private static final long serialVersionUID = 1L; + + private final static LithuanianStemmer methodObject = new LithuanianStemmer (); + + private final static Among a_0[] = { + new Among ( "a", -1, -1, "", methodObject ), + new Among ( "ia", 0, -1, "", methodObject ), + new Among ( "eria", 1, -1, "", methodObject ), + new Among ( "osna", 0, -1, "", methodObject ), + new Among ( "iosna", 3, -1, "", methodObject ), + new Among ( "uosna", 3, -1, "", methodObject ), + new Among ( "iuosna", 5, -1, "", methodObject ), + new Among ( "ysna", 0, -1, "", methodObject ), + new Among ( "\u0117sna", 0, -1, "", methodObject ), + new Among ( "e", -1, -1, "", methodObject ), + new Among ( "ie", 9, -1, "", methodObject ), + new Among ( "enie", 10, -1, "", methodObject ), + new Among ( "erie", 10, -1, "", methodObject ), + new Among ( "oje", 9, -1, "", methodObject ), + new Among ( "ioje", 13, -1, "", methodObject ), + new Among ( "uje", 9, -1, "", methodObject ), + new Among ( "iuje", 15, -1, "", methodObject ), + new Among ( "yje", 9, -1, "", methodObject ), + new Among ( "enyje", 17, -1, "", methodObject ), + new Among ( "eryje", 17, -1, "", methodObject ), + new Among ( "\u0117je", 9, -1, "", methodObject ), + new Among ( "ame", 9, -1, "", methodObject ), + new Among ( "iame", 21, -1, "", methodObject ), + new Among ( "sime", 9, -1, "", methodObject ), + new Among ( "ome", 9, -1, "", methodObject ), + new Among ( "\u0117me", 9, -1, "", methodObject ), + new Among ( "tum\u0117me", 25, -1, "", methodObject ), + new Among ( "ose", 9, -1, "", methodObject ), + new Among ( "iose", 27, -1, "", methodObject ), + new Among ( "uose", 27, -1, "", methodObject ), + new Among ( "iuose", 29, -1, "", methodObject ), + new Among ( "yse", 9, -1, "", methodObject ), + new Among ( "enyse", 31, -1, "", methodObject ), + new Among ( "eryse", 31, -1, "", methodObject ), + new Among ( "\u0117se", 9, -1, "", methodObject ), + new Among ( "ate", 9, -1, "", methodObject ), + new Among ( "iate", 35, -1, "", methodObject ), + new Among ( "ite", 9, -1, "", methodObject ), + new Among ( "kite", 37, -1, "", methodObject ), + new Among ( "site", 37, -1, "", methodObject ), + new Among ( "ote", 9, -1, "", methodObject ), + new Among ( "tute", 9, -1, "", methodObject ), + new Among ( "\u0117te", 9, -1, "", methodObject ), + new Among ( "tum\u0117te", 42, -1, "", methodObject ), + new Among ( "i", -1, -1, "", methodObject ), + new Among ( "ai", 44, -1, "", methodObject ), + new Among ( "iai", 45, -1, "", methodObject ), + new Among ( "eriai", 46, -1, "", methodObject ), + new Among ( "ei", 44, -1, "", methodObject ), + new Among ( "tumei", 48, -1, "", methodObject ), + new Among ( "ki", 44, -1, "", methodObject ), + new Among ( "imi", 44, -1, "", methodObject ), + new Among ( "erimi", 51, -1, "", methodObject ), + new Among ( "umi", 44, -1, "", methodObject ), + new Among ( "iumi", 53, -1, "", methodObject ), + new Among ( "si", 44, -1, "", methodObject ), + new Among ( "asi", 55, -1, "", methodObject ), + new Among ( "iasi", 56, -1, "", methodObject ), + new Among ( "esi", 55, -1, "", methodObject ), + new Among ( "iesi", 58, -1, "", methodObject ), + new Among ( "siesi", 59, -1, "", methodObject ), + new Among ( "isi", 55, -1, "", methodObject ), + new Among ( "aisi", 61, -1, "", methodObject ), + new Among ( "eisi", 61, -1, "", methodObject ), + new Among ( "tumeisi", 63, -1, "", methodObject ), + new Among ( "uisi", 61, -1, "", methodObject ), + new Among ( "osi", 55, -1, "", methodObject ), + new Among ( "\u0117josi", 66, -1, "", methodObject ), + new Among ( "uosi", 66, -1, "", methodObject ), + new Among ( "iuosi", 68, -1, "", methodObject ), + new Among ( "siuosi", 69, -1, "", methodObject ), + new Among ( "usi", 55, -1, "", methodObject ), + new Among ( "ausi", 71, -1, "", methodObject ), + new Among ( "\u010Diausi", 72, -1, "", methodObject ), + new Among ( "\u0105si", 55, -1, "", methodObject ), + new Among ( "\u0117si", 55, -1, "", methodObject ), + new Among ( "\u0173si", 55, -1, "", methodObject ), + new Among ( "t\u0173si", 76, -1, "", methodObject ), + new Among ( "ti", 44, -1, "", methodObject ), + new Among ( "enti", 78, -1, "", methodObject ), + new Among ( "inti", 78, -1, "", methodObject ), + new Among ( "oti", 78, -1, "", methodObject ), + new Among ( "ioti", 81, -1, "", methodObject ), + new Among ( "uoti", 81, -1, "", methodObject ), + new Among ( "iuoti", 83, -1, "", methodObject ), + new Among ( "auti", 78, -1, "", methodObject ), + new Among ( "iauti", 85, -1, "", methodObject ), + new Among ( "yti", 78, -1, "", methodObject ), + new Among ( "\u0117ti", 78, -1, "", methodObject ), + new Among ( "tel\u0117ti", 88, -1, "", methodObject ), + new Among ( "in\u0117ti", 88, -1, "", methodObject ), + new Among ( "ter\u0117ti", 88, -1, "", methodObject ), + new Among ( "ui", 44, -1, "", methodObject ), + new Among ( "iui", 92, -1, "", methodObject ), + new Among ( "eniui", 93, -1, "", methodObject ), + new Among ( "oj", -1, -1, "", methodObject ), + new Among ( "\u0117j", -1, -1, "", methodObject ), + new Among ( "k", -1, -1, "", methodObject ), + new Among ( "am", -1, -1, "", methodObject ), + new Among ( "iam", 98, -1, "", methodObject ), + new Among ( "iem", -1, -1, "", methodObject ), + new Among ( "im", -1, -1, "", methodObject ), + new Among ( "sim", 101, -1, "", methodObject ), + new Among ( "om", -1, -1, "", methodObject ), + new Among ( "tum", -1, -1, "", methodObject ), + new Among ( "\u0117m", -1, -1, "", methodObject ), + new Among ( "tum\u0117m", 105, -1, "", methodObject ), + new Among ( "an", -1, -1, "", methodObject ), + new Among ( "on", -1, -1, "", methodObject ), + new Among ( "ion", 108, -1, "", methodObject ), + new Among ( "un", -1, -1, "", methodObject ), + new Among ( "iun", 110, -1, "", methodObject ), + new Among ( "\u0117n", -1, -1, "", methodObject ), + new Among ( "o", -1, -1, "", methodObject ), + new Among ( "io", 113, -1, "", methodObject ), + new Among ( "enio", 114, -1, "", methodObject ), + new Among ( "\u0117jo", 113, -1, "", methodObject ), + new Among ( "uo", 113, -1, "", methodObject ), + new Among ( "s", -1, -1, "", methodObject ), + new Among ( "as", 118, -1, "", methodObject ), + new Among ( "ias", 119, -1, "", methodObject ), + new Among ( "es", 118, -1, "", methodObject ), + new Among ( "ies", 121, -1, "", methodObject ), + new Among ( "is", 118, -1, "", methodObject ), + new Among ( "ais", 123, -1, "", methodObject ), + new Among ( "iais", 124, -1, "", methodObject ), + new Among ( "tumeis", 123, -1, "", methodObject ), + new Among ( "imis", 123, -1, "", methodObject ), + new Among ( "enimis", 127, -1, "", methodObject ), + new Among ( "omis", 123, -1, "", methodObject ), + new Among ( "iomis", 129, -1, "", methodObject ), + new Among ( "umis", 123, -1, "", methodObject ), + new Among ( "\u0117mis", 123, -1, "", methodObject ), + new Among ( "enis", 123, -1, "", methodObject ), + new Among ( "asis", 123, -1, "", methodObject ), + new Among ( "ysis", 123, -1, "", methodObject ), + new Among ( "ams", 118, -1, "", methodObject ), + new Among ( "iams", 136, -1, "", methodObject ), + new Among ( "iems", 118, -1, "", methodObject ), + new Among ( "ims", 118, -1, "", methodObject ), + new Among ( "enims", 139, -1, "", methodObject ), + new Among ( "erims", 139, -1, "", methodObject ), + new Among ( "oms", 118, -1, "", methodObject ), + new Among ( "ioms", 142, -1, "", methodObject ), + new Among ( "ums", 118, -1, "", methodObject ), + new Among ( "\u0117ms", 118, -1, "", methodObject ), + new Among ( "ens", 118, -1, "", methodObject ), + new Among ( "os", 118, -1, "", methodObject ), + new Among ( "ios", 147, -1, "", methodObject ), + new Among ( "uos", 147, -1, "", methodObject ), + new Among ( "iuos", 149, -1, "", methodObject ), + new Among ( "ers", 118, -1, "", methodObject ), + new Among ( "us", 118, -1, "", methodObject ), + new Among ( "aus", 152, -1, "", methodObject ), + new Among ( "iaus", 153, -1, "", methodObject ), + new Among ( "ius", 152, -1, "", methodObject ), + new Among ( "ys", 118, -1, "", methodObject ), + new Among ( "enys", 156, -1, "", methodObject ), + new Among ( "erys", 156, -1, "", methodObject ), + new Among ( "om\u00C4\u0097s", 118, -1, "", methodObject ), + new Among ( "ot\u00C4\u0097s", 118, -1, "", methodObject ), + new Among ( "\u0105s", 118, -1, "", methodObject ), + new Among ( "i\u0105s", 161, -1, "", methodObject ), + new Among ( "\u0117s", 118, -1, "", methodObject ), + new Among ( "am\u0117s", 163, -1, "", methodObject ), + new Among ( "iam\u0117s", 164, -1, "", methodObject ), + new Among ( "im\u0117s", 163, -1, "", methodObject ), + new Among ( "kim\u0117s", 166, -1, "", methodObject ), + new Among ( "sim\u0117s", 166, -1, "", methodObject ), + new Among ( "om\u0117s", 163, -1, "", methodObject ), + new Among ( "\u0117m\u0117s", 163, -1, "", methodObject ), + new Among ( "tum\u0117m\u0117s", 170, -1, "", methodObject ), + new Among ( "at\u0117s", 163, -1, "", methodObject ), + new Among ( "iat\u0117s", 172, -1, "", methodObject ), + new Among ( "sit\u0117s", 163, -1, "", methodObject ), + new Among ( "ot\u0117s", 163, -1, "", methodObject ), + new Among ( "\u0117t\u0117s", 163, -1, "", methodObject ), + new Among ( "tum\u0117t\u0117s", 176, -1, "", methodObject ), + new Among ( "\u012Fs", 118, -1, "", methodObject ), + new Among ( "\u016Bs", 118, -1, "", methodObject ), + new Among ( "t\u0173s", 118, -1, "", methodObject ), + new Among ( "at", -1, -1, "", methodObject ), + new Among ( "iat", 181, -1, "", methodObject ), + new Among ( "it", -1, -1, "", methodObject ), + new Among ( "sit", 183, -1, "", methodObject ), + new Among ( "ot", -1, -1, "", methodObject ), + new Among ( "\u0117t", -1, -1, "", methodObject ), + new Among ( "tum\u0117t", 186, -1, "", methodObject ), + new Among ( "u", -1, -1, "", methodObject ), + new Among ( "au", 188, -1, "", methodObject ), + new Among ( "iau", 189, -1, "", methodObject ), + new Among ( "\u010Diau", 190, -1, "", methodObject ), + new Among ( "iu", 188, -1, "", methodObject ), + new Among ( "eniu", 192, -1, "", methodObject ), + new Among ( "siu", 192, -1, "", methodObject ), + new Among ( "y", -1, -1, "", methodObject ), + new Among ( "\u0105", -1, -1, "", methodObject ), + new Among ( "i\u0105", 196, -1, "", methodObject ), + new Among ( "\u0117", -1, -1, "", methodObject ), + new Among ( "\u0119", -1, -1, "", methodObject ), + new Among ( "\u012F", -1, -1, "", methodObject ), + new Among ( "en\u012F", 200, -1, "", methodObject ), + new Among ( "er\u012F", 200, -1, "", methodObject ), + new Among ( "\u0173", -1, -1, "", methodObject ), + new Among ( "i\u0173", 203, -1, "", methodObject ), + new Among ( "er\u0173", 203, -1, "", methodObject ) + }; + + private final static Among a_1[] = { + new Among ( "ing", -1, -1, "", methodObject ), + new Among ( "aj", -1, -1, "", methodObject ), + new Among ( "iaj", 1, -1, "", methodObject ), + new Among ( "iej", -1, -1, "", methodObject ), + new Among ( "oj", -1, -1, "", methodObject ), + new Among ( "ioj", 4, -1, "", methodObject ), + new Among ( "uoj", 4, -1, "", methodObject ), + new Among ( "iuoj", 6, -1, "", methodObject ), + new Among ( "auj", -1, -1, "", methodObject ), + new Among ( "\u0105j", -1, -1, "", methodObject ), + new Among ( "i\u0105j", 9, -1, "", methodObject ), + new Among ( "\u0117j", -1, -1, "", methodObject ), + new Among ( "\u0173j", -1, -1, "", methodObject ), + new Among ( "i\u0173j", 12, -1, "", methodObject ), + new Among ( "ok", -1, -1, "", methodObject ), + new Among ( "iok", 14, -1, "", methodObject ), + new Among ( "iuk", -1, -1, "", methodObject ), + new Among ( "uliuk", 16, -1, "", methodObject ), + new Among ( "u\u010Diuk", 16, -1, "", methodObject ), + new Among ( "i\u0161k", -1, -1, "", methodObject ), + new Among ( "iul", -1, -1, "", methodObject ), + new Among ( "yl", -1, -1, "", methodObject ), + new Among ( "\u0117l", -1, -1, "", methodObject ), + new Among ( "am", -1, -1, "", methodObject ), + new Among ( "dam", 23, -1, "", methodObject ), + new Among ( "jam", 23, -1, "", methodObject ), + new Among ( "zgan", -1, -1, "", methodObject ), + new Among ( "ain", -1, -1, "", methodObject ), + new Among ( "esn", -1, -1, "", methodObject ), + new Among ( "op", -1, -1, "", methodObject ), + new Among ( "iop", 29, -1, "", methodObject ), + new Among ( "ias", -1, -1, "", methodObject ), + new Among ( "ies", -1, -1, "", methodObject ), + new Among ( "ais", -1, -1, "", methodObject ), + new Among ( "iais", 33, -1, "", methodObject ), + new Among ( "os", -1, -1, "", methodObject ), + new Among ( "ios", 35, -1, "", methodObject ), + new Among ( "uos", 35, -1, "", methodObject ), + new Among ( "iuos", 37, -1, "", methodObject ), + new Among ( "aus", -1, -1, "", methodObject ), + new Among ( "iaus", 39, -1, "", methodObject ), + new Among ( "\u0105s", -1, -1, "", methodObject ), + new Among ( "i\u0105s", 41, -1, "", methodObject ), + new Among ( "\u0119s", -1, -1, "", methodObject ), + new Among ( "ut\u0117ait", -1, -1, "", methodObject ), + new Among ( "ant", -1, -1, "", methodObject ), + new Among ( "iant", 45, -1, "", methodObject ), + new Among ( "siant", 46, -1, "", methodObject ), + new Among ( "int", -1, -1, "", methodObject ), + new Among ( "ot", -1, -1, "", methodObject ), + new Among ( "uot", 49, -1, "", methodObject ), + new Among ( "iuot", 50, -1, "", methodObject ), + new Among ( "yt", -1, -1, "", methodObject ), + new Among ( "\u0117t", -1, -1, "", methodObject ), + new Among ( "yk\u0161t", -1, -1, "", methodObject ), + new Among ( "iau", -1, -1, "", methodObject ), + new Among ( "dav", -1, -1, "", methodObject ), + new Among ( "sv", -1, -1, "", methodObject ), + new Among ( "\u0161v", -1, -1, "", methodObject ), + new Among ( "yk\u0161\u010D", -1, -1, "", methodObject ), + new Among ( "\u0119", -1, -1, "", methodObject ), + new Among ( "\u0117j\u0119", 60, -1, "", methodObject ) + }; + + private final static Among a_2[] = { + new Among ( "ojime", -1, 9, "", methodObject ), + new Among ( "\u0117jime", -1, 5, "", methodObject ), + new Among ( "avime", -1, 8, "", methodObject ), + new Among ( "okate", -1, 11, "", methodObject ), + new Among ( "aite", -1, 1, "", methodObject ), + new Among ( "uote", -1, 4, "", methodObject ), + new Among ( "asius", -1, 7, "", methodObject ), + new Among ( "okat\u0117s", -1, 10, "", methodObject ), + new Among ( "ait\u0117s", -1, 2, "", methodObject ), + new Among ( "uot\u0117s", -1, 3, "", methodObject ), + new Among ( "esiu", -1, 6, "", methodObject ) + }; + + private final static Among a_3[] = { + new Among ( "\u010D", -1, 1, "", methodObject ), + new Among ( "d\u017E", -1, 2, "", methodObject ) + }; + + private final static Among a_4[] = { + new Among ( "gd", -1, 1, "", methodObject ) + }; + + private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 64, 1, 0, 64, 0, 0, 0, 0, 0, 0, 0, 4, 4 }; + + private boolean B_CHANGE; + private int I_s; + private int I_p2; + private int I_p1; + + private void copy_from(LithuanianStemmer other) { + B_CHANGE = other.B_CHANGE; + I_s = other.I_s; + I_p2 = other.I_p2; + I_p1 = other.I_p1; + super.copy_from(other); + } + + private boolean r_R1() { + if (!(I_p1 <= cursor)) + { + return false; + } + return true; + } + + private boolean r_step1() { + int v_1; + int v_2; + // (, line 48 + // setlimit, line 49 + v_1 = limit - cursor; + // tomark, line 49 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 49 + // [, line 49 + ket = cursor; + // substring, line 49 + if (find_among_b(a_0, 206) == 0) + { + limit_backward = v_2; + return false; + } + // ], line 49 + bra = cursor; + limit_backward = v_2; + // call R1, line 49 + if (!r_R1()) + { + return false; + } + // delete, line 235 + slice_del(); + return true; + } + + private boolean r_step2() { + int v_1; + int v_2; + int v_3; + // repeat, line 238 + replab0: while(true) + { + v_1 = limit - cursor; + lab1: do { + // (, line 238 + // setlimit, line 239 + v_2 = limit - cursor; + // tomark, line 239 + if (cursor < I_p1) + { + break lab1; + } + cursor = I_p1; + v_3 = limit_backward; + limit_backward = cursor; + cursor = limit - v_2; + // (, line 239 + // [, line 239 + ket = cursor; + // substring, line 239 + if (find_among_b(a_1, 62) == 0) + { + limit_backward = v_3; + break lab1; + } + // ], line 239 + bra = cursor; + limit_backward = v_3; + // delete, line 309 + slice_del(); + continue replab0; + } while (false); + cursor = limit - v_1; + break replab0; + } + return true; + } + + private boolean r_fix_conflicts() { + int among_var; + // (, line 312 + // [, line 313 + ket = cursor; + // substring, line 313 + among_var = find_among_b(a_2, 11); + if (among_var == 0) + { + return false; + } + // ], line 313 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 315 + // <-, line 315 + slice_from("ait\u0117"); + // set CHANGE, line 315 + B_CHANGE = true; + break; + case 2: + // (, line 317 + // <-, line 317 + slice_from("ait\u0117"); + // set CHANGE, line 317 + B_CHANGE = true; + break; + case 3: + // (, line 320 + // <-, line 320 + slice_from("uot\u0117"); + // set CHANGE, line 320 + B_CHANGE = true; + break; + case 4: + // (, line 322 + // <-, line 322 + slice_from("uot\u0117"); + // set CHANGE, line 322 + B_CHANGE = true; + break; + case 5: + // (, line 325 + // <-, line 325 + slice_from("\u0117jimas"); + // set CHANGE, line 325 + B_CHANGE = true; + break; + case 6: + // (, line 328 + // <-, line 328 + slice_from("esys"); + // set CHANGE, line 328 + B_CHANGE = true; + break; + case 7: + // (, line 330 + // <-, line 330 + slice_from("asys"); + // set CHANGE, line 330 + B_CHANGE = true; + break; + case 8: + // (, line 334 + // <-, line 334 + slice_from("avimas"); + // set CHANGE, line 334 + B_CHANGE = true; + break; + case 9: + // (, line 335 + // <-, line 335 + slice_from("ojimas"); + // set CHANGE, line 335 + B_CHANGE = true; + break; + case 10: + // (, line 338 + // <-, line 338 + slice_from("okat\u0117"); + // set CHANGE, line 338 + B_CHANGE = true; + break; + case 11: + // (, line 340 + // <-, line 340 + slice_from("okat\u0117"); + // set CHANGE, line 340 + B_CHANGE = true; + break; + } + return true; + } + + private boolean r_fix_chdz() { + int among_var; + // (, line 346 + // [, line 347 + ket = cursor; + // substring, line 347 + among_var = find_among_b(a_3, 2); + if (among_var == 0) + { + return false; + } + // ], line 347 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 348 + // <-, line 348 + slice_from("t"); + // set CHANGE, line 348 + B_CHANGE = true; + break; + case 2: + // (, line 349 + // <-, line 349 + slice_from("d"); + // set CHANGE, line 349 + B_CHANGE = true; + break; + } + return true; + } + + private boolean r_fix_gd() { + int among_var; + // (, line 353 + // [, line 354 + ket = cursor; + // substring, line 354 + among_var = find_among_b(a_4, 1); + if (among_var == 0) + { + return false; + } + // ], line 354 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 355 + // <-, line 355 + slice_from("g"); + // set CHANGE, line 355 + B_CHANGE = true; + break; + } + return true; + } + + public boolean stem() { + int v_1; + int v_2; + int v_3; + int v_8; + int v_9; + int v_10; + int v_11; + int v_12; + int v_13; + // (, line 362 + I_p1 = limit; + I_p2 = limit; + I_s = (getCurrent().length()); + // do, line 368 + v_1 = cursor; + lab0: do { + // (, line 368 + // try, line 370 + v_2 = cursor; + lab1: do { + // (, line 370 + // test, line 370 + v_3 = cursor; + // literal, line 370 + if (!(eq_s(1, "a"))) + { + cursor = v_2; + break lab1; + } + cursor = v_3; + if (!(I_s > 6)) + { + cursor = v_2; + break lab1; + } + // hop, line 370 + { + int c = cursor + 1; + if (0 > c || c > limit) + { + cursor = v_2; + break lab1; + } + cursor = c; + } + } while (false); + // gopast, line 372 + golab2: while(true) + { + lab3: do { + if (!(in_grouping(g_v, 97, 371))) + { + break lab3; + } + break golab2; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; + } + // gopast, line 372 + golab4: while(true) + { + lab5: do { + if (!(out_grouping(g_v, 97, 371))) + { + break lab5; + } + break golab4; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; + } + // setmark p1, line 372 + I_p1 = cursor; + // gopast, line 373 + golab6: while(true) + { + lab7: do { + if (!(in_grouping(g_v, 97, 371))) + { + break lab7; + } + break golab6; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; + } + // gopast, line 373 + golab8: while(true) + { + lab9: do { + if (!(out_grouping(g_v, 97, 371))) + { + break lab9; + } + break golab8; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; + } + // setmark p2, line 373 + I_p2 = cursor; + } while (false); + cursor = v_1; + // backwards, line 377 + limit_backward = cursor; cursor = limit; + // (, line 377 + // do, line 378 + v_8 = limit - cursor; + lab10: do { + // call fix_conflicts, line 378 + if (!r_fix_conflicts()) + { + break lab10; + } + } while (false); + cursor = limit - v_8; + // do, line 379 + v_9 = limit - cursor; + lab11: do { + // call step1, line 379 + if (!r_step1()) + { + break lab11; + } + } while (false); + cursor = limit - v_9; + // do, line 380 + v_10 = limit - cursor; + lab12: do { + // call fix_chdz, line 380 + if (!r_fix_chdz()) + { + break lab12; + } + } while (false); + cursor = limit - v_10; + // do, line 381 + v_11 = limit - cursor; + lab13: do { + // call step2, line 381 + if (!r_step2()) + { + break lab13; + } + } while (false); + cursor = limit - v_11; + // do, line 382 + v_12 = limit - cursor; + lab14: do { + // call fix_chdz, line 382 + if (!r_fix_chdz()) + { + break lab14; + } + } while (false); + cursor = limit - v_12; + // do, line 383 + v_13 = limit - cursor; + lab15: do { + // call fix_gd, line 383 + if (!r_fix_gd()) + { + break lab15; + } + } while (false); + cursor = limit - v_13; + cursor = limit_backward; return true; + } + + public boolean equals( Object o ) { + return o instanceof LithuanianStemmer; + } + + public int hashCode() { + return LithuanianStemmer.class.getName().hashCode(); + } + + + +} diff --git a/lucene/analysis/common/src/resources/org/apache/lucene/analysis/lt/stopwords.txt b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/lt/stopwords.txt new file mode 100644 index 00000000000..02b3b73cd4a --- /dev/null +++ b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/lt/stopwords.txt @@ -0,0 +1,126 @@ +# Lithuanian stopwords list +ant +apie +ar +arba +aš +be +bei +bet +bus +būti +būtų +buvo +dėl +gali +į +iki +ir +iš +ja +ją +jai +jais +jam +jame +jas +jei +ji +jį +jie +jiedu +jiedvi +jiedviem +jiedviese +jiems +jis +jo +jodviem +jog +joje +jomis +joms +jos +jose +jų +judu +judvi +judviejų +jųdviejų +judviem +judviese +jumis +jums +jumyse +juo +juodu +juodviese +juos +juose +jus +jūs +jūsų +ką +kad +kai +kaip +kas +kiek +kol +kur +kurie +kuris +man +mane +manęs +manimi +mano +manyje +mes +metu +mudu +mudvi +mudviejų +mudviem +mudviese +mumis +mums +mumyse +mus +mūsų +nei +nes +net +nors +nuo +o +pat +per +po +prie +prieš +sau +save +savęs +savimi +savo +savyje +su +tačiau +tada +tai +taip +tas +tau +tave +tavęs +tavimi +tavyje +ten +to +todėl +tu +tuo +už +visi +yra diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianAnalyzer.java new file mode 100644 index 00000000000..09fab6b70ad --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianAnalyzer.java @@ -0,0 +1,53 @@ +package org.apache.lucene.analysis.lt; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.util.CharArraySet; + +public class TestLithuanianAnalyzer extends BaseTokenStreamTestCase { + + /** This test fails with NPE when the + * stopwords file is missing in classpath */ + public void testResourcesAvailable() { + new LithuanianAnalyzer().close(); + } + + /** Test stopword removal */ + public void testStopWord() throws Exception { + Analyzer a = new LithuanianAnalyzer(); + assertAnalyzesTo(a, "man", + new String[] { }); + } + + /** Test stemmer exceptions */ + public void testStemExclusion() throws IOException{ + CharArraySet set = new CharArraySet(1, true); + set.add("vaikų"); + Analyzer a = new LithuanianAnalyzer(CharArraySet.EMPTY_SET, set); + assertAnalyzesTo(a, "vaikų", new String[] {"vaikų"}); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random(), new LithuanianAnalyzer(), 1000*RANDOM_MULTIPLIER); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianStemming.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianStemming.java new file mode 100644 index 00000000000..0566ee4f7da --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianStemming.java @@ -0,0 +1,481 @@ +package org.apache.lucene.analysis.lt; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.snowball.SnowballFilter; +import org.tartarus.snowball.ext.LithuanianStemmer; + +/** + * Basic tests for {@link LithuanianStemmer}. + * We test some n/adj templates from wikipedia and some high frequency + * terms from mixed corpora. + */ +public class TestLithuanianStemming extends BaseTokenStreamTestCase { + private Analyzer a; + + @Override + public void setUp() throws Exception { + super.setUp(); + a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); + return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, new LithuanianStemmer())); + } + }; + } + + @Override + public void tearDown() throws Exception { + a.close(); + super.tearDown(); + } + + public void testNounsI() throws IOException { + // n. decl. I (-as) + checkOneTerm(a, "vaikas", "vaik"); // nom. sing. + checkOneTerm(a, "vaikai", "vaik"); // nom. pl. + checkOneTerm(a, "vaiko", "vaik"); // gen. sg. + checkOneTerm(a, "vaikų", "vaik"); // gen. pl. + checkOneTerm(a, "vaikui", "vaik"); // dat. sg. + checkOneTerm(a, "vaikams", "vaik"); // dat. pl. + checkOneTerm(a, "vaiką", "vaik"); // acc. sg. + checkOneTerm(a, "vaikus", "vaik"); // acc. pl. + checkOneTerm(a, "vaiku", "vaik"); // ins. sg. + checkOneTerm(a, "vaikais", "vaik"); // ins. pl. + checkOneTerm(a, "vaike", "vaik"); // loc. sg. + checkOneTerm(a, "vaikuose", "vaik"); // loc. pl. + checkOneTerm(a, "vaike", "vaik"); // voc. sg. + checkOneTerm(a, "vaikai", "vaik"); // voc. pl. + + // n. decl. I (-is) + checkOneTerm(a, "brolis", "brol"); // nom. sing. + checkOneTerm(a, "broliai", "brol"); // nom. pl. + checkOneTerm(a, "brolio", "brol"); // gen. sg. + checkOneTerm(a, "brolių", "brol"); // gen. pl. + checkOneTerm(a, "broliui", "brol"); // dat. sg. + checkOneTerm(a, "broliams", "brol"); // dat. pl. + checkOneTerm(a, "brolį", "brol"); // acc. sg. + checkOneTerm(a, "brolius", "brol"); // acc. pl. + checkOneTerm(a, "broliu", "brol"); // ins. sg. + checkOneTerm(a, "broliais", "brol"); // ins. pl. + checkOneTerm(a, "brolyje", "brol"); // loc. sg. + checkOneTerm(a, "broliuose", "brol"); // loc. pl. + checkOneTerm(a, "broli", "brol"); // voc. sg. + checkOneTerm(a, "broliai", "brol"); // voc. pl. + + // n. decl. I (-ys) + // note: some forms don't conflate + checkOneTerm(a, "arklys", "arkl"); // nom. sing. + checkOneTerm(a, "arkliai", "arkliai"); // nom. pl. + checkOneTerm(a, "arklio", "arkl"); // gen. sg. + checkOneTerm(a, "arklių", "arkl"); // gen. pl. + checkOneTerm(a, "arkliui", "arkliui"); // dat. sg. + checkOneTerm(a, "arkliams", "arkliam"); // dat. pl. + checkOneTerm(a, "arklį", "arkl"); // acc. sg. + checkOneTerm(a, "arklius", "arklius"); // acc. pl. + checkOneTerm(a, "arkliu", "arkl"); // ins. sg. + checkOneTerm(a, "arkliais", "arkliais"); // ins. pl. + checkOneTerm(a, "arklyje", "arklyj"); // loc. sg. + checkOneTerm(a, "arkliuose", "arkliuos"); // loc. pl. + checkOneTerm(a, "arkly", "arkl"); // voc. sg. + checkOneTerm(a, "arkliai", "arkliai"); // voc. pl. + } + + public void testNounsII() throws IOException { + // n. decl II (-a) + checkOneTerm(a, "motina", "motin"); // nom. sing. + checkOneTerm(a, "motinos", "motin"); // nom. pl. + checkOneTerm(a, "motinos", "motin"); // gen. sg. + checkOneTerm(a, "motinų", "motin"); // gen. pl. + checkOneTerm(a, "motinai", "motin"); // dat. sg. + checkOneTerm(a, "motinoms", "motin"); // dat. pl. + checkOneTerm(a, "motiną", "motin"); // acc. sg. + checkOneTerm(a, "motinas", "motin"); // acc. pl. + checkOneTerm(a, "motina", "motin"); // ins. sg. + checkOneTerm(a, "motinomis", "motin"); // ins. pl. + checkOneTerm(a, "motinoje", "motin"); // loc. sg. + checkOneTerm(a, "motinose", "motin"); // loc. pl. + checkOneTerm(a, "motina", "motin"); // voc. sg. + checkOneTerm(a, "motinos", "motin"); // voc. pl. + + // n. decl II (-ė) + checkOneTerm(a, "katė", "kat"); // nom. sing. + checkOneTerm(a, "katės", "kat"); // nom. pl. + checkOneTerm(a, "katės", "kat"); // gen. sg. + checkOneTerm(a, "kačių", "kat"); // gen. pl. + checkOneTerm(a, "katei", "kat"); // dat. sg. + checkOneTerm(a, "katėms", "kat"); // dat. pl. + checkOneTerm(a, "katę", "kat"); // acc. sg. + checkOneTerm(a, "kates", "kat"); // acc. pl. + checkOneTerm(a, "kate", "kat"); // ins. sg. + checkOneTerm(a, "katėmis", "kat"); // ins. pl. + checkOneTerm(a, "katėje", "kat"); // loc. sg. + checkOneTerm(a, "katėse", "kat"); // loc. pl. + checkOneTerm(a, "kate", "kat"); // voc. sg. + checkOneTerm(a, "katės", "kat"); // voc. pl. + + // n. decl II (-ti) + checkOneTerm(a, "pati", "pat"); // nom. sing. + checkOneTerm(a, "pačios", "pat"); // nom. pl. + checkOneTerm(a, "pačios", "pat"); // gen. sg. + checkOneTerm(a, "pačių", "pat"); // gen. pl. + checkOneTerm(a, "pačiai", "pat"); // dat. sg. + checkOneTerm(a, "pačioms", "pat"); // dat. pl. + checkOneTerm(a, "pačią", "pat"); // acc. sg. + checkOneTerm(a, "pačias", "pat"); // acc. pl. + checkOneTerm(a, "pačia", "pat"); // ins. sg. + checkOneTerm(a, "pačiomis", "pat"); // ins. pl. + checkOneTerm(a, "pačioje", "pat"); // loc. sg. + checkOneTerm(a, "pačiose", "pat"); // loc. pl. + checkOneTerm(a, "pati", "pat"); // voc. sg. + checkOneTerm(a, "pačios", "pat"); // voc. pl. + } + + public void testNounsIII() throws IOException { + // n. decl III-m + checkOneTerm(a, "vagis", "vag"); // nom. sing. + checkOneTerm(a, "vagys", "vag"); // nom. pl. + checkOneTerm(a, "vagies", "vag"); // gen. sg. + checkOneTerm(a, "vagių", "vag"); // gen. pl. + checkOneTerm(a, "vagiui", "vag"); // dat. sg. + checkOneTerm(a, "vagims", "vag"); // dat. pl. + checkOneTerm(a, "vagį", "vag"); // acc. sg. + checkOneTerm(a, "vagis", "vag"); // acc. pl. + checkOneTerm(a, "vagimi", "vag"); // ins. sg. + checkOneTerm(a, "vagimis", "vag"); // ins. pl. + checkOneTerm(a, "vagyje", "vag"); // loc. sg. + checkOneTerm(a, "vagyse", "vag"); // loc. pl. + checkOneTerm(a, "vagie", "vag"); // voc. sg. + checkOneTerm(a, "vagys", "vag"); // voc. pl. + + // n. decl III-f + checkOneTerm(a, "akis", "ak"); // nom. sing. + checkOneTerm(a, "akys", "ak"); // nom. pl. + checkOneTerm(a, "akies", "ak"); // gen. sg. + checkOneTerm(a, "akių", "ak"); // gen. pl. + checkOneTerm(a, "akiai", "ak"); // dat. sg. + checkOneTerm(a, "akims", "ak"); // dat. pl. + checkOneTerm(a, "akį", "ak"); // acc. sg. + checkOneTerm(a, "akis", "ak"); // acc. pl. + checkOneTerm(a, "akimi", "ak"); // ins. sg. + checkOneTerm(a, "akimis", "ak"); // ins. pl. + checkOneTerm(a, "akyje", "ak"); // loc. sg. + checkOneTerm(a, "akyse", "ak"); // loc. pl. + checkOneTerm(a, "akie", "ak"); // voc. sg. + checkOneTerm(a, "akys", "ak"); // voc. pl. + } + + public void testNounsIV() throws IOException { + // n. decl IV (-us) + checkOneTerm(a, "sūnus", "sūn"); // nom. sing. + checkOneTerm(a, "sūnūs", "sūn"); // nom. pl. + checkOneTerm(a, "sūnaus", "sūn"); // gen. sg. + checkOneTerm(a, "sūnų", "sūn"); // gen. pl. + checkOneTerm(a, "sūnui", "sūn"); // dat. sg. + checkOneTerm(a, "sūnums", "sūn"); // dat. pl. + checkOneTerm(a, "sūnų", "sūn"); // acc. sg. + checkOneTerm(a, "sūnus", "sūn"); // acc. pl. + checkOneTerm(a, "sūnumi", "sūn"); // ins. sg. + checkOneTerm(a, "sūnumis", "sūn"); // ins. pl. + checkOneTerm(a, "sūnuje", "sūn"); // loc. sg. + checkOneTerm(a, "sūnuose", "sūn"); // loc. pl. + checkOneTerm(a, "sūnau", "sūn"); // voc. sg. + checkOneTerm(a, "sūnūs", "sūn"); // voc. pl. + + // n. decl IV (-ius) + checkOneTerm(a, "profesorius", "profesor"); // nom. sing. + checkOneTerm(a, "profesoriai", "profesor"); // nom. pl. + checkOneTerm(a, "profesoriaus", "profesor"); // gen. sg. + checkOneTerm(a, "profesorių", "profesor"); // gen. pl. + checkOneTerm(a, "profesoriui", "profesor"); // dat. sg. + checkOneTerm(a, "profesoriams", "profesor"); // dat. pl. + checkOneTerm(a, "profesorių", "profesor"); // acc. sg. + checkOneTerm(a, "profesorius", "profesor"); // acc. pl. + checkOneTerm(a, "profesoriumi", "profesor"); // ins. sg. + checkOneTerm(a, "profesoriais", "profesor"); // ins. pl. + checkOneTerm(a, "profesoriuje", "profesor"); // loc. sg. + checkOneTerm(a, "profesoriuose", "profesor"); // loc. pl. + checkOneTerm(a, "profesoriau", "profesor"); // voc. sg. + checkOneTerm(a, "profesoriai", "profesor"); // voc. pl. + } + + public void testNounsV() throws IOException { + // n. decl V + // note: gen.pl. doesn't conflate + checkOneTerm(a, "vanduo", "vand"); // nom. sing. + checkOneTerm(a, "vandenys", "vand"); // nom. pl. + checkOneTerm(a, "vandens", "vand"); // gen. sg. + checkOneTerm(a, "vandenų", "vanden"); // gen. pl. + checkOneTerm(a, "vandeniui", "vand"); // dat. sg. + checkOneTerm(a, "vandenims", "vand"); // dat. pl. + checkOneTerm(a, "vandenį", "vand"); // acc. sg. + checkOneTerm(a, "vandenis", "vand"); // acc. pl. + checkOneTerm(a, "vandeniu", "vand"); // ins. sg. + checkOneTerm(a, "vandenimis", "vand"); // ins. pl. + checkOneTerm(a, "vandenyje", "vand"); // loc. sg. + checkOneTerm(a, "vandenyse", "vand"); // loc. pl. + checkOneTerm(a, "vandenie", "vand"); // voc. sg. + checkOneTerm(a, "vandenys", "vand"); // voc. pl. + } + + public void testAdjI() throws IOException { + // adj. decl I + checkOneTerm(a, "geras", "ger"); // m. nom. sing. + checkOneTerm(a, "geri", "ger"); // m. nom. pl. + checkOneTerm(a, "gero", "ger"); // m. gen. sg. + checkOneTerm(a, "gerų", "ger"); // m. gen. pl. + checkOneTerm(a, "geram", "ger"); // m. dat. sg. + checkOneTerm(a, "geriems", "ger"); // m. dat. pl. + checkOneTerm(a, "gerą", "ger"); // m. acc. sg. + checkOneTerm(a, "gerus", "ger"); // m. acc. pl. + checkOneTerm(a, "geru", "ger"); // m. ins. sg. + checkOneTerm(a, "gerais", "ger"); // m. ins. pl. + checkOneTerm(a, "gerame", "ger"); // m. loc. sg. + checkOneTerm(a, "geruose", "ger"); // m. loc. pl. + + checkOneTerm(a, "gera", "ger"); // f. nom. sing. + checkOneTerm(a, "geros", "ger"); // f. nom. pl. + checkOneTerm(a, "geros", "ger"); // f. gen. sg. + checkOneTerm(a, "gerų", "ger"); // f. gen. pl. + checkOneTerm(a, "gerai", "ger"); // f. dat. sg. + checkOneTerm(a, "geroms", "ger"); // f. dat. pl. + checkOneTerm(a, "gerą", "ger"); // f. acc. sg. + checkOneTerm(a, "geras", "ger"); // f. acc. pl. + checkOneTerm(a, "gera", "ger"); // f. ins. sg. + checkOneTerm(a, "geromis", "ger"); // f. ins. pl. + checkOneTerm(a, "geroje", "ger"); // f. loc. sg. + checkOneTerm(a, "gerose", "ger"); // f. loc. pl. + } + + public void testAdjII() throws IOException { + // adj. decl II + checkOneTerm(a, "gražus", "graž"); // m. nom. sing. + checkOneTerm(a, "gražūs", "graž"); // m. nom. pl. + checkOneTerm(a, "gražaus", "graž"); // m. gen. sg. + checkOneTerm(a, "gražių", "graž"); // m. gen. pl. + checkOneTerm(a, "gražiam", "graž"); // m. dat. sg. + checkOneTerm(a, "gražiems", "graž"); // m. dat. pl. + checkOneTerm(a, "gražų", "graž"); // m. acc. sg. + checkOneTerm(a, "gražius", "graž"); // m. acc. pl. + checkOneTerm(a, "gražiu", "graž"); // m. ins. sg. + checkOneTerm(a, "gražiais", "graž"); // m. ins. pl. + checkOneTerm(a, "gražiame", "graž"); // m. loc. sg. + checkOneTerm(a, "gražiuose", "graž"); // m. loc. pl. + + checkOneTerm(a, "graži", "graž"); // f. nom. sing. + checkOneTerm(a, "gražios", "graž"); // f. nom. pl. + checkOneTerm(a, "gražios", "graž"); // f. gen. sg. + checkOneTerm(a, "gražių", "graž"); // f. gen. pl. + checkOneTerm(a, "gražiai", "graž"); // f. dat. sg. + checkOneTerm(a, "gražioms", "graž"); // f. dat. pl. + checkOneTerm(a, "gražią", "graž"); // f. acc. sg. + checkOneTerm(a, "gražias", "graž"); // f. acc. pl. + checkOneTerm(a, "gražia", "graž"); // f. ins. sg. + checkOneTerm(a, "gražiomis", "graž"); // f. ins. pl. + checkOneTerm(a, "gražioje", "graž"); // f. loc. sg. + checkOneTerm(a, "gražiose", "graž"); // f. loc. pl. + } + + public void testAdjIII() throws IOException { + // adj. decl III + checkOneTerm(a, "vidutinis", "vidutin"); // m. nom. sing. + checkOneTerm(a, "vidutiniai", "vidutin"); // m. nom. pl. + checkOneTerm(a, "vidutinio", "vidutin"); // m. gen. sg. + checkOneTerm(a, "vidutinių", "vidutin"); // m. gen. pl. + checkOneTerm(a, "vidutiniam", "vidutin"); // m. dat. sg. + checkOneTerm(a, "vidutiniams", "vidutin"); // m. dat. pl. + checkOneTerm(a, "vidutinį", "vidutin"); // m. acc. sg. + checkOneTerm(a, "vidutinius", "vidutin"); // m. acc. pl. + checkOneTerm(a, "vidutiniu", "vidutin"); // m. ins. sg. + checkOneTerm(a, "vidutiniais", "vidutin"); // m. ins. pl. + checkOneTerm(a, "vidutiniame", "vidutin"); // m. loc. sg. + checkOneTerm(a, "vidutiniuose", "vidutin"); // m. loc. pl. + + checkOneTerm(a, "vidutinė", "vidutin"); // f. nom. sing. + checkOneTerm(a, "vidutinės", "vidutin"); // f. nom. pl. + checkOneTerm(a, "vidutinės", "vidutin"); // f. gen. sg. + checkOneTerm(a, "vidutinių", "vidutin"); // f. gen. pl. + checkOneTerm(a, "vidutinei", "vidutin"); // f. dat. sg. + checkOneTerm(a, "vidutinėms", "vidutin"); // f. dat. pl. + checkOneTerm(a, "vidutinę", "vidutin"); // f. acc. sg. + checkOneTerm(a, "vidutines", "vidutin"); // f. acc. pl. + checkOneTerm(a, "vidutine", "vidutin"); // f. ins. sg. + checkOneTerm(a, "vidutinėmis", "vidutin"); // f. ins. pl. + checkOneTerm(a, "vidutinėje", "vidutin"); // f. loc. sg. + checkOneTerm(a, "vidutinėse", "vidutin"); // f. loc. pl. + } + + /** + * test some high frequency terms from corpora to look for anything crazy + */ + public void testHighFrequencyTerms() throws IOException { + checkOneTerm(a, "ir", "ir"); + checkOneTerm(a, "kad", "kad"); + checkOneTerm(a, "į", "į"); + checkOneTerm(a, "tai", "tai"); + checkOneTerm(a, "su", "su"); + checkOneTerm(a, "o", "o"); + checkOneTerm(a, "iš", "iš"); + checkOneTerm(a, "kaip", "kaip"); + checkOneTerm(a, "bet", "bet"); + checkOneTerm(a, "yra", "yr"); + checkOneTerm(a, "buvo", "buv"); + checkOneTerm(a, "tik", "tik"); + checkOneTerm(a, "ne", "ne"); + checkOneTerm(a, "taip", "taip"); + checkOneTerm(a, "ar", "ar"); + checkOneTerm(a, "dar", "dar"); + checkOneTerm(a, "jau", "jau"); + checkOneTerm(a, "savo", "sav"); + checkOneTerm(a, "apie", "ap"); + checkOneTerm(a, "kai", "kai"); + checkOneTerm(a, "aš", "aš"); + checkOneTerm(a, "per", "per"); + checkOneTerm(a, "nuo", "nuo"); + checkOneTerm(a, "po", "po"); + checkOneTerm(a, "jis", "jis"); + checkOneTerm(a, "kas", "kas"); + checkOneTerm(a, "d", "d"); + checkOneTerm(a, "labai", "lab"); + checkOneTerm(a, "man", "man"); + checkOneTerm(a, "dėl", "dėl"); + checkOneTerm(a, "tačiau", "tat"); + checkOneTerm(a, "nes", "nes"); + checkOneTerm(a, "už", "už"); + checkOneTerm(a, "to", "to"); + checkOneTerm(a, "jo", "jo"); + checkOneTerm(a, "iki", "ik"); + checkOneTerm(a, "ką", "ką"); + checkOneTerm(a, "mano", "man"); + checkOneTerm(a, "metų", "met"); + checkOneTerm(a, "nors", "nor"); + checkOneTerm(a, "jei", "jei"); + checkOneTerm(a, "bus", "bus"); + checkOneTerm(a, "jų", "jų"); + checkOneTerm(a, "čia", "čia"); + checkOneTerm(a, "dabar", "dabar"); + checkOneTerm(a, "Lietuvos", "Lietuv"); + checkOneTerm(a, "net", "net"); + checkOneTerm(a, "nei", "nei"); + checkOneTerm(a, "gali", "gal"); + checkOneTerm(a, "daug", "daug"); + checkOneTerm(a, "prie", "prie"); + checkOneTerm(a, "ji", "ji"); + checkOneTerm(a, "jos", "jos"); + checkOneTerm(a, "pat", "pat"); + checkOneTerm(a, "jie", "jie"); + checkOneTerm(a, "kur", "kur"); + checkOneTerm(a, "gal", "gal"); + checkOneTerm(a, "ant", "ant"); + checkOneTerm(a, "tiek", "tiek"); + checkOneTerm(a, "be", "be"); + checkOneTerm(a, "būti", "būt"); + checkOneTerm(a, "bei", "bei"); + checkOneTerm(a, "daugiau", "daug"); + checkOneTerm(a, "turi", "tur"); + checkOneTerm(a, "prieš", "prieš"); + checkOneTerm(a, "vis", "vis"); + checkOneTerm(a, "būtų", "būt"); + checkOneTerm(a, "jog", "jog"); + checkOneTerm(a, "reikia", "reik"); + checkOneTerm(a, "mūsų", "mūs"); + checkOneTerm(a, "metu", "met"); + checkOneTerm(a, "galima", "galim"); + checkOneTerm(a, "nėra", "nėr"); + checkOneTerm(a, "arba", "arb"); + checkOneTerm(a, "mes", "mes"); + checkOneTerm(a, "kurie", "kur"); + checkOneTerm(a, "tikrai", "tikr"); + checkOneTerm(a, "todėl", "tod"); + checkOneTerm(a, "ten", "ten"); + checkOneTerm(a, "šiandien", "šiandien"); + checkOneTerm(a, "vienas", "vien"); + checkOneTerm(a, "visi", "vis"); + checkOneTerm(a, "kuris", "kur"); + checkOneTerm(a, "tada", "tad"); + checkOneTerm(a, "kiek", "kiek"); + checkOneTerm(a, "tuo", "tuo"); + checkOneTerm(a, "gerai", "ger"); + checkOneTerm(a, "nieko", "niek"); + checkOneTerm(a, "jį", "jį"); + checkOneTerm(a, "kol", "kol"); + checkOneTerm(a, "viskas", "visk"); + checkOneTerm(a, "mane", "man"); + checkOneTerm(a, "kartą", "kart"); + checkOneTerm(a, "m", "m"); + checkOneTerm(a, "tas", "tas"); + checkOneTerm(a, "sakė", "sak"); + checkOneTerm(a, "žmonių", "žmon"); + checkOneTerm(a, "tu", "tu"); + checkOneTerm(a, "dieną", "dien"); + checkOneTerm(a, "žmonės", "žmon"); + checkOneTerm(a, "metais", "met"); + checkOneTerm(a, "vieną", "vien"); + checkOneTerm(a, "vėl", "vėl"); + checkOneTerm(a, "na", "na"); + checkOneTerm(a, "tą", "tą"); + checkOneTerm(a, "tiesiog", "tiesiog"); + checkOneTerm(a, "toks", "tok"); + checkOneTerm(a, "pats", "pat"); + checkOneTerm(a, "ko", "ko"); + checkOneTerm(a, "Lietuvoje", "Lietuv"); + checkOneTerm(a, "pagal", "pagal"); + checkOneTerm(a, "jeigu", "jeig"); + checkOneTerm(a, "visai", "vis"); + checkOneTerm(a, "viena", "vien"); + checkOneTerm(a, "šį", "šį"); + checkOneTerm(a, "metus", "met"); + checkOneTerm(a, "jam", "jam"); + checkOneTerm(a, "kodėl", "kod"); + checkOneTerm(a, "litų", "lit"); + checkOneTerm(a, "ją", "ją"); + checkOneTerm(a, "kuri", "kur"); + checkOneTerm(a, "darbo", "darb"); + checkOneTerm(a, "tarp", "tarp"); + checkOneTerm(a, "juk", "juk"); + checkOneTerm(a, "laiko", "laik"); + checkOneTerm(a, "juos", "juos"); + checkOneTerm(a, "visą", "vis"); + checkOneTerm(a, "kurios", "kur"); + checkOneTerm(a, "tam", "tam"); + checkOneTerm(a, "pas", "pas"); + checkOneTerm(a, "viską", "visk"); + checkOneTerm(a, "Europos", "Eur"); + checkOneTerm(a, "atrodo", "atrod"); + checkOneTerm(a, "tad", "tad"); + checkOneTerm(a, "bent", "bent"); + checkOneTerm(a, "kitų", "kit"); + checkOneTerm(a, "šis", "šis"); + checkOneTerm(a, "Vilniaus", "Viln"); + checkOneTerm(a, "beveik", "bevei"); + checkOneTerm(a, "proc", "proc"); + checkOneTerm(a, "tokia", "tok"); + checkOneTerm(a, "šiuo", "šiuo"); + checkOneTerm(a, "du", "du"); + checkOneTerm(a, "kartu", "kart"); + checkOneTerm(a, "visada", "visad"); + checkOneTerm(a, "kuo", "kuo"); + } +}