LUCENE-6694: Add LithuanianAnalyzer and LithuanianStemmer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692544 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2015-07-24 15:47:25 +00:00
parent 068549d8a8
commit 8f58afc41a
8 changed files with 1973 additions and 0 deletions

View File

@ -141,6 +141,9 @@ New Features
the specified distance from the center point. Fix
GeoPointInBBoxQuery to handle dateline crossing.
* LUCENE-6694: Add LithuanianAnalyzer and LithuanianStemmer.
(Dainius Jocas via Robert Muir)
API Changes
* LUCENE-6508: Simplify Lock api, there is now just

View File

@ -0,0 +1,124 @@
package org.apache.lucene.analysis.lt;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.tartarus.snowball.ext.LithuanianStemmer;
/**
* {@link Analyzer} for Lithuanian.
*/
public final class LithuanianAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;
/** File containing default Lithuanian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
/**
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static CharArraySet getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
/**
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final CharArraySet DEFAULT_STOP_SET;
static {
try {
DEFAULT_STOP_SET = loadStopwordSet(false,
LithuanianAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
throw new RuntimeException("Unable to load default stopword set");
}
}
}
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
public LithuanianAnalyzer() {
this(DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
* @param stopwords a stopword set
*/
public LithuanianAnalyzer(CharArraySet stopwords) {
this(stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
* provided this analyzer will add a {@link SetKeywordMarkerFilter} before
* stemming.
*
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public LithuanianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}
/**
* Creates a
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new LithuanianStemmer());
return new TokenStreamComponents(source, result);
}
}

View File

@ -0,0 +1,21 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Analyzer for Lithuanian.
*/
package org.apache.lucene.analysis.lt;

View File

@ -0,0 +1,396 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
externals ( stem )
/* Special characters in Unicode Latin-1 and Latin Extended-A */
// ' nosine
stringdef a' decimal '261' // ą a + ogonek
stringdef e' decimal '281' // ę e + ogonek
stringdef i' decimal '303' // į i + ogonek
stringdef u' decimal '371' // ų u + ogonek
// . taskas
stringdef e. decimal '279' // ė e + dot
// - ilgoji
stringdef u- decimal '363' // ū u + macron
// * varnele
stringdef c* decimal '269' // č c + caron (haček)
stringdef s* decimal '353' // š s + caron (haček)
stringdef z* decimal '382' // ž z + caron (haček)
// [C](VC)^m[V|C]
// definitions of variables for
// p1 - position of m = 0
// p2 - position of m = 1
integers ( p1 p2 s)
// booleans - to be commented
// CHANGE
booleans ( CHANGE )
// escape symbols for substituting lithuanian characters
stringescapes { }
// groupings
// v - lithuanian vowels
groupings ( v )
// v - all lithuanian vowels
define v 'aeiyou{a'}{e'}{i'}{u'}{e.}{u-}'
// all lithuanian stemmer routines: 4 steps
routines (
step2 R1 step1 fix_chdz fix_gd fix_conflicts
)
backwardmode(
define R1 as $p1 <= cursor
define step1 as (
setlimit tomark p1 for ([substring]) R1 among(
// Daiktavardžiai
// I linksniuotė
'as' 'ias' 'is' 'ys' // vyras, kelias, brolis, gaidys
'o' 'io' // vyro, kelio
'ui' 'iui' // vyrui, keliui
'{a'}' 'i{a'}' '{i'}' // vyrą, kelią, brolį
'u' 'iu' // vyru, keliu
'e' 'yje' // vyre, kelyje
'y' 'au' 'i' // kely, brolau, broli,
'an' // nusižengiman
'ai' 'iai' // vyrai, keliai
'{u'}' 'i{u'}' // vyrų, kelių
'ams' 'am' // vyrams, vyram
'iams' 'iam' // broliams, broliam
'us' 'ius' // vyrus, brolius
'ais' 'iais' // vyrais, keliais
'uose' 'iuose' 'uos' 'iuos' // vyruose, keliuose, vyruos, keliuos
'uosna' 'iuosna' // vyruosna, keliuosna
'ysna' // žutysna
'asis' 'aisi' // sukimasis, sukimaisi
'osi' '{u'}si' // sukimosi, sukimųsi
'uisi' // sukimuisi
'{a'}si' // sukimąsi
'usi' // sukimusi
'esi' // sukimesi
'uo' // mėnuo
// II linksniuote
'a' 'ia' // galva, vysnios
'os' 'ios' // galvos, vysnios
'oj' 'oje' 'ioje' // galvoje, vysnioje
'osna' 'iosna' // galvosna, vyšniosna
'om' 'oms' 'ioms' // galvoms, vysnioms
'omis' 'iomis' // galvomis, vysniomis
'ose' 'iose' // galvose, vysniose
'on' 'ion' // galvon, vyšnion
// III linksniuote
'{e.}' // gervė
'{e.}s' // gervės
'ei' // gervei
'{e'}' // gervę
'{e.}j' '{e.}je' // gervėj, gervėje
'{e.}ms' // gervėms
'es' // gerves
'{e.}mis' // gervėmis
'{e.}se' // gervėse
'{e.}sna' // gervėsna
'{e.}n' // žydaitėn
// IV linksniuote
'aus' 'iaus' // sūnaus, skaičiaus
'umi' 'iumi' // sūnumi, skaičiumi
'uje' 'iuje' // sūnuje, skaičiuje
'iau' // skaičiau
'{u-}s' // sūnūs
'ums' // sūnums
'umis' // sūnumis
'un' 'iun' // sūnun, administratoriun
// V linksniuote
'ies' 'ens' 'enio' 'ers' // avies, vandens, sesers
'eniui' 'eriai' // vandeniui, eriai
'en{i'}' 'er{i'}' // vandenį, seserį
'imi' 'eniu' 'erimi' 'eria' // avimi, vandeniu, seserimi, seseria
'enyje' 'eryje' // vandenyje, seseryje
'ie' 'enie' 'erie' // avie, vandenie, seserie
'enys' 'erys' // vandenys, seserys
// 'en{u'}' konfliktas su 'žandenų' 'antenų'
'er{u'}' // seserų
'ims' 'enims' 'erims' // avims, vandemins, seserims
'enis' // vandenis
'imis' // žebenkštimis
'enimis' // vandenimis
'yse' 'enyse' 'eryse' // avyse, vandenyse, seseryse
// Būdvardžiai
// (i)a linksniuotė
'iem' 'iems' // geriem, geriems
'ame' 'iame' // naujame, mediniame
// Veiksmažodžiai
// Tiesioginė nuosaka
// esamasis laikas
// (i)a asmenuotė
'uosi' 'iuosi' // dirbuosi, traukiuosi
'iesi' // dirbiesi
'asi' 'iasi' // dirbasi, traukiasi
'am{e.}s' 'iam{e.}s' // dirbamės, traukiamės
'at' 'ate' 'iat' 'iate' // dirbat, dirbate, ariat, traukiate
'at{e.}s' 'iat{e.}s' // dirbatės, traukiatės
// i asmenuotė
'isi' // tikisi
'im' // mylim
//'ime' konfliktassu daiktavardžiu vietininku, pvz. 'gėrime'
'im{e.}s' // tikimės
'it' 'ite' // mylit, mylite, tikitės
// 'it{e.}s' konfliktas su priesaga ir dgs. vardininko galūne -ait-ės pvz. žydaitės
// o asmenuotė
'ome' 'omės' // mokome, bijomės
'ot' 'ote' 'otės' // mokot, mokote, bijotės
// būtasis laikas
// o asmenuotė
'{e.}jo' '{e.}josi' // tikėjo, tikėjosi
'ot{e.}s' // tikėjotės
// ė asmenuotė
'eisi' // mokeisi
'{e.}si' // mokėsi
'{e.}m' '{e.}me' // mokėm, mokėme
'{e.}m{e.}s' // mokėmės
'{e.}t' '{e.}te' // mokėt, mokėte
'{e.}t{e.}s' // mokėtės
// būtasis dažninis laikas
'ausi' // mokydavausi
'om{e.}s' // mokydavomės
// būsimasis laikas
'siu' 'siuosi' // dirbsiu, mokysiuosi
'si' 'siesi' // dirbsi, dirbsiesi
's' 'ysis' // dirbs, mokysis
'sim' 'sime' // dirbsim, dirbsime
'sit' 'site' // gersit, gersite
// tariamoji nuosaka
'{c*}iau' '{c*}iausi' // dirbčiau
'tum' 'tumei' // dirbtum, dirbtumei
'tumeis' 'tumeisi' // mokytumeis, mokytumeisi
// 't{u'}' nes blogai batutų -> batų
't{u'}si' // mokytųsi
// 'tume' konfliktas su 'šventume'
'tum{e.}m' // dirbtumėm
'tum{e.}me' // dirbtumėme
'tum{e.}m{e.}s' // mokytumėmės
'tute' 'tum{e.}t' // dirbtute, dirbtumėt
'tum{e.}te' // dirbtumėte
'tum{e.}t{e.}s' // mokytumėtės
// liepiamoji nuosaka
'k' 'ki' // dirbk, dirbki, mokykis
// 'kis' konfliktas viln-išk-is
// 'kime' konfliktas, nes pirkime
'kim{e.}s' // mokykimės
// bendratis
'uoti' 'iuoti' // meluoti, dygsniuoti
'auti' 'iauti' // draugauti, girtuokliauti
'oti' 'ioti' // dovanoti, meškerioti
'{e.}ti' // auklėti
'yti' // akyti
'inti' // auginti
'in{e.}ti' // blusinėti
'enti' // gyventi
'tel{e.}ti' // bumbtelėti
'ter{e.}ti' // bumbterėti
'ti' // skalbti
// 'tis' konfliktas, nes rytme-tis -> rytme
// dalyviai
'{a'}s' 'i{a'}s' '{i'}s' // dirbąs, žaidžiąs, gulįs
't{u'}s' // suktųs -> suk
'sim{e.}s' // suksimės
'sit{e.}s' // suksitės
'kite' // supkite
)
delete
)
define step2 as repeat (
setlimit tomark p1 for ([substring]) among(
// daiktavardziu priesagos
// budvardziu priesagos
// 'in' // konfliktas su 'augintinis' ir 'akiniais' // lauk-in-is
'ing' // tvark-ing-as
'i{s*}k' // lenk-išk-as
'{e.}t' // dem-ėt-as
'ot' // garban-ot-as
'uot' 'iuot' // lang-uot-as, akin-iuot-as
// 'tin', nes augintinis // dirb-tin-is
// 'ut', nes batutas, degutas etc. // maž-ut-is
'yt' // maž-yt-is
'iuk' // maž-iuk-as
'iul' // maž-ul-is
'{e.}l' // maž-ėl-is
'yl' // maž-yl-is
'u{c*}iuk' // maž-učiuk-as
'uliuk' // maž-uliuk-as
'ut{e.}ait' // maž-utėlait-is
'ok' // did-ok-as
'iok' // višč-iok-as
'sv' '{s*}v' 'zgan' // sal-sv-as, pilk-šv-as, bal-zgan-as
'op' 'iop' // dvej-op-as, viener-iop-as
'ain' // apval-ain-as
'yk{s*}t' 'yk{s*}{c*}' // ten-ykšt-is, vakar-ykšč-ias
// laisniai
'esn' // did-esn-is
'aus' 'iaus' // nauj-aus-ias, ger-iaus-ias
// ivardziuotiniai budvardziai
// vyriska gimine
'ias' // žaliasis
'oj' 'ioj' // gerojo, žaliojo
'aj' 'iaj' // gerajam, žaliajam
'{a'}j' 'i{a'}j' // garąjį, žaliąjį
'uoj' 'iuoj' // geruoju, žaliuoju
'iej' // gerieji
'{u'}j' 'i{u'}j' // gerųjų, žaliųjų
'ies' // geriesiems
'uos' 'iuos' // geruosius, žaliuosius
'ais' 'iais' // geraisiais, žaliaisiais
// moteriska gimine
'os' 'ios' // gerosios, žaliosios
'{a'}s' 'i{a'}s' // gerąsios, žaliąsias
// būtasis dažninis laikas
'dav' // ei-dav-o
// dalyvių priesagos
'ant' 'iant'
'int' // tur-int-is
'{e.}j' // tur-ėj-o
'{e'}' //
'{e.}j{e'}'
'{e'}s' // dirb-ęs-is
'siant' // dirb-siant
// pusdalyviai
'dam' // bėg-dam-as
'auj' // ūkinink-auj-a
'jam'
'iau'
'am' // baiminim-ams-i
)
delete
)
define fix_conflicts as (
[substring] among (
// 'lietuvaite' -> 'lietuvaitė', konfliktas su 'myl-ite'
'aite' (<-'ait{e.}' set CHANGE)
// 'lietuvaitės' -> 'lietuvaitė', konfliktas su 'myl-itės'
'ait{e.}s' (<-'ait{e.}' set CHANGE)
// ''ūs-uotės' -> 'ūs-uotė', konfliktas 'mokotės'
'uot{e.}s' (<-'uot{e.}' set CHANGE)
// ''ūs-uote' -> 'ūs-uotė', konfliktas 'mokote'
'uote' (<-'uot{e.}' set CHANGE)
// 'žerėjime' -> 'žėrėjimas', konfliktas su 'žais-ime'
'{e.}jime' (<-'{e.}jimas' set CHANGE)
// 'žvilgesiu' -> 'žvilgesys', konfliktas su 'dirb-siu'
'esiu' (<-'esys' set CHANGE)
// 'duobkasiu' -> 'duobkasys', konfliktas su 'pakasiu'
'asius' (<-'asys' set CHANGE)
// 'žioravime' -> 'žioravimas', konfliktas su 'myl-ime'
'avime' (<-'avimas' set CHANGE)
'ojime' (<-'ojimas' set CHANGE)
// 'advokatės' -> 'advokatė', konfliktas su 'dirb-atės'
'okat{e.}s' (<-'okat{e.}' set CHANGE)
// 'advokate' -> 'advokatė', konfliktas su 'dirb-ate'
'okate' (<-'okat{e.}' set CHANGE)
)
)
define fix_chdz as (
[substring] among (
'{c*}' (<-'t' set CHANGE)
'd{z*}' (<-'d' set CHANGE)
)
)
define fix_gd as (
[substring] among (
'gd' (<-'g' set CHANGE)
//'{e.}k' (<-'{e.}g' set CHANGE)
)
)
)
define stem as (
$p1 = limit
$p2 = limit
$s = size
do (
// priešdėlis 'a' ilgeniuose nei 6 raidės žodžiuose, pvz. 'a-liejus'.
try (test 'a' $s > 6 hop 1)
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
backwards (
do fix_conflicts
do step1
do fix_chdz
do step2
do fix_chdz
do fix_gd
)
)

View File

@ -0,0 +1,769 @@
// This file was generated automatically by the Snowball to Java compiler
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
public class LithuanianStemmer extends org.tartarus.snowball.SnowballProgram {
private static final long serialVersionUID = 1L;
private final static LithuanianStemmer methodObject = new LithuanianStemmer ();
private final static Among a_0[] = {
new Among ( "a", -1, -1, "", methodObject ),
new Among ( "ia", 0, -1, "", methodObject ),
new Among ( "eria", 1, -1, "", methodObject ),
new Among ( "osna", 0, -1, "", methodObject ),
new Among ( "iosna", 3, -1, "", methodObject ),
new Among ( "uosna", 3, -1, "", methodObject ),
new Among ( "iuosna", 5, -1, "", methodObject ),
new Among ( "ysna", 0, -1, "", methodObject ),
new Among ( "\u0117sna", 0, -1, "", methodObject ),
new Among ( "e", -1, -1, "", methodObject ),
new Among ( "ie", 9, -1, "", methodObject ),
new Among ( "enie", 10, -1, "", methodObject ),
new Among ( "erie", 10, -1, "", methodObject ),
new Among ( "oje", 9, -1, "", methodObject ),
new Among ( "ioje", 13, -1, "", methodObject ),
new Among ( "uje", 9, -1, "", methodObject ),
new Among ( "iuje", 15, -1, "", methodObject ),
new Among ( "yje", 9, -1, "", methodObject ),
new Among ( "enyje", 17, -1, "", methodObject ),
new Among ( "eryje", 17, -1, "", methodObject ),
new Among ( "\u0117je", 9, -1, "", methodObject ),
new Among ( "ame", 9, -1, "", methodObject ),
new Among ( "iame", 21, -1, "", methodObject ),
new Among ( "sime", 9, -1, "", methodObject ),
new Among ( "ome", 9, -1, "", methodObject ),
new Among ( "\u0117me", 9, -1, "", methodObject ),
new Among ( "tum\u0117me", 25, -1, "", methodObject ),
new Among ( "ose", 9, -1, "", methodObject ),
new Among ( "iose", 27, -1, "", methodObject ),
new Among ( "uose", 27, -1, "", methodObject ),
new Among ( "iuose", 29, -1, "", methodObject ),
new Among ( "yse", 9, -1, "", methodObject ),
new Among ( "enyse", 31, -1, "", methodObject ),
new Among ( "eryse", 31, -1, "", methodObject ),
new Among ( "\u0117se", 9, -1, "", methodObject ),
new Among ( "ate", 9, -1, "", methodObject ),
new Among ( "iate", 35, -1, "", methodObject ),
new Among ( "ite", 9, -1, "", methodObject ),
new Among ( "kite", 37, -1, "", methodObject ),
new Among ( "site", 37, -1, "", methodObject ),
new Among ( "ote", 9, -1, "", methodObject ),
new Among ( "tute", 9, -1, "", methodObject ),
new Among ( "\u0117te", 9, -1, "", methodObject ),
new Among ( "tum\u0117te", 42, -1, "", methodObject ),
new Among ( "i", -1, -1, "", methodObject ),
new Among ( "ai", 44, -1, "", methodObject ),
new Among ( "iai", 45, -1, "", methodObject ),
new Among ( "eriai", 46, -1, "", methodObject ),
new Among ( "ei", 44, -1, "", methodObject ),
new Among ( "tumei", 48, -1, "", methodObject ),
new Among ( "ki", 44, -1, "", methodObject ),
new Among ( "imi", 44, -1, "", methodObject ),
new Among ( "erimi", 51, -1, "", methodObject ),
new Among ( "umi", 44, -1, "", methodObject ),
new Among ( "iumi", 53, -1, "", methodObject ),
new Among ( "si", 44, -1, "", methodObject ),
new Among ( "asi", 55, -1, "", methodObject ),
new Among ( "iasi", 56, -1, "", methodObject ),
new Among ( "esi", 55, -1, "", methodObject ),
new Among ( "iesi", 58, -1, "", methodObject ),
new Among ( "siesi", 59, -1, "", methodObject ),
new Among ( "isi", 55, -1, "", methodObject ),
new Among ( "aisi", 61, -1, "", methodObject ),
new Among ( "eisi", 61, -1, "", methodObject ),
new Among ( "tumeisi", 63, -1, "", methodObject ),
new Among ( "uisi", 61, -1, "", methodObject ),
new Among ( "osi", 55, -1, "", methodObject ),
new Among ( "\u0117josi", 66, -1, "", methodObject ),
new Among ( "uosi", 66, -1, "", methodObject ),
new Among ( "iuosi", 68, -1, "", methodObject ),
new Among ( "siuosi", 69, -1, "", methodObject ),
new Among ( "usi", 55, -1, "", methodObject ),
new Among ( "ausi", 71, -1, "", methodObject ),
new Among ( "\u010Diausi", 72, -1, "", methodObject ),
new Among ( "\u0105si", 55, -1, "", methodObject ),
new Among ( "\u0117si", 55, -1, "", methodObject ),
new Among ( "\u0173si", 55, -1, "", methodObject ),
new Among ( "t\u0173si", 76, -1, "", methodObject ),
new Among ( "ti", 44, -1, "", methodObject ),
new Among ( "enti", 78, -1, "", methodObject ),
new Among ( "inti", 78, -1, "", methodObject ),
new Among ( "oti", 78, -1, "", methodObject ),
new Among ( "ioti", 81, -1, "", methodObject ),
new Among ( "uoti", 81, -1, "", methodObject ),
new Among ( "iuoti", 83, -1, "", methodObject ),
new Among ( "auti", 78, -1, "", methodObject ),
new Among ( "iauti", 85, -1, "", methodObject ),
new Among ( "yti", 78, -1, "", methodObject ),
new Among ( "\u0117ti", 78, -1, "", methodObject ),
new Among ( "tel\u0117ti", 88, -1, "", methodObject ),
new Among ( "in\u0117ti", 88, -1, "", methodObject ),
new Among ( "ter\u0117ti", 88, -1, "", methodObject ),
new Among ( "ui", 44, -1, "", methodObject ),
new Among ( "iui", 92, -1, "", methodObject ),
new Among ( "eniui", 93, -1, "", methodObject ),
new Among ( "oj", -1, -1, "", methodObject ),
new Among ( "\u0117j", -1, -1, "", methodObject ),
new Among ( "k", -1, -1, "", methodObject ),
new Among ( "am", -1, -1, "", methodObject ),
new Among ( "iam", 98, -1, "", methodObject ),
new Among ( "iem", -1, -1, "", methodObject ),
new Among ( "im", -1, -1, "", methodObject ),
new Among ( "sim", 101, -1, "", methodObject ),
new Among ( "om", -1, -1, "", methodObject ),
new Among ( "tum", -1, -1, "", methodObject ),
new Among ( "\u0117m", -1, -1, "", methodObject ),
new Among ( "tum\u0117m", 105, -1, "", methodObject ),
new Among ( "an", -1, -1, "", methodObject ),
new Among ( "on", -1, -1, "", methodObject ),
new Among ( "ion", 108, -1, "", methodObject ),
new Among ( "un", -1, -1, "", methodObject ),
new Among ( "iun", 110, -1, "", methodObject ),
new Among ( "\u0117n", -1, -1, "", methodObject ),
new Among ( "o", -1, -1, "", methodObject ),
new Among ( "io", 113, -1, "", methodObject ),
new Among ( "enio", 114, -1, "", methodObject ),
new Among ( "\u0117jo", 113, -1, "", methodObject ),
new Among ( "uo", 113, -1, "", methodObject ),
new Among ( "s", -1, -1, "", methodObject ),
new Among ( "as", 118, -1, "", methodObject ),
new Among ( "ias", 119, -1, "", methodObject ),
new Among ( "es", 118, -1, "", methodObject ),
new Among ( "ies", 121, -1, "", methodObject ),
new Among ( "is", 118, -1, "", methodObject ),
new Among ( "ais", 123, -1, "", methodObject ),
new Among ( "iais", 124, -1, "", methodObject ),
new Among ( "tumeis", 123, -1, "", methodObject ),
new Among ( "imis", 123, -1, "", methodObject ),
new Among ( "enimis", 127, -1, "", methodObject ),
new Among ( "omis", 123, -1, "", methodObject ),
new Among ( "iomis", 129, -1, "", methodObject ),
new Among ( "umis", 123, -1, "", methodObject ),
new Among ( "\u0117mis", 123, -1, "", methodObject ),
new Among ( "enis", 123, -1, "", methodObject ),
new Among ( "asis", 123, -1, "", methodObject ),
new Among ( "ysis", 123, -1, "", methodObject ),
new Among ( "ams", 118, -1, "", methodObject ),
new Among ( "iams", 136, -1, "", methodObject ),
new Among ( "iems", 118, -1, "", methodObject ),
new Among ( "ims", 118, -1, "", methodObject ),
new Among ( "enims", 139, -1, "", methodObject ),
new Among ( "erims", 139, -1, "", methodObject ),
new Among ( "oms", 118, -1, "", methodObject ),
new Among ( "ioms", 142, -1, "", methodObject ),
new Among ( "ums", 118, -1, "", methodObject ),
new Among ( "\u0117ms", 118, -1, "", methodObject ),
new Among ( "ens", 118, -1, "", methodObject ),
new Among ( "os", 118, -1, "", methodObject ),
new Among ( "ios", 147, -1, "", methodObject ),
new Among ( "uos", 147, -1, "", methodObject ),
new Among ( "iuos", 149, -1, "", methodObject ),
new Among ( "ers", 118, -1, "", methodObject ),
new Among ( "us", 118, -1, "", methodObject ),
new Among ( "aus", 152, -1, "", methodObject ),
new Among ( "iaus", 153, -1, "", methodObject ),
new Among ( "ius", 152, -1, "", methodObject ),
new Among ( "ys", 118, -1, "", methodObject ),
new Among ( "enys", 156, -1, "", methodObject ),
new Among ( "erys", 156, -1, "", methodObject ),
new Among ( "om\u00C4\u0097s", 118, -1, "", methodObject ),
new Among ( "ot\u00C4\u0097s", 118, -1, "", methodObject ),
new Among ( "\u0105s", 118, -1, "", methodObject ),
new Among ( "i\u0105s", 161, -1, "", methodObject ),
new Among ( "\u0117s", 118, -1, "", methodObject ),
new Among ( "am\u0117s", 163, -1, "", methodObject ),
new Among ( "iam\u0117s", 164, -1, "", methodObject ),
new Among ( "im\u0117s", 163, -1, "", methodObject ),
new Among ( "kim\u0117s", 166, -1, "", methodObject ),
new Among ( "sim\u0117s", 166, -1, "", methodObject ),
new Among ( "om\u0117s", 163, -1, "", methodObject ),
new Among ( "\u0117m\u0117s", 163, -1, "", methodObject ),
new Among ( "tum\u0117m\u0117s", 170, -1, "", methodObject ),
new Among ( "at\u0117s", 163, -1, "", methodObject ),
new Among ( "iat\u0117s", 172, -1, "", methodObject ),
new Among ( "sit\u0117s", 163, -1, "", methodObject ),
new Among ( "ot\u0117s", 163, -1, "", methodObject ),
new Among ( "\u0117t\u0117s", 163, -1, "", methodObject ),
new Among ( "tum\u0117t\u0117s", 176, -1, "", methodObject ),
new Among ( "\u012Fs", 118, -1, "", methodObject ),
new Among ( "\u016Bs", 118, -1, "", methodObject ),
new Among ( "t\u0173s", 118, -1, "", methodObject ),
new Among ( "at", -1, -1, "", methodObject ),
new Among ( "iat", 181, -1, "", methodObject ),
new Among ( "it", -1, -1, "", methodObject ),
new Among ( "sit", 183, -1, "", methodObject ),
new Among ( "ot", -1, -1, "", methodObject ),
new Among ( "\u0117t", -1, -1, "", methodObject ),
new Among ( "tum\u0117t", 186, -1, "", methodObject ),
new Among ( "u", -1, -1, "", methodObject ),
new Among ( "au", 188, -1, "", methodObject ),
new Among ( "iau", 189, -1, "", methodObject ),
new Among ( "\u010Diau", 190, -1, "", methodObject ),
new Among ( "iu", 188, -1, "", methodObject ),
new Among ( "eniu", 192, -1, "", methodObject ),
new Among ( "siu", 192, -1, "", methodObject ),
new Among ( "y", -1, -1, "", methodObject ),
new Among ( "\u0105", -1, -1, "", methodObject ),
new Among ( "i\u0105", 196, -1, "", methodObject ),
new Among ( "\u0117", -1, -1, "", methodObject ),
new Among ( "\u0119", -1, -1, "", methodObject ),
new Among ( "\u012F", -1, -1, "", methodObject ),
new Among ( "en\u012F", 200, -1, "", methodObject ),
new Among ( "er\u012F", 200, -1, "", methodObject ),
new Among ( "\u0173", -1, -1, "", methodObject ),
new Among ( "i\u0173", 203, -1, "", methodObject ),
new Among ( "er\u0173", 203, -1, "", methodObject )
};
private final static Among a_1[] = {
new Among ( "ing", -1, -1, "", methodObject ),
new Among ( "aj", -1, -1, "", methodObject ),
new Among ( "iaj", 1, -1, "", methodObject ),
new Among ( "iej", -1, -1, "", methodObject ),
new Among ( "oj", -1, -1, "", methodObject ),
new Among ( "ioj", 4, -1, "", methodObject ),
new Among ( "uoj", 4, -1, "", methodObject ),
new Among ( "iuoj", 6, -1, "", methodObject ),
new Among ( "auj", -1, -1, "", methodObject ),
new Among ( "\u0105j", -1, -1, "", methodObject ),
new Among ( "i\u0105j", 9, -1, "", methodObject ),
new Among ( "\u0117j", -1, -1, "", methodObject ),
new Among ( "\u0173j", -1, -1, "", methodObject ),
new Among ( "i\u0173j", 12, -1, "", methodObject ),
new Among ( "ok", -1, -1, "", methodObject ),
new Among ( "iok", 14, -1, "", methodObject ),
new Among ( "iuk", -1, -1, "", methodObject ),
new Among ( "uliuk", 16, -1, "", methodObject ),
new Among ( "u\u010Diuk", 16, -1, "", methodObject ),
new Among ( "i\u0161k", -1, -1, "", methodObject ),
new Among ( "iul", -1, -1, "", methodObject ),
new Among ( "yl", -1, -1, "", methodObject ),
new Among ( "\u0117l", -1, -1, "", methodObject ),
new Among ( "am", -1, -1, "", methodObject ),
new Among ( "dam", 23, -1, "", methodObject ),
new Among ( "jam", 23, -1, "", methodObject ),
new Among ( "zgan", -1, -1, "", methodObject ),
new Among ( "ain", -1, -1, "", methodObject ),
new Among ( "esn", -1, -1, "", methodObject ),
new Among ( "op", -1, -1, "", methodObject ),
new Among ( "iop", 29, -1, "", methodObject ),
new Among ( "ias", -1, -1, "", methodObject ),
new Among ( "ies", -1, -1, "", methodObject ),
new Among ( "ais", -1, -1, "", methodObject ),
new Among ( "iais", 33, -1, "", methodObject ),
new Among ( "os", -1, -1, "", methodObject ),
new Among ( "ios", 35, -1, "", methodObject ),
new Among ( "uos", 35, -1, "", methodObject ),
new Among ( "iuos", 37, -1, "", methodObject ),
new Among ( "aus", -1, -1, "", methodObject ),
new Among ( "iaus", 39, -1, "", methodObject ),
new Among ( "\u0105s", -1, -1, "", methodObject ),
new Among ( "i\u0105s", 41, -1, "", methodObject ),
new Among ( "\u0119s", -1, -1, "", methodObject ),
new Among ( "ut\u0117ait", -1, -1, "", methodObject ),
new Among ( "ant", -1, -1, "", methodObject ),
new Among ( "iant", 45, -1, "", methodObject ),
new Among ( "siant", 46, -1, "", methodObject ),
new Among ( "int", -1, -1, "", methodObject ),
new Among ( "ot", -1, -1, "", methodObject ),
new Among ( "uot", 49, -1, "", methodObject ),
new Among ( "iuot", 50, -1, "", methodObject ),
new Among ( "yt", -1, -1, "", methodObject ),
new Among ( "\u0117t", -1, -1, "", methodObject ),
new Among ( "yk\u0161t", -1, -1, "", methodObject ),
new Among ( "iau", -1, -1, "", methodObject ),
new Among ( "dav", -1, -1, "", methodObject ),
new Among ( "sv", -1, -1, "", methodObject ),
new Among ( "\u0161v", -1, -1, "", methodObject ),
new Among ( "yk\u0161\u010D", -1, -1, "", methodObject ),
new Among ( "\u0119", -1, -1, "", methodObject ),
new Among ( "\u0117j\u0119", 60, -1, "", methodObject )
};
private final static Among a_2[] = {
new Among ( "ojime", -1, 9, "", methodObject ),
new Among ( "\u0117jime", -1, 5, "", methodObject ),
new Among ( "avime", -1, 8, "", methodObject ),
new Among ( "okate", -1, 11, "", methodObject ),
new Among ( "aite", -1, 1, "", methodObject ),
new Among ( "uote", -1, 4, "", methodObject ),
new Among ( "asius", -1, 7, "", methodObject ),
new Among ( "okat\u0117s", -1, 10, "", methodObject ),
new Among ( "ait\u0117s", -1, 2, "", methodObject ),
new Among ( "uot\u0117s", -1, 3, "", methodObject ),
new Among ( "esiu", -1, 6, "", methodObject )
};
private final static Among a_3[] = {
new Among ( "\u010D", -1, 1, "", methodObject ),
new Among ( "d\u017E", -1, 2, "", methodObject )
};
private final static Among a_4[] = {
new Among ( "gd", -1, 1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 64, 1, 0, 64, 0, 0, 0, 0, 0, 0, 0, 4, 4 };
private boolean B_CHANGE;
private int I_s;
private int I_p2;
private int I_p1;
private void copy_from(LithuanianStemmer other) {
B_CHANGE = other.B_CHANGE;
I_s = other.I_s;
I_p2 = other.I_p2;
I_p1 = other.I_p1;
super.copy_from(other);
}
private boolean r_R1() {
if (!(I_p1 <= cursor))
{
return false;
}
return true;
}
private boolean r_step1() {
int v_1;
int v_2;
// (, line 48
// setlimit, line 49
v_1 = limit - cursor;
// tomark, line 49
if (cursor < I_p1)
{
return false;
}
cursor = I_p1;
v_2 = limit_backward;
limit_backward = cursor;
cursor = limit - v_1;
// (, line 49
// [, line 49
ket = cursor;
// substring, line 49
if (find_among_b(a_0, 206) == 0)
{
limit_backward = v_2;
return false;
}
// ], line 49
bra = cursor;
limit_backward = v_2;
// call R1, line 49
if (!r_R1())
{
return false;
}
// delete, line 235
slice_del();
return true;
}
private boolean r_step2() {
int v_1;
int v_2;
int v_3;
// repeat, line 238
replab0: while(true)
{
v_1 = limit - cursor;
lab1: do {
// (, line 238
// setlimit, line 239
v_2 = limit - cursor;
// tomark, line 239
if (cursor < I_p1)
{
break lab1;
}
cursor = I_p1;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 239
// [, line 239
ket = cursor;
// substring, line 239
if (find_among_b(a_1, 62) == 0)
{
limit_backward = v_3;
break lab1;
}
// ], line 239
bra = cursor;
limit_backward = v_3;
// delete, line 309
slice_del();
continue replab0;
} while (false);
cursor = limit - v_1;
break replab0;
}
return true;
}
private boolean r_fix_conflicts() {
int among_var;
// (, line 312
// [, line 313
ket = cursor;
// substring, line 313
among_var = find_among_b(a_2, 11);
if (among_var == 0)
{
return false;
}
// ], line 313
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 315
// <-, line 315
slice_from("ait\u0117");
// set CHANGE, line 315
B_CHANGE = true;
break;
case 2:
// (, line 317
// <-, line 317
slice_from("ait\u0117");
// set CHANGE, line 317
B_CHANGE = true;
break;
case 3:
// (, line 320
// <-, line 320
slice_from("uot\u0117");
// set CHANGE, line 320
B_CHANGE = true;
break;
case 4:
// (, line 322
// <-, line 322
slice_from("uot\u0117");
// set CHANGE, line 322
B_CHANGE = true;
break;
case 5:
// (, line 325
// <-, line 325
slice_from("\u0117jimas");
// set CHANGE, line 325
B_CHANGE = true;
break;
case 6:
// (, line 328
// <-, line 328
slice_from("esys");
// set CHANGE, line 328
B_CHANGE = true;
break;
case 7:
// (, line 330
// <-, line 330
slice_from("asys");
// set CHANGE, line 330
B_CHANGE = true;
break;
case 8:
// (, line 334
// <-, line 334
slice_from("avimas");
// set CHANGE, line 334
B_CHANGE = true;
break;
case 9:
// (, line 335
// <-, line 335
slice_from("ojimas");
// set CHANGE, line 335
B_CHANGE = true;
break;
case 10:
// (, line 338
// <-, line 338
slice_from("okat\u0117");
// set CHANGE, line 338
B_CHANGE = true;
break;
case 11:
// (, line 340
// <-, line 340
slice_from("okat\u0117");
// set CHANGE, line 340
B_CHANGE = true;
break;
}
return true;
}
private boolean r_fix_chdz() {
int among_var;
// (, line 346
// [, line 347
ket = cursor;
// substring, line 347
among_var = find_among_b(a_3, 2);
if (among_var == 0)
{
return false;
}
// ], line 347
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 348
// <-, line 348
slice_from("t");
// set CHANGE, line 348
B_CHANGE = true;
break;
case 2:
// (, line 349
// <-, line 349
slice_from("d");
// set CHANGE, line 349
B_CHANGE = true;
break;
}
return true;
}
private boolean r_fix_gd() {
int among_var;
// (, line 353
// [, line 354
ket = cursor;
// substring, line 354
among_var = find_among_b(a_4, 1);
if (among_var == 0)
{
return false;
}
// ], line 354
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 355
// <-, line 355
slice_from("g");
// set CHANGE, line 355
B_CHANGE = true;
break;
}
return true;
}
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_8;
int v_9;
int v_10;
int v_11;
int v_12;
int v_13;
// (, line 362
I_p1 = limit;
I_p2 = limit;
I_s = (getCurrent().length());
// do, line 368
v_1 = cursor;
lab0: do {
// (, line 368
// try, line 370
v_2 = cursor;
lab1: do {
// (, line 370
// test, line 370
v_3 = cursor;
// literal, line 370
if (!(eq_s(1, "a")))
{
cursor = v_2;
break lab1;
}
cursor = v_3;
if (!(I_s > 6))
{
cursor = v_2;
break lab1;
}
// hop, line 370
{
int c = cursor + 1;
if (0 > c || c > limit)
{
cursor = v_2;
break lab1;
}
cursor = c;
}
} while (false);
// gopast, line 372
golab2: while(true)
{
lab3: do {
if (!(in_grouping(g_v, 97, 371)))
{
break lab3;
}
break golab2;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// gopast, line 372
golab4: while(true)
{
lab5: do {
if (!(out_grouping(g_v, 97, 371)))
{
break lab5;
}
break golab4;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// setmark p1, line 372
I_p1 = cursor;
// gopast, line 373
golab6: while(true)
{
lab7: do {
if (!(in_grouping(g_v, 97, 371)))
{
break lab7;
}
break golab6;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// gopast, line 373
golab8: while(true)
{
lab9: do {
if (!(out_grouping(g_v, 97, 371)))
{
break lab9;
}
break golab8;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// setmark p2, line 373
I_p2 = cursor;
} while (false);
cursor = v_1;
// backwards, line 377
limit_backward = cursor; cursor = limit;
// (, line 377
// do, line 378
v_8 = limit - cursor;
lab10: do {
// call fix_conflicts, line 378
if (!r_fix_conflicts())
{
break lab10;
}
} while (false);
cursor = limit - v_8;
// do, line 379
v_9 = limit - cursor;
lab11: do {
// call step1, line 379
if (!r_step1())
{
break lab11;
}
} while (false);
cursor = limit - v_9;
// do, line 380
v_10 = limit - cursor;
lab12: do {
// call fix_chdz, line 380
if (!r_fix_chdz())
{
break lab12;
}
} while (false);
cursor = limit - v_10;
// do, line 381
v_11 = limit - cursor;
lab13: do {
// call step2, line 381
if (!r_step2())
{
break lab13;
}
} while (false);
cursor = limit - v_11;
// do, line 382
v_12 = limit - cursor;
lab14: do {
// call fix_chdz, line 382
if (!r_fix_chdz())
{
break lab14;
}
} while (false);
cursor = limit - v_12;
// do, line 383
v_13 = limit - cursor;
lab15: do {
// call fix_gd, line 383
if (!r_fix_gd())
{
break lab15;
}
} while (false);
cursor = limit - v_13;
cursor = limit_backward; return true;
}
public boolean equals( Object o ) {
return o instanceof LithuanianStemmer;
}
public int hashCode() {
return LithuanianStemmer.class.getName().hashCode();
}
}

View File

@ -0,0 +1,126 @@
# Lithuanian stopwords list
ant
apie
ar
arba
be
bei
bet
bus
būti
būtų
buvo
dėl
gali
į
iki
ir
ja
jai
jais
jam
jame
jas
jei
ji
jie
jiedu
jiedvi
jiedviem
jiedviese
jiems
jis
jo
jodviem
jog
joje
jomis
joms
jos
jose
judu
judvi
judviejų
jųdviejų
judviem
judviese
jumis
jums
jumyse
juo
juodu
juodviese
juos
juose
jus
jūs
jūsų
kad
kai
kaip
kas
kiek
kol
kur
kurie
kuris
man
mane
manęs
manimi
mano
manyje
mes
metu
mudu
mudvi
mudviejų
mudviem
mudviese
mumis
mums
mumyse
mus
mūsų
nei
nes
net
nors
nuo
o
pat
per
po
prie
prieš
sau
save
savęs
savimi
savo
savyje
su
tačiau
tada
tai
taip
tas
tau
tave
tavęs
tavimi
tavyje
ten
to
todėl
tu
tuo
visi
yra

View File

@ -0,0 +1,53 @@
package org.apache.lucene.analysis.lt;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
public class TestLithuanianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new LithuanianAnalyzer().close();
}
/** Test stopword removal */
public void testStopWord() throws Exception {
Analyzer a = new LithuanianAnalyzer();
assertAnalyzesTo(a, "man",
new String[] { });
}
/** Test stemmer exceptions */
public void testStemExclusion() throws IOException{
CharArraySet set = new CharArraySet(1, true);
set.add("vaikų");
Analyzer a = new LithuanianAnalyzer(CharArraySet.EMPTY_SET, set);
assertAnalyzesTo(a, "vaikų", new String[] {"vaikų"});
}
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
checkRandomData(random(), new LithuanianAnalyzer(), 1000*RANDOM_MULTIPLIER);
}
}

View File

@ -0,0 +1,481 @@
package org.apache.lucene.analysis.lt;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.tartarus.snowball.ext.LithuanianStemmer;
/**
* Basic tests for {@link LithuanianStemmer}.
* We test some n/adj templates from wikipedia and some high frequency
* terms from mixed corpora.
*/
public class TestLithuanianStemming extends BaseTokenStreamTestCase {
private Analyzer a;
@Override
public void setUp() throws Exception {
super.setUp();
a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, new LithuanianStemmer()));
}
};
}
@Override
public void tearDown() throws Exception {
a.close();
super.tearDown();
}
public void testNounsI() throws IOException {
// n. decl. I (-as)
checkOneTerm(a, "vaikas", "vaik"); // nom. sing.
checkOneTerm(a, "vaikai", "vaik"); // nom. pl.
checkOneTerm(a, "vaiko", "vaik"); // gen. sg.
checkOneTerm(a, "vaikų", "vaik"); // gen. pl.
checkOneTerm(a, "vaikui", "vaik"); // dat. sg.
checkOneTerm(a, "vaikams", "vaik"); // dat. pl.
checkOneTerm(a, "vaiką", "vaik"); // acc. sg.
checkOneTerm(a, "vaikus", "vaik"); // acc. pl.
checkOneTerm(a, "vaiku", "vaik"); // ins. sg.
checkOneTerm(a, "vaikais", "vaik"); // ins. pl.
checkOneTerm(a, "vaike", "vaik"); // loc. sg.
checkOneTerm(a, "vaikuose", "vaik"); // loc. pl.
checkOneTerm(a, "vaike", "vaik"); // voc. sg.
checkOneTerm(a, "vaikai", "vaik"); // voc. pl.
// n. decl. I (-is)
checkOneTerm(a, "brolis", "brol"); // nom. sing.
checkOneTerm(a, "broliai", "brol"); // nom. pl.
checkOneTerm(a, "brolio", "brol"); // gen. sg.
checkOneTerm(a, "brolių", "brol"); // gen. pl.
checkOneTerm(a, "broliui", "brol"); // dat. sg.
checkOneTerm(a, "broliams", "brol"); // dat. pl.
checkOneTerm(a, "brolį", "brol"); // acc. sg.
checkOneTerm(a, "brolius", "brol"); // acc. pl.
checkOneTerm(a, "broliu", "brol"); // ins. sg.
checkOneTerm(a, "broliais", "brol"); // ins. pl.
checkOneTerm(a, "brolyje", "brol"); // loc. sg.
checkOneTerm(a, "broliuose", "brol"); // loc. pl.
checkOneTerm(a, "broli", "brol"); // voc. sg.
checkOneTerm(a, "broliai", "brol"); // voc. pl.
// n. decl. I (-ys)
// note: some forms don't conflate
checkOneTerm(a, "arklys", "arkl"); // nom. sing.
checkOneTerm(a, "arkliai", "arkliai"); // nom. pl.
checkOneTerm(a, "arklio", "arkl"); // gen. sg.
checkOneTerm(a, "arklių", "arkl"); // gen. pl.
checkOneTerm(a, "arkliui", "arkliui"); // dat. sg.
checkOneTerm(a, "arkliams", "arkliam"); // dat. pl.
checkOneTerm(a, "arklį", "arkl"); // acc. sg.
checkOneTerm(a, "arklius", "arklius"); // acc. pl.
checkOneTerm(a, "arkliu", "arkl"); // ins. sg.
checkOneTerm(a, "arkliais", "arkliais"); // ins. pl.
checkOneTerm(a, "arklyje", "arklyj"); // loc. sg.
checkOneTerm(a, "arkliuose", "arkliuos"); // loc. pl.
checkOneTerm(a, "arkly", "arkl"); // voc. sg.
checkOneTerm(a, "arkliai", "arkliai"); // voc. pl.
}
public void testNounsII() throws IOException {
// n. decl II (-a)
checkOneTerm(a, "motina", "motin"); // nom. sing.
checkOneTerm(a, "motinos", "motin"); // nom. pl.
checkOneTerm(a, "motinos", "motin"); // gen. sg.
checkOneTerm(a, "motinų", "motin"); // gen. pl.
checkOneTerm(a, "motinai", "motin"); // dat. sg.
checkOneTerm(a, "motinoms", "motin"); // dat. pl.
checkOneTerm(a, "motiną", "motin"); // acc. sg.
checkOneTerm(a, "motinas", "motin"); // acc. pl.
checkOneTerm(a, "motina", "motin"); // ins. sg.
checkOneTerm(a, "motinomis", "motin"); // ins. pl.
checkOneTerm(a, "motinoje", "motin"); // loc. sg.
checkOneTerm(a, "motinose", "motin"); // loc. pl.
checkOneTerm(a, "motina", "motin"); // voc. sg.
checkOneTerm(a, "motinos", "motin"); // voc. pl.
// n. decl II (-ė)
checkOneTerm(a, "katė", "kat"); // nom. sing.
checkOneTerm(a, "katės", "kat"); // nom. pl.
checkOneTerm(a, "katės", "kat"); // gen. sg.
checkOneTerm(a, "kačių", "kat"); // gen. pl.
checkOneTerm(a, "katei", "kat"); // dat. sg.
checkOneTerm(a, "katėms", "kat"); // dat. pl.
checkOneTerm(a, "katę", "kat"); // acc. sg.
checkOneTerm(a, "kates", "kat"); // acc. pl.
checkOneTerm(a, "kate", "kat"); // ins. sg.
checkOneTerm(a, "katėmis", "kat"); // ins. pl.
checkOneTerm(a, "katėje", "kat"); // loc. sg.
checkOneTerm(a, "katėse", "kat"); // loc. pl.
checkOneTerm(a, "kate", "kat"); // voc. sg.
checkOneTerm(a, "katės", "kat"); // voc. pl.
// n. decl II (-ti)
checkOneTerm(a, "pati", "pat"); // nom. sing.
checkOneTerm(a, "pačios", "pat"); // nom. pl.
checkOneTerm(a, "pačios", "pat"); // gen. sg.
checkOneTerm(a, "pačių", "pat"); // gen. pl.
checkOneTerm(a, "pačiai", "pat"); // dat. sg.
checkOneTerm(a, "pačioms", "pat"); // dat. pl.
checkOneTerm(a, "pačią", "pat"); // acc. sg.
checkOneTerm(a, "pačias", "pat"); // acc. pl.
checkOneTerm(a, "pačia", "pat"); // ins. sg.
checkOneTerm(a, "pačiomis", "pat"); // ins. pl.
checkOneTerm(a, "pačioje", "pat"); // loc. sg.
checkOneTerm(a, "pačiose", "pat"); // loc. pl.
checkOneTerm(a, "pati", "pat"); // voc. sg.
checkOneTerm(a, "pačios", "pat"); // voc. pl.
}
public void testNounsIII() throws IOException {
// n. decl III-m
checkOneTerm(a, "vagis", "vag"); // nom. sing.
checkOneTerm(a, "vagys", "vag"); // nom. pl.
checkOneTerm(a, "vagies", "vag"); // gen. sg.
checkOneTerm(a, "vagių", "vag"); // gen. pl.
checkOneTerm(a, "vagiui", "vag"); // dat. sg.
checkOneTerm(a, "vagims", "vag"); // dat. pl.
checkOneTerm(a, "vagį", "vag"); // acc. sg.
checkOneTerm(a, "vagis", "vag"); // acc. pl.
checkOneTerm(a, "vagimi", "vag"); // ins. sg.
checkOneTerm(a, "vagimis", "vag"); // ins. pl.
checkOneTerm(a, "vagyje", "vag"); // loc. sg.
checkOneTerm(a, "vagyse", "vag"); // loc. pl.
checkOneTerm(a, "vagie", "vag"); // voc. sg.
checkOneTerm(a, "vagys", "vag"); // voc. pl.
// n. decl III-f
checkOneTerm(a, "akis", "ak"); // nom. sing.
checkOneTerm(a, "akys", "ak"); // nom. pl.
checkOneTerm(a, "akies", "ak"); // gen. sg.
checkOneTerm(a, "akių", "ak"); // gen. pl.
checkOneTerm(a, "akiai", "ak"); // dat. sg.
checkOneTerm(a, "akims", "ak"); // dat. pl.
checkOneTerm(a, "akį", "ak"); // acc. sg.
checkOneTerm(a, "akis", "ak"); // acc. pl.
checkOneTerm(a, "akimi", "ak"); // ins. sg.
checkOneTerm(a, "akimis", "ak"); // ins. pl.
checkOneTerm(a, "akyje", "ak"); // loc. sg.
checkOneTerm(a, "akyse", "ak"); // loc. pl.
checkOneTerm(a, "akie", "ak"); // voc. sg.
checkOneTerm(a, "akys", "ak"); // voc. pl.
}
public void testNounsIV() throws IOException {
// n. decl IV (-us)
checkOneTerm(a, "sūnus", "sūn"); // nom. sing.
checkOneTerm(a, "sūnūs", "sūn"); // nom. pl.
checkOneTerm(a, "sūnaus", "sūn"); // gen. sg.
checkOneTerm(a, "sūnų", "sūn"); // gen. pl.
checkOneTerm(a, "sūnui", "sūn"); // dat. sg.
checkOneTerm(a, "sūnums", "sūn"); // dat. pl.
checkOneTerm(a, "sūnų", "sūn"); // acc. sg.
checkOneTerm(a, "sūnus", "sūn"); // acc. pl.
checkOneTerm(a, "sūnumi", "sūn"); // ins. sg.
checkOneTerm(a, "sūnumis", "sūn"); // ins. pl.
checkOneTerm(a, "sūnuje", "sūn"); // loc. sg.
checkOneTerm(a, "sūnuose", "sūn"); // loc. pl.
checkOneTerm(a, "sūnau", "sūn"); // voc. sg.
checkOneTerm(a, "sūnūs", "sūn"); // voc. pl.
// n. decl IV (-ius)
checkOneTerm(a, "profesorius", "profesor"); // nom. sing.
checkOneTerm(a, "profesoriai", "profesor"); // nom. pl.
checkOneTerm(a, "profesoriaus", "profesor"); // gen. sg.
checkOneTerm(a, "profesorių", "profesor"); // gen. pl.
checkOneTerm(a, "profesoriui", "profesor"); // dat. sg.
checkOneTerm(a, "profesoriams", "profesor"); // dat. pl.
checkOneTerm(a, "profesorių", "profesor"); // acc. sg.
checkOneTerm(a, "profesorius", "profesor"); // acc. pl.
checkOneTerm(a, "profesoriumi", "profesor"); // ins. sg.
checkOneTerm(a, "profesoriais", "profesor"); // ins. pl.
checkOneTerm(a, "profesoriuje", "profesor"); // loc. sg.
checkOneTerm(a, "profesoriuose", "profesor"); // loc. pl.
checkOneTerm(a, "profesoriau", "profesor"); // voc. sg.
checkOneTerm(a, "profesoriai", "profesor"); // voc. pl.
}
public void testNounsV() throws IOException {
// n. decl V
// note: gen.pl. doesn't conflate
checkOneTerm(a, "vanduo", "vand"); // nom. sing.
checkOneTerm(a, "vandenys", "vand"); // nom. pl.
checkOneTerm(a, "vandens", "vand"); // gen. sg.
checkOneTerm(a, "vandenų", "vanden"); // gen. pl.
checkOneTerm(a, "vandeniui", "vand"); // dat. sg.
checkOneTerm(a, "vandenims", "vand"); // dat. pl.
checkOneTerm(a, "vandenį", "vand"); // acc. sg.
checkOneTerm(a, "vandenis", "vand"); // acc. pl.
checkOneTerm(a, "vandeniu", "vand"); // ins. sg.
checkOneTerm(a, "vandenimis", "vand"); // ins. pl.
checkOneTerm(a, "vandenyje", "vand"); // loc. sg.
checkOneTerm(a, "vandenyse", "vand"); // loc. pl.
checkOneTerm(a, "vandenie", "vand"); // voc. sg.
checkOneTerm(a, "vandenys", "vand"); // voc. pl.
}
public void testAdjI() throws IOException {
// adj. decl I
checkOneTerm(a, "geras", "ger"); // m. nom. sing.
checkOneTerm(a, "geri", "ger"); // m. nom. pl.
checkOneTerm(a, "gero", "ger"); // m. gen. sg.
checkOneTerm(a, "gerų", "ger"); // m. gen. pl.
checkOneTerm(a, "geram", "ger"); // m. dat. sg.
checkOneTerm(a, "geriems", "ger"); // m. dat. pl.
checkOneTerm(a, "gerą", "ger"); // m. acc. sg.
checkOneTerm(a, "gerus", "ger"); // m. acc. pl.
checkOneTerm(a, "geru", "ger"); // m. ins. sg.
checkOneTerm(a, "gerais", "ger"); // m. ins. pl.
checkOneTerm(a, "gerame", "ger"); // m. loc. sg.
checkOneTerm(a, "geruose", "ger"); // m. loc. pl.
checkOneTerm(a, "gera", "ger"); // f. nom. sing.
checkOneTerm(a, "geros", "ger"); // f. nom. pl.
checkOneTerm(a, "geros", "ger"); // f. gen. sg.
checkOneTerm(a, "gerų", "ger"); // f. gen. pl.
checkOneTerm(a, "gerai", "ger"); // f. dat. sg.
checkOneTerm(a, "geroms", "ger"); // f. dat. pl.
checkOneTerm(a, "gerą", "ger"); // f. acc. sg.
checkOneTerm(a, "geras", "ger"); // f. acc. pl.
checkOneTerm(a, "gera", "ger"); // f. ins. sg.
checkOneTerm(a, "geromis", "ger"); // f. ins. pl.
checkOneTerm(a, "geroje", "ger"); // f. loc. sg.
checkOneTerm(a, "gerose", "ger"); // f. loc. pl.
}
public void testAdjII() throws IOException {
// adj. decl II
checkOneTerm(a, "gražus", "graž"); // m. nom. sing.
checkOneTerm(a, "gražūs", "graž"); // m. nom. pl.
checkOneTerm(a, "gražaus", "graž"); // m. gen. sg.
checkOneTerm(a, "gražių", "graž"); // m. gen. pl.
checkOneTerm(a, "gražiam", "graž"); // m. dat. sg.
checkOneTerm(a, "gražiems", "graž"); // m. dat. pl.
checkOneTerm(a, "gražų", "graž"); // m. acc. sg.
checkOneTerm(a, "gražius", "graž"); // m. acc. pl.
checkOneTerm(a, "gražiu", "graž"); // m. ins. sg.
checkOneTerm(a, "gražiais", "graž"); // m. ins. pl.
checkOneTerm(a, "gražiame", "graž"); // m. loc. sg.
checkOneTerm(a, "gražiuose", "graž"); // m. loc. pl.
checkOneTerm(a, "graži", "graž"); // f. nom. sing.
checkOneTerm(a, "gražios", "graž"); // f. nom. pl.
checkOneTerm(a, "gražios", "graž"); // f. gen. sg.
checkOneTerm(a, "gražių", "graž"); // f. gen. pl.
checkOneTerm(a, "gražiai", "graž"); // f. dat. sg.
checkOneTerm(a, "gražioms", "graž"); // f. dat. pl.
checkOneTerm(a, "gražią", "graž"); // f. acc. sg.
checkOneTerm(a, "gražias", "graž"); // f. acc. pl.
checkOneTerm(a, "gražia", "graž"); // f. ins. sg.
checkOneTerm(a, "gražiomis", "graž"); // f. ins. pl.
checkOneTerm(a, "gražioje", "graž"); // f. loc. sg.
checkOneTerm(a, "gražiose", "graž"); // f. loc. pl.
}
public void testAdjIII() throws IOException {
// adj. decl III
checkOneTerm(a, "vidutinis", "vidutin"); // m. nom. sing.
checkOneTerm(a, "vidutiniai", "vidutin"); // m. nom. pl.
checkOneTerm(a, "vidutinio", "vidutin"); // m. gen. sg.
checkOneTerm(a, "vidutinių", "vidutin"); // m. gen. pl.
checkOneTerm(a, "vidutiniam", "vidutin"); // m. dat. sg.
checkOneTerm(a, "vidutiniams", "vidutin"); // m. dat. pl.
checkOneTerm(a, "vidutinį", "vidutin"); // m. acc. sg.
checkOneTerm(a, "vidutinius", "vidutin"); // m. acc. pl.
checkOneTerm(a, "vidutiniu", "vidutin"); // m. ins. sg.
checkOneTerm(a, "vidutiniais", "vidutin"); // m. ins. pl.
checkOneTerm(a, "vidutiniame", "vidutin"); // m. loc. sg.
checkOneTerm(a, "vidutiniuose", "vidutin"); // m. loc. pl.
checkOneTerm(a, "vidutinė", "vidutin"); // f. nom. sing.
checkOneTerm(a, "vidutinės", "vidutin"); // f. nom. pl.
checkOneTerm(a, "vidutinės", "vidutin"); // f. gen. sg.
checkOneTerm(a, "vidutinių", "vidutin"); // f. gen. pl.
checkOneTerm(a, "vidutinei", "vidutin"); // f. dat. sg.
checkOneTerm(a, "vidutinėms", "vidutin"); // f. dat. pl.
checkOneTerm(a, "vidutinę", "vidutin"); // f. acc. sg.
checkOneTerm(a, "vidutines", "vidutin"); // f. acc. pl.
checkOneTerm(a, "vidutine", "vidutin"); // f. ins. sg.
checkOneTerm(a, "vidutinėmis", "vidutin"); // f. ins. pl.
checkOneTerm(a, "vidutinėje", "vidutin"); // f. loc. sg.
checkOneTerm(a, "vidutinėse", "vidutin"); // f. loc. pl.
}
/**
* test some high frequency terms from corpora to look for anything crazy
*/
public void testHighFrequencyTerms() throws IOException {
checkOneTerm(a, "ir", "ir");
checkOneTerm(a, "kad", "kad");
checkOneTerm(a, "į", "į");
checkOneTerm(a, "tai", "tai");
checkOneTerm(a, "su", "su");
checkOneTerm(a, "o", "o");
checkOneTerm(a, "", "");
checkOneTerm(a, "kaip", "kaip");
checkOneTerm(a, "bet", "bet");
checkOneTerm(a, "yra", "yr");
checkOneTerm(a, "buvo", "buv");
checkOneTerm(a, "tik", "tik");
checkOneTerm(a, "ne", "ne");
checkOneTerm(a, "taip", "taip");
checkOneTerm(a, "ar", "ar");
checkOneTerm(a, "dar", "dar");
checkOneTerm(a, "jau", "jau");
checkOneTerm(a, "savo", "sav");
checkOneTerm(a, "apie", "ap");
checkOneTerm(a, "kai", "kai");
checkOneTerm(a, "", "");
checkOneTerm(a, "per", "per");
checkOneTerm(a, "nuo", "nuo");
checkOneTerm(a, "po", "po");
checkOneTerm(a, "jis", "jis");
checkOneTerm(a, "kas", "kas");
checkOneTerm(a, "d", "d");
checkOneTerm(a, "labai", "lab");
checkOneTerm(a, "man", "man");
checkOneTerm(a, "dėl", "dėl");
checkOneTerm(a, "tačiau", "tat");
checkOneTerm(a, "nes", "nes");
checkOneTerm(a, "", "");
checkOneTerm(a, "to", "to");
checkOneTerm(a, "jo", "jo");
checkOneTerm(a, "iki", "ik");
checkOneTerm(a, "", "");
checkOneTerm(a, "mano", "man");
checkOneTerm(a, "metų", "met");
checkOneTerm(a, "nors", "nor");
checkOneTerm(a, "jei", "jei");
checkOneTerm(a, "bus", "bus");
checkOneTerm(a, "", "");
checkOneTerm(a, "čia", "čia");
checkOneTerm(a, "dabar", "dabar");
checkOneTerm(a, "Lietuvos", "Lietuv");
checkOneTerm(a, "net", "net");
checkOneTerm(a, "nei", "nei");
checkOneTerm(a, "gali", "gal");
checkOneTerm(a, "daug", "daug");
checkOneTerm(a, "prie", "prie");
checkOneTerm(a, "ji", "ji");
checkOneTerm(a, "jos", "jos");
checkOneTerm(a, "pat", "pat");
checkOneTerm(a, "jie", "jie");
checkOneTerm(a, "kur", "kur");
checkOneTerm(a, "gal", "gal");
checkOneTerm(a, "ant", "ant");
checkOneTerm(a, "tiek", "tiek");
checkOneTerm(a, "be", "be");
checkOneTerm(a, "būti", "būt");
checkOneTerm(a, "bei", "bei");
checkOneTerm(a, "daugiau", "daug");
checkOneTerm(a, "turi", "tur");
checkOneTerm(a, "prieš", "prieš");
checkOneTerm(a, "vis", "vis");
checkOneTerm(a, "būtų", "būt");
checkOneTerm(a, "jog", "jog");
checkOneTerm(a, "reikia", "reik");
checkOneTerm(a, "mūsų", "mūs");
checkOneTerm(a, "metu", "met");
checkOneTerm(a, "galima", "galim");
checkOneTerm(a, "nėra", "nėr");
checkOneTerm(a, "arba", "arb");
checkOneTerm(a, "mes", "mes");
checkOneTerm(a, "kurie", "kur");
checkOneTerm(a, "tikrai", "tikr");
checkOneTerm(a, "todėl", "tod");
checkOneTerm(a, "ten", "ten");
checkOneTerm(a, "šiandien", "šiandien");
checkOneTerm(a, "vienas", "vien");
checkOneTerm(a, "visi", "vis");
checkOneTerm(a, "kuris", "kur");
checkOneTerm(a, "tada", "tad");
checkOneTerm(a, "kiek", "kiek");
checkOneTerm(a, "tuo", "tuo");
checkOneTerm(a, "gerai", "ger");
checkOneTerm(a, "nieko", "niek");
checkOneTerm(a, "", "");
checkOneTerm(a, "kol", "kol");
checkOneTerm(a, "viskas", "visk");
checkOneTerm(a, "mane", "man");
checkOneTerm(a, "kartą", "kart");
checkOneTerm(a, "m", "m");
checkOneTerm(a, "tas", "tas");
checkOneTerm(a, "sakė", "sak");
checkOneTerm(a, "žmonių", "žmon");
checkOneTerm(a, "tu", "tu");
checkOneTerm(a, "dieną", "dien");
checkOneTerm(a, "žmonės", "žmon");
checkOneTerm(a, "metais", "met");
checkOneTerm(a, "vieną", "vien");
checkOneTerm(a, "vėl", "vėl");
checkOneTerm(a, "na", "na");
checkOneTerm(a, "", "");
checkOneTerm(a, "tiesiog", "tiesiog");
checkOneTerm(a, "toks", "tok");
checkOneTerm(a, "pats", "pat");
checkOneTerm(a, "ko", "ko");
checkOneTerm(a, "Lietuvoje", "Lietuv");
checkOneTerm(a, "pagal", "pagal");
checkOneTerm(a, "jeigu", "jeig");
checkOneTerm(a, "visai", "vis");
checkOneTerm(a, "viena", "vien");
checkOneTerm(a, "šį", "šį");
checkOneTerm(a, "metus", "met");
checkOneTerm(a, "jam", "jam");
checkOneTerm(a, "kodėl", "kod");
checkOneTerm(a, "litų", "lit");
checkOneTerm(a, "", "");
checkOneTerm(a, "kuri", "kur");
checkOneTerm(a, "darbo", "darb");
checkOneTerm(a, "tarp", "tarp");
checkOneTerm(a, "juk", "juk");
checkOneTerm(a, "laiko", "laik");
checkOneTerm(a, "juos", "juos");
checkOneTerm(a, "visą", "vis");
checkOneTerm(a, "kurios", "kur");
checkOneTerm(a, "tam", "tam");
checkOneTerm(a, "pas", "pas");
checkOneTerm(a, "viską", "visk");
checkOneTerm(a, "Europos", "Eur");
checkOneTerm(a, "atrodo", "atrod");
checkOneTerm(a, "tad", "tad");
checkOneTerm(a, "bent", "bent");
checkOneTerm(a, "kitų", "kit");
checkOneTerm(a, "šis", "šis");
checkOneTerm(a, "Vilniaus", "Viln");
checkOneTerm(a, "beveik", "bevei");
checkOneTerm(a, "proc", "proc");
checkOneTerm(a, "tokia", "tok");
checkOneTerm(a, "šiuo", "šiuo");
checkOneTerm(a, "du", "du");
checkOneTerm(a, "kartu", "kart");
checkOneTerm(a, "visada", "visad");
checkOneTerm(a, "kuo", "kuo");
}
}