LUCENE-2624: add armenian, basque, catalan analyzers from snowball

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@990459 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-08-28 22:42:25 +00:00
parent 33cc5a041e
commit 13fd70521a
16 changed files with 3507 additions and 0 deletions

View File

@ -220,6 +220,9 @@ New features
* LUCENE-2581: FastVectorHighlighter: add Encoder to FragmentsBuilder.
(Koji Sekiguchi)
* LUCENE-2624: Add Analyzers for Armenian, Basque, and Catalan, from snowball.
(Robert Muir)
Build
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation

View File

@ -0,0 +1,130 @@
package org.apache.lucene.analysis.ca;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.CatalanStemmer;
/**
* {@link Analyzer} for Catalan.
*/
public final class CatalanAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
/** File containing default Catalan stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
/**
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
/**
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static {
try {
DEFAULT_STOP_SET = loadStopwordSet(false,
CatalanAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
throw new RuntimeException("Unable to load default stopword set");
}
}
}
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
public CatalanAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public CatalanAnalyzer(Version matchVersion, Set<?> stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
* provided this analyzer will add a {@link KeywordMarkerFilter} before
* stemming.
*
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public CatalanAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));
}
/**
* Creates a
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new KeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new CatalanStemmer());
return new TokenStreamComponents(source, result);
}
}

View File

@ -0,0 +1,22 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html><head></head>
<body>
Analyzer for Catalan.
</body>
</html>

View File

@ -0,0 +1,130 @@
package org.apache.lucene.analysis.eu;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.BasqueStemmer;
/**
* {@link Analyzer} for Basque.
*/
public final class BasqueAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
/** File containing default Basque stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
/**
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
/**
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static {
try {
DEFAULT_STOP_SET = loadStopwordSet(false,
BasqueAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
throw new RuntimeException("Unable to load default stopword set");
}
}
}
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
public BasqueAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public BasqueAnalyzer(Version matchVersion, Set<?> stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
* provided this analyzer will add a {@link KeywordMarkerFilter} before
* stemming.
*
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public BasqueAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));
}
/**
* Creates a
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new KeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new BasqueStemmer());
return new TokenStreamComponents(source, result);
}
}

View File

@ -0,0 +1,22 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html><head></head>
<body>
Analyzer for Basque.
</body>
</html>

View File

@ -0,0 +1,130 @@
package org.apache.lucene.analysis.hy;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.ArmenianStemmer;
/**
* {@link Analyzer} for Armenian.
*/
public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
private final Set<?> stemExclusionSet;
/** File containing default Armenian stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
/**
* Returns an unmodifiable instance of the default stop words set.
* @return default stop words set.
*/
public static Set<?> getDefaultStopSet(){
return DefaultSetHolder.DEFAULT_STOP_SET;
}
/**
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
* accesses the static final set the first time.;
*/
private static class DefaultSetHolder {
static final Set<?> DEFAULT_STOP_SET;
static {
try {
DEFAULT_STOP_SET = loadStopwordSet(false,
ArmenianAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
throw new RuntimeException("Unable to load default stopword set");
}
}
}
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
public ArmenianAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
}
/**
* Builds an analyzer with the given stop words.
*
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
*/
public ArmenianAnalyzer(Version matchVersion, Set<?> stopwords) {
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
}
/**
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
* provided this analyzer will add a {@link KeywordMarkerFilter} before
* stemming.
*
* @param matchVersion lucene compatibility version
* @param stopwords a stopword set
* @param stemExclusionSet a set of terms not to be stemmed
*/
public ArmenianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));
}
/**
* Creates a
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new KeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new ArmenianStemmer());
return new TokenStreamComponents(source, result);
}
}

View File

@ -0,0 +1,22 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html><head></head>
<body>
Analyzer for Armenian.
</body>
</html>

View File

@ -0,0 +1,516 @@
// This file was generated automatically by the Snowball to Java compiler
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
public class ArmenianStemmer extends SnowballProgram {
private static final long serialVersionUID = 1L;
private final static ArmenianStemmer methodObject = new ArmenianStemmer ();
private final static Among a_0[] = {
new Among ( "\u0580\u0578\u0580\u0564", -1, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0578\u0580\u0564", 0, 1, "", methodObject ),
new Among ( "\u0561\u056C\u056B", -1, 1, "", methodObject ),
new Among ( "\u0561\u056F\u056B", -1, 1, "", methodObject ),
new Among ( "\u0578\u0580\u0561\u056F", -1, 1, "", methodObject ),
new Among ( "\u0565\u0572", -1, 1, "", methodObject ),
new Among ( "\u0561\u056F\u0561\u0576", -1, 1, "", methodObject ),
new Among ( "\u0561\u0580\u0561\u0576", -1, 1, "", methodObject ),
new Among ( "\u0565\u0576", -1, 1, "", methodObject ),
new Among ( "\u0565\u056F\u0565\u0576", 8, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0565\u0576", 8, 1, "", methodObject ),
new Among ( "\u0578\u0580\u0567\u0576", -1, 1, "", methodObject ),
new Among ( "\u056B\u0576", -1, 1, "", methodObject ),
new Among ( "\u0563\u056B\u0576", 12, 1, "", methodObject ),
new Among ( "\u0578\u057E\u056B\u0576", 12, 1, "", methodObject ),
new Among ( "\u056C\u0561\u0575\u0576", -1, 1, "", methodObject ),
new Among ( "\u057E\u0578\u0582\u0576", -1, 1, "", methodObject ),
new Among ( "\u057A\u0565\u057D", -1, 1, "", methodObject ),
new Among ( "\u056B\u057E", -1, 1, "", methodObject ),
new Among ( "\u0561\u057F", -1, 1, "", methodObject ),
new Among ( "\u0561\u057E\u0565\u057F", -1, 1, "", methodObject ),
new Among ( "\u056F\u0578\u057F", -1, 1, "", methodObject ),
new Among ( "\u0562\u0561\u0580", -1, 1, "", methodObject )
};
private final static Among a_1[] = {
new Among ( "\u0561", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561", 0, 1, "", methodObject ),
new Among ( "\u0565\u0581\u0561", 0, 1, "", methodObject ),
new Among ( "\u057E\u0565", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u056B", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u056B", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581\u056B", -1, 1, "", methodObject ),
new Among ( "\u057E\u0565\u0581\u056B", 6, 1, "", methodObject ),
new Among ( "\u0561\u056C", -1, 1, "", methodObject ),
new Among ( "\u0568\u0561\u056C", 8, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0561\u056C", 8, 1, "", methodObject ),
new Among ( "\u0565\u0576\u0561\u056C", 8, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0576\u0561\u056C", 8, 1, "", methodObject ),
new Among ( "\u0565\u056C", -1, 1, "", methodObject ),
new Among ( "\u0568\u0565\u056C", 13, 1, "", methodObject ),
new Among ( "\u0576\u0565\u056C", 13, 1, "", methodObject ),
new Among ( "\u0581\u0576\u0565\u056C", 15, 1, "", methodObject ),
new Among ( "\u0565\u0581\u0576\u0565\u056C", 16, 1, "", methodObject ),
new Among ( "\u0579\u0565\u056C", 13, 1, "", methodObject ),
new Among ( "\u057E\u0565\u056C", 13, 1, "", methodObject ),
new Among ( "\u0561\u0581\u057E\u0565\u056C", 19, 1, "", methodObject ),
new Among ( "\u0565\u0581\u057E\u0565\u056C", 19, 1, "", methodObject ),
new Among ( "\u057F\u0565\u056C", 13, 1, "", methodObject ),
new Among ( "\u0561\u057F\u0565\u056C", 22, 1, "", methodObject ),
new Among ( "\u0578\u057F\u0565\u056C", 22, 1, "", methodObject ),
new Among ( "\u056F\u0578\u057F\u0565\u056C", 24, 1, "", methodObject ),
new Among ( "\u057E\u0561\u056E", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0574", -1, 1, "", methodObject ),
new Among ( "\u057E\u0578\u0582\u0574", 27, 1, "", methodObject ),
new Among ( "\u0561\u0576", -1, 1, "", methodObject ),
new Among ( "\u0581\u0561\u0576", 29, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561\u0576", 30, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u056B\u0576", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u056B\u0576", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581\u056B\u0576", -1, 1, "", methodObject ),
new Among ( "\u057E\u0565\u0581\u056B\u0576", 34, 1, "", methodObject ),
new Among ( "\u0561\u056C\u056B\u057D", -1, 1, "", methodObject ),
new Among ( "\u0565\u056C\u056B\u057D", -1, 1, "", methodObject ),
new Among ( "\u0561\u057E", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561\u057E", 38, 1, "", methodObject ),
new Among ( "\u0565\u0581\u0561\u057E", 38, 1, "", methodObject ),
new Among ( "\u0561\u056C\u0578\u057E", -1, 1, "", methodObject ),
new Among ( "\u0565\u056C\u0578\u057E", -1, 1, "", methodObject ),
new Among ( "\u0561\u0580", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561\u0580", 43, 1, "", methodObject ),
new Among ( "\u0565\u0581\u0561\u0580", 43, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u056B\u0580", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u056B\u0580", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581\u056B\u0580", -1, 1, "", methodObject ),
new Among ( "\u057E\u0565\u0581\u056B\u0580", 48, 1, "", methodObject ),
new Among ( "\u0561\u0581", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u0565\u0581", 51, 1, "", methodObject ),
new Among ( "\u0561\u056C\u0578\u0582\u0581", -1, 1, "", methodObject ),
new Among ( "\u0565\u056C\u0578\u0582\u0581", -1, 1, "", methodObject ),
new Among ( "\u0561\u056C\u0578\u0582", -1, 1, "", methodObject ),
new Among ( "\u0565\u056C\u0578\u0582", -1, 1, "", methodObject ),
new Among ( "\u0561\u0584", -1, 1, "", methodObject ),
new Among ( "\u0581\u0561\u0584", 57, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561\u0584", 58, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u056B\u0584", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u056B\u0584", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581\u056B\u0584", -1, 1, "", methodObject ),
new Among ( "\u057E\u0565\u0581\u056B\u0584", 62, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0584", -1, 1, "", methodObject ),
new Among ( "\u0581\u0561\u0576\u0584", 64, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0561\u0576\u0584", 65, 1, "", methodObject ),
new Among ( "\u0561\u0581\u0580\u056B\u0576\u0584", -1, 1, "", methodObject ),
new Among ( "\u0561\u0581\u056B\u0576\u0584", -1, 1, "", methodObject ),
new Among ( "\u0565\u0581\u056B\u0576\u0584", -1, 1, "", methodObject ),
new Among ( "\u057E\u0565\u0581\u056B\u0576\u0584", 69, 1, "", methodObject )
};
private final static Among a_2[] = {
new Among ( "\u0578\u0580\u0564", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0575\u0569", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0570\u056B", -1, 1, "", methodObject ),
new Among ( "\u0581\u056B", -1, 1, "", methodObject ),
new Among ( "\u056B\u056C", -1, 1, "", methodObject ),
new Among ( "\u0561\u056F", -1, 1, "", methodObject ),
new Among ( "\u0575\u0561\u056F", 5, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0561\u056F", 5, 1, "", methodObject ),
new Among ( "\u056B\u056F", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u056F", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576", -1, 1, "", methodObject ),
new Among ( "\u057A\u0561\u0576", 10, 1, "", methodObject ),
new Among ( "\u057D\u057F\u0561\u0576", 10, 1, "", methodObject ),
new Among ( "\u0561\u0580\u0561\u0576", 10, 1, "", methodObject ),
new Among ( "\u0565\u0572\u0567\u0576", -1, 1, "", methodObject ),
new Among ( "\u0575\u0578\u0582\u0576", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0578\u0582\u0576", 15, 1, "", methodObject ),
new Among ( "\u0561\u056E\u0578", -1, 1, "", methodObject ),
new Among ( "\u056B\u0579", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u057D", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u057D\u057F", -1, 1, "", methodObject ),
new Among ( "\u0563\u0561\u0580", -1, 1, "", methodObject ),
new Among ( "\u057E\u0578\u0580", -1, 1, "", methodObject ),
new Among ( "\u0561\u057E\u0578\u0580", 22, 1, "", methodObject ),
new Among ( "\u0578\u0581", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0585\u0581", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582", -1, 1, "", methodObject ),
new Among ( "\u0584", -1, 1, "", methodObject ),
new Among ( "\u0579\u0565\u0584", 27, 1, "", methodObject ),
new Among ( "\u056B\u0584", 27, 1, "", methodObject ),
new Among ( "\u0561\u056C\u056B\u0584", 29, 1, "", methodObject ),
new Among ( "\u0561\u0576\u056B\u0584", 29, 1, "", methodObject ),
new Among ( "\u057E\u0561\u056E\u0584", 27, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0575\u0584", 27, 1, "", methodObject ),
new Among ( "\u0565\u0576\u0584", 27, 1, "", methodObject ),
new Among ( "\u0578\u0576\u0584", 27, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0576\u0584", 27, 1, "", methodObject ),
new Among ( "\u0574\u0578\u0582\u0576\u0584", 36, 1, "", methodObject ),
new Among ( "\u056B\u0579\u0584", 27, 1, "", methodObject ),
new Among ( "\u0561\u0580\u0584", 27, 1, "", methodObject )
};
private final static Among a_3[] = {
new Among ( "\u057D\u0561", -1, 1, "", methodObject ),
new Among ( "\u057E\u0561", -1, 1, "", methodObject ),
new Among ( "\u0561\u0574\u0562", -1, 1, "", methodObject ),
new Among ( "\u0564", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0564", 3, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0564", 4, 1, "", methodObject ),
new Among ( "\u057E\u0561\u0576\u0564", 4, 1, "", methodObject ),
new Among ( "\u0578\u057B\u0564", 3, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0564", 3, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u0564", 8, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0564", 3, 1, "", methodObject ),
new Among ( "\u0568", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0568", 11, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0568", 12, 1, "", methodObject ),
new Among ( "\u057E\u0561\u0576\u0568", 12, 1, "", methodObject ),
new Among ( "\u0578\u057B\u0568", 11, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0568", 11, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u0568", 16, 1, "", methodObject ),
new Among ( "\u056B", -1, 1, "", methodObject ),
new Among ( "\u057E\u056B", 18, 1, "", methodObject ),
new Among ( "\u0565\u0580\u056B", 18, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u056B", 20, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0578\u0582\u0574", -1, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0578\u0582\u0574", -1, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u0578\u0582\u0574", 23, 1, "", methodObject ),
new Among ( "\u0576", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576", 25, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576", 26, 1, "", methodObject ),
new Among ( "\u057E\u0561\u0576", 26, 1, "", methodObject ),
new Among ( "\u056B\u0576", 25, 1, "", methodObject ),
new Among ( "\u0565\u0580\u056B\u0576", 29, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u056B\u0576", 30, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0576", 25, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0576", 25, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u0576", 33, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0576", 25, 1, "", methodObject ),
new Among ( "\u0578\u057B", -1, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u057D", -1, 1, "", methodObject ),
new Among ( "\u057E\u0561\u0576\u057D", -1, 1, "", methodObject ),
new Among ( "\u0578\u057B\u057D", -1, 1, "", methodObject ),
new Among ( "\u0578\u057E", -1, 1, "", methodObject ),
new Among ( "\u0561\u0576\u0578\u057E", 40, 1, "", methodObject ),
new Among ( "\u057E\u0578\u057E", 40, 1, "", methodObject ),
new Among ( "\u0565\u0580\u0578\u057E", 40, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u0578\u057E", 43, 1, "", methodObject ),
new Among ( "\u0565\u0580", -1, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580", 45, 1, "", methodObject ),
new Among ( "\u0581", -1, 1, "", methodObject ),
new Among ( "\u056B\u0581", 47, 1, "", methodObject ),
new Among ( "\u057E\u0561\u0576\u056B\u0581", 48, 1, "", methodObject ),
new Among ( "\u0578\u057B\u056B\u0581", 48, 1, "", methodObject ),
new Among ( "\u057E\u056B\u0581", 48, 1, "", methodObject ),
new Among ( "\u0565\u0580\u056B\u0581", 48, 1, "", methodObject ),
new Among ( "\u0576\u0565\u0580\u056B\u0581", 52, 1, "", methodObject ),
new Among ( "\u0581\u056B\u0581", 48, 1, "", methodObject ),
new Among ( "\u0578\u0581", 47, 1, "", methodObject ),
new Among ( "\u0578\u0582\u0581", 47, 1, "", methodObject )
};
private static final char g_v[] = {209, 4, 128, 0, 18 };
private int I_p2;
private int I_pV;
private void copy_from(ArmenianStemmer other) {
I_p2 = other.I_p2;
I_pV = other.I_pV;
super.copy_from(other);
}
private boolean r_mark_regions() {
int v_1;
// (, line 58
I_pV = limit;
I_p2 = limit;
// do, line 62
v_1 = cursor;
lab0: do {
// (, line 62
// gopast, line 63
golab1: while(true)
{
lab2: do {
if (!(in_grouping(g_v, 1377, 1413)))
{
break lab2;
}
break golab1;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// setmark pV, line 63
I_pV = cursor;
// gopast, line 63
golab3: while(true)
{
lab4: do {
if (!(out_grouping(g_v, 1377, 1413)))
{
break lab4;
}
break golab3;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// gopast, line 64
golab5: while(true)
{
lab6: do {
if (!(in_grouping(g_v, 1377, 1413)))
{
break lab6;
}
break golab5;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// gopast, line 64
golab7: while(true)
{
lab8: do {
if (!(out_grouping(g_v, 1377, 1413)))
{
break lab8;
}
break golab7;
} while (false);
if (cursor >= limit)
{
break lab0;
}
cursor++;
}
// setmark p2, line 64
I_p2 = cursor;
} while (false);
cursor = v_1;
return true;
}
private boolean r_R2() {
if (!(I_p2 <= cursor))
{
return false;
}
return true;
}
private boolean r_adjective() {
int among_var;
// (, line 72
// [, line 73
ket = cursor;
// substring, line 73
among_var = find_among_b(a_0, 23);
if (among_var == 0)
{
return false;
}
// ], line 73
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 98
// delete, line 98
slice_del();
break;
}
return true;
}
private boolean r_verb() {
int among_var;
// (, line 102
// [, line 103
ket = cursor;
// substring, line 103
among_var = find_among_b(a_1, 71);
if (among_var == 0)
{
return false;
}
// ], line 103
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 176
// delete, line 176
slice_del();
break;
}
return true;
}
private boolean r_noun() {
int among_var;
// (, line 180
// [, line 181
ket = cursor;
// substring, line 181
among_var = find_among_b(a_2, 40);
if (among_var == 0)
{
return false;
}
// ], line 181
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 223
// delete, line 223
slice_del();
break;
}
return true;
}
private boolean r_ending() {
int among_var;
// (, line 227
// [, line 228
ket = cursor;
// substring, line 228
among_var = find_among_b(a_3, 57);
if (among_var == 0)
{
return false;
}
// ], line 228
bra = cursor;
// call R2, line 228
if (!r_R2())
{
return false;
}
switch(among_var) {
case 0:
return false;
case 1:
// (, line 287
// delete, line 287
slice_del();
break;
}
return true;
}
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
int v_5;
int v_6;
int v_7;
// (, line 292
// do, line 294
v_1 = cursor;
lab0: do {
// call mark_regions, line 294
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 295
limit_backward = cursor; cursor = limit;
// setlimit, line 295
v_2 = limit - cursor;
// tomark, line 295
if (cursor < I_pV)
{
return false;
}
cursor = I_pV;
v_3 = limit_backward;
limit_backward = cursor;
cursor = limit - v_2;
// (, line 295
// do, line 296
v_4 = limit - cursor;
lab1: do {
// call ending, line 296
if (!r_ending())
{
break lab1;
}
} while (false);
cursor = limit - v_4;
// do, line 297
v_5 = limit - cursor;
lab2: do {
// call verb, line 297
if (!r_verb())
{
break lab2;
}
} while (false);
cursor = limit - v_5;
// do, line 298
v_6 = limit - cursor;
lab3: do {
// call adjective, line 298
if (!r_adjective())
{
break lab3;
}
} while (false);
cursor = limit - v_6;
// do, line 299
v_7 = limit - cursor;
lab4: do {
// call noun, line 299
if (!r_noun())
{
break lab4;
}
} while (false);
cursor = limit - v_7;
limit_backward = v_3;
cursor = limit_backward; return true;
}
public boolean equals( Object o ) {
return o instanceof ArmenianStemmer;
}
public int hashCode() {
return ArmenianStemmer.class.getName().hashCode();
}
}

View File

@ -0,0 +1,939 @@
// This file was generated automatically by the Snowball to Java compiler
package org.tartarus.snowball.ext;
import org.tartarus.snowball.Among;
import org.tartarus.snowball.SnowballProgram;
/**
* This class was automatically generated by a Snowball to Java compiler
* It implements the stemming algorithm defined by a snowball script.
*/
public class BasqueStemmer extends SnowballProgram {
private static final long serialVersionUID = 1L;
private final static BasqueStemmer methodObject = new BasqueStemmer ();
private final static Among a_0[] = {
new Among ( "idea", -1, 1, "", methodObject ),
new Among ( "bidea", 0, 1, "", methodObject ),
new Among ( "kidea", 0, 1, "", methodObject ),
new Among ( "pidea", 0, 1, "", methodObject ),
new Among ( "kundea", -1, 1, "", methodObject ),
new Among ( "galea", -1, 1, "", methodObject ),
new Among ( "tailea", -1, 1, "", methodObject ),
new Among ( "tzailea", -1, 1, "", methodObject ),
new Among ( "gunea", -1, 1, "", methodObject ),
new Among ( "kunea", -1, 1, "", methodObject ),
new Among ( "tzaga", -1, 1, "", methodObject ),
new Among ( "gaia", -1, 1, "", methodObject ),
new Among ( "aldia", -1, 1, "", methodObject ),
new Among ( "taldia", 12, 1, "", methodObject ),
new Among ( "karia", -1, 1, "", methodObject ),
new Among ( "garria", -1, 2, "", methodObject ),
new Among ( "karria", -1, 1, "", methodObject ),
new Among ( "ka", -1, 1, "", methodObject ),
new Among ( "tzaka", 17, 1, "", methodObject ),
new Among ( "la", -1, 1, "", methodObject ),
new Among ( "mena", -1, 1, "", methodObject ),
new Among ( "pena", -1, 1, "", methodObject ),
new Among ( "kina", -1, 1, "", methodObject ),
new Among ( "ezina", -1, 1, "", methodObject ),
new Among ( "tezina", 23, 1, "", methodObject ),
new Among ( "kuna", -1, 1, "", methodObject ),
new Among ( "tuna", -1, 1, "", methodObject ),
new Among ( "kizuna", -1, 1, "", methodObject ),
new Among ( "era", -1, 1, "", methodObject ),
new Among ( "bera", 28, 1, "", methodObject ),
new Among ( "arabera", 29, 4, "", methodObject ),
new Among ( "kera", 28, 1, "", methodObject ),
new Among ( "pera", 28, 1, "", methodObject ),
new Among ( "orra", -1, 1, "", methodObject ),
new Among ( "korra", 33, 1, "", methodObject ),
new Among ( "dura", -1, 1, "", methodObject ),
new Among ( "gura", -1, 1, "", methodObject ),
new Among ( "kura", -1, 1, "", methodObject ),
new Among ( "tura", -1, 1, "", methodObject ),
new Among ( "eta", -1, 1, "", methodObject ),
new Among ( "keta", 39, 1, "", methodObject ),
new Among ( "gailua", -1, 1, "", methodObject ),
new Among ( "eza", -1, 1, "", methodObject ),
new Among ( "erreza", 42, 1, "", methodObject ),
new Among ( "tza", -1, 2, "", methodObject ),
new Among ( "gaitza", 44, 1, "", methodObject ),
new Among ( "kaitza", 44, 1, "", methodObject ),
new Among ( "kuntza", 44, 1, "", methodObject ),
new Among ( "ide", -1, 1, "", methodObject ),
new Among ( "bide", 48, 1, "", methodObject ),
new Among ( "kide", 48, 1, "", methodObject ),
new Among ( "pide", 48, 1, "", methodObject ),
new Among ( "kunde", -1, 1, "", methodObject ),
new Among ( "tzake", -1, 1, "", methodObject ),
new Among ( "tzeke", -1, 1, "", methodObject ),
new Among ( "le", -1, 1, "", methodObject ),
new Among ( "gale", 55, 1, "", methodObject ),
new Among ( "taile", 55, 1, "", methodObject ),
new Among ( "tzaile", 55, 1, "", methodObject ),
new Among ( "gune", -1, 1, "", methodObject ),
new Among ( "kune", -1, 1, "", methodObject ),
new Among ( "tze", -1, 1, "", methodObject ),
new Among ( "atze", 61, 1, "", methodObject ),
new Among ( "gai", -1, 1, "", methodObject ),
new Among ( "aldi", -1, 1, "", methodObject ),
new Among ( "taldi", 64, 1, "", methodObject ),
new Among ( "ki", -1, 1, "", methodObject ),
new Among ( "ari", -1, 1, "", methodObject ),
new Among ( "kari", 67, 1, "", methodObject ),
new Among ( "lari", 67, 1, "", methodObject ),
new Among ( "tari", 67, 1, "", methodObject ),
new Among ( "etari", 70, 1, "", methodObject ),
new Among ( "garri", -1, 2, "", methodObject ),
new Among ( "karri", -1, 1, "", methodObject ),
new Among ( "arazi", -1, 1, "", methodObject ),
new Among ( "tarazi", 74, 1, "", methodObject ),
new Among ( "an", -1, 1, "", methodObject ),
new Among ( "ean", 76, 1, "", methodObject ),
new Among ( "rean", 77, 1, "", methodObject ),
new Among ( "kan", 76, 1, "", methodObject ),
new Among ( "etan", 76, 1, "", methodObject ),
new Among ( "atseden", -1, 3, "", methodObject ),
new Among ( "men", -1, 1, "", methodObject ),
new Among ( "pen", -1, 1, "", methodObject ),
new Among ( "kin", -1, 1, "", methodObject ),
new Among ( "rekin", 84, 1, "", methodObject ),
new Among ( "ezin", -1, 1, "", methodObject ),
new Among ( "tezin", 86, 1, "", methodObject ),
new Among ( "tun", -1, 1, "", methodObject ),
new Among ( "kizun", -1, 1, "", methodObject ),
new Among ( "go", -1, 1, "", methodObject ),
new Among ( "ago", 90, 1, "", methodObject ),
new Among ( "tio", -1, 1, "", methodObject ),
new Among ( "dako", -1, 1, "", methodObject ),
new Among ( "or", -1, 1, "", methodObject ),
new Among ( "kor", 94, 1, "", methodObject ),
new Among ( "tzat", -1, 1, "", methodObject ),
new Among ( "du", -1, 1, "", methodObject ),
new Among ( "gailu", -1, 1, "", methodObject ),
new Among ( "tu", -1, 1, "", methodObject ),
new Among ( "atu", 99, 1, "", methodObject ),
new Among ( "aldatu", 100, 1, "", methodObject ),
new Among ( "tatu", 100, 1, "", methodObject ),
new Among ( "baditu", 99, 5, "", methodObject ),
new Among ( "ez", -1, 1, "", methodObject ),
new Among ( "errez", 104, 1, "", methodObject ),
new Among ( "tzez", 104, 1, "", methodObject ),
new Among ( "gaitz", -1, 1, "", methodObject ),
new Among ( "kaitz", -1, 1, "", methodObject )
};
private final static Among a_1[] = {
new Among ( "ada", -1, 1, "", methodObject ),
new Among ( "kada", 0, 1, "", methodObject ),
new Among ( "anda", -1, 1, "", methodObject ),
new Among ( "denda", -1, 1, "", methodObject ),
new Among ( "gabea", -1, 1, "", methodObject ),
new Among ( "kabea", -1, 1, "", methodObject ),
new Among ( "aldea", -1, 1, "", methodObject ),
new Among ( "kaldea", 6, 1, "", methodObject ),
new Among ( "taldea", 6, 1, "", methodObject ),
new Among ( "ordea", -1, 1, "", methodObject ),
new Among ( "zalea", -1, 1, "", methodObject ),
new Among ( "tzalea", 10, 1, "", methodObject ),
new Among ( "gilea", -1, 1, "", methodObject ),
new Among ( "emea", -1, 1, "", methodObject ),
new Among ( "kumea", -1, 1, "", methodObject ),
new Among ( "nea", -1, 1, "", methodObject ),
new Among ( "enea", 15, 1, "", methodObject ),
new Among ( "zionea", 15, 1, "", methodObject ),
new Among ( "unea", 15, 1, "", methodObject ),
new Among ( "gunea", 18, 1, "", methodObject ),
new Among ( "pea", -1, 1, "", methodObject ),
new Among ( "aurrea", -1, 1, "", methodObject ),
new Among ( "tea", -1, 1, "", methodObject ),
new Among ( "kotea", 22, 1, "", methodObject ),
new Among ( "artea", 22, 1, "", methodObject ),
new Among ( "ostea", 22, 1, "", methodObject ),
new Among ( "etxea", -1, 1, "", methodObject ),
new Among ( "ga", -1, 1, "", methodObject ),
new Among ( "anga", 27, 1, "", methodObject ),
new Among ( "gaia", -1, 1, "", methodObject ),
new Among ( "aldia", -1, 1, "", methodObject ),
new Among ( "taldia", 30, 1, "", methodObject ),
new Among ( "handia", -1, 1, "", methodObject ),
new Among ( "mendia", -1, 1, "", methodObject ),
new Among ( "geia", -1, 1, "", methodObject ),
new Among ( "egia", -1, 1, "", methodObject ),
new Among ( "degia", 35, 1, "", methodObject ),
new Among ( "tegia", 35, 1, "", methodObject ),
new Among ( "nahia", -1, 1, "", methodObject ),
new Among ( "ohia", -1, 1, "", methodObject ),
new Among ( "kia", -1, 1, "", methodObject ),
new Among ( "tokia", 40, 1, "", methodObject ),
new Among ( "oia", -1, 1, "", methodObject ),
new Among ( "koia", 42, 1, "", methodObject ),
new Among ( "aria", -1, 1, "", methodObject ),
new Among ( "karia", 44, 1, "", methodObject ),
new Among ( "laria", 44, 1, "", methodObject ),
new Among ( "taria", 44, 1, "", methodObject ),
new Among ( "eria", -1, 1, "", methodObject ),
new Among ( "keria", 48, 1, "", methodObject ),
new Among ( "teria", 48, 1, "", methodObject ),
new Among ( "garria", -1, 2, "", methodObject ),
new Among ( "larria", -1, 1, "", methodObject ),
new Among ( "kirria", -1, 1, "", methodObject ),
new Among ( "duria", -1, 1, "", methodObject ),
new Among ( "asia", -1, 1, "", methodObject ),
new Among ( "tia", -1, 1, "", methodObject ),
new Among ( "ezia", -1, 1, "", methodObject ),
new Among ( "bizia", -1, 1, "", methodObject ),
new Among ( "ontzia", -1, 1, "", methodObject ),
new Among ( "ka", -1, 1, "", methodObject ),
new Among ( "joka", 60, 3, "", methodObject ),
new Among ( "aurka", 60, 10, "", methodObject ),
new Among ( "ska", 60, 1, "", methodObject ),
new Among ( "xka", 60, 1, "", methodObject ),
new Among ( "zka", 60, 1, "", methodObject ),
new Among ( "gibela", -1, 1, "", methodObject ),
new Among ( "gela", -1, 1, "", methodObject ),
new Among ( "kaila", -1, 1, "", methodObject ),
new Among ( "skila", -1, 1, "", methodObject ),
new Among ( "tila", -1, 1, "", methodObject ),
new Among ( "ola", -1, 1, "", methodObject ),
new Among ( "na", -1, 1, "", methodObject ),
new Among ( "kana", 72, 1, "", methodObject ),
new Among ( "ena", 72, 1, "", methodObject ),
new Among ( "garrena", 74, 1, "", methodObject ),
new Among ( "gerrena", 74, 1, "", methodObject ),
new Among ( "urrena", 74, 1, "", methodObject ),
new Among ( "zaina", 72, 1, "", methodObject ),
new Among ( "tzaina", 78, 1, "", methodObject ),
new Among ( "kina", 72, 1, "", methodObject ),
new Among ( "mina", 72, 1, "", methodObject ),
new Among ( "garna", 72, 1, "", methodObject ),
new Among ( "una", 72, 1, "", methodObject ),
new Among ( "duna", 83, 1, "", methodObject ),
new Among ( "asuna", 83, 1, "", methodObject ),
new Among ( "tasuna", 85, 1, "", methodObject ),
new Among ( "ondoa", -1, 1, "", methodObject ),
new Among ( "kondoa", 87, 1, "", methodObject ),
new Among ( "ngoa", -1, 1, "", methodObject ),
new Among ( "zioa", -1, 1, "", methodObject ),
new Among ( "koa", -1, 1, "", methodObject ),
new Among ( "takoa", 91, 1, "", methodObject ),
new Among ( "zkoa", 91, 1, "", methodObject ),
new Among ( "noa", -1, 1, "", methodObject ),
new Among ( "zinoa", 94, 1, "", methodObject ),
new Among ( "aroa", -1, 1, "", methodObject ),
new Among ( "taroa", 96, 1, "", methodObject ),
new Among ( "zaroa", 96, 1, "", methodObject ),
new Among ( "eroa", -1, 1, "", methodObject ),
new Among ( "oroa", -1, 1, "", methodObject ),
new Among ( "osoa", -1, 1, "", methodObject ),
new Among ( "toa", -1, 1, "", methodObject ),
new Among ( "ttoa", 102, 1, "", methodObject ),
new Among ( "ztoa", 102, 1, "", methodObject ),
new Among ( "txoa", -1, 1, "", methodObject ),
new Among ( "tzoa", -1, 1, "", methodObject ),
new Among ( "\u00F1oa", -1, 1, "", methodObject ),
new Among ( "ra", -1, 1, "", methodObject ),
new Among ( "ara", 108, 1, "", methodObject ),
new Among ( "dara", 109, 1, "", methodObject ),
new Among ( "liara", 109, 1, "", methodObject ),
new Among ( "tiara", 109, 1, "", methodObject ),
new Among ( "tara", 109, 1, "", methodObject ),
new Among ( "etara", 113, 1, "", methodObject ),
new Among ( "tzara", 109, 1, "", methodObject ),
new Among ( "bera", 108, 1, "", methodObject ),
new Among ( "kera", 108, 1, "", methodObject ),
new Among ( "pera", 108, 1, "", methodObject ),
new Among ( "ora", 108, 2, "", methodObject ),
new Among ( "tzarra", 108, 1, "", methodObject ),
new Among ( "korra", 108, 1, "", methodObject ),
new Among ( "tra", 108, 1, "", methodObject ),
new Among ( "sa", -1, 1, "", methodObject ),
new Among ( "osa", 123, 1, "", methodObject ),
new Among ( "ta", -1, 1, "", methodObject ),
new Among ( "eta", 125, 1, "", methodObject ),
new Among ( "keta", 126, 1, "", methodObject ),
new Among ( "sta", 125, 1, "", methodObject ),
new Among ( "dua", -1, 1, "", methodObject ),
new Among ( "mendua", 129, 1, "", methodObject ),
new Among ( "ordua", 129, 1, "", methodObject ),
new Among ( "lekua", -1, 1, "", methodObject ),
new Among ( "burua", -1, 1, "", methodObject ),
new Among ( "durua", -1, 1, "", methodObject ),
new Among ( "tsua", -1, 1, "", methodObject ),
new Among ( "tua", -1, 1, "", methodObject ),
new Among ( "mentua", 136, 1, "", methodObject ),
new Among ( "estua", 136, 1, "", methodObject ),
new Among ( "txua", -1, 1, "", methodObject ),
new Among ( "zua", -1, 1, "", methodObject ),
new Among ( "tzua", 140, 1, "", methodObject ),
new Among ( "za", -1, 1, "", methodObject ),
new Among ( "eza", 142, 1, "", methodObject ),
new Among ( "eroza", 142, 1, "", methodObject ),
new Among ( "tza", 142, 2, "", methodObject ),
new Among ( "koitza", 145, 1, "", methodObject ),
new Among ( "antza", 145, 1, "", methodObject ),
new Among ( "gintza", 145, 1, "", methodObject ),
new Among ( "kintza", 145, 1, "", methodObject ),
new Among ( "kuntza", 145, 1, "", methodObject ),
new Among ( "gabe", -1, 1, "", methodObject ),
new Among ( "kabe", -1, 1, "", methodObject ),
new Among ( "kide", -1, 1, "", methodObject ),
new Among ( "alde", -1, 1, "", methodObject ),
new Among ( "kalde", 154, 1, "", methodObject ),
new Among ( "talde", 154, 1, "", methodObject ),
new Among ( "orde", -1, 1, "", methodObject ),
new Among ( "ge", -1, 1, "", methodObject ),
new Among ( "zale", -1, 1, "", methodObject ),
new Among ( "tzale", 159, 1, "", methodObject ),
new Among ( "gile", -1, 1, "", methodObject ),
new Among ( "eme", -1, 1, "", methodObject ),
new Among ( "kume", -1, 1, "", methodObject ),
new Among ( "ne", -1, 1, "", methodObject ),
new Among ( "zione", 164, 1, "", methodObject ),
new Among ( "une", 164, 1, "", methodObject ),
new Among ( "gune", 166, 1, "", methodObject ),
new Among ( "pe", -1, 1, "", methodObject ),
new Among ( "aurre", -1, 1, "", methodObject ),
new Among ( "te", -1, 1, "", methodObject ),
new Among ( "kote", 170, 1, "", methodObject ),
new Among ( "arte", 170, 1, "", methodObject ),
new Among ( "oste", 170, 1, "", methodObject ),
new Among ( "etxe", -1, 1, "", methodObject ),
new Among ( "gai", -1, 1, "", methodObject ),
new Among ( "di", -1, 1, "", methodObject ),
new Among ( "aldi", 176, 1, "", methodObject ),
new Among ( "taldi", 177, 1, "", methodObject ),
new Among ( "geldi", 176, 8, "", methodObject ),
new Among ( "handi", 176, 1, "", methodObject ),
new Among ( "mendi", 176, 1, "", methodObject ),
new Among ( "gei", -1, 1, "", methodObject ),
new Among ( "egi", -1, 1, "", methodObject ),
new Among ( "degi", 183, 1, "", methodObject ),
new Among ( "tegi", 183, 1, "", methodObject ),
new Among ( "nahi", -1, 1, "", methodObject ),
new Among ( "ohi", -1, 1, "", methodObject ),
new Among ( "ki", -1, 1, "", methodObject ),
new Among ( "toki", 188, 1, "", methodObject ),
new Among ( "oi", -1, 1, "", methodObject ),
new Among ( "goi", 190, 1, "", methodObject ),
new Among ( "koi", 190, 1, "", methodObject ),
new Among ( "ari", -1, 1, "", methodObject ),
new Among ( "kari", 193, 1, "", methodObject ),
new Among ( "lari", 193, 1, "", methodObject ),
new Among ( "tari", 193, 1, "", methodObject ),
new Among ( "garri", -1, 2, "", methodObject ),
new Among ( "larri", -1, 1, "", methodObject ),
new Among ( "kirri", -1, 1, "", methodObject ),
new Among ( "duri", -1, 1, "", methodObject ),
new Among ( "asi", -1, 1, "", methodObject ),
new Among ( "ti", -1, 1, "", methodObject ),
new Among ( "ontzi", -1, 1, "", methodObject ),
new Among ( "\u00F1i", -1, 1, "", methodObject ),
new Among ( "ak", -1, 1, "", methodObject ),
new Among ( "ek", -1, 1, "", methodObject ),
new Among ( "tarik", -1, 1, "", methodObject ),
new Among ( "gibel", -1, 1, "", methodObject ),
new Among ( "ail", -1, 1, "", methodObject ),
new Among ( "kail", 209, 1, "", methodObject ),
new Among ( "kan", -1, 1, "", methodObject ),
new Among ( "tan", -1, 1, "", methodObject ),
new Among ( "etan", 212, 1, "", methodObject ),
new Among ( "en", -1, 4, "", methodObject ),
new Among ( "ren", 214, 2, "", methodObject ),
new Among ( "garren", 215, 1, "", methodObject ),
new Among ( "gerren", 215, 1, "", methodObject ),
new Among ( "urren", 215, 1, "", methodObject ),
new Among ( "ten", 214, 4, "", methodObject ),
new Among ( "tzen", 214, 4, "", methodObject ),
new Among ( "zain", -1, 1, "", methodObject ),
new Among ( "tzain", 221, 1, "", methodObject ),
new Among ( "kin", -1, 1, "", methodObject ),
new Among ( "min", -1, 1, "", methodObject ),
new Among ( "dun", -1, 1, "", methodObject ),
new Among ( "asun", -1, 1, "", methodObject ),
new Among ( "tasun", 226, 1, "", methodObject ),
new Among ( "aizun", -1, 1, "", methodObject ),
new Among ( "ondo", -1, 1, "", methodObject ),
new Among ( "kondo", 229, 1, "", methodObject ),
new Among ( "go", -1, 1, "", methodObject ),
new Among ( "ngo", 231, 1, "", methodObject ),
new Among ( "zio", -1, 1, "", methodObject ),
new Among ( "ko", -1, 1, "", methodObject ),
new Among ( "trako", 234, 5, "", methodObject ),
new Among ( "tako", 234, 1, "", methodObject ),
new Among ( "etako", 236, 1, "", methodObject ),
new Among ( "eko", 234, 1, "", methodObject ),
new Among ( "tariko", 234, 1, "", methodObject ),
new Among ( "sko", 234, 1, "", methodObject ),
new Among ( "tuko", 234, 1, "", methodObject ),
new Among ( "minutuko", 241, 6, "", methodObject ),
new Among ( "zko", 234, 1, "", methodObject ),
new Among ( "no", -1, 1, "", methodObject ),
new Among ( "zino", 244, 1, "", methodObject ),
new Among ( "ro", -1, 1, "", methodObject ),
new Among ( "aro", 246, 1, "", methodObject ),
new Among ( "igaro", 247, 9, "", methodObject ),
new Among ( "taro", 247, 1, "", methodObject ),
new Among ( "zaro", 247, 1, "", methodObject ),
new Among ( "ero", 246, 1, "", methodObject ),
new Among ( "giro", 246, 1, "", methodObject ),
new Among ( "oro", 246, 1, "", methodObject ),
new Among ( "oso", -1, 1, "", methodObject ),
new Among ( "to", -1, 1, "", methodObject ),
new Among ( "tto", 255, 1, "", methodObject ),
new Among ( "zto", 255, 1, "", methodObject ),
new Among ( "txo", -1, 1, "", methodObject ),
new Among ( "tzo", -1, 1, "", methodObject ),
new Among ( "gintzo", 259, 1, "", methodObject ),
new Among ( "\u00F1o", -1, 1, "", methodObject ),
new Among ( "zp", -1, 1, "", methodObject ),
new Among ( "ar", -1, 1, "", methodObject ),
new Among ( "dar", 263, 1, "", methodObject ),
new Among ( "behar", 263, 1, "", methodObject ),
new Among ( "zehar", 263, 7, "", methodObject ),
new Among ( "liar", 263, 1, "", methodObject ),
new Among ( "tiar", 263, 1, "", methodObject ),
new Among ( "tar", 263, 1, "", methodObject ),
new Among ( "tzar", 263, 1, "", methodObject ),
new Among ( "or", -1, 2, "", methodObject ),
new Among ( "kor", 271, 1, "", methodObject ),
new Among ( "os", -1, 1, "", methodObject ),
new Among ( "ket", -1, 1, "", methodObject ),
new Among ( "du", -1, 1, "", methodObject ),
new Among ( "mendu", 275, 1, "", methodObject ),
new Among ( "ordu", 275, 1, "", methodObject ),
new Among ( "leku", -1, 1, "", methodObject ),
new Among ( "buru", -1, 2, "", methodObject ),
new Among ( "duru", -1, 1, "", methodObject ),
new Among ( "tsu", -1, 1, "", methodObject ),
new Among ( "tu", -1, 1, "", methodObject ),
new Among ( "tatu", 282, 4, "", methodObject ),
new Among ( "mentu", 282, 1, "", methodObject ),
new Among ( "estu", 282, 1, "", methodObject ),
new Among ( "txu", -1, 1, "", methodObject ),
new Among ( "zu", -1, 1, "", methodObject ),
new Among ( "tzu", 287, 1, "", methodObject ),
new Among ( "gintzu", 288, 1, "", methodObject ),
new Among ( "z", -1, 1, "", methodObject ),
new Among ( "ez", 290, 1, "", methodObject ),
new Among ( "eroz", 290, 1, "", methodObject ),
new Among ( "tz", 290, 1, "", methodObject ),
new Among ( "koitz", 293, 1, "", methodObject )
};
private final static Among a_2[] = {
new Among ( "zlea", -1, 2, "", methodObject ),
new Among ( "keria", -1, 1, "", methodObject ),
new Among ( "la", -1, 1, "", methodObject ),
new Among ( "era", -1, 1, "", methodObject ),
new Among ( "dade", -1, 1, "", methodObject ),
new Among ( "tade", -1, 1, "", methodObject ),
new Among ( "date", -1, 1, "", methodObject ),
new Among ( "tate", -1, 1, "", methodObject ),
new Among ( "gi", -1, 1, "", methodObject ),
new Among ( "ki", -1, 1, "", methodObject ),
new Among ( "ik", -1, 1, "", methodObject ),
new Among ( "lanik", 10, 1, "", methodObject ),
new Among ( "rik", 10, 1, "", methodObject ),
new Among ( "larik", 12, 1, "", methodObject ),
new Among ( "ztik", 10, 1, "", methodObject ),
new Among ( "go", -1, 1, "", methodObject ),
new Among ( "ro", -1, 1, "", methodObject ),
new Among ( "ero", 16, 1, "", methodObject ),
new Among ( "to", -1, 1, "", methodObject )
};
private static final char g_v[] = {17, 65, 16 };
private int I_p2;
private int I_p1;
private int I_pV;
private void copy_from(BasqueStemmer other) {
I_p2 = other.I_p2;
I_p1 = other.I_p1;
I_pV = other.I_pV;
super.copy_from(other);
}
private boolean r_mark_regions() {
int v_1;
int v_2;
int v_3;
int v_6;
int v_8;
// (, line 25
I_pV = limit;
I_p1 = limit;
I_p2 = limit;
// do, line 31
v_1 = cursor;
lab0: do {
// (, line 31
// or, line 33
lab1: do {
v_2 = cursor;
lab2: do {
// (, line 32
if (!(in_grouping(g_v, 97, 117)))
{
break lab2;
}
// or, line 32
lab3: do {
v_3 = cursor;
lab4: do {
// (, line 32
if (!(out_grouping(g_v, 97, 117)))
{
break lab4;
}
// gopast, line 32
golab5: while(true)
{
lab6: do {
if (!(in_grouping(g_v, 97, 117)))
{
break lab6;
}
break golab5;
} while (false);
if (cursor >= limit)
{
break lab4;
}
cursor++;
}
break lab3;
} while (false);
cursor = v_3;
// (, line 32
if (!(in_grouping(g_v, 97, 117)))
{
break lab2;
}
// gopast, line 32
golab7: while(true)
{
lab8: do {
if (!(out_grouping(g_v, 97, 117)))
{
break lab8;
}
break golab7;
} while (false);
if (cursor >= limit)
{
break lab2;
}
cursor++;
}
} while (false);
break lab1;
} while (false);
cursor = v_2;
// (, line 34
if (!(out_grouping(g_v, 97, 117)))
{
break lab0;
}
// or, line 34
lab9: do {
v_6 = cursor;
lab10: do {
// (, line 34
if (!(out_grouping(g_v, 97, 117)))
{
break lab10;
}
// gopast, line 34
golab11: while(true)
{
lab12: do {
if (!(in_grouping(g_v, 97, 117)))
{
break lab12;
}
break golab11;
} while (false);
if (cursor >= limit)
{
break lab10;
}
cursor++;
}
break lab9;
} while (false);
cursor = v_6;
// (, line 34
if (!(in_grouping(g_v, 97, 117)))
{
break lab0;
}
// next, line 34
if (cursor >= limit)
{
break lab0;
}
cursor++;
} while (false);
} while (false);
// setmark pV, line 35
I_pV = cursor;
} while (false);
cursor = v_1;
// do, line 37
v_8 = cursor;
lab13: do {
// (, line 37
// gopast, line 38
golab14: while(true)
{
lab15: do {
if (!(in_grouping(g_v, 97, 117)))
{
break lab15;
}
break golab14;
} while (false);
if (cursor >= limit)
{
break lab13;
}
cursor++;
}
// gopast, line 38
golab16: while(true)
{
lab17: do {
if (!(out_grouping(g_v, 97, 117)))
{
break lab17;
}
break golab16;
} while (false);
if (cursor >= limit)
{
break lab13;
}
cursor++;
}
// setmark p1, line 38
I_p1 = cursor;
// gopast, line 39
golab18: while(true)
{
lab19: do {
if (!(in_grouping(g_v, 97, 117)))
{
break lab19;
}
break golab18;
} while (false);
if (cursor >= limit)
{
break lab13;
}
cursor++;
}
// gopast, line 39
golab20: while(true)
{
lab21: do {
if (!(out_grouping(g_v, 97, 117)))
{
break lab21;
}
break golab20;
} while (false);
if (cursor >= limit)
{
break lab13;
}
cursor++;
}
// setmark p2, line 39
I_p2 = cursor;
} while (false);
cursor = v_8;
return true;
}
private boolean r_RV() {
if (!(I_pV <= cursor))
{
return false;
}
return true;
}
private boolean r_R2() {
if (!(I_p2 <= cursor))
{
return false;
}
return true;
}
private boolean r_R1() {
if (!(I_p1 <= cursor))
{
return false;
}
return true;
}
private boolean r_aditzak() {
int among_var;
// (, line 49
// [, line 50
ket = cursor;
// substring, line 50
among_var = find_among_b(a_0, 109);
if (among_var == 0)
{
return false;
}
// ], line 50
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 61
// call RV, line 61
if (!r_RV())
{
return false;
}
// delete, line 61
slice_del();
break;
case 2:
// (, line 63
// call R2, line 63
if (!r_R2())
{
return false;
}
// delete, line 63
slice_del();
break;
case 3:
// (, line 65
// <-, line 65
slice_from("atseden");
break;
case 4:
// (, line 67
// <-, line 67
slice_from("arabera");
break;
case 5:
// (, line 69
// <-, line 69
slice_from("baditu");
break;
}
return true;
}
private boolean r_izenak() {
int among_var;
// (, line 74
// [, line 75
ket = cursor;
// substring, line 75
among_var = find_among_b(a_1, 295);
if (among_var == 0)
{
return false;
}
// ], line 75
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 105
// call RV, line 105
if (!r_RV())
{
return false;
}
// delete, line 105
slice_del();
break;
case 2:
// (, line 107
// call R2, line 107
if (!r_R2())
{
return false;
}
// delete, line 107
slice_del();
break;
case 3:
// (, line 109
// <-, line 109
slice_from("jok");
break;
case 4:
// (, line 111
// call R1, line 111
if (!r_R1())
{
return false;
}
// delete, line 111
slice_del();
break;
case 5:
// (, line 113
// <-, line 113
slice_from("tra");
break;
case 6:
// (, line 115
// <-, line 115
slice_from("minutu");
break;
case 7:
// (, line 117
// <-, line 117
slice_from("zehar");
break;
case 8:
// (, line 119
// <-, line 119
slice_from("geldi");
break;
case 9:
// (, line 121
// <-, line 121
slice_from("igaro");
break;
case 10:
// (, line 123
// <-, line 123
slice_from("aurka");
break;
}
return true;
}
private boolean r_adjetiboak() {
int among_var;
// (, line 127
// [, line 128
ket = cursor;
// substring, line 128
among_var = find_among_b(a_2, 19);
if (among_var == 0)
{
return false;
}
// ], line 128
bra = cursor;
switch(among_var) {
case 0:
return false;
case 1:
// (, line 131
// call RV, line 131
if (!r_RV())
{
return false;
}
// delete, line 131
slice_del();
break;
case 2:
// (, line 133
// <-, line 133
slice_from("z");
break;
}
return true;
}
public boolean stem() {
int v_1;
int v_2;
int v_3;
int v_4;
// (, line 139
// do, line 140
v_1 = cursor;
lab0: do {
// call mark_regions, line 140
if (!r_mark_regions())
{
break lab0;
}
} while (false);
cursor = v_1;
// backwards, line 141
limit_backward = cursor; cursor = limit;
// (, line 141
// repeat, line 142
replab1: while(true)
{
v_2 = limit - cursor;
lab2: do {
// call aditzak, line 142
if (!r_aditzak())
{
break lab2;
}
continue replab1;
} while (false);
cursor = limit - v_2;
break replab1;
}
// repeat, line 143
replab3: while(true)
{
v_3 = limit - cursor;
lab4: do {
// call izenak, line 143
if (!r_izenak())
{
break lab4;
}
continue replab3;
} while (false);
cursor = limit - v_3;
break replab3;
}
// do, line 144
v_4 = limit - cursor;
lab5: do {
// call adjetiboak, line 144
if (!r_adjetiboak())
{
break lab5;
}
} while (false);
cursor = limit - v_4;
cursor = limit_backward; return true;
}
public boolean equals( Object o ) {
return o instanceof BasqueStemmer;
}
public int hashCode() {
return BasqueStemmer.class.getName().hashCode();
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,220 @@
# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
a
abans
ací
ah
així
això
al
als
aleshores
algun
alguna
algunes
alguns
alhora
allà
allí
allò
altra
altre
altres
amb
ambdós
ambdues
apa
aquell
aquella
aquelles
aquells
aquest
aquesta
aquestes
aquests
aquí
baix
cada
cadascú
cadascuna
cadascunes
cadascuns
com
contra
d'un
d'una
d'unes
d'uns
dalt
de
del
dels
des
després
dins
dintre
donat
doncs
durant
e
eh
el
els
em
en
encara
ens
entre
érem
eren
éreu
es
és
esta
està
estàvem
estaven
estàveu
esteu
et
etc
ets
fins
fora
gairebé
ha
han
has
havia
he
hem
heu
hi
ho
i
igual
iguals
ja
l'hi
la
les
li
li'n
llavors
m'he
ma
mal
malgrat
mateix
mateixa
mateixes
mateixos
me
mentre
més
meu
meus
meva
meves
molt
molta
moltes
molts
mon
mons
n'he
n'hi
ne
ni
no
nogensmenys
només
nosaltres
nostra
nostre
nostres
o
oh
oi
on
pas
pel
pels
per
però
perquè
poc
poca
pocs
poques
potser
propi
qual
quals
quan
quant
que
què
quelcom
qui
quin
quina
quines
quins
s'ha
s'han
sa
semblant
semblants
ses
seu
seus
seva
seva
seves
si
sobre
sobretot
sóc
solament
sols
son
són
sons
sota
sou
t'ha
t'han
t'he
ta
tal
també
tampoc
tan
tant
tanta
tantes
teu
teus
teva
teves
ton
tons
tot
tota
totes
tots
un
una
unes
uns
us
va
vaig
vam
van
vas
veu
vosaltres
vostra
vostre
vostres

View File

@ -0,0 +1,99 @@
# example set of basque stopwords
al
anitz
arabera
asko
baina
bat
batean
batek
bati
batzuei
batzuek
batzuetan
batzuk
bera
beraiek
berau
berauek
bere
berori
beroriek
beste
bezala
da
dago
dira
ditu
du
dute
edo
egin
ere
eta
eurak
ez
gainera
gu
gutxi
guzti
haiei
haiek
haietan
hainbeste
hala
han
handik
hango
hara
hari
hark
hartan
hau
hauei
hauek
hauetan
hemen
hemendik
hemengo
hi
hona
honek
honela
honetan
honi
hor
hori
horiei
horiek
horietan
horko
horra
horrek
horrela
horretan
horri
hortik
hura
izan
ni
noiz
nola
non
nondik
nongo
nor
nora
ze
zein
zen
zenbait
zenbat
zer
zergatik
ziren
zituen
zu
zuek
zuen
zuten

View File

@ -0,0 +1,46 @@
# example set of Armenian stopwords.
այդ
այլ
այն
այս
դու
դուք
եմ
են
ենք
ես
եք
է
էի
էին
էինք
էիր
էիք
էր
ըստ
թ
ի
ին
իսկ
իր
կամ
համար
հետ
հետո
մենք
մեջ
մի
ն
նա
նաև
նրա
նրանք
որ
որը
որոնք
որպես
ու
ում
պիտի
վրա
և

View File

@ -0,0 +1,53 @@
package org.apache.lucene.analysis.ca;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new CatalanAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "llengües", "llengu");
checkOneTermReuse(a, "llengua", "llengu");
// stopword
assertAnalyzesTo(a, "un", new String[] { });
}
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("llengües");
Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT,
CatalanAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "llengües", "llengües");
checkOneTermReuse(a, "llengua", "llengu");
}
}

View File

@ -0,0 +1,53 @@
package org.apache.lucene.analysis.eu;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new BasqueAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "zaldi", "zaldi");
checkOneTermReuse(a, "zaldiak", "zaldi");
// stopword
assertAnalyzesTo(a, "izan", new String[] { });
}
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("zaldiak");
Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT,
BasqueAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "zaldiak", "zaldiak");
checkOneTermReuse(a, "mendiari", "mendi");
}
}

View File

@ -0,0 +1,53 @@
package org.apache.lucene.analysis.hy;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new ArmenianAnalyzer(TEST_VERSION_CURRENT);
}
/** test stopwords and stemming */
public void testBasics() throws IOException {
Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT);
// stemming
checkOneTermReuse(a, "արծիվ", "արծ");
checkOneTermReuse(a, "արծիվներ", "արծ");
// stopword
assertAnalyzesTo(a, "է", new String[] { });
}
/** test use of exclusion set */
public void testExclude() throws IOException {
Set<String> exclusionSet = new HashSet<String>();
exclusionSet.add("արծիվներ");
Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT,
ArmenianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTermReuse(a, "արծիվներ", "արծիվներ");
checkOneTermReuse(a, "արծիվ", "արծ");
}
}