mirror of https://github.com/apache/lucene.git
LUCENE-2624: add armenian, basque, catalan analyzers from snowball
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@990459 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
33cc5a041e
commit
13fd70521a
|
@ -220,6 +220,9 @@ New features
|
|||
* LUCENE-2581: FastVectorHighlighter: add Encoder to FragmentsBuilder.
|
||||
(Koji Sekiguchi)
|
||||
|
||||
* LUCENE-2624: Add Analyzers for Armenian, Basque, and Catalan, from snowball.
|
||||
(Robert Muir)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
|
||||
|
|
|
@ -0,0 +1,130 @@
|
|||
package org.apache.lucene.analysis.ca;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.tartarus.snowball.ext.CatalanStemmer;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Catalan.
|
||||
*/
|
||||
public final class CatalanAnalyzer extends StopwordAnalyzerBase {
|
||||
private final Set<?> stemExclusionSet;
|
||||
|
||||
/** File containing default Catalan stopwords. */
|
||||
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
|
||||
|
||||
/**
|
||||
* Returns an unmodifiable instance of the default stop words set.
|
||||
* @return default stop words set.
|
||||
*/
|
||||
public static Set<?> getDefaultStopSet(){
|
||||
return DefaultSetHolder.DEFAULT_STOP_SET;
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
|
||||
* accesses the static final set the first time.;
|
||||
*/
|
||||
private static class DefaultSetHolder {
|
||||
static final Set<?> DEFAULT_STOP_SET;
|
||||
|
||||
static {
|
||||
try {
|
||||
DEFAULT_STOP_SET = loadStopwordSet(false,
|
||||
CatalanAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
|
||||
} catch (IOException ex) {
|
||||
// default set should always be present as it is part of the
|
||||
// distribution (JAR)
|
||||
throw new RuntimeException("Unable to load default stopword set");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
|
||||
*/
|
||||
public CatalanAnalyzer(Version matchVersion) {
|
||||
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
* @param stopwords a stopword set
|
||||
*/
|
||||
public CatalanAnalyzer(Version matchVersion, Set<?> stopwords) {
|
||||
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
* @param stopwords a stopword set
|
||||
* @param stemExclusionSet a set of terms not to be stemmed
|
||||
*/
|
||||
public CatalanAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
|
||||
super(matchVersion, stopwords);
|
||||
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
|
||||
matchVersion, stemExclusionSet));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new CatalanStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html><head></head>
|
||||
<body>
|
||||
Analyzer for Catalan.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,130 @@
|
|||
package org.apache.lucene.analysis.eu;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.tartarus.snowball.ext.BasqueStemmer;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Basque.
|
||||
*/
|
||||
public final class BasqueAnalyzer extends StopwordAnalyzerBase {
|
||||
private final Set<?> stemExclusionSet;
|
||||
|
||||
/** File containing default Basque stopwords. */
|
||||
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
|
||||
|
||||
/**
|
||||
* Returns an unmodifiable instance of the default stop words set.
|
||||
* @return default stop words set.
|
||||
*/
|
||||
public static Set<?> getDefaultStopSet(){
|
||||
return DefaultSetHolder.DEFAULT_STOP_SET;
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
|
||||
* accesses the static final set the first time.;
|
||||
*/
|
||||
private static class DefaultSetHolder {
|
||||
static final Set<?> DEFAULT_STOP_SET;
|
||||
|
||||
static {
|
||||
try {
|
||||
DEFAULT_STOP_SET = loadStopwordSet(false,
|
||||
BasqueAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
|
||||
} catch (IOException ex) {
|
||||
// default set should always be present as it is part of the
|
||||
// distribution (JAR)
|
||||
throw new RuntimeException("Unable to load default stopword set");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
|
||||
*/
|
||||
public BasqueAnalyzer(Version matchVersion) {
|
||||
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
* @param stopwords a stopword set
|
||||
*/
|
||||
public BasqueAnalyzer(Version matchVersion, Set<?> stopwords) {
|
||||
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
* @param stopwords a stopword set
|
||||
* @param stemExclusionSet a set of terms not to be stemmed
|
||||
*/
|
||||
public BasqueAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
|
||||
super(matchVersion, stopwords);
|
||||
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
|
||||
matchVersion, stemExclusionSet));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new BasqueStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html><head></head>
|
||||
<body>
|
||||
Analyzer for Basque.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,130 @@
|
|||
package org.apache.lucene.analysis.hy;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.tartarus.snowball.ext.ArmenianStemmer;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Armenian.
|
||||
*/
|
||||
public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
|
||||
private final Set<?> stemExclusionSet;
|
||||
|
||||
/** File containing default Armenian stopwords. */
|
||||
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
|
||||
|
||||
/**
|
||||
* Returns an unmodifiable instance of the default stop words set.
|
||||
* @return default stop words set.
|
||||
*/
|
||||
public static Set<?> getDefaultStopSet(){
|
||||
return DefaultSetHolder.DEFAULT_STOP_SET;
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
|
||||
* accesses the static final set the first time.;
|
||||
*/
|
||||
private static class DefaultSetHolder {
|
||||
static final Set<?> DEFAULT_STOP_SET;
|
||||
|
||||
static {
|
||||
try {
|
||||
DEFAULT_STOP_SET = loadStopwordSet(false,
|
||||
ArmenianAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
|
||||
} catch (IOException ex) {
|
||||
// default set should always be present as it is part of the
|
||||
// distribution (JAR)
|
||||
throw new RuntimeException("Unable to load default stopword set");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
|
||||
*/
|
||||
public ArmenianAnalyzer(Version matchVersion) {
|
||||
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
* @param stopwords a stopword set
|
||||
*/
|
||||
public ArmenianAnalyzer(Version matchVersion, Set<?> stopwords) {
|
||||
this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
|
||||
* provided this analyzer will add a {@link KeywordMarkerFilter} before
|
||||
* stemming.
|
||||
*
|
||||
* @param matchVersion lucene compatibility version
|
||||
* @param stopwords a stopword set
|
||||
* @param stemExclusionSet a set of terms not to be stemmed
|
||||
*/
|
||||
public ArmenianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
|
||||
super(matchVersion, stopwords);
|
||||
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
|
||||
matchVersion, stemExclusionSet));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new KeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new SnowballFilter(result, new ArmenianStemmer());
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html><head></head>
|
||||
<body>
|
||||
Analyzer for Armenian.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,516 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
|
||||
public class ArmenianStemmer extends SnowballProgram {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private final static ArmenianStemmer methodObject = new ArmenianStemmer ();
|
||||
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "\u0580\u0578\u0580\u0564", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0578\u0580\u0564", 0, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056F\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0580\u0561\u056F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0572", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056F\u0561\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0580\u0561\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056F\u0565\u0576", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0565\u0576", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0580\u0567\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0563\u056B\u0576", 12, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057E\u056B\u0576", 12, 1, "", methodObject ),
|
||||
new Among ( "\u056C\u0561\u0575\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0578\u0582\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057A\u0565\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u057E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u057F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u057E\u0565\u057F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056F\u0578\u057F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0562\u0561\u0580", -1, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "\u0561", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561", 0, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u0561", 0, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u0581\u056B", 6, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0568\u0561\u056C", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0561\u056C", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0576\u0561\u056C", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0576\u0561\u056C", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056C", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0568\u0565\u056C", 13, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u056C", 13, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u0576\u0565\u056C", 15, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u0576\u0565\u056C", 16, 1, "", methodObject ),
|
||||
new Among ( "\u0579\u0565\u056C", 13, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u056C", 13, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u057E\u0565\u056C", 19, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u057E\u0565\u056C", 19, 1, "", methodObject ),
|
||||
new Among ( "\u057F\u0565\u056C", 13, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u057F\u0565\u056C", 22, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057F\u0565\u056C", 22, 1, "", methodObject ),
|
||||
new Among ( "\u056F\u0578\u057F\u0565\u056C", 24, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u056E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0574", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0578\u0582\u0574", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u0561\u0576", 29, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561\u0576", 30, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u056B\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u056B\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u056B\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u0581\u056B\u0576", 34, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u056B\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056C\u056B\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u057E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561\u057E", 38, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u0561\u057E", 38, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u0578\u057E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056C\u0578\u057E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561\u0580", 43, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u0561\u0580", 43, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u056B\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u056B\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u056B\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u0581\u056B\u0580", 48, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u0565\u0581", 51, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u0578\u0582\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056C\u0578\u0582\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u0578\u0582", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u056C\u0578\u0582", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u0561\u0584", 57, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561\u0584", 58, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u056B\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u056B\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u056B\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u0581\u056B\u0584", 62, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u0561\u0576\u0584", 64, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0561\u0576\u0584", 65, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u0580\u056B\u0576\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0581\u056B\u0576\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0581\u056B\u0576\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0565\u0581\u056B\u0576\u0584", 69, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "\u0578\u0580\u0564", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0575\u0569", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0570\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u056C", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0575\u0561\u056F", 5, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0561\u056F", 5, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u056F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u056F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057A\u0561\u0576", 10, 1, "", methodObject ),
|
||||
new Among ( "\u057D\u057F\u0561\u0576", 10, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0580\u0561\u0576", 10, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0572\u0567\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0575\u0578\u0582\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0578\u0582\u0576", 15, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056E\u0578", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0579", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u057D\u057F", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0563\u0561\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0578\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u057E\u0578\u0580", 22, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0585\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0584", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0579\u0565\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u056C\u056B\u0584", 29, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u056B\u0584", 29, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u056E\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0575\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0576\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0576\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0576\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0574\u0578\u0582\u0576\u0584", 36, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0579\u0584", 27, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0580\u0584", 27, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private final static Among a_3[] = {
|
||||
new Among ( "\u057D\u0561", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0574\u0562", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0564", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0564", 3, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0564", 4, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u0576\u0564", 4, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057B\u0564", 3, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0564", 3, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u0564", 8, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0564", 3, 1, "", methodObject ),
|
||||
new Among ( "\u0568", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0568", 11, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0568", 12, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u0576\u0568", 12, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057B\u0568", 11, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0568", 11, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u0568", 16, 1, "", methodObject ),
|
||||
new Among ( "\u056B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u056B", 18, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u056B", 18, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u056B", 20, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0578\u0582\u0574", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0578\u0582\u0574", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u0578\u0582\u0574", 23, 1, "", methodObject ),
|
||||
new Among ( "\u0576", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576", 25, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576", 26, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u0576", 26, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0576", 25, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u056B\u0576", 29, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u056B\u0576", 30, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u0576", 25, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0576", 25, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u0576", 33, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0576", 25, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057B", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0569\u0575\u0561\u0576\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u0576\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057B\u057D", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057E", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0561\u0576\u0578\u057E", 40, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0578\u057E", 40, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u0578\u057E", 40, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u0578\u057E", 43, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580", -1, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580", 45, 1, "", methodObject ),
|
||||
new Among ( "\u0581", -1, 1, "", methodObject ),
|
||||
new Among ( "\u056B\u0581", 47, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u0561\u0576\u056B\u0581", 48, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u057B\u056B\u0581", 48, 1, "", methodObject ),
|
||||
new Among ( "\u057E\u056B\u0581", 48, 1, "", methodObject ),
|
||||
new Among ( "\u0565\u0580\u056B\u0581", 48, 1, "", methodObject ),
|
||||
new Among ( "\u0576\u0565\u0580\u056B\u0581", 52, 1, "", methodObject ),
|
||||
new Among ( "\u0581\u056B\u0581", 48, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0581", 47, 1, "", methodObject ),
|
||||
new Among ( "\u0578\u0582\u0581", 47, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {209, 4, 128, 0, 18 };
|
||||
|
||||
private int I_p2;
|
||||
private int I_pV;
|
||||
|
||||
private void copy_from(ArmenianStemmer other) {
|
||||
I_p2 = other.I_p2;
|
||||
I_pV = other.I_pV;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
// (, line 58
|
||||
I_pV = limit;
|
||||
I_p2 = limit;
|
||||
// do, line 62
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// (, line 62
|
||||
// gopast, line 63
|
||||
golab1: while(true)
|
||||
{
|
||||
lab2: do {
|
||||
if (!(in_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
break golab1;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark pV, line 63
|
||||
I_pV = cursor;
|
||||
// gopast, line 63
|
||||
golab3: while(true)
|
||||
{
|
||||
lab4: do {
|
||||
if (!(out_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
break golab3;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 64
|
||||
golab5: while(true)
|
||||
{
|
||||
lab6: do {
|
||||
if (!(in_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab6;
|
||||
}
|
||||
break golab5;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 64
|
||||
golab7: while(true)
|
||||
{
|
||||
lab8: do {
|
||||
if (!(out_grouping(g_v, 1377, 1413)))
|
||||
{
|
||||
break lab8;
|
||||
}
|
||||
break golab7;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p2, line 64
|
||||
I_p2 = cursor;
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_R2() {
|
||||
if (!(I_p2 <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_adjective() {
|
||||
int among_var;
|
||||
// (, line 72
|
||||
// [, line 73
|
||||
ket = cursor;
|
||||
// substring, line 73
|
||||
among_var = find_among_b(a_0, 23);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 73
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 98
|
||||
// delete, line 98
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_verb() {
|
||||
int among_var;
|
||||
// (, line 102
|
||||
// [, line 103
|
||||
ket = cursor;
|
||||
// substring, line 103
|
||||
among_var = find_among_b(a_1, 71);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 103
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 176
|
||||
// delete, line 176
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_noun() {
|
||||
int among_var;
|
||||
// (, line 180
|
||||
// [, line 181
|
||||
ket = cursor;
|
||||
// substring, line 181
|
||||
among_var = find_among_b(a_2, 40);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 181
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 223
|
||||
// delete, line 223
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_ending() {
|
||||
int among_var;
|
||||
// (, line 227
|
||||
// [, line 228
|
||||
ket = cursor;
|
||||
// substring, line 228
|
||||
among_var = find_among_b(a_3, 57);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 228
|
||||
bra = cursor;
|
||||
// call R2, line 228
|
||||
if (!r_R2())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 287
|
||||
// delete, line 287
|
||||
slice_del();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
int v_5;
|
||||
int v_6;
|
||||
int v_7;
|
||||
// (, line 292
|
||||
// do, line 294
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 294
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 295
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// setlimit, line 295
|
||||
v_2 = limit - cursor;
|
||||
// tomark, line 295
|
||||
if (cursor < I_pV)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
cursor = I_pV;
|
||||
v_3 = limit_backward;
|
||||
limit_backward = cursor;
|
||||
cursor = limit - v_2;
|
||||
// (, line 295
|
||||
// do, line 296
|
||||
v_4 = limit - cursor;
|
||||
lab1: do {
|
||||
// call ending, line 296
|
||||
if (!r_ending())
|
||||
{
|
||||
break lab1;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
// do, line 297
|
||||
v_5 = limit - cursor;
|
||||
lab2: do {
|
||||
// call verb, line 297
|
||||
if (!r_verb())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_5;
|
||||
// do, line 298
|
||||
v_6 = limit - cursor;
|
||||
lab3: do {
|
||||
// call adjective, line 298
|
||||
if (!r_adjective())
|
||||
{
|
||||
break lab3;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_6;
|
||||
// do, line 299
|
||||
v_7 = limit - cursor;
|
||||
lab4: do {
|
||||
// call noun, line 299
|
||||
if (!r_noun())
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_7;
|
||||
limit_backward = v_3;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof ArmenianStemmer;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return ArmenianStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,939 @@
|
|||
// This file was generated automatically by the Snowball to Java compiler
|
||||
|
||||
package org.tartarus.snowball.ext;
|
||||
|
||||
import org.tartarus.snowball.Among;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
* This class was automatically generated by a Snowball to Java compiler
|
||||
* It implements the stemming algorithm defined by a snowball script.
|
||||
*/
|
||||
|
||||
public class BasqueStemmer extends SnowballProgram {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private final static BasqueStemmer methodObject = new BasqueStemmer ();
|
||||
|
||||
private final static Among a_0[] = {
|
||||
new Among ( "idea", -1, 1, "", methodObject ),
|
||||
new Among ( "bidea", 0, 1, "", methodObject ),
|
||||
new Among ( "kidea", 0, 1, "", methodObject ),
|
||||
new Among ( "pidea", 0, 1, "", methodObject ),
|
||||
new Among ( "kundea", -1, 1, "", methodObject ),
|
||||
new Among ( "galea", -1, 1, "", methodObject ),
|
||||
new Among ( "tailea", -1, 1, "", methodObject ),
|
||||
new Among ( "tzailea", -1, 1, "", methodObject ),
|
||||
new Among ( "gunea", -1, 1, "", methodObject ),
|
||||
new Among ( "kunea", -1, 1, "", methodObject ),
|
||||
new Among ( "tzaga", -1, 1, "", methodObject ),
|
||||
new Among ( "gaia", -1, 1, "", methodObject ),
|
||||
new Among ( "aldia", -1, 1, "", methodObject ),
|
||||
new Among ( "taldia", 12, 1, "", methodObject ),
|
||||
new Among ( "karia", -1, 1, "", methodObject ),
|
||||
new Among ( "garria", -1, 2, "", methodObject ),
|
||||
new Among ( "karria", -1, 1, "", methodObject ),
|
||||
new Among ( "ka", -1, 1, "", methodObject ),
|
||||
new Among ( "tzaka", 17, 1, "", methodObject ),
|
||||
new Among ( "la", -1, 1, "", methodObject ),
|
||||
new Among ( "mena", -1, 1, "", methodObject ),
|
||||
new Among ( "pena", -1, 1, "", methodObject ),
|
||||
new Among ( "kina", -1, 1, "", methodObject ),
|
||||
new Among ( "ezina", -1, 1, "", methodObject ),
|
||||
new Among ( "tezina", 23, 1, "", methodObject ),
|
||||
new Among ( "kuna", -1, 1, "", methodObject ),
|
||||
new Among ( "tuna", -1, 1, "", methodObject ),
|
||||
new Among ( "kizuna", -1, 1, "", methodObject ),
|
||||
new Among ( "era", -1, 1, "", methodObject ),
|
||||
new Among ( "bera", 28, 1, "", methodObject ),
|
||||
new Among ( "arabera", 29, 4, "", methodObject ),
|
||||
new Among ( "kera", 28, 1, "", methodObject ),
|
||||
new Among ( "pera", 28, 1, "", methodObject ),
|
||||
new Among ( "orra", -1, 1, "", methodObject ),
|
||||
new Among ( "korra", 33, 1, "", methodObject ),
|
||||
new Among ( "dura", -1, 1, "", methodObject ),
|
||||
new Among ( "gura", -1, 1, "", methodObject ),
|
||||
new Among ( "kura", -1, 1, "", methodObject ),
|
||||
new Among ( "tura", -1, 1, "", methodObject ),
|
||||
new Among ( "eta", -1, 1, "", methodObject ),
|
||||
new Among ( "keta", 39, 1, "", methodObject ),
|
||||
new Among ( "gailua", -1, 1, "", methodObject ),
|
||||
new Among ( "eza", -1, 1, "", methodObject ),
|
||||
new Among ( "erreza", 42, 1, "", methodObject ),
|
||||
new Among ( "tza", -1, 2, "", methodObject ),
|
||||
new Among ( "gaitza", 44, 1, "", methodObject ),
|
||||
new Among ( "kaitza", 44, 1, "", methodObject ),
|
||||
new Among ( "kuntza", 44, 1, "", methodObject ),
|
||||
new Among ( "ide", -1, 1, "", methodObject ),
|
||||
new Among ( "bide", 48, 1, "", methodObject ),
|
||||
new Among ( "kide", 48, 1, "", methodObject ),
|
||||
new Among ( "pide", 48, 1, "", methodObject ),
|
||||
new Among ( "kunde", -1, 1, "", methodObject ),
|
||||
new Among ( "tzake", -1, 1, "", methodObject ),
|
||||
new Among ( "tzeke", -1, 1, "", methodObject ),
|
||||
new Among ( "le", -1, 1, "", methodObject ),
|
||||
new Among ( "gale", 55, 1, "", methodObject ),
|
||||
new Among ( "taile", 55, 1, "", methodObject ),
|
||||
new Among ( "tzaile", 55, 1, "", methodObject ),
|
||||
new Among ( "gune", -1, 1, "", methodObject ),
|
||||
new Among ( "kune", -1, 1, "", methodObject ),
|
||||
new Among ( "tze", -1, 1, "", methodObject ),
|
||||
new Among ( "atze", 61, 1, "", methodObject ),
|
||||
new Among ( "gai", -1, 1, "", methodObject ),
|
||||
new Among ( "aldi", -1, 1, "", methodObject ),
|
||||
new Among ( "taldi", 64, 1, "", methodObject ),
|
||||
new Among ( "ki", -1, 1, "", methodObject ),
|
||||
new Among ( "ari", -1, 1, "", methodObject ),
|
||||
new Among ( "kari", 67, 1, "", methodObject ),
|
||||
new Among ( "lari", 67, 1, "", methodObject ),
|
||||
new Among ( "tari", 67, 1, "", methodObject ),
|
||||
new Among ( "etari", 70, 1, "", methodObject ),
|
||||
new Among ( "garri", -1, 2, "", methodObject ),
|
||||
new Among ( "karri", -1, 1, "", methodObject ),
|
||||
new Among ( "arazi", -1, 1, "", methodObject ),
|
||||
new Among ( "tarazi", 74, 1, "", methodObject ),
|
||||
new Among ( "an", -1, 1, "", methodObject ),
|
||||
new Among ( "ean", 76, 1, "", methodObject ),
|
||||
new Among ( "rean", 77, 1, "", methodObject ),
|
||||
new Among ( "kan", 76, 1, "", methodObject ),
|
||||
new Among ( "etan", 76, 1, "", methodObject ),
|
||||
new Among ( "atseden", -1, 3, "", methodObject ),
|
||||
new Among ( "men", -1, 1, "", methodObject ),
|
||||
new Among ( "pen", -1, 1, "", methodObject ),
|
||||
new Among ( "kin", -1, 1, "", methodObject ),
|
||||
new Among ( "rekin", 84, 1, "", methodObject ),
|
||||
new Among ( "ezin", -1, 1, "", methodObject ),
|
||||
new Among ( "tezin", 86, 1, "", methodObject ),
|
||||
new Among ( "tun", -1, 1, "", methodObject ),
|
||||
new Among ( "kizun", -1, 1, "", methodObject ),
|
||||
new Among ( "go", -1, 1, "", methodObject ),
|
||||
new Among ( "ago", 90, 1, "", methodObject ),
|
||||
new Among ( "tio", -1, 1, "", methodObject ),
|
||||
new Among ( "dako", -1, 1, "", methodObject ),
|
||||
new Among ( "or", -1, 1, "", methodObject ),
|
||||
new Among ( "kor", 94, 1, "", methodObject ),
|
||||
new Among ( "tzat", -1, 1, "", methodObject ),
|
||||
new Among ( "du", -1, 1, "", methodObject ),
|
||||
new Among ( "gailu", -1, 1, "", methodObject ),
|
||||
new Among ( "tu", -1, 1, "", methodObject ),
|
||||
new Among ( "atu", 99, 1, "", methodObject ),
|
||||
new Among ( "aldatu", 100, 1, "", methodObject ),
|
||||
new Among ( "tatu", 100, 1, "", methodObject ),
|
||||
new Among ( "baditu", 99, 5, "", methodObject ),
|
||||
new Among ( "ez", -1, 1, "", methodObject ),
|
||||
new Among ( "errez", 104, 1, "", methodObject ),
|
||||
new Among ( "tzez", 104, 1, "", methodObject ),
|
||||
new Among ( "gaitz", -1, 1, "", methodObject ),
|
||||
new Among ( "kaitz", -1, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private final static Among a_1[] = {
|
||||
new Among ( "ada", -1, 1, "", methodObject ),
|
||||
new Among ( "kada", 0, 1, "", methodObject ),
|
||||
new Among ( "anda", -1, 1, "", methodObject ),
|
||||
new Among ( "denda", -1, 1, "", methodObject ),
|
||||
new Among ( "gabea", -1, 1, "", methodObject ),
|
||||
new Among ( "kabea", -1, 1, "", methodObject ),
|
||||
new Among ( "aldea", -1, 1, "", methodObject ),
|
||||
new Among ( "kaldea", 6, 1, "", methodObject ),
|
||||
new Among ( "taldea", 6, 1, "", methodObject ),
|
||||
new Among ( "ordea", -1, 1, "", methodObject ),
|
||||
new Among ( "zalea", -1, 1, "", methodObject ),
|
||||
new Among ( "tzalea", 10, 1, "", methodObject ),
|
||||
new Among ( "gilea", -1, 1, "", methodObject ),
|
||||
new Among ( "emea", -1, 1, "", methodObject ),
|
||||
new Among ( "kumea", -1, 1, "", methodObject ),
|
||||
new Among ( "nea", -1, 1, "", methodObject ),
|
||||
new Among ( "enea", 15, 1, "", methodObject ),
|
||||
new Among ( "zionea", 15, 1, "", methodObject ),
|
||||
new Among ( "unea", 15, 1, "", methodObject ),
|
||||
new Among ( "gunea", 18, 1, "", methodObject ),
|
||||
new Among ( "pea", -1, 1, "", methodObject ),
|
||||
new Among ( "aurrea", -1, 1, "", methodObject ),
|
||||
new Among ( "tea", -1, 1, "", methodObject ),
|
||||
new Among ( "kotea", 22, 1, "", methodObject ),
|
||||
new Among ( "artea", 22, 1, "", methodObject ),
|
||||
new Among ( "ostea", 22, 1, "", methodObject ),
|
||||
new Among ( "etxea", -1, 1, "", methodObject ),
|
||||
new Among ( "ga", -1, 1, "", methodObject ),
|
||||
new Among ( "anga", 27, 1, "", methodObject ),
|
||||
new Among ( "gaia", -1, 1, "", methodObject ),
|
||||
new Among ( "aldia", -1, 1, "", methodObject ),
|
||||
new Among ( "taldia", 30, 1, "", methodObject ),
|
||||
new Among ( "handia", -1, 1, "", methodObject ),
|
||||
new Among ( "mendia", -1, 1, "", methodObject ),
|
||||
new Among ( "geia", -1, 1, "", methodObject ),
|
||||
new Among ( "egia", -1, 1, "", methodObject ),
|
||||
new Among ( "degia", 35, 1, "", methodObject ),
|
||||
new Among ( "tegia", 35, 1, "", methodObject ),
|
||||
new Among ( "nahia", -1, 1, "", methodObject ),
|
||||
new Among ( "ohia", -1, 1, "", methodObject ),
|
||||
new Among ( "kia", -1, 1, "", methodObject ),
|
||||
new Among ( "tokia", 40, 1, "", methodObject ),
|
||||
new Among ( "oia", -1, 1, "", methodObject ),
|
||||
new Among ( "koia", 42, 1, "", methodObject ),
|
||||
new Among ( "aria", -1, 1, "", methodObject ),
|
||||
new Among ( "karia", 44, 1, "", methodObject ),
|
||||
new Among ( "laria", 44, 1, "", methodObject ),
|
||||
new Among ( "taria", 44, 1, "", methodObject ),
|
||||
new Among ( "eria", -1, 1, "", methodObject ),
|
||||
new Among ( "keria", 48, 1, "", methodObject ),
|
||||
new Among ( "teria", 48, 1, "", methodObject ),
|
||||
new Among ( "garria", -1, 2, "", methodObject ),
|
||||
new Among ( "larria", -1, 1, "", methodObject ),
|
||||
new Among ( "kirria", -1, 1, "", methodObject ),
|
||||
new Among ( "duria", -1, 1, "", methodObject ),
|
||||
new Among ( "asia", -1, 1, "", methodObject ),
|
||||
new Among ( "tia", -1, 1, "", methodObject ),
|
||||
new Among ( "ezia", -1, 1, "", methodObject ),
|
||||
new Among ( "bizia", -1, 1, "", methodObject ),
|
||||
new Among ( "ontzia", -1, 1, "", methodObject ),
|
||||
new Among ( "ka", -1, 1, "", methodObject ),
|
||||
new Among ( "joka", 60, 3, "", methodObject ),
|
||||
new Among ( "aurka", 60, 10, "", methodObject ),
|
||||
new Among ( "ska", 60, 1, "", methodObject ),
|
||||
new Among ( "xka", 60, 1, "", methodObject ),
|
||||
new Among ( "zka", 60, 1, "", methodObject ),
|
||||
new Among ( "gibela", -1, 1, "", methodObject ),
|
||||
new Among ( "gela", -1, 1, "", methodObject ),
|
||||
new Among ( "kaila", -1, 1, "", methodObject ),
|
||||
new Among ( "skila", -1, 1, "", methodObject ),
|
||||
new Among ( "tila", -1, 1, "", methodObject ),
|
||||
new Among ( "ola", -1, 1, "", methodObject ),
|
||||
new Among ( "na", -1, 1, "", methodObject ),
|
||||
new Among ( "kana", 72, 1, "", methodObject ),
|
||||
new Among ( "ena", 72, 1, "", methodObject ),
|
||||
new Among ( "garrena", 74, 1, "", methodObject ),
|
||||
new Among ( "gerrena", 74, 1, "", methodObject ),
|
||||
new Among ( "urrena", 74, 1, "", methodObject ),
|
||||
new Among ( "zaina", 72, 1, "", methodObject ),
|
||||
new Among ( "tzaina", 78, 1, "", methodObject ),
|
||||
new Among ( "kina", 72, 1, "", methodObject ),
|
||||
new Among ( "mina", 72, 1, "", methodObject ),
|
||||
new Among ( "garna", 72, 1, "", methodObject ),
|
||||
new Among ( "una", 72, 1, "", methodObject ),
|
||||
new Among ( "duna", 83, 1, "", methodObject ),
|
||||
new Among ( "asuna", 83, 1, "", methodObject ),
|
||||
new Among ( "tasuna", 85, 1, "", methodObject ),
|
||||
new Among ( "ondoa", -1, 1, "", methodObject ),
|
||||
new Among ( "kondoa", 87, 1, "", methodObject ),
|
||||
new Among ( "ngoa", -1, 1, "", methodObject ),
|
||||
new Among ( "zioa", -1, 1, "", methodObject ),
|
||||
new Among ( "koa", -1, 1, "", methodObject ),
|
||||
new Among ( "takoa", 91, 1, "", methodObject ),
|
||||
new Among ( "zkoa", 91, 1, "", methodObject ),
|
||||
new Among ( "noa", -1, 1, "", methodObject ),
|
||||
new Among ( "zinoa", 94, 1, "", methodObject ),
|
||||
new Among ( "aroa", -1, 1, "", methodObject ),
|
||||
new Among ( "taroa", 96, 1, "", methodObject ),
|
||||
new Among ( "zaroa", 96, 1, "", methodObject ),
|
||||
new Among ( "eroa", -1, 1, "", methodObject ),
|
||||
new Among ( "oroa", -1, 1, "", methodObject ),
|
||||
new Among ( "osoa", -1, 1, "", methodObject ),
|
||||
new Among ( "toa", -1, 1, "", methodObject ),
|
||||
new Among ( "ttoa", 102, 1, "", methodObject ),
|
||||
new Among ( "ztoa", 102, 1, "", methodObject ),
|
||||
new Among ( "txoa", -1, 1, "", methodObject ),
|
||||
new Among ( "tzoa", -1, 1, "", methodObject ),
|
||||
new Among ( "\u00F1oa", -1, 1, "", methodObject ),
|
||||
new Among ( "ra", -1, 1, "", methodObject ),
|
||||
new Among ( "ara", 108, 1, "", methodObject ),
|
||||
new Among ( "dara", 109, 1, "", methodObject ),
|
||||
new Among ( "liara", 109, 1, "", methodObject ),
|
||||
new Among ( "tiara", 109, 1, "", methodObject ),
|
||||
new Among ( "tara", 109, 1, "", methodObject ),
|
||||
new Among ( "etara", 113, 1, "", methodObject ),
|
||||
new Among ( "tzara", 109, 1, "", methodObject ),
|
||||
new Among ( "bera", 108, 1, "", methodObject ),
|
||||
new Among ( "kera", 108, 1, "", methodObject ),
|
||||
new Among ( "pera", 108, 1, "", methodObject ),
|
||||
new Among ( "ora", 108, 2, "", methodObject ),
|
||||
new Among ( "tzarra", 108, 1, "", methodObject ),
|
||||
new Among ( "korra", 108, 1, "", methodObject ),
|
||||
new Among ( "tra", 108, 1, "", methodObject ),
|
||||
new Among ( "sa", -1, 1, "", methodObject ),
|
||||
new Among ( "osa", 123, 1, "", methodObject ),
|
||||
new Among ( "ta", -1, 1, "", methodObject ),
|
||||
new Among ( "eta", 125, 1, "", methodObject ),
|
||||
new Among ( "keta", 126, 1, "", methodObject ),
|
||||
new Among ( "sta", 125, 1, "", methodObject ),
|
||||
new Among ( "dua", -1, 1, "", methodObject ),
|
||||
new Among ( "mendua", 129, 1, "", methodObject ),
|
||||
new Among ( "ordua", 129, 1, "", methodObject ),
|
||||
new Among ( "lekua", -1, 1, "", methodObject ),
|
||||
new Among ( "burua", -1, 1, "", methodObject ),
|
||||
new Among ( "durua", -1, 1, "", methodObject ),
|
||||
new Among ( "tsua", -1, 1, "", methodObject ),
|
||||
new Among ( "tua", -1, 1, "", methodObject ),
|
||||
new Among ( "mentua", 136, 1, "", methodObject ),
|
||||
new Among ( "estua", 136, 1, "", methodObject ),
|
||||
new Among ( "txua", -1, 1, "", methodObject ),
|
||||
new Among ( "zua", -1, 1, "", methodObject ),
|
||||
new Among ( "tzua", 140, 1, "", methodObject ),
|
||||
new Among ( "za", -1, 1, "", methodObject ),
|
||||
new Among ( "eza", 142, 1, "", methodObject ),
|
||||
new Among ( "eroza", 142, 1, "", methodObject ),
|
||||
new Among ( "tza", 142, 2, "", methodObject ),
|
||||
new Among ( "koitza", 145, 1, "", methodObject ),
|
||||
new Among ( "antza", 145, 1, "", methodObject ),
|
||||
new Among ( "gintza", 145, 1, "", methodObject ),
|
||||
new Among ( "kintza", 145, 1, "", methodObject ),
|
||||
new Among ( "kuntza", 145, 1, "", methodObject ),
|
||||
new Among ( "gabe", -1, 1, "", methodObject ),
|
||||
new Among ( "kabe", -1, 1, "", methodObject ),
|
||||
new Among ( "kide", -1, 1, "", methodObject ),
|
||||
new Among ( "alde", -1, 1, "", methodObject ),
|
||||
new Among ( "kalde", 154, 1, "", methodObject ),
|
||||
new Among ( "talde", 154, 1, "", methodObject ),
|
||||
new Among ( "orde", -1, 1, "", methodObject ),
|
||||
new Among ( "ge", -1, 1, "", methodObject ),
|
||||
new Among ( "zale", -1, 1, "", methodObject ),
|
||||
new Among ( "tzale", 159, 1, "", methodObject ),
|
||||
new Among ( "gile", -1, 1, "", methodObject ),
|
||||
new Among ( "eme", -1, 1, "", methodObject ),
|
||||
new Among ( "kume", -1, 1, "", methodObject ),
|
||||
new Among ( "ne", -1, 1, "", methodObject ),
|
||||
new Among ( "zione", 164, 1, "", methodObject ),
|
||||
new Among ( "une", 164, 1, "", methodObject ),
|
||||
new Among ( "gune", 166, 1, "", methodObject ),
|
||||
new Among ( "pe", -1, 1, "", methodObject ),
|
||||
new Among ( "aurre", -1, 1, "", methodObject ),
|
||||
new Among ( "te", -1, 1, "", methodObject ),
|
||||
new Among ( "kote", 170, 1, "", methodObject ),
|
||||
new Among ( "arte", 170, 1, "", methodObject ),
|
||||
new Among ( "oste", 170, 1, "", methodObject ),
|
||||
new Among ( "etxe", -1, 1, "", methodObject ),
|
||||
new Among ( "gai", -1, 1, "", methodObject ),
|
||||
new Among ( "di", -1, 1, "", methodObject ),
|
||||
new Among ( "aldi", 176, 1, "", methodObject ),
|
||||
new Among ( "taldi", 177, 1, "", methodObject ),
|
||||
new Among ( "geldi", 176, 8, "", methodObject ),
|
||||
new Among ( "handi", 176, 1, "", methodObject ),
|
||||
new Among ( "mendi", 176, 1, "", methodObject ),
|
||||
new Among ( "gei", -1, 1, "", methodObject ),
|
||||
new Among ( "egi", -1, 1, "", methodObject ),
|
||||
new Among ( "degi", 183, 1, "", methodObject ),
|
||||
new Among ( "tegi", 183, 1, "", methodObject ),
|
||||
new Among ( "nahi", -1, 1, "", methodObject ),
|
||||
new Among ( "ohi", -1, 1, "", methodObject ),
|
||||
new Among ( "ki", -1, 1, "", methodObject ),
|
||||
new Among ( "toki", 188, 1, "", methodObject ),
|
||||
new Among ( "oi", -1, 1, "", methodObject ),
|
||||
new Among ( "goi", 190, 1, "", methodObject ),
|
||||
new Among ( "koi", 190, 1, "", methodObject ),
|
||||
new Among ( "ari", -1, 1, "", methodObject ),
|
||||
new Among ( "kari", 193, 1, "", methodObject ),
|
||||
new Among ( "lari", 193, 1, "", methodObject ),
|
||||
new Among ( "tari", 193, 1, "", methodObject ),
|
||||
new Among ( "garri", -1, 2, "", methodObject ),
|
||||
new Among ( "larri", -1, 1, "", methodObject ),
|
||||
new Among ( "kirri", -1, 1, "", methodObject ),
|
||||
new Among ( "duri", -1, 1, "", methodObject ),
|
||||
new Among ( "asi", -1, 1, "", methodObject ),
|
||||
new Among ( "ti", -1, 1, "", methodObject ),
|
||||
new Among ( "ontzi", -1, 1, "", methodObject ),
|
||||
new Among ( "\u00F1i", -1, 1, "", methodObject ),
|
||||
new Among ( "ak", -1, 1, "", methodObject ),
|
||||
new Among ( "ek", -1, 1, "", methodObject ),
|
||||
new Among ( "tarik", -1, 1, "", methodObject ),
|
||||
new Among ( "gibel", -1, 1, "", methodObject ),
|
||||
new Among ( "ail", -1, 1, "", methodObject ),
|
||||
new Among ( "kail", 209, 1, "", methodObject ),
|
||||
new Among ( "kan", -1, 1, "", methodObject ),
|
||||
new Among ( "tan", -1, 1, "", methodObject ),
|
||||
new Among ( "etan", 212, 1, "", methodObject ),
|
||||
new Among ( "en", -1, 4, "", methodObject ),
|
||||
new Among ( "ren", 214, 2, "", methodObject ),
|
||||
new Among ( "garren", 215, 1, "", methodObject ),
|
||||
new Among ( "gerren", 215, 1, "", methodObject ),
|
||||
new Among ( "urren", 215, 1, "", methodObject ),
|
||||
new Among ( "ten", 214, 4, "", methodObject ),
|
||||
new Among ( "tzen", 214, 4, "", methodObject ),
|
||||
new Among ( "zain", -1, 1, "", methodObject ),
|
||||
new Among ( "tzain", 221, 1, "", methodObject ),
|
||||
new Among ( "kin", -1, 1, "", methodObject ),
|
||||
new Among ( "min", -1, 1, "", methodObject ),
|
||||
new Among ( "dun", -1, 1, "", methodObject ),
|
||||
new Among ( "asun", -1, 1, "", methodObject ),
|
||||
new Among ( "tasun", 226, 1, "", methodObject ),
|
||||
new Among ( "aizun", -1, 1, "", methodObject ),
|
||||
new Among ( "ondo", -1, 1, "", methodObject ),
|
||||
new Among ( "kondo", 229, 1, "", methodObject ),
|
||||
new Among ( "go", -1, 1, "", methodObject ),
|
||||
new Among ( "ngo", 231, 1, "", methodObject ),
|
||||
new Among ( "zio", -1, 1, "", methodObject ),
|
||||
new Among ( "ko", -1, 1, "", methodObject ),
|
||||
new Among ( "trako", 234, 5, "", methodObject ),
|
||||
new Among ( "tako", 234, 1, "", methodObject ),
|
||||
new Among ( "etako", 236, 1, "", methodObject ),
|
||||
new Among ( "eko", 234, 1, "", methodObject ),
|
||||
new Among ( "tariko", 234, 1, "", methodObject ),
|
||||
new Among ( "sko", 234, 1, "", methodObject ),
|
||||
new Among ( "tuko", 234, 1, "", methodObject ),
|
||||
new Among ( "minutuko", 241, 6, "", methodObject ),
|
||||
new Among ( "zko", 234, 1, "", methodObject ),
|
||||
new Among ( "no", -1, 1, "", methodObject ),
|
||||
new Among ( "zino", 244, 1, "", methodObject ),
|
||||
new Among ( "ro", -1, 1, "", methodObject ),
|
||||
new Among ( "aro", 246, 1, "", methodObject ),
|
||||
new Among ( "igaro", 247, 9, "", methodObject ),
|
||||
new Among ( "taro", 247, 1, "", methodObject ),
|
||||
new Among ( "zaro", 247, 1, "", methodObject ),
|
||||
new Among ( "ero", 246, 1, "", methodObject ),
|
||||
new Among ( "giro", 246, 1, "", methodObject ),
|
||||
new Among ( "oro", 246, 1, "", methodObject ),
|
||||
new Among ( "oso", -1, 1, "", methodObject ),
|
||||
new Among ( "to", -1, 1, "", methodObject ),
|
||||
new Among ( "tto", 255, 1, "", methodObject ),
|
||||
new Among ( "zto", 255, 1, "", methodObject ),
|
||||
new Among ( "txo", -1, 1, "", methodObject ),
|
||||
new Among ( "tzo", -1, 1, "", methodObject ),
|
||||
new Among ( "gintzo", 259, 1, "", methodObject ),
|
||||
new Among ( "\u00F1o", -1, 1, "", methodObject ),
|
||||
new Among ( "zp", -1, 1, "", methodObject ),
|
||||
new Among ( "ar", -1, 1, "", methodObject ),
|
||||
new Among ( "dar", 263, 1, "", methodObject ),
|
||||
new Among ( "behar", 263, 1, "", methodObject ),
|
||||
new Among ( "zehar", 263, 7, "", methodObject ),
|
||||
new Among ( "liar", 263, 1, "", methodObject ),
|
||||
new Among ( "tiar", 263, 1, "", methodObject ),
|
||||
new Among ( "tar", 263, 1, "", methodObject ),
|
||||
new Among ( "tzar", 263, 1, "", methodObject ),
|
||||
new Among ( "or", -1, 2, "", methodObject ),
|
||||
new Among ( "kor", 271, 1, "", methodObject ),
|
||||
new Among ( "os", -1, 1, "", methodObject ),
|
||||
new Among ( "ket", -1, 1, "", methodObject ),
|
||||
new Among ( "du", -1, 1, "", methodObject ),
|
||||
new Among ( "mendu", 275, 1, "", methodObject ),
|
||||
new Among ( "ordu", 275, 1, "", methodObject ),
|
||||
new Among ( "leku", -1, 1, "", methodObject ),
|
||||
new Among ( "buru", -1, 2, "", methodObject ),
|
||||
new Among ( "duru", -1, 1, "", methodObject ),
|
||||
new Among ( "tsu", -1, 1, "", methodObject ),
|
||||
new Among ( "tu", -1, 1, "", methodObject ),
|
||||
new Among ( "tatu", 282, 4, "", methodObject ),
|
||||
new Among ( "mentu", 282, 1, "", methodObject ),
|
||||
new Among ( "estu", 282, 1, "", methodObject ),
|
||||
new Among ( "txu", -1, 1, "", methodObject ),
|
||||
new Among ( "zu", -1, 1, "", methodObject ),
|
||||
new Among ( "tzu", 287, 1, "", methodObject ),
|
||||
new Among ( "gintzu", 288, 1, "", methodObject ),
|
||||
new Among ( "z", -1, 1, "", methodObject ),
|
||||
new Among ( "ez", 290, 1, "", methodObject ),
|
||||
new Among ( "eroz", 290, 1, "", methodObject ),
|
||||
new Among ( "tz", 290, 1, "", methodObject ),
|
||||
new Among ( "koitz", 293, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private final static Among a_2[] = {
|
||||
new Among ( "zlea", -1, 2, "", methodObject ),
|
||||
new Among ( "keria", -1, 1, "", methodObject ),
|
||||
new Among ( "la", -1, 1, "", methodObject ),
|
||||
new Among ( "era", -1, 1, "", methodObject ),
|
||||
new Among ( "dade", -1, 1, "", methodObject ),
|
||||
new Among ( "tade", -1, 1, "", methodObject ),
|
||||
new Among ( "date", -1, 1, "", methodObject ),
|
||||
new Among ( "tate", -1, 1, "", methodObject ),
|
||||
new Among ( "gi", -1, 1, "", methodObject ),
|
||||
new Among ( "ki", -1, 1, "", methodObject ),
|
||||
new Among ( "ik", -1, 1, "", methodObject ),
|
||||
new Among ( "lanik", 10, 1, "", methodObject ),
|
||||
new Among ( "rik", 10, 1, "", methodObject ),
|
||||
new Among ( "larik", 12, 1, "", methodObject ),
|
||||
new Among ( "ztik", 10, 1, "", methodObject ),
|
||||
new Among ( "go", -1, 1, "", methodObject ),
|
||||
new Among ( "ro", -1, 1, "", methodObject ),
|
||||
new Among ( "ero", 16, 1, "", methodObject ),
|
||||
new Among ( "to", -1, 1, "", methodObject )
|
||||
};
|
||||
|
||||
private static final char g_v[] = {17, 65, 16 };
|
||||
|
||||
private int I_p2;
|
||||
private int I_p1;
|
||||
private int I_pV;
|
||||
|
||||
private void copy_from(BasqueStemmer other) {
|
||||
I_p2 = other.I_p2;
|
||||
I_p1 = other.I_p1;
|
||||
I_pV = other.I_pV;
|
||||
super.copy_from(other);
|
||||
}
|
||||
|
||||
private boolean r_mark_regions() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_6;
|
||||
int v_8;
|
||||
// (, line 25
|
||||
I_pV = limit;
|
||||
I_p1 = limit;
|
||||
I_p2 = limit;
|
||||
// do, line 31
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// (, line 31
|
||||
// or, line 33
|
||||
lab1: do {
|
||||
v_2 = cursor;
|
||||
lab2: do {
|
||||
// (, line 32
|
||||
if (!(in_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
// or, line 32
|
||||
lab3: do {
|
||||
v_3 = cursor;
|
||||
lab4: do {
|
||||
// (, line 32
|
||||
if (!(out_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
// gopast, line 32
|
||||
golab5: while(true)
|
||||
{
|
||||
lab6: do {
|
||||
if (!(in_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab6;
|
||||
}
|
||||
break golab5;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
break lab3;
|
||||
} while (false);
|
||||
cursor = v_3;
|
||||
// (, line 32
|
||||
if (!(in_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
// gopast, line 32
|
||||
golab7: while(true)
|
||||
{
|
||||
lab8: do {
|
||||
if (!(out_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab8;
|
||||
}
|
||||
break golab7;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
} while (false);
|
||||
break lab1;
|
||||
} while (false);
|
||||
cursor = v_2;
|
||||
// (, line 34
|
||||
if (!(out_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
// or, line 34
|
||||
lab9: do {
|
||||
v_6 = cursor;
|
||||
lab10: do {
|
||||
// (, line 34
|
||||
if (!(out_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab10;
|
||||
}
|
||||
// gopast, line 34
|
||||
golab11: while(true)
|
||||
{
|
||||
lab12: do {
|
||||
if (!(in_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab12;
|
||||
}
|
||||
break golab11;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab10;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
break lab9;
|
||||
} while (false);
|
||||
cursor = v_6;
|
||||
// (, line 34
|
||||
if (!(in_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
// next, line 34
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
cursor++;
|
||||
} while (false);
|
||||
} while (false);
|
||||
// setmark pV, line 35
|
||||
I_pV = cursor;
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// do, line 37
|
||||
v_8 = cursor;
|
||||
lab13: do {
|
||||
// (, line 37
|
||||
// gopast, line 38
|
||||
golab14: while(true)
|
||||
{
|
||||
lab15: do {
|
||||
if (!(in_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab15;
|
||||
}
|
||||
break golab14;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab13;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 38
|
||||
golab16: while(true)
|
||||
{
|
||||
lab17: do {
|
||||
if (!(out_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab17;
|
||||
}
|
||||
break golab16;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab13;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p1, line 38
|
||||
I_p1 = cursor;
|
||||
// gopast, line 39
|
||||
golab18: while(true)
|
||||
{
|
||||
lab19: do {
|
||||
if (!(in_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab19;
|
||||
}
|
||||
break golab18;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab13;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// gopast, line 39
|
||||
golab20: while(true)
|
||||
{
|
||||
lab21: do {
|
||||
if (!(out_grouping(g_v, 97, 117)))
|
||||
{
|
||||
break lab21;
|
||||
}
|
||||
break golab20;
|
||||
} while (false);
|
||||
if (cursor >= limit)
|
||||
{
|
||||
break lab13;
|
||||
}
|
||||
cursor++;
|
||||
}
|
||||
// setmark p2, line 39
|
||||
I_p2 = cursor;
|
||||
} while (false);
|
||||
cursor = v_8;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_RV() {
|
||||
if (!(I_pV <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_R2() {
|
||||
if (!(I_p2 <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_R1() {
|
||||
if (!(I_p1 <= cursor))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_aditzak() {
|
||||
int among_var;
|
||||
// (, line 49
|
||||
// [, line 50
|
||||
ket = cursor;
|
||||
// substring, line 50
|
||||
among_var = find_among_b(a_0, 109);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 50
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 61
|
||||
// call RV, line 61
|
||||
if (!r_RV())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 61
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 63
|
||||
// call R2, line 63
|
||||
if (!r_R2())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 63
|
||||
slice_del();
|
||||
break;
|
||||
case 3:
|
||||
// (, line 65
|
||||
// <-, line 65
|
||||
slice_from("atseden");
|
||||
break;
|
||||
case 4:
|
||||
// (, line 67
|
||||
// <-, line 67
|
||||
slice_from("arabera");
|
||||
break;
|
||||
case 5:
|
||||
// (, line 69
|
||||
// <-, line 69
|
||||
slice_from("baditu");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_izenak() {
|
||||
int among_var;
|
||||
// (, line 74
|
||||
// [, line 75
|
||||
ket = cursor;
|
||||
// substring, line 75
|
||||
among_var = find_among_b(a_1, 295);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 75
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 105
|
||||
// call RV, line 105
|
||||
if (!r_RV())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 105
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 107
|
||||
// call R2, line 107
|
||||
if (!r_R2())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 107
|
||||
slice_del();
|
||||
break;
|
||||
case 3:
|
||||
// (, line 109
|
||||
// <-, line 109
|
||||
slice_from("jok");
|
||||
break;
|
||||
case 4:
|
||||
// (, line 111
|
||||
// call R1, line 111
|
||||
if (!r_R1())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 111
|
||||
slice_del();
|
||||
break;
|
||||
case 5:
|
||||
// (, line 113
|
||||
// <-, line 113
|
||||
slice_from("tra");
|
||||
break;
|
||||
case 6:
|
||||
// (, line 115
|
||||
// <-, line 115
|
||||
slice_from("minutu");
|
||||
break;
|
||||
case 7:
|
||||
// (, line 117
|
||||
// <-, line 117
|
||||
slice_from("zehar");
|
||||
break;
|
||||
case 8:
|
||||
// (, line 119
|
||||
// <-, line 119
|
||||
slice_from("geldi");
|
||||
break;
|
||||
case 9:
|
||||
// (, line 121
|
||||
// <-, line 121
|
||||
slice_from("igaro");
|
||||
break;
|
||||
case 10:
|
||||
// (, line 123
|
||||
// <-, line 123
|
||||
slice_from("aurka");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean r_adjetiboak() {
|
||||
int among_var;
|
||||
// (, line 127
|
||||
// [, line 128
|
||||
ket = cursor;
|
||||
// substring, line 128
|
||||
among_var = find_among_b(a_2, 19);
|
||||
if (among_var == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// ], line 128
|
||||
bra = cursor;
|
||||
switch(among_var) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
// (, line 131
|
||||
// call RV, line 131
|
||||
if (!r_RV())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// delete, line 131
|
||||
slice_del();
|
||||
break;
|
||||
case 2:
|
||||
// (, line 133
|
||||
// <-, line 133
|
||||
slice_from("z");
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean stem() {
|
||||
int v_1;
|
||||
int v_2;
|
||||
int v_3;
|
||||
int v_4;
|
||||
// (, line 139
|
||||
// do, line 140
|
||||
v_1 = cursor;
|
||||
lab0: do {
|
||||
// call mark_regions, line 140
|
||||
if (!r_mark_regions())
|
||||
{
|
||||
break lab0;
|
||||
}
|
||||
} while (false);
|
||||
cursor = v_1;
|
||||
// backwards, line 141
|
||||
limit_backward = cursor; cursor = limit;
|
||||
// (, line 141
|
||||
// repeat, line 142
|
||||
replab1: while(true)
|
||||
{
|
||||
v_2 = limit - cursor;
|
||||
lab2: do {
|
||||
// call aditzak, line 142
|
||||
if (!r_aditzak())
|
||||
{
|
||||
break lab2;
|
||||
}
|
||||
continue replab1;
|
||||
} while (false);
|
||||
cursor = limit - v_2;
|
||||
break replab1;
|
||||
}
|
||||
// repeat, line 143
|
||||
replab3: while(true)
|
||||
{
|
||||
v_3 = limit - cursor;
|
||||
lab4: do {
|
||||
// call izenak, line 143
|
||||
if (!r_izenak())
|
||||
{
|
||||
break lab4;
|
||||
}
|
||||
continue replab3;
|
||||
} while (false);
|
||||
cursor = limit - v_3;
|
||||
break replab3;
|
||||
}
|
||||
// do, line 144
|
||||
v_4 = limit - cursor;
|
||||
lab5: do {
|
||||
// call adjetiboak, line 144
|
||||
if (!r_adjetiboak())
|
||||
{
|
||||
break lab5;
|
||||
}
|
||||
} while (false);
|
||||
cursor = limit - v_4;
|
||||
cursor = limit_backward; return true;
|
||||
}
|
||||
|
||||
public boolean equals( Object o ) {
|
||||
return o instanceof BasqueStemmer;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return BasqueStemmer.class.getName().hashCode();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,220 @@
|
|||
# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
|
||||
a
|
||||
abans
|
||||
ací
|
||||
ah
|
||||
així
|
||||
això
|
||||
al
|
||||
als
|
||||
aleshores
|
||||
algun
|
||||
alguna
|
||||
algunes
|
||||
alguns
|
||||
alhora
|
||||
allà
|
||||
allí
|
||||
allò
|
||||
altra
|
||||
altre
|
||||
altres
|
||||
amb
|
||||
ambdós
|
||||
ambdues
|
||||
apa
|
||||
aquell
|
||||
aquella
|
||||
aquelles
|
||||
aquells
|
||||
aquest
|
||||
aquesta
|
||||
aquestes
|
||||
aquests
|
||||
aquí
|
||||
baix
|
||||
cada
|
||||
cadascú
|
||||
cadascuna
|
||||
cadascunes
|
||||
cadascuns
|
||||
com
|
||||
contra
|
||||
d'un
|
||||
d'una
|
||||
d'unes
|
||||
d'uns
|
||||
dalt
|
||||
de
|
||||
del
|
||||
dels
|
||||
des
|
||||
després
|
||||
dins
|
||||
dintre
|
||||
donat
|
||||
doncs
|
||||
durant
|
||||
e
|
||||
eh
|
||||
el
|
||||
els
|
||||
em
|
||||
en
|
||||
encara
|
||||
ens
|
||||
entre
|
||||
érem
|
||||
eren
|
||||
éreu
|
||||
es
|
||||
és
|
||||
esta
|
||||
està
|
||||
estàvem
|
||||
estaven
|
||||
estàveu
|
||||
esteu
|
||||
et
|
||||
etc
|
||||
ets
|
||||
fins
|
||||
fora
|
||||
gairebé
|
||||
ha
|
||||
han
|
||||
has
|
||||
havia
|
||||
he
|
||||
hem
|
||||
heu
|
||||
hi
|
||||
ho
|
||||
i
|
||||
igual
|
||||
iguals
|
||||
ja
|
||||
l'hi
|
||||
la
|
||||
les
|
||||
li
|
||||
li'n
|
||||
llavors
|
||||
m'he
|
||||
ma
|
||||
mal
|
||||
malgrat
|
||||
mateix
|
||||
mateixa
|
||||
mateixes
|
||||
mateixos
|
||||
me
|
||||
mentre
|
||||
més
|
||||
meu
|
||||
meus
|
||||
meva
|
||||
meves
|
||||
molt
|
||||
molta
|
||||
moltes
|
||||
molts
|
||||
mon
|
||||
mons
|
||||
n'he
|
||||
n'hi
|
||||
ne
|
||||
ni
|
||||
no
|
||||
nogensmenys
|
||||
només
|
||||
nosaltres
|
||||
nostra
|
||||
nostre
|
||||
nostres
|
||||
o
|
||||
oh
|
||||
oi
|
||||
on
|
||||
pas
|
||||
pel
|
||||
pels
|
||||
per
|
||||
però
|
||||
perquè
|
||||
poc
|
||||
poca
|
||||
pocs
|
||||
poques
|
||||
potser
|
||||
propi
|
||||
qual
|
||||
quals
|
||||
quan
|
||||
quant
|
||||
que
|
||||
què
|
||||
quelcom
|
||||
qui
|
||||
quin
|
||||
quina
|
||||
quines
|
||||
quins
|
||||
s'ha
|
||||
s'han
|
||||
sa
|
||||
semblant
|
||||
semblants
|
||||
ses
|
||||
seu
|
||||
seus
|
||||
seva
|
||||
seva
|
||||
seves
|
||||
si
|
||||
sobre
|
||||
sobretot
|
||||
sóc
|
||||
solament
|
||||
sols
|
||||
son
|
||||
són
|
||||
sons
|
||||
sota
|
||||
sou
|
||||
t'ha
|
||||
t'han
|
||||
t'he
|
||||
ta
|
||||
tal
|
||||
també
|
||||
tampoc
|
||||
tan
|
||||
tant
|
||||
tanta
|
||||
tantes
|
||||
teu
|
||||
teus
|
||||
teva
|
||||
teves
|
||||
ton
|
||||
tons
|
||||
tot
|
||||
tota
|
||||
totes
|
||||
tots
|
||||
un
|
||||
una
|
||||
unes
|
||||
uns
|
||||
us
|
||||
va
|
||||
vaig
|
||||
vam
|
||||
van
|
||||
vas
|
||||
veu
|
||||
vosaltres
|
||||
vostra
|
||||
vostre
|
||||
vostres
|
|
@ -0,0 +1,99 @@
|
|||
# example set of basque stopwords
|
||||
al
|
||||
anitz
|
||||
arabera
|
||||
asko
|
||||
baina
|
||||
bat
|
||||
batean
|
||||
batek
|
||||
bati
|
||||
batzuei
|
||||
batzuek
|
||||
batzuetan
|
||||
batzuk
|
||||
bera
|
||||
beraiek
|
||||
berau
|
||||
berauek
|
||||
bere
|
||||
berori
|
||||
beroriek
|
||||
beste
|
||||
bezala
|
||||
da
|
||||
dago
|
||||
dira
|
||||
ditu
|
||||
du
|
||||
dute
|
||||
edo
|
||||
egin
|
||||
ere
|
||||
eta
|
||||
eurak
|
||||
ez
|
||||
gainera
|
||||
gu
|
||||
gutxi
|
||||
guzti
|
||||
haiei
|
||||
haiek
|
||||
haietan
|
||||
hainbeste
|
||||
hala
|
||||
han
|
||||
handik
|
||||
hango
|
||||
hara
|
||||
hari
|
||||
hark
|
||||
hartan
|
||||
hau
|
||||
hauei
|
||||
hauek
|
||||
hauetan
|
||||
hemen
|
||||
hemendik
|
||||
hemengo
|
||||
hi
|
||||
hona
|
||||
honek
|
||||
honela
|
||||
honetan
|
||||
honi
|
||||
hor
|
||||
hori
|
||||
horiei
|
||||
horiek
|
||||
horietan
|
||||
horko
|
||||
horra
|
||||
horrek
|
||||
horrela
|
||||
horretan
|
||||
horri
|
||||
hortik
|
||||
hura
|
||||
izan
|
||||
ni
|
||||
noiz
|
||||
nola
|
||||
non
|
||||
nondik
|
||||
nongo
|
||||
nor
|
||||
nora
|
||||
ze
|
||||
zein
|
||||
zen
|
||||
zenbait
|
||||
zenbat
|
||||
zer
|
||||
zergatik
|
||||
ziren
|
||||
zituen
|
||||
zu
|
||||
zuek
|
||||
zuen
|
||||
zuten
|
|
@ -0,0 +1,46 @@
|
|||
# example set of Armenian stopwords.
|
||||
այդ
|
||||
այլ
|
||||
այն
|
||||
այս
|
||||
դու
|
||||
դուք
|
||||
եմ
|
||||
են
|
||||
ենք
|
||||
ես
|
||||
եք
|
||||
է
|
||||
էի
|
||||
էին
|
||||
էինք
|
||||
էիր
|
||||
էիք
|
||||
էր
|
||||
ըստ
|
||||
թ
|
||||
ի
|
||||
ին
|
||||
իսկ
|
||||
իր
|
||||
կամ
|
||||
համար
|
||||
հետ
|
||||
հետո
|
||||
մենք
|
||||
մեջ
|
||||
մի
|
||||
ն
|
||||
նա
|
||||
նաև
|
||||
նրա
|
||||
նրանք
|
||||
որ
|
||||
որը
|
||||
որոնք
|
||||
որպես
|
||||
ու
|
||||
ում
|
||||
պիտի
|
||||
վրա
|
||||
և
|
|
@ -0,0 +1,53 @@
|
|||
package org.apache.lucene.analysis.ca;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
||||
public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new CatalanAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "llengües", "llengu");
|
||||
checkOneTermReuse(a, "llengua", "llengu");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "un", new String[] { });
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("llengües");
|
||||
Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT,
|
||||
CatalanAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "llengües", "llengües");
|
||||
checkOneTermReuse(a, "llengua", "llengu");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package org.apache.lucene.analysis.eu;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
||||
public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new BasqueAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "zaldi", "zaldi");
|
||||
checkOneTermReuse(a, "zaldiak", "zaldi");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "izan", new String[] { });
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("zaldiak");
|
||||
Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT,
|
||||
BasqueAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "zaldiak", "zaldiak");
|
||||
checkOneTermReuse(a, "mendiari", "mendi");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package org.apache.lucene.analysis.hy;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
||||
public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new ArmenianAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** test stopwords and stemming */
|
||||
public void testBasics() throws IOException {
|
||||
Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT);
|
||||
// stemming
|
||||
checkOneTermReuse(a, "արծիվ", "արծ");
|
||||
checkOneTermReuse(a, "արծիվներ", "արծ");
|
||||
// stopword
|
||||
assertAnalyzesTo(a, "է", new String[] { });
|
||||
}
|
||||
|
||||
/** test use of exclusion set */
|
||||
public void testExclude() throws IOException {
|
||||
Set<String> exclusionSet = new HashSet<String>();
|
||||
exclusionSet.add("արծիվներ");
|
||||
Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT,
|
||||
ArmenianAnalyzer.getDefaultStopSet(), exclusionSet);
|
||||
checkOneTermReuse(a, "արծիվներ", "արծիվներ");
|
||||
checkOneTermReuse(a, "արծիվ", "արծ");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue