From ad20d99b3571181504d956a5056c449de5968afd Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 24 Feb 2014 14:53:21 +0000 Subject: [PATCH] break out this class git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5468@1571305 13f79535-47bb-0310-9956-ffa450edef68 --- .../hunspell2/Hunspell2StemFilter.java | 1 - .../lucene/analysis/hunspell2/Stem.java | 98 +++++++++++++++++++ .../lucene/analysis/hunspell2/Stemmer.java | 78 --------------- .../hunspell2/TestAllDictionaries.java | 3 +- .../analysis/hunspell2/TestStemmer.java | 1 - 5 files changed, 100 insertions(+), 81 deletions(-) create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Stem.java diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Hunspell2StemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Hunspell2StemFilter.java index f9dfb770ab2..45941345342 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Hunspell2StemFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Hunspell2StemFilter.java @@ -22,7 +22,6 @@ import java.util.List; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.hunspell2.Stemmer.Stem; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Stem.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Stem.java new file mode 100644 index 00000000000..d3c8d4c86ab --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Stem.java @@ -0,0 +1,98 @@ +package org.apache.lucene.analysis.hunspell2; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.List; + +/** + * Stem represents all information known about a stem of a word. This includes the stem, and the prefixes and suffixes + * that were used to change the word into the stem. + */ +final class Stem { + final List prefixes = new ArrayList(); + final List suffixes = new ArrayList(); + final char stem[]; + final int stemLength; + + /** + * Creates a new Stem wrapping the given word stem + * + * @param stem Stem of a word + */ + public Stem(char stem[], int stemLength) { + this.stem = stem; + this.stemLength = stemLength; + } + + /** + * Adds a prefix to the list of prefixes used to generate this stem. Because it is assumed that prefixes are added + * depth first, the prefix is added to the front of the list + * + * @param prefix Prefix to add to the list of prefixes for this stem + */ + public void addPrefix(Affix prefix) { + prefixes.add(0, prefix); + } + + /** + * Adds a suffix to the list of suffixes used to generate this stem. Because it is assumed that suffixes are added + * depth first, the suffix is added to the end of the list + * + * @param suffix Suffix to add to the list of suffixes for this stem + */ + public void addSuffix(Affix suffix) { + suffixes.add(suffix); + } + + /** + * Returns the list of prefixes used to generate the stem + * + * @return List of prefixes used to generate the stem or an empty list if no prefixes were required + */ + public List getPrefixes() { + return prefixes; + } + + /** + * Returns the list of suffixes used to generate the stem + * + * @return List of suffixes used to generate the stem or an empty list if no suffixes were required + */ + public List getSuffixes() { + return suffixes; + } + + /** + * Returns the text of the word's stem. + * @see #getStemLength() + */ + public char[] getStem() { + return stem; + } + + /** Returns the valid length of the text in {@link #getStem()} */ + public int getStemLength() { + return stemLength; + } + + /** Only use this if you really need a string (e.g. for testing) */ + public String getStemString() { + return new String(stem, 0, stemLength); + } +} \ No newline at end of file diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Stemmer.java index 7d36c81e4ae..aa00836d6fe 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Stemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Stemmer.java @@ -207,82 +207,4 @@ final class Stemmer { private boolean hasCrossCheckedFlag(char flag, char[] flags) { return flags == null || Arrays.binarySearch(flags, flag) >= 0; } - - /** - * Stem represents all information known about a stem of a word. This includes the stem, and the prefixes and suffixes - * that were used to change the word into the stem. - */ - public static class Stem { - - private final List prefixes = new ArrayList(); - private final List suffixes = new ArrayList(); - private final char stem[]; - private final int stemLength; - - /** - * Creates a new Stem wrapping the given word stem - * - * @param stem Stem of a word - */ - public Stem(char stem[], int stemLength) { - this.stem = stem; - this.stemLength = stemLength; - } - - /** - * Adds a prefix to the list of prefixes used to generate this stem. Because it is assumed that prefixes are added - * depth first, the prefix is added to the front of the list - * - * @param prefix Prefix to add to the list of prefixes for this stem - */ - public void addPrefix(Affix prefix) { - prefixes.add(0, prefix); - } - - /** - * Adds a suffix to the list of suffixes used to generate this stem. Because it is assumed that suffixes are added - * depth first, the suffix is added to the end of the list - * - * @param suffix Suffix to add to the list of suffixes for this stem - */ - public void addSuffix(Affix suffix) { - suffixes.add(suffix); - } - - /** - * Returns the list of prefixes used to generate the stem - * - * @return List of prefixes used to generate the stem or an empty list if no prefixes were required - */ - public List getPrefixes() { - return prefixes; - } - - /** - * Returns the list of suffixes used to generate the stem - * - * @return List of suffixes used to generate the stem or an empty list if no suffixes were required - */ - public List getSuffixes() { - return suffixes; - } - - /** - * Returns the text of the word's stem. - * @see #getStemLength() - */ - public char[] getStem() { - return stem; - } - - /** Returns the valid length of the text in {@link #getStem()} */ - public int getStemLength() { - return stemLength; - } - - /** Only use this if you really need a string (e.g. for testing) */ - public String getStemString() { - return new String(stem, 0, stemLength); - } - } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestAllDictionaries.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestAllDictionaries.java index 02ccedb9be7..ecb21b97a7c 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestAllDictionaries.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestAllDictionaries.java @@ -26,13 +26,14 @@ import org.apache.lucene.analysis.hunspell.HunspellDictionary; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.RamUsageEstimator; +import org.junit.Ignore; /** * Can be retrieved via: * wget --mirror -np http://archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries/ * Note some of the files differ only in case. This may be a problem on your operating system! */ -//@Ignore("enable manually") +@Ignore("enable manually") public class TestAllDictionaries extends LuceneTestCase { // set this to the location of where you downloaded all the files diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestStemmer.java index ea98f65256f..a8ac2a83fa9 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestStemmer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestStemmer.java @@ -17,7 +17,6 @@ package org.apache.lucene.analysis.hunspell2; * limitations under the License. */ -import org.apache.lucene.analysis.hunspell2.Stemmer.Stem; import org.apache.lucene.util.LuceneTestCase; import org.junit.AfterClass; import org.junit.BeforeClass;