mirror of https://github.com/apache/lucene.git
break out this class
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5468@1571305 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e541984b62
commit
ad20d99b35
|
@ -22,7 +22,6 @@ import java.util.List;
|
|||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hunspell2.Stemmer.Stem;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
package org.apache.lucene.analysis.hunspell2;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Stem represents all information known about a stem of a word. This includes the stem, and the prefixes and suffixes
|
||||
* that were used to change the word into the stem.
|
||||
*/
|
||||
final class Stem {
|
||||
final List<Affix> prefixes = new ArrayList<Affix>();
|
||||
final List<Affix> suffixes = new ArrayList<Affix>();
|
||||
final char stem[];
|
||||
final int stemLength;
|
||||
|
||||
/**
|
||||
* Creates a new Stem wrapping the given word stem
|
||||
*
|
||||
* @param stem Stem of a word
|
||||
*/
|
||||
public Stem(char stem[], int stemLength) {
|
||||
this.stem = stem;
|
||||
this.stemLength = stemLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a prefix to the list of prefixes used to generate this stem. Because it is assumed that prefixes are added
|
||||
* depth first, the prefix is added to the front of the list
|
||||
*
|
||||
* @param prefix Prefix to add to the list of prefixes for this stem
|
||||
*/
|
||||
public void addPrefix(Affix prefix) {
|
||||
prefixes.add(0, prefix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a suffix to the list of suffixes used to generate this stem. Because it is assumed that suffixes are added
|
||||
* depth first, the suffix is added to the end of the list
|
||||
*
|
||||
* @param suffix Suffix to add to the list of suffixes for this stem
|
||||
*/
|
||||
public void addSuffix(Affix suffix) {
|
||||
suffixes.add(suffix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of prefixes used to generate the stem
|
||||
*
|
||||
* @return List of prefixes used to generate the stem or an empty list if no prefixes were required
|
||||
*/
|
||||
public List<Affix> getPrefixes() {
|
||||
return prefixes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of suffixes used to generate the stem
|
||||
*
|
||||
* @return List of suffixes used to generate the stem or an empty list if no suffixes were required
|
||||
*/
|
||||
public List<Affix> getSuffixes() {
|
||||
return suffixes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the text of the word's stem.
|
||||
* @see #getStemLength()
|
||||
*/
|
||||
public char[] getStem() {
|
||||
return stem;
|
||||
}
|
||||
|
||||
/** Returns the valid length of the text in {@link #getStem()} */
|
||||
public int getStemLength() {
|
||||
return stemLength;
|
||||
}
|
||||
|
||||
/** Only use this if you really need a string (e.g. for testing) */
|
||||
public String getStemString() {
|
||||
return new String(stem, 0, stemLength);
|
||||
}
|
||||
}
|
|
@ -207,82 +207,4 @@ final class Stemmer {
|
|||
private boolean hasCrossCheckedFlag(char flag, char[] flags) {
|
||||
return flags == null || Arrays.binarySearch(flags, flag) >= 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stem represents all information known about a stem of a word. This includes the stem, and the prefixes and suffixes
|
||||
* that were used to change the word into the stem.
|
||||
*/
|
||||
public static class Stem {
|
||||
|
||||
private final List<Affix> prefixes = new ArrayList<Affix>();
|
||||
private final List<Affix> suffixes = new ArrayList<Affix>();
|
||||
private final char stem[];
|
||||
private final int stemLength;
|
||||
|
||||
/**
|
||||
* Creates a new Stem wrapping the given word stem
|
||||
*
|
||||
* @param stem Stem of a word
|
||||
*/
|
||||
public Stem(char stem[], int stemLength) {
|
||||
this.stem = stem;
|
||||
this.stemLength = stemLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a prefix to the list of prefixes used to generate this stem. Because it is assumed that prefixes are added
|
||||
* depth first, the prefix is added to the front of the list
|
||||
*
|
||||
* @param prefix Prefix to add to the list of prefixes for this stem
|
||||
*/
|
||||
public void addPrefix(Affix prefix) {
|
||||
prefixes.add(0, prefix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a suffix to the list of suffixes used to generate this stem. Because it is assumed that suffixes are added
|
||||
* depth first, the suffix is added to the end of the list
|
||||
*
|
||||
* @param suffix Suffix to add to the list of suffixes for this stem
|
||||
*/
|
||||
public void addSuffix(Affix suffix) {
|
||||
suffixes.add(suffix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of prefixes used to generate the stem
|
||||
*
|
||||
* @return List of prefixes used to generate the stem or an empty list if no prefixes were required
|
||||
*/
|
||||
public List<Affix> getPrefixes() {
|
||||
return prefixes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of suffixes used to generate the stem
|
||||
*
|
||||
* @return List of suffixes used to generate the stem or an empty list if no suffixes were required
|
||||
*/
|
||||
public List<Affix> getSuffixes() {
|
||||
return suffixes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the text of the word's stem.
|
||||
* @see #getStemLength()
|
||||
*/
|
||||
public char[] getStem() {
|
||||
return stem;
|
||||
}
|
||||
|
||||
/** Returns the valid length of the text in {@link #getStem()} */
|
||||
public int getStemLength() {
|
||||
return stemLength;
|
||||
}
|
||||
|
||||
/** Only use this if you really need a string (e.g. for testing) */
|
||||
public String getStemString() {
|
||||
return new String(stem, 0, stemLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,13 +26,14 @@ import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* Can be retrieved via:
|
||||
* wget --mirror -np http://archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries/
|
||||
* Note some of the files differ only in case. This may be a problem on your operating system!
|
||||
*/
|
||||
//@Ignore("enable manually")
|
||||
@Ignore("enable manually")
|
||||
public class TestAllDictionaries extends LuceneTestCase {
|
||||
|
||||
// set this to the location of where you downloaded all the files
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.hunspell2;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.hunspell2.Stemmer.Stem;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
|
Loading…
Reference in New Issue