SOLR-3737: fix Stempel factory resource loading, use singleton instance

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1374115 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-08-17 00:40:52 +00:00
parent 5a940c12d8
commit 04f3188bf9
5 changed files with 15 additions and 17 deletions

View File

@ -75,6 +75,10 @@ Bug Fixes
encoders / stemmers via the ResourceLoader now instead of Class.forName().
Solr users should now no longer have to embed these in its war. (David Smiley)
* SOLR-3737: StempelPolishStemFilterFactory loaded its stemmer table incorrectly.
Also, ensure immutability and use only one instance of this table in RAM (lazy
loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir)
Build
* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for

View File

@ -58,6 +58,13 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
return DefaultsHolder.DEFAULT_STOP_SET;
}
/**
* Returns an unmodifiable instance of the default stemmer table.
*/
public static Trie getDefaultTable() {
return DefaultsHolder.DEFAULT_TABLE;
}
/**
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
* accesses the static final set the first time.;

View File

@ -17,28 +17,17 @@ package org.apache.lucene.analysis.stempel;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.egothor.stemmer.Trie;
/**
* Factory for {@link StempelFilter} using a Polish stemming table.
*/
public class StempelPolishStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private Trie stemmer = null;
private static final String STEMTABLE = "/org/apache/lucene/analysis/pl/stemmer_20000.tbl";
public class StempelPolishStemFilterFactory extends TokenFilterFactory {
public TokenStream create(TokenStream input) {
return new StempelFilter(input, new StempelStemmer(stemmer));
}
public void inform(ResourceLoader loader) throws IOException {
stemmer = StempelStemmer.load(loader.openResource(STEMTABLE));
return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
}
}

View File

@ -332,7 +332,7 @@ public class Trie {
* @param key the key
* @param cmd the patch command
*/
public void add(CharSequence key, CharSequence cmd) {
void add(CharSequence key, CharSequence cmd) {
if (key == null || cmd == null) {
return;
}

View File

@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
/**
* Tests for {@link StempelPolishStemFilterFactory}
@ -31,7 +30,6 @@ public class TestStempelPolishStemFilterFactory extends BaseTokenStreamTestCase
public void testBasics() throws Exception {
StringReader document = new StringReader("studenta studenci");
StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
factory.inform(new ClasspathResourceLoader(getClass()));
TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
assertTokenStreamContents(ts,
new String[] { "student", "student" });