SOLR-3737: fix Stempel factory resource loading, use singleton instance

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1374115 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-08-17 00:40:52 +00:00
parent 5a940c12d8
commit 04f3188bf9
5 changed files with 15 additions and 17 deletions

View File

@ -75,6 +75,10 @@ Bug Fixes
encoders / stemmers via the ResourceLoader now instead of Class.forName(). encoders / stemmers via the ResourceLoader now instead of Class.forName().
Solr users should now no longer have to embed these in its war. (David Smiley) Solr users should now no longer have to embed these in its war. (David Smiley)
* SOLR-3737: StempelPolishStemFilterFactory loaded its stemmer table incorrectly.
Also, ensure immutability and use only one instance of this table in RAM (lazy
loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir)
Build Build
* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for * LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for

View File

@ -58,6 +58,13 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
return DefaultsHolder.DEFAULT_STOP_SET; return DefaultsHolder.DEFAULT_STOP_SET;
} }
/**
* Returns an unmodifiable instance of the default stemmer table.
*/
public static Trie getDefaultTable() {
return DefaultsHolder.DEFAULT_TABLE;
}
/** /**
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
* accesses the static final set the first time.; * accesses the static final set the first time.;

View File

@ -17,28 +17,17 @@ package org.apache.lucene.analysis.stempel;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.apache.lucene.analysis.stempel.StempelFilter; import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.analysis.stempel.StempelStemmer; import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.egothor.stemmer.Trie;
/** /**
* Factory for {@link StempelFilter} using a Polish stemming table. * Factory for {@link StempelFilter} using a Polish stemming table.
*/ */
public class StempelPolishStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { public class StempelPolishStemFilterFactory extends TokenFilterFactory {
private Trie stemmer = null;
private static final String STEMTABLE = "/org/apache/lucene/analysis/pl/stemmer_20000.tbl";
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new StempelFilter(input, new StempelStemmer(stemmer)); return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
}
public void inform(ResourceLoader loader) throws IOException {
stemmer = StempelStemmer.load(loader.openResource(STEMTABLE));
} }
} }

View File

@ -332,7 +332,7 @@ public class Trie {
* @param key the key * @param key the key
* @param cmd the patch command * @param cmd the patch command
*/ */
public void add(CharSequence key, CharSequence cmd) { void add(CharSequence key, CharSequence cmd) {
if (key == null || cmd == null) { if (key == null || cmd == null) {
return; return;
} }

View File

@ -22,7 +22,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
/** /**
* Tests for {@link StempelPolishStemFilterFactory} * Tests for {@link StempelPolishStemFilterFactory}
@ -31,7 +30,6 @@ public class TestStempelPolishStemFilterFactory extends BaseTokenStreamTestCase
public void testBasics() throws Exception { public void testBasics() throws Exception {
StringReader document = new StringReader("studenta studenci"); StringReader document = new StringReader("studenta studenci");
StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory(); StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
factory.inform(new ClasspathResourceLoader(getClass()));
TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document)); TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
assertTokenStreamContents(ts, assertTokenStreamContents(ts,
new String[] { "student", "student" }); new String[] { "student", "student" });