mirror of https://github.com/apache/lucene.git
SOLR-3737: fix Stempel factory resource loading, use singleton instance
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1374115 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5a940c12d8
commit
04f3188bf9
|
@ -75,6 +75,10 @@ Bug Fixes
|
||||||
encoders / stemmers via the ResourceLoader now instead of Class.forName().
|
encoders / stemmers via the ResourceLoader now instead of Class.forName().
|
||||||
Solr users should now no longer have to embed these in its war. (David Smiley)
|
Solr users should now no longer have to embed these in its war. (David Smiley)
|
||||||
|
|
||||||
|
* SOLR-3737: StempelPolishStemFilterFactory loaded its stemmer table incorrectly.
|
||||||
|
Also, ensure immutability and use only one instance of this table in RAM (lazy
|
||||||
|
loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for
|
* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for
|
||||||
|
|
|
@ -58,6 +58,13 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
|
||||||
return DefaultsHolder.DEFAULT_STOP_SET;
|
return DefaultsHolder.DEFAULT_STOP_SET;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an unmodifiable instance of the default stemmer table.
|
||||||
|
*/
|
||||||
|
public static Trie getDefaultTable() {
|
||||||
|
return DefaultsHolder.DEFAULT_TABLE;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
|
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
|
||||||
* accesses the static final set the first time.;
|
* accesses the static final set the first time.;
|
||||||
|
|
|
@ -17,28 +17,17 @@ package org.apache.lucene.analysis.stempel;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.pl.PolishAnalyzer;
|
||||||
import org.apache.lucene.analysis.stempel.StempelFilter;
|
import org.apache.lucene.analysis.stempel.StempelFilter;
|
||||||
import org.apache.lucene.analysis.stempel.StempelStemmer;
|
import org.apache.lucene.analysis.stempel.StempelStemmer;
|
||||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
|
||||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
|
||||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
import org.egothor.stemmer.Trie;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link StempelFilter} using a Polish stemming table.
|
* Factory for {@link StempelFilter} using a Polish stemming table.
|
||||||
*/
|
*/
|
||||||
public class StempelPolishStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
public class StempelPolishStemFilterFactory extends TokenFilterFactory {
|
||||||
private Trie stemmer = null;
|
|
||||||
private static final String STEMTABLE = "/org/apache/lucene/analysis/pl/stemmer_20000.tbl";
|
|
||||||
|
|
||||||
public TokenStream create(TokenStream input) {
|
public TokenStream create(TokenStream input) {
|
||||||
return new StempelFilter(input, new StempelStemmer(stemmer));
|
return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
|
||||||
}
|
|
||||||
|
|
||||||
public void inform(ResourceLoader loader) throws IOException {
|
|
||||||
stemmer = StempelStemmer.load(loader.openResource(STEMTABLE));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -332,7 +332,7 @@ public class Trie {
|
||||||
* @param key the key
|
* @param key the key
|
||||||
* @param cmd the patch command
|
* @param cmd the patch command
|
||||||
*/
|
*/
|
||||||
public void add(CharSequence key, CharSequence cmd) {
|
void add(CharSequence key, CharSequence cmd) {
|
||||||
if (key == null || cmd == null) {
|
if (key == null || cmd == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,6 @@ import java.io.StringReader;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests for {@link StempelPolishStemFilterFactory}
|
* Tests for {@link StempelPolishStemFilterFactory}
|
||||||
|
@ -31,7 +30,6 @@ public class TestStempelPolishStemFilterFactory extends BaseTokenStreamTestCase
|
||||||
public void testBasics() throws Exception {
|
public void testBasics() throws Exception {
|
||||||
StringReader document = new StringReader("studenta studenci");
|
StringReader document = new StringReader("studenta studenci");
|
||||||
StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
|
StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
|
||||||
factory.inform(new ClasspathResourceLoader(getClass()));
|
|
||||||
TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
|
TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
new String[] { "student", "student" });
|
new String[] { "student", "student" });
|
||||||
|
|
Loading…
Reference in New Issue