lucene 4: upgrade analyzers
This commit is contained in:
parent
7aacc8d448
commit
05138bb2fb
2
pom.xml
2
pom.xml
|
@ -51,7 +51,7 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers</artifactId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
|
|
|
@ -19,11 +19,11 @@
|
|||
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -19,13 +19,13 @@
|
|||
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.*;
|
||||
|
|
|
@ -19,10 +19,14 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
|
@ -29,6 +31,7 @@ import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
|||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
|
||||
import org.apache.lucene.analysis.core.*;
|
||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
import org.apache.lucene.analysis.cz.CzechStemFilter;
|
||||
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
||||
|
@ -37,11 +40,11 @@ import org.apache.lucene.analysis.de.GermanStemFilter;
|
|||
import org.apache.lucene.analysis.el.GreekAnalyzer;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.en.KStemFilter;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.es.SpanishAnalyzer;
|
||||
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
||||
import org.apache.lucene.analysis.fa.PersianAnalyzer;
|
||||
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.ElisionFilter;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchStemFilter;
|
||||
import org.apache.lucene.analysis.ga.IrishAnalyzer;
|
||||
|
@ -66,7 +69,6 @@ import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
|
|||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
||||
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||
import org.apache.lucene.analysis.ru.RussianStemFilter;
|
||||
import org.apache.lucene.analysis.shingle.ShingleFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
|
@ -74,6 +76,7 @@ import org.apache.lucene.analysis.standard.*;
|
|||
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
|
@ -565,7 +568,8 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new ElisionFilter(Lucene.ANALYZER_VERSION, tokenStream);
|
||||
// LUCENE 4 UPGRADE: French default for now, make set of articles configurable
|
||||
return new ElisionFilter(tokenStream, FrenchAnalyzer.DEFAULT_ARTICLES);
|
||||
}
|
||||
}));
|
||||
tokenFilterFactories.put("arabic_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
|
@ -642,7 +646,7 @@ public class IndicesAnalysisService extends AbstractComponent {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new RussianStemFilter(tokenStream);
|
||||
return new SnowballFilter(tokenStream, "Russian");
|
||||
}
|
||||
}));
|
||||
|
||||
|
|
Loading…
Reference in New Issue