From 05138bb2fbbf38a1c1e14469dcf33f8e857f5adf Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Thu, 25 Oct 2012 15:51:34 -0400 Subject: [PATCH] lucene 4: upgrade analyzers --- pom.xml | 2 +- .../analysis/miscellaneous/UniqueTokenFilter.java | 2 +- .../miscellaneous/WordDelimiterFilter.java | 2 +- .../org/elasticsearch/common/lucene/Lucene.java | 2 +- .../index/analysis/StandardHtmlStripAnalyzer.java | 6 +++++- .../indices/analysis/IndicesAnalysisService.java | 14 +++++++++----- 6 files changed, 18 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index 74a6e5d4614..e41b516d3fd 100644 --- a/pom.xml +++ b/pom.xml @@ -51,7 +51,7 @@ org.apache.lucene - lucene-analyzers + lucene-analyzers-common ${lucene.version} compile diff --git a/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java b/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java index 020c26b0d2f..0c85ea9fd4c 100644 --- a/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java +++ b/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java @@ -19,11 +19,11 @@ package org.apache.lucene.analysis.miscellaneous; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; import java.io.IOException; diff --git a/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java b/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java index e5d05e99482..930a09a1100 100644 --- a/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java +++ b/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java @@ -19,13 +19,13 @@ package org.apache.lucene.analysis.miscellaneous; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; diff --git a/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 67c6f52c2f9..6929b163224 100644 --- a/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -19,7 +19,7 @@ package org.elasticsearch.common.lucene; -import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.*; import org.apache.lucene.search.*; diff --git a/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java index b77861c551b..2b03fc82999 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java @@ -19,10 +19,14 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.core.StopAnalyzer; +import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.util.Version; import java.io.IOException; diff --git a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java index 57ed7cfc46a..49d7a2f3d9f 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java +++ b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java @@ -19,7 +19,9 @@ package org.elasticsearch.indices.analysis; -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicStemFilter; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; @@ -29,6 +31,7 @@ import org.apache.lucene.analysis.ca.CatalanAnalyzer; import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.cn.ChineseAnalyzer; +import org.apache.lucene.analysis.core.*; import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.apache.lucene.analysis.cz.CzechStemFilter; import org.apache.lucene.analysis.da.DanishAnalyzer; @@ -37,11 +40,11 @@ import org.apache.lucene.analysis.de.GermanStemFilter; import org.apache.lucene.analysis.el.GreekAnalyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.en.KStemFilter; +import org.apache.lucene.analysis.en.PorterStemFilter; import org.apache.lucene.analysis.es.SpanishAnalyzer; import org.apache.lucene.analysis.eu.BasqueAnalyzer; import org.apache.lucene.analysis.fa.PersianAnalyzer; import org.apache.lucene.analysis.fi.FinnishAnalyzer; -import org.apache.lucene.analysis.fr.ElisionFilter; import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.fr.FrenchStemFilter; import org.apache.lucene.analysis.ga.IrishAnalyzer; @@ -66,7 +69,6 @@ import org.apache.lucene.analysis.pt.PortugueseAnalyzer; import org.apache.lucene.analysis.reverse.ReverseStringFilter; import org.apache.lucene.analysis.ro.RomanianAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer; -import org.apache.lucene.analysis.ru.RussianStemFilter; import org.apache.lucene.analysis.shingle.ShingleFilter; import org.apache.lucene.analysis.snowball.SnowballAnalyzer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -74,6 +76,7 @@ import org.apache.lucene.analysis.standard.*; import org.apache.lucene.analysis.sv.SwedishAnalyzer; import org.apache.lucene.analysis.th.ThaiAnalyzer; import org.apache.lucene.analysis.tr.TurkishAnalyzer; +import org.apache.lucene.analysis.util.ElisionFilter; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; @@ -565,7 +568,8 @@ public class IndicesAnalysisService extends AbstractComponent { @Override public TokenStream create(TokenStream tokenStream) { - return new ElisionFilter(Lucene.ANALYZER_VERSION, tokenStream); + // LUCENE 4 UPGRADE: French default for now, make set of articles configurable + return new ElisionFilter(tokenStream, FrenchAnalyzer.DEFAULT_ARTICLES); } })); tokenFilterFactories.put("arabic_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @@ -642,7 +646,7 @@ public class IndicesAnalysisService extends AbstractComponent { @Override public TokenStream create(TokenStream tokenStream) { - return new RussianStemFilter(tokenStream); + return new SnowballFilter(tokenStream, "Russian"); } }));