diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArabicAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArabicAnalyzerProvider.java index 8dcc6cc9075..f5e1d882d2b 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArabicAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final ArabicAnalyzer arabicAnalyzer; - public ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); arabicAnalyzer = new ArabicAnalyzer( Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArmenianAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArmenianAnalyzerProvider.java index ba9f55f331f..d066aed14ee 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ArmenianAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ArmenianAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.hy.ArmenianAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final ArmenianAnalyzer analyzer; - public ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new ArmenianAnalyzer( Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BasqueAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BasqueAnalyzerProvider.java index 45ff947c61e..8fe32a697f7 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/BasqueAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BasqueAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.eu.BasqueAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BasqueAnalyzer analyzer; - public BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new BasqueAnalyzer( Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/BengaliAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BengaliAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/BengaliAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BengaliAnalyzerProvider.java index 41931833301..5696d0a6045 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/BengaliAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BengaliAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.bn.BengaliAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class BengaliAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BengaliAnalyzer analyzer; - public BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new BengaliAnalyzer( Analysis.parseStopWords(env, settings, BengaliAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BrazilianAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BrazilianAnalyzerProvider.java index 36b13e67bf4..07399a2263d 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/BrazilianAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BrazilianAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.br.BrazilianAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BrazilianAnalyzer analyzer; - public BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new BrazilianAnalyzer( Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BulgarianAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BulgarianAnalyzerProvider.java index 26e82cbfb2f..a6b1cb97a1b 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/BulgarianAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final BulgarianAnalyzer analyzer; - public BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new BulgarianAnalyzer( Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CatalanAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CatalanAnalyzerProvider.java index 94babaa52f8..db229ffb492 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CatalanAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CatalanAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ca.CatalanAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final CatalanAnalyzer analyzer; - public CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new CatalanAnalyzer( Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ChineseAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ChineseAnalyzerProvider.java index 10e6f0dc42f..01b529188c6 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ChineseAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ChineseAnalyzerProvider.java @@ -17,12 +17,13 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; /** * Only for old indexes @@ -31,16 +32,16 @@ public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final CJKAnalyzer analyzer; - public CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet()); diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 433bef902c1..24dce7abcf3 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -24,11 +24,17 @@ import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; import org.apache.lucene.analysis.ar.ArabicStemFilter; +import org.apache.lucene.analysis.bg.BulgarianAnalyzer; +import org.apache.lucene.analysis.bn.BengaliAnalyzer; import org.apache.lucene.analysis.bn.BengaliNormalizationFilter; +import org.apache.lucene.analysis.br.BrazilianAnalyzer; import org.apache.lucene.analysis.br.BrazilianStemFilter; +import org.apache.lucene.analysis.ca.CatalanAnalyzer; import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; +import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.cjk.CJKBigramFilter; import org.apache.lucene.analysis.cjk.CJKWidthFilter; import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter; @@ -40,14 +46,22 @@ import org.apache.lucene.analysis.core.LowerCaseTokenizer; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.UpperCaseFilter; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.apache.lucene.analysis.cz.CzechStemFilter; +import org.apache.lucene.analysis.da.DanishAnalyzer; +import org.apache.lucene.analysis.de.GermanAnalyzer; import org.apache.lucene.analysis.de.GermanNormalizationFilter; import org.apache.lucene.analysis.de.GermanStemFilter; +import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.en.KStemFilter; import org.apache.lucene.analysis.en.PorterStemFilter; +import org.apache.lucene.analysis.eu.BasqueAnalyzer; import org.apache.lucene.analysis.fa.PersianNormalizationFilter; +import org.apache.lucene.analysis.fi.FinnishAnalyzer; import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.apache.lucene.analysis.gl.GalicianAnalyzer; import org.apache.lucene.analysis.hi.HindiNormalizationFilter; +import org.apache.lucene.analysis.hy.ArmenianAnalyzer; import org.apache.lucene.analysis.in.IndicNormalizationFilter; import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute; @@ -64,6 +78,7 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer; import org.apache.lucene.analysis.ngram.NGramTokenFilter; import org.apache.lucene.analysis.ngram.NGramTokenizer; +import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.apache.lucene.analysis.path.PathHierarchyTokenizer; import org.apache.lucene.analysis.pattern.PatternTokenizer; import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter; @@ -73,6 +88,7 @@ import org.apache.lucene.analysis.shingle.ShingleFilter; import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.standard.ClassicFilter; import org.apache.lucene.analysis.standard.ClassicTokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; import org.apache.lucene.analysis.th.ThaiTokenizer; import org.apache.lucene.analysis.tr.ApostropheFilter; @@ -113,6 +129,24 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin { analyzers.put("fingerprint", FingerprintAnalyzerProvider::new); analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new); analyzers.put("pattern", PatternAnalyzerProvider::new); + analyzers.put("snowball", SnowballAnalyzerProvider::new); + analyzers.put("arabic", ArabicAnalyzerProvider::new); + analyzers.put("armenian", ArmenianAnalyzerProvider::new); + analyzers.put("basque", BasqueAnalyzerProvider::new); + analyzers.put("bengali", BengaliAnalyzerProvider::new); + analyzers.put("brazilian", BrazilianAnalyzerProvider::new); + analyzers.put("bulgarian", BulgarianAnalyzerProvider::new); + analyzers.put("catalan", CatalanAnalyzerProvider::new); + analyzers.put("chinese", ChineseAnalyzerProvider::new); + analyzers.put("cjk", CjkAnalyzerProvider::new); + analyzers.put("czech", CzechAnalyzerProvider::new); + analyzers.put("danish", DanishAnalyzerProvider::new); + analyzers.put("dutch", DutchAnalyzerProvider::new); + analyzers.put("english", EnglishAnalyzerProvider::new); + analyzers.put("finnish", FinnishAnalyzerProvider::new); + analyzers.put("french", FrenchAnalyzerProvider::new); + analyzers.put("galician", GalicianAnalyzerProvider::new); + analyzers.put("german", GermanAnalyzerProvider::new); return analyzers; } @@ -213,10 +247,108 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin { @Override public List getPreBuiltAnalyzerProviderFactories() { List analyzers = new ArrayList<>(); - analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE, - version -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET))); - analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version -> - new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET))); + analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE, version -> { + Analyzer a = new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version -> { + Analyzer a = new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, + CharArraySet.EMPTY_SET); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE, version -> { + Analyzer a = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, version -> { + Analyzer a = new ArabicAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("armenian", CachingStrategy.LUCENE, version -> { + Analyzer a = new ArmenianAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("basque", CachingStrategy.LUCENE, version -> { + Analyzer a = new BasqueAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("bengali", CachingStrategy.LUCENE, version -> { + Analyzer a = new BengaliAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("brazilian", CachingStrategy.LUCENE, version -> { + Analyzer a = new BrazilianAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, version -> { + Analyzer a = new BulgarianAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, version -> { + Analyzer a = new CatalanAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.LUCENE, version -> { + // only for old indices, best effort + Analyzer a = new StandardAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, version -> { + Analyzer a = new CJKAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, version -> { + Analyzer a = new CzechAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, version -> { + Analyzer a = new DanishAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("dutch", CachingStrategy.LUCENE, version -> { + Analyzer a = new DutchAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("english", CachingStrategy.LUCENE, version -> { + Analyzer a = new EnglishAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("finnish", CachingStrategy.LUCENE, version -> { + Analyzer a = new FinnishAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("french", CachingStrategy.LUCENE, version -> { + Analyzer a = new FrenchAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("galician", CachingStrategy.LUCENE, version -> { + Analyzer a = new GalicianAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); + analyzers.add(new PreBuiltAnalyzerProviderFactory("german", CachingStrategy.LUCENE, version -> { + Analyzer a = new GermanAnalyzer(); + a.setVersion(version.luceneVersion); + return a; + })); return analyzers; } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CzechAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CzechAnalyzerProvider.java index 12d2349d9ba..c14c2d57a3a 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CzechAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final CzechAnalyzer analyzer; - public CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new CzechAnalyzer( Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DanishAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DanishAnalyzerProvider.java index 01021cbfd8e..e3f024693c2 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/DanishAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DanishAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.da.DanishAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final DanishAnalyzer analyzer; - public DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new DanishAnalyzer( Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DutchAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DutchAnalyzerProvider.java index e215a89241e..70ab2a5ea62 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/DutchAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DutchAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final DutchAnalyzer analyzer; - public DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new DutchAnalyzer( Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EnglishAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EnglishAnalyzerProvider.java index 300381ef147..b14a83dbf7c 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EnglishAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final EnglishAnalyzer analyzer; - public EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new EnglishAnalyzer( Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FinnishAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FinnishAnalyzerProvider.java index 95f0819293a..ec18a71a125 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FinnishAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.fi.FinnishAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final FinnishAnalyzer analyzer; - public FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new FinnishAnalyzer( Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FrenchAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FrenchAnalyzerProvider.java index eac40c375ca..caee3618afb 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/FrenchAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FrenchAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final FrenchAnalyzer analyzer; - public FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new FrenchAnalyzer( Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GalicianAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GalicianAnalyzerProvider.java index 57550594dc0..23f8da7b8dd 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GalicianAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.gl.GalicianAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final GalicianAnalyzer analyzer; - public GalicianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + GalicianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new GalicianAnalyzer( Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GermanAnalyzerProvider.java similarity index 85% rename from server/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GermanAnalyzerProvider.java index 7951f17b79d..cf96f50845f 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/GermanAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/GermanAnalyzerProvider.java @@ -17,19 +17,21 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.de.GermanAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final GermanAnalyzer analyzer; - public GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { + GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new GermanAnalyzer( Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet()), diff --git a/server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzer.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzer.java similarity index 95% rename from server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzer.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzer.java index 1a096b8fa4b..5dbe902fe15 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzer.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzer.java @@ -1,4 +1,4 @@ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; /* * Licensed to Elasticsearch under one or more contributor @@ -48,12 +48,12 @@ public final class SnowballAnalyzer extends Analyzer { private CharArraySet stopSet; /** Builds the named analyzer with no stop words. */ - public SnowballAnalyzer(String name) { + SnowballAnalyzer(String name) { this.name = name; } /** Builds the named analyzer with the given stop words. */ - public SnowballAnalyzer(String name, CharArraySet stopWords) { + SnowballAnalyzer(String name, CharArraySet stopWords) { this(name); stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords)); } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzerProvider.java similarity index 92% rename from server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzerProvider.java index 84f19316331..0f213df9ad7 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/SnowballAnalyzerProvider.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballAnalyzerProvider.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.core.StopAnalyzer; @@ -26,6 +26,8 @@ import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; +import org.elasticsearch.index.analysis.Analysis; import java.util.HashMap; import java.util.Map; @@ -60,7 +62,7 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider 0); Query query = new QueryStringQueryBuilder("the quick fox") .field(STRING_FIELD_NAME) - .analyzer("english") + .analyzer("stop") .toQuery(createShardContext()); BooleanQuery expected = new BooleanQuery.Builder() .add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD) @@ -1313,7 +1313,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase 0); Query query = new QueryStringQueryBuilder("the* quick fox") .field(STRING_FIELD_NAME) - .analyzer("english") + .analyzer("stop") .toQuery(createShardContext()); BooleanQuery expected = new BooleanQuery.Builder() .add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), Occur.SHOULD) diff --git a/server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java index 7ff2b7ec122..0cd5e7fe330 100644 --- a/server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java @@ -629,7 +629,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase 0); Query query = new SimpleQueryStringBuilder("the quick fox") .field(STRING_FIELD_NAME) - .analyzer("english") + .analyzer("stop") .toQuery(createShardContext()); BooleanQuery expected = new BooleanQuery.Builder() .add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD) @@ -642,7 +642,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase 0); Query query = new SimpleQueryStringBuilder("the* quick fox") .field(STRING_FIELD_NAME) - .analyzer("english") + .analyzer("stop") .toQuery(createShardContext()); BooleanQuery expected = new BooleanQuery.Builder() .add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), BooleanClause.Occur.SHOULD) diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 717bab12ea5..35c5a19cc2e 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -22,6 +22,9 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.search.join.ScoreMode; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchRequestBuilder; @@ -36,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider; import org.elasticsearch.index.analysis.AnalyzerProvider; +import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.IdsQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; @@ -66,9 +70,11 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; +import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; import static org.elasticsearch.client.Requests.searchRequest; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; @@ -113,7 +119,7 @@ public class HighlighterSearchIT extends ESIntegTestCase { @Override protected Collection> nodePlugins() { - return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockWhitespacePlugin.class); + return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockAnalysisPlugin.class); } public void testHighlightingWithStoredKeyword() throws IOException { @@ -765,14 +771,19 @@ public class HighlighterSearchIT extends ESIntegTestCase { } private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception { + Settings.Builder settings = Settings.builder(); + settings.put(indexSettings()); + settings.put("index.analysis.analyzer.mock_english.tokenizer", "standard"); + settings.put("index.analysis.analyzer.mock_english.filter", "mock_snowball"); assertAcked(prepareCreate("test") + .setSettings(settings) .addMapping("type1", XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties") .startObject("foo") .field("type", "text") .field("term_vector", "with_positions_offsets") .field("store", true) - .field("analyzer", "english") + .field("analyzer", "mock_english") .startObject("fields") .startObject("plain") .field("type", "text") @@ -785,7 +796,7 @@ public class HighlighterSearchIT extends ESIntegTestCase { .field("type", "text") .field("term_vector", "with_positions_offsets") .field("store", true) - .field("analyzer", "english") + .field("analyzer", "mock_english") .startObject("fields") .startObject("plain") .field("type", "text") @@ -2819,7 +2830,7 @@ public class HighlighterSearchIT extends ESIntegTestCase { assertAcked(prepareCreate("test").setSettings(builder.build()) .addMapping("type1", "field1", "type=text,term_vector=with_positions_offsets,search_analyzer=synonym," + - "analyzer=english,index_options=offsets")); + "analyzer=standard,index_options=offsets")); ensureGreen(); client().prepareIndex("test", "type1", "0").setSource( @@ -2983,7 +2994,39 @@ public class HighlighterSearchIT extends ESIntegTestCase { } } - public static class MockWhitespacePlugin extends Plugin implements AnalysisPlugin { + public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin { + + public final class MockSnowBall extends TokenFilter { + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + /** Sole constructor. */ + MockSnowBall(TokenStream in) { + super(in); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + final char[] buffer = termAtt.buffer(); + final int length = termAtt.length(); + if (buffer[length - 1] == 's') { + termAtt.setLength(length - 1); + } + if (length > 3) { + if (buffer[length - 1] == 'g' && buffer[length - 2] == 'n' && buffer[length - 3] == 'i') { + termAtt.setLength(length- 3); + } + } + return true; + } else + return false; + } + } + + @Override + public List getPreConfiguredTokenFilters() { + return singletonList(PreConfiguredTokenFilter.singleton("mock_snowball", false, MockSnowBall::new)); + } @Override public Map>> getAnalyzers() { diff --git a/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java b/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java index 7aef2d208ec..147caa4c1c1 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java @@ -19,6 +19,12 @@ package org.elasticsearch.search.query; +import org.apache.lucene.analysis.CharacterUtils; +import org.apache.lucene.analysis.MockLowerCaseFilter; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -28,12 +34,19 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.MultiTermAwareComponent; +import org.elasticsearch.index.analysis.PreConfiguredCharFilter; +import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.Operator; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.SimpleQueryStringBuilder; import org.elasticsearch.index.query.SimpleQueryStringFlag; +import org.elasticsearch.indices.analysis.AnalysisModule; +import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; @@ -42,14 +55,19 @@ import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalSettingsPlugin; import java.io.IOException; +import java.io.Reader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; +import java.util.function.Function; +import static java.util.Collections.singletonList; +import static java.util.Collections.singletonMap; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.index.query.QueryBuilders.boolQuery; import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery; @@ -72,11 +90,15 @@ import static org.hamcrest.Matchers.equalTo; public class SimpleQueryStringIT extends ESIntegTestCase { @Override protected Collection> nodePlugins() { - return Arrays.asList(InternalSettingsPlugin.class); // uses index.version.created + return Arrays.asList(MockAnalysisPlugin.class, InternalSettingsPlugin.class); // uses index.version.created } public void testSimpleQueryString() throws ExecutionException, InterruptedException { - createIndex("test"); + Settings.Builder settings = Settings.builder(); + settings.put(indexSettings()); + settings.put("index.analysis.analyzer.mock_snowball.tokenizer", "standard"); + settings.put("index.analysis.analyzer.mock_snowball.filter", "mock_snowball"); + createIndex("test", settings.build()); indexRandom(true, false, client().prepareIndex("test", "type1", "1").setSource("body", "foo"), client().prepareIndex("test", "type1", "2").setSource("body", "bar"), @@ -108,7 +130,7 @@ public class SimpleQueryStringIT extends ESIntegTestCase { assertSearchHits(searchResponse, "4", "5"); searchResponse = client().prepareSearch().setQuery( - simpleQueryStringQuery("eggplants").analyzer("snowball")).get(); + simpleQueryStringQuery("eggplants").analyzer("mock_snowball")).get(); assertHitCount(searchResponse, 1L); assertFirstHit(searchResponse, hasId("4")); @@ -312,7 +334,7 @@ public class SimpleQueryStringIT extends ESIntegTestCase { .startObject("properties") .startObject("location") .field("type", "text") - .field("analyzer", "german") + .field("analyzer", "standard") .endObject() .endObject() .endObject() @@ -583,4 +605,33 @@ public class SimpleQueryStringIT extends ESIntegTestCase { } assertThat(hitIds, containsInAnyOrder(ids)); } + + public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin { + + public final class MockSnowBall extends TokenFilter { + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + /** Sole constructor. */ + MockSnowBall(TokenStream in) { + super(in); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + char[] buffer = termAtt.buffer(); + if (buffer[termAtt.length() - 1] == 's') { + termAtt.setLength(termAtt.length() - 1); + } + return true; + } else + return false; + } + } + + @Override + public List getPreConfiguredTokenFilters() { + return singletonList(PreConfiguredTokenFilter.singleton("mock_snowball", false, MockSnowBall::new)); + } + } } diff --git a/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java b/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java index 34501ba8a1b..8b3aff90e8d 100644 --- a/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java +++ b/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java @@ -87,7 +87,7 @@ public class SimpleValidateQueryIT extends ESIntegTestCase { .setSource(XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("foo").field("type", "text").endObject() .startObject("bar").field("type", "integer").endObject() - .startObject("baz").field("type", "text").field("analyzer", "snowball").endObject() + .startObject("baz").field("type", "text").field("analyzer", "standard").endObject() .startObject("pin").startObject("properties").startObject("location").field("type", "geo_point").endObject().endObject().endObject() .endObject().endObject().endObject()) .execute().actionGet();