From 258be2b135e49df263a546a67bcea9aa2c5ef283 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 30 May 2017 00:54:46 +0200 Subject: [PATCH] Moved `keyword_marker`, `trim`, `snowball` and `porter_stemmer` tokenfilter factories from core to common-analysis module. Relates to #23658 --- .../indices/analysis/AnalysisModule.java | 8 --- .../indices/analyze/AnalyzeActionIT.java | 29 ++------- .../analysis/common/CommonAnalysisPlugin.java | 4 ++ .../KeywordMarkerTokenFilterFactory.java | 6 +- .../common}/PorterStemTokenFilterFactory.java | 5 +- .../common}/SnowballTokenFilterFactory.java | 5 +- .../common}/TrimTokenFilterFactory.java | 5 +- .../common/CommonAnalysisFactoryTests.java | 6 +- .../KeywordMarkerFilterFactoryTests.java | 11 ++-- .../test/analysis-common/40_token_filters.yml | 64 +++++++++++++++++++ .../analysis/AnalysisFactoryTestCase.java | 12 ++-- 11 files changed, 101 insertions(+), 54 deletions(-) rename {core/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/KeywordMarkerTokenFilterFactory.java (93%) rename {core/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/PorterStemTokenFilterFactory.java (84%) rename {core/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/SnowballTokenFilterFactory.java (87%) rename {core/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/TrimTokenFilterFactory.java (87%) rename {core/src/test/java/org/elasticsearch/index/analysis => modules/analysis-common/src/test/java/org/elasticsearch/analysis/common}/KeywordMarkerFilterFactoryTests.java (93%) diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index 4dd146599c9..9e378f66793 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -81,7 +81,6 @@ import org.elasticsearch.index.analysis.KStemTokenFilterFactory; import org.elasticsearch.index.analysis.KeepTypesFilterFactory; import org.elasticsearch.index.analysis.KeepWordFilterFactory; import org.elasticsearch.index.analysis.KeywordAnalyzerProvider; -import org.elasticsearch.index.analysis.KeywordMarkerTokenFilterFactory; import org.elasticsearch.index.analysis.KeywordTokenizerFactory; import org.elasticsearch.index.analysis.LatvianAnalyzerProvider; import org.elasticsearch.index.analysis.LengthTokenFilterFactory; @@ -101,7 +100,6 @@ import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory; import org.elasticsearch.index.analysis.PatternTokenizerFactory; import org.elasticsearch.index.analysis.PersianAnalyzerProvider; import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory; -import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory; import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenizer; @@ -115,7 +113,6 @@ import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory; import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; import org.elasticsearch.index.analysis.SimpleAnalyzerProvider; import org.elasticsearch.index.analysis.SnowballAnalyzerProvider; -import org.elasticsearch.index.analysis.SnowballTokenFilterFactory; import org.elasticsearch.index.analysis.SoraniAnalyzerProvider; import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory; import org.elasticsearch.index.analysis.SpanishAnalyzerProvider; @@ -132,7 +129,6 @@ import org.elasticsearch.index.analysis.ThaiAnalyzerProvider; import org.elasticsearch.index.analysis.ThaiTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; -import org.elasticsearch.index.analysis.TrimTokenFilterFactory; import org.elasticsearch.index.analysis.TruncateTokenFilterFactory; import org.elasticsearch.index.analysis.TurkishAnalyzerProvider; import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory; @@ -212,7 +208,6 @@ public final class AnalysisModule { tokenFilters.register("length", LengthTokenFilterFactory::new); tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new); tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new); - tokenFilters.register("porter_stem", PorterStemTokenFilterFactory::new); tokenFilters.register("kstem", KStemTokenFilterFactory::new); tokenFilters.register("standard", StandardTokenFilterFactory::new); tokenFilters.register("nGram", NGramTokenFilterFactory::new); @@ -223,10 +218,8 @@ public final class AnalysisModule { tokenFilters.register("min_hash", MinHashTokenFilterFactory::new); tokenFilters.register("unique", UniqueTokenFilterFactory::new); tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new)); - tokenFilters.register("trim", TrimTokenFilterFactory::new); tokenFilters.register("limit", LimitTokenCountFilterFactory::new); tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new)); - tokenFilters.register("snowball", SnowballTokenFilterFactory::new); tokenFilters.register("stemmer", StemmerTokenFilterFactory::new); tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new); tokenFilters.register("elision", ElisionTokenFilterFactory::new); @@ -244,7 +237,6 @@ public final class AnalysisModule { tokenFilters.register("french_stem", FrenchStemTokenFilterFactory::new); tokenFilters.register("german_stem", GermanStemTokenFilterFactory::new); tokenFilters.register("russian_stem", RussianStemTokenFilterFactory::new); - tokenFilters.register("keyword_marker", requriesAnalysisSettings(KeywordMarkerTokenFilterFactory::new)); tokenFilters.register("stemmer_override", requriesAnalysisSettings(StemmerOverrideTokenFilterFactory::new)); tokenFilters.register("arabic_normalization", ArabicNormalizationFilterFactory::new); tokenFilters.register("german_normalization", GermanNormalizationFilterFactory::new); diff --git a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java index ebfeb5f92d1..9ff301791ae 100644 --- a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java +++ b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java @@ -275,38 +275,17 @@ public class AnalyzeActionIT extends ESIntegTestCase { assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("\nthis is a test\n")); - //check other attributes analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled") - .setExplain(true).setTokenizer("standard").addTokenFilter("snowball").get(); + .setExplain(true).setTokenizer("standard").addTokenFilter("lowercase").get(); assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); - assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase")); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3)); - assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubled")); String[] expectedAttributesKey = { "bytes", - "positionLength", - "keyword"}; - assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length)); - Object extendedAttribute; - - for (String key : expectedAttributesKey) { - extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key); - assertThat(extendedAttribute, notNullValue()); - } - } - - public void testDetailAnalyzeSpecifyAttributes() throws Exception { - AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled") - .setExplain(true).setTokenizer("standard").addTokenFilter("snowball").setAttributes("keyword").get(); - - assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); - assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball")); - assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3)); - assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl")); - String[] expectedAttributesKey = { - "keyword"}; + "positionLength"}; assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length)); Object extendedAttribute; diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index fcca4f7eddf..689803f323d 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -89,6 +89,10 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin { public Map> getTokenFilters() { Map> filters = new TreeMap<>(); filters.put("asciifolding", ASCIIFoldingTokenFilterFactory::new); + filters.put("keyword_marker", requriesAnalysisSettings(KeywordMarkerTokenFilterFactory::new)); + filters.put("porter_stem", PorterStemTokenFilterFactory::new); + filters.put("snowball", SnowballTokenFilterFactory::new); + filters.put("trim", TrimTokenFilterFactory::new); filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new); filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new); return filters; diff --git a/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordMarkerTokenFilterFactory.java similarity index 93% rename from core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordMarkerTokenFilterFactory.java index a4cd4c41c97..a57e322ff02 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/KeywordMarkerTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordMarkerTokenFilterFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenStream; @@ -26,6 +26,8 @@ import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; +import org.elasticsearch.index.analysis.Analysis; import java.util.Set; import java.util.regex.Pattern; @@ -50,7 +52,7 @@ public class KeywordMarkerTokenFilterFactory extends AbstractTokenFilterFactory private final CharArraySet keywordLookup; private final Pattern keywordPattern; - public KeywordMarkerTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + KeywordMarkerTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); boolean ignoreCase = diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PorterStemTokenFilterFactory.java similarity index 84% rename from core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PorterStemTokenFilterFactory.java index 82d3d7633a8..94f7271116b 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PorterStemTokenFilterFactory.java @@ -17,17 +17,18 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.en.PorterStemFilter; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; public class PorterStemTokenFilterFactory extends AbstractTokenFilterFactory { - public PorterStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { + PorterStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballTokenFilterFactory.java similarity index 87% rename from core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballTokenFilterFactory.java index ba1c3a2a886..5aff07e8fed 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/SnowballTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SnowballTokenFilterFactory.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -24,6 +24,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; /** * Real work actually done here by Sebastian on the Elasticsearch mailing list @@ -33,7 +34,7 @@ public class SnowballTokenFilterFactory extends AbstractTokenFilterFactory { private String language; - public SnowballTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { + SnowballTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); this.language = Strings.capitalize(settings.get("language", settings.get("name", "English"))); } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/TrimTokenFilterFactory.java similarity index 87% rename from core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/TrimTokenFilterFactory.java index 4239f2444bc..ab82ba0f7eb 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/TrimTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/TrimTokenFilterFactory.java @@ -17,19 +17,20 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.TrimFilter; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; public class TrimTokenFilterFactory extends AbstractTokenFilterFactory { private static final String UPDATE_OFFSETS_KEY = "update_offsets"; - public TrimTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + TrimTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); if (settings.get(UPDATE_OFFSETS_KEY) != null) { throw new IllegalArgumentException(UPDATE_OFFSETS_KEY + " is not supported anymore. Please fix your analysis chain"); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java index 3ce7fd1d301..3282a023f69 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java @@ -49,6 +49,10 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase { protected Map> getTokenFilters() { Map> filters = new TreeMap<>(super.getTokenFilters()); filters.put("asciifolding", ASCIIFoldingTokenFilterFactory.class); + filters.put("keywordmarker", KeywordMarkerTokenFilterFactory.class); + filters.put("porterstem", PorterStemTokenFilterFactory.class); + filters.put("snowballporter", SnowballTokenFilterFactory.class); + filters.put("trim", TrimTokenFilterFactory.class); filters.put("worddelimiter", WordDelimiterTokenFilterFactory.class); filters.put("worddelimitergraph", WordDelimiterGraphTokenFilterFactory.class); return filters; @@ -120,7 +124,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase { @Override protected Map> getPreConfiguredTokenizers() { Map> filters = new TreeMap<>(super.getPreConfiguredTokenFilters()); - + return filters; } diff --git a/core/src/test/java/org/elasticsearch/index/analysis/KeywordMarkerFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java similarity index 93% rename from core/src/test/java/org/elasticsearch/index/analysis/KeywordMarkerFilterFactoryTests.java rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java index 3298537af97..d0b72c50c42 100644 --- a/core/src/test/java/org/elasticsearch/index/analysis/KeywordMarkerFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/KeywordMarkerFilterFactoryTests.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; @@ -25,6 +25,9 @@ import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter; import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; +import org.elasticsearch.index.analysis.AnalysisTestsHelper; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.test.ESTestCase.TestAnalysis; import org.elasticsearch.test.ESTokenStreamTestCase; @@ -49,7 +52,7 @@ public class KeywordMarkerFilterFactoryTests extends ESTokenStreamTestCase { .put("index.analysis.analyzer.my_keyword.filter", "my_keyword, porter_stem") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_keyword"); assertThat(tokenFilter, instanceOf(KeywordMarkerTokenFilterFactory.class)); TokenStream filter = tokenFilter.create(new WhitespaceTokenizer()); @@ -72,7 +75,7 @@ public class KeywordMarkerFilterFactoryTests extends ESTokenStreamTestCase { .put("index.analysis.analyzer.my_keyword.filter", "my_keyword, porter_stem") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_keyword"); assertThat(tokenFilter, instanceOf(KeywordMarkerTokenFilterFactory.class)); TokenStream filter = tokenFilter.create(new WhitespaceTokenizer()); @@ -96,7 +99,7 @@ public class KeywordMarkerFilterFactoryTests extends ESTokenStreamTestCase { .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings)); + () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin())); assertEquals("cannot specify both `keywords_pattern` and `keywords` or `keywords_path`", e.getMessage()); } diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml index 39d55c15ace..eb9dec65542 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml @@ -33,6 +33,70 @@ - length: { tokens: 1 } - match: { tokens.0.token: foo bar! } +--- +"porterstem": + - do: + indices.analyze: + body: + text: This is troubled + tokenizer: standard + filter: [porter_stem] + - length: { tokens: 3 } + - match: { tokens.2.token: troubl } + - match: { tokens.2.position: 2 } + +--- +"keywordmarker": + - do: + indices.analyze: + body: + text: This is troubled + tokenizer: standard + filter: + - type: keyword_marker + keywords: troubled + - type: porter_stem + - length: { tokens: 3 } + - match: { tokens.2.token: troubled } + - match: { tokens.2.position: 2 } + +--- +"snowball": + - do: + indices.analyze: + body: + text: This is troubled + tokenizer: standard + filter: [snowball] + - length: { tokens: 3 } + - match: { tokens.2.token: troubl } + - match: { tokens.2.position: 2 } + + - do: + indices.analyze: + body: + explain: true + text: This is troubled + tokenizer: standard + filter: [snowball] + - length: { detail.tokenfilters.0.tokens: 3 } + - match: { detail.tokenfilters.0.tokens.2.token: troubl } + - match: { detail.tokenfilters.0.tokens.2.position: 2 } + - is_true: detail.tokenfilters.0.tokens.2.bytes + - match: { detail.tokenfilters.0.tokens.2.positionLength: 1 } + - match: { detail.tokenfilters.0.tokens.2.keyword: false } + +--- +"trim": + - do: + indices.analyze: + body: + text: Foo Bar ! + tokenizer: keyword + filter: [trim] + - length: { tokens: 1 } + - match: { tokens.0.token: Foo Bar ! } + --- "word_delimiter": - do: diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java index f70a3a5ef99..35f79a26ac8 100644 --- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java @@ -48,7 +48,6 @@ import org.elasticsearch.index.analysis.IndicNormalizationFilterFactory; import org.elasticsearch.index.analysis.KStemTokenFilterFactory; import org.elasticsearch.index.analysis.KeepTypesFilterFactory; import org.elasticsearch.index.analysis.KeepWordFilterFactory; -import org.elasticsearch.index.analysis.KeywordMarkerTokenFilterFactory; import org.elasticsearch.index.analysis.KeywordTokenizerFactory; import org.elasticsearch.index.analysis.LengthTokenFilterFactory; import org.elasticsearch.index.analysis.LetterTokenizerFactory; @@ -64,7 +63,6 @@ import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory; import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory; import org.elasticsearch.index.analysis.PatternTokenizerFactory; import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory; -import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.ReverseTokenFilterFactory; @@ -72,7 +70,6 @@ import org.elasticsearch.index.analysis.ScandinavianFoldingFilterFactory; import org.elasticsearch.index.analysis.ScandinavianNormalizationFilterFactory; import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory; import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; -import org.elasticsearch.index.analysis.SnowballTokenFilterFactory; import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory; import org.elasticsearch.index.analysis.StandardTokenFilterFactory; import org.elasticsearch.index.analysis.StandardTokenizerFactory; @@ -82,7 +79,6 @@ import org.elasticsearch.index.analysis.StopTokenFilterFactory; import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory; import org.elasticsearch.index.analysis.SynonymTokenFilterFactory; import org.elasticsearch.index.analysis.ThaiTokenizerFactory; -import org.elasticsearch.index.analysis.TrimTokenFilterFactory; import org.elasticsearch.index.analysis.TruncateTokenFilterFactory; import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory; import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory; @@ -193,7 +189,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("indonesianstem", StemmerTokenFilterFactory.class) .put("italianlightstem", StemmerTokenFilterFactory.class) .put("keepword", KeepWordFilterFactory.class) - .put("keywordmarker", KeywordMarkerTokenFilterFactory.class) + .put("keywordmarker", MovedToAnalysisCommon.class) .put("kstem", KStemTokenFilterFactory.class) .put("latvianstem", StemmerTokenFilterFactory.class) .put("length", LengthTokenFilterFactory.class) @@ -205,7 +201,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class) .put("patternreplace", PatternReplaceTokenFilterFactory.class) .put("persiannormalization", PersianNormalizationFilterFactory.class) - .put("porterstem", PorterStemTokenFilterFactory.class) + .put("porterstem", MovedToAnalysisCommon.class) .put("portuguesestem", StemmerTokenFilterFactory.class) .put("portugueselightstem", StemmerTokenFilterFactory.class) .put("portugueseminimalstem", StemmerTokenFilterFactory.class) @@ -216,7 +212,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("serbiannormalization", SerbianNormalizationFilterFactory.class) .put("shingle", ShingleTokenFilterFactory.class) .put("minhash", MinHashTokenFilterFactory.class) - .put("snowballporter", SnowballTokenFilterFactory.class) + .put("snowballporter", MovedToAnalysisCommon.class) .put("soraninormalization", SoraniNormalizationFilterFactory.class) .put("soranistem", StemmerTokenFilterFactory.class) .put("spanishlightstem", StemmerTokenFilterFactory.class) @@ -226,7 +222,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("swedishlightstem", StemmerTokenFilterFactory.class) .put("synonym", SynonymTokenFilterFactory.class) .put("synonymgraph", SynonymGraphTokenFilterFactory.class) - .put("trim", TrimTokenFilterFactory.class) + .put("trim", MovedToAnalysisCommon.class) .put("truncate", TruncateTokenFilterFactory.class) .put("turkishlowercase", LowerCaseTokenFilterFactory.class) .put("type", KeepTypesFilterFactory.class)