From 6db708ef75bf72c17722df8a5b828825ee33fa57 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 6 Jul 2017 14:06:20 +0200 Subject: [PATCH] Move more token filters to analysis-common module The following token filters were moved: common grams, limit token, pattern capture and pattern raplace. Relates to #23658 --- .../resources/checkstyle_suppressions.xml | 3 - .../indices/analysis/AnalysisModule.java | 8 -- ...bstractCompoundWordTokenFilterFactory.java | 0 .../analysis/common/CommonAnalysisPlugin.java | 5 +- .../CommonGramsTokenFilterFactory.java | 15 ++- .../common}/LimitTokenCountFilterFactory.java | 13 +-- ...PatternCaptureGroupTokenFilterFactory.java | 5 +- .../PatternReplaceCharFilterFactory.java | 2 +- .../PatternReplaceTokenFilterFactory.java | 3 +- .../common/CommonAnalysisFactoryTests.java | 5 + .../CommonGramsTokenFilterFactoryTests.java | 51 ++++++---- .../LimitTokenCountFilterFactoryTests.java | 16 +++- .../PatternCaptureTokenFilterTests.java | 11 ++- .../analysis/common}/common_words.txt | 0 .../analysis/common}/commongrams.json | 0 .../common}/commongrams_query_mode.json | 0 .../analysis/common}/pattern_capture.json | 0 .../test/analysis-common/40_token_filters.yml | 95 +++++++++++++++++++ .../analysis/AnalysisFactoryTestCase.java | 14 +-- 19 files changed, 180 insertions(+), 66 deletions(-) rename {core => modules/analysis-common}/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java (100%) rename {core/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/CommonGramsTokenFilterFactory.java (74%) rename {core/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/LimitTokenCountFilterFactory.java (79%) rename {core/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/PatternCaptureGroupTokenFilterFactory.java (90%) rename {core/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/PatternReplaceTokenFilterFactory.java (95%) rename {core/src/test/java/org/elasticsearch/index/analysis/commongrams => modules/analysis-common/src/test/java/org/elasticsearch/analysis/common}/CommonGramsTokenFilterFactoryTests.java (86%) rename {core/src/test/java/org/elasticsearch/index/analysis => modules/analysis-common/src/test/java/org/elasticsearch/analysis/common}/LimitTokenCountFilterFactoryTests.java (86%) rename {core/src/test/java/org/elasticsearch/index/analysis => modules/analysis-common/src/test/java/org/elasticsearch/analysis/common}/PatternCaptureTokenFilterTests.java (85%) rename {core/src/test/resources/org/elasticsearch/index/analysis/commongrams => modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common}/common_words.txt (100%) rename {core/src/test/resources/org/elasticsearch/index/analysis/commongrams => modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common}/commongrams.json (100%) rename {core/src/test/resources/org/elasticsearch/index/analysis/commongrams => modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common}/commongrams_query_mode.json (100%) rename {core/src/test/resources/org/elasticsearch/index/analysis => modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common}/pattern_capture.json (100%) diff --git a/buildSrc/src/main/resources/checkstyle_suppressions.xml b/buildSrc/src/main/resources/checkstyle_suppressions.xml index 9b86a207af5..be73b64b0b3 100644 --- a/buildSrc/src/main/resources/checkstyle_suppressions.xml +++ b/buildSrc/src/main/resources/checkstyle_suppressions.xml @@ -266,7 +266,6 @@ - @@ -564,9 +563,7 @@ - - diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index 2657c9f7981..bcdebc737a5 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -46,7 +46,6 @@ import org.elasticsearch.index.analysis.ChineseAnalyzerProvider; import org.elasticsearch.index.analysis.CjkAnalyzerProvider; import org.elasticsearch.index.analysis.ClassicFilterFactory; import org.elasticsearch.index.analysis.ClassicTokenizerFactory; -import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory; import org.elasticsearch.index.analysis.CzechAnalyzerProvider; import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory; import org.elasticsearch.index.analysis.DanishAnalyzerProvider; @@ -80,7 +79,6 @@ import org.elasticsearch.index.analysis.KeywordAnalyzerProvider; import org.elasticsearch.index.analysis.KeywordTokenizerFactory; import org.elasticsearch.index.analysis.LatvianAnalyzerProvider; import org.elasticsearch.index.analysis.LetterTokenizerFactory; -import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory; import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider; import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory; import org.elasticsearch.index.analysis.MinHashTokenFilterFactory; @@ -88,8 +86,6 @@ import org.elasticsearch.index.analysis.NGramTokenizerFactory; import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider; import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory; import org.elasticsearch.index.analysis.PatternAnalyzerProvider; -import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory; -import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory; import org.elasticsearch.index.analysis.PatternTokenizerFactory; import org.elasticsearch.index.analysis.PersianAnalyzerProvider; import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory; @@ -196,13 +192,9 @@ public final class AnalysisModule { tokenFilters.register("standard", StandardTokenFilterFactory::new); tokenFilters.register("shingle", ShingleTokenFilterFactory::new); tokenFilters.register("min_hash", MinHashTokenFilterFactory::new); - tokenFilters.register("limit", LimitTokenCountFilterFactory::new); - tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new)); tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new); tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new)); tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new)); - tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new)); - tokenFilters.register("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new)); tokenFilters.register("arabic_stem", ArabicStemTokenFilterFactory::new); tokenFilters.register("brazilian_stem", BrazilianStemTokenFilterFactory::new); tokenFilters.register("czech_stem", CzechStemTokenFilterFactory::new); diff --git a/core/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java similarity index 100% rename from core/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 18e34d381a1..d6a8ee2a006 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -68,7 +68,6 @@ import org.apache.lucene.analysis.util.ElisionFilter; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory; import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory; -import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory; import org.elasticsearch.index.analysis.PreConfiguredCharFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenizer; @@ -115,6 +114,10 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin { filters.put("reverse", ReverseTokenFilterFactory::new); filters.put("elision", ElisionTokenFilterFactory::new); filters.put("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new)); + filters.put("limit", LimitTokenCountFilterFactory::new); + filters.put("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new)); + filters.put("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new)); + filters.put("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new)); return filters; } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/CommonGramsTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactory.java similarity index 74% rename from core/src/main/java/org/elasticsearch/index/analysis/CommonGramsTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactory.java index b64eb917df8..a6e9baeab8d 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/CommonGramsTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenStream; @@ -26,6 +26,8 @@ import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; +import org.elasticsearch.index.analysis.Analysis; public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory { @@ -35,14 +37,17 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory { private final boolean queryMode; - public CommonGramsTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + CommonGramsTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); - this.ignoreCase = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "ignore_case", false, deprecationLogger); - this.queryMode = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "query_mode", false, deprecationLogger); + this.ignoreCase = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), + "ignore_case", false, deprecationLogger); + this.queryMode = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), + "query_mode", false, deprecationLogger); this.words = Analysis.parseCommonWords(env, settings, null, ignoreCase); if (this.words == null) { - throw new IllegalArgumentException("missing or empty [common_words] or [common_words_path] configuration for common_grams token filter"); + throw new IllegalArgumentException( + "missing or empty [common_words] or [common_words_path] configuration for common_grams token filter"); } } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/LimitTokenCountFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactory.java similarity index 79% rename from core/src/main/java/org/elasticsearch/index/analysis/LimitTokenCountFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactory.java index 459f3b94229..862c2e67261 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/LimitTokenCountFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactory.java @@ -17,23 +17,24 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; public class LimitTokenCountFilterFactory extends AbstractTokenFilterFactory { - public static final int DEFAULT_MAX_TOKEN_COUNT = 1; - public static final boolean DEFAULT_CONSUME_ALL_TOKENS = false; + static final int DEFAULT_MAX_TOKEN_COUNT = 1; + static final boolean DEFAULT_CONSUME_ALL_TOKENS = false; - final int maxTokenCount; - final boolean consumeAllTokens; + private final int maxTokenCount; + private final boolean consumeAllTokens; - public LimitTokenCountFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + LimitTokenCountFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); this.maxTokenCount = settings.getAsInt("max_token_count", DEFAULT_MAX_TOKEN_COUNT); this.consumeAllTokens = settings.getAsBooleanLenientForPreEs6Indices( diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PatternCaptureGroupTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternCaptureGroupTokenFilterFactory.java similarity index 90% rename from core/src/main/java/org/elasticsearch/index/analysis/PatternCaptureGroupTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternCaptureGroupTokenFilterFactory.java index d129f77ecb7..7c58bc1491a 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PatternCaptureGroupTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternCaptureGroupTokenFilterFactory.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.TokenFilter; @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.pattern.PatternCaptureGroupTokenFilter; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; import java.util.regex.Pattern; @@ -34,7 +35,7 @@ public class PatternCaptureGroupTokenFilterFactory extends AbstractTokenFilterFa private static final String PATTERNS_KEY = "patterns"; private static final String PRESERVE_ORIG_KEY = "preserve_original"; - public PatternCaptureGroupTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { + PatternCaptureGroupTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); String[] regexes = settings.getAsArray(PATTERNS_KEY, null, false); if (regexes == null) { diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceCharFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceCharFilterFactory.java index b243618b53f..9d3985cc604 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceCharFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceCharFilterFactory.java @@ -35,7 +35,7 @@ public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory i private final Pattern pattern; private final String replacement; - public PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { + PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name); String sPattern = settings.get("pattern"); diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PatternReplaceTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterFactory.java similarity index 95% rename from core/src/main/java/org/elasticsearch/index/analysis/PatternReplaceTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterFactory.java index a47f0d72d81..8af85861875 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PatternReplaceTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.pattern.PatternReplaceFilter; @@ -25,6 +25,7 @@ import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; import java.util.regex.Pattern; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java index 37bf407df03..e016219826b 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java @@ -101,6 +101,11 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase { filters.put("reversestring", ReverseTokenFilterFactory.class); filters.put("elision", ElisionTokenFilterFactory.class); filters.put("truncate", TruncateTokenFilterFactory.class); + filters.put("limittokencount", LimitTokenCountFilterFactory.class); + filters.put("commongrams", CommonGramsTokenFilterFactory.class); + filters.put("commongramsquery", CommonGramsTokenFilterFactory.class); + filters.put("patternreplace", PatternReplaceTokenFilterFactory.class); + filters.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class); return filters; } diff --git a/core/src/test/java/org/elasticsearch/index/analysis/commongrams/CommonGramsTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java similarity index 86% rename from core/src/test/java/org/elasticsearch/index/analysis/commongrams/CommonGramsTokenFilterFactoryTests.java rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java index 19c6bf64dcc..e8578fde60d 100644 --- a/core/src/test/java/org/elasticsearch/index/analysis/commongrams/CommonGramsTokenFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactoryTests.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis.commongrams; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; @@ -60,7 +60,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); { TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default"); String source = "the quick brown is a fox Or noT"; @@ -77,7 +77,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .putArray("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein") .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); { TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default"); String source = "the quick brown is a fox Or noT"; @@ -96,10 +96,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .putArray("index.analysis.filter.common_grams_1.common_words", "the", "Or", "Not", "a", "is", "an", "they", "are") .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_1"); String source = "the quick brown is a fox or noT"; - String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "fox_or", "or", "or_noT", "noT" }; + String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", + "a_fox", "fox", "fox_or", "or", "or_noT", "noT" }; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); @@ -110,10 +111,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .putArray("index.analysis.filter.common_grams_2.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are") .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_2"); String source = "the quick brown is a fox or why noT"; - String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "or", "why", "why_noT", "noT" }; + String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "" + + "a_fox", "fox", "or", "why", "why_noT", "noT" }; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); @@ -123,10 +125,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { .putArray("index.analysis.filter.common_grams_3.common_words", "the", "or", "not", "a", "is", "an", "they", "are") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_3"); String source = "the quick brown is a fox Or noT"; - String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "Or", "noT" }; + String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", + "a_fox", "fox", "Or", "noT" }; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); @@ -134,25 +137,27 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { } public void testCommonGramsAnalysis() throws IOException { - String json = "/org/elasticsearch/index/analysis/commongrams/commongrams.json"; + String json = "/org/elasticsearch/analysis/common/commongrams.json"; Settings settings = Settings.builder() .loadFromStream(json, getClass().getResourceAsStream(json)) .put(Environment.PATH_HOME_SETTING.getKey(), createHome()) .build(); { - IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings) + IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings) .indexAnalyzers; Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer(); String source = "the quick brown is a fox or not"; - String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", "fox", "fox_or", "or", "not" }; + String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", + "fox", "fox_or", "or", "not" }; assertTokenStreamContents(analyzer.tokenStream("test", source), expected); } { - IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings) + IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings) .indexAnalyzers; Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer(); String source = "the quick brown is a fox or not"; - String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", "fox", "fox_or", "or", "not" }; + String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", + "fox", "fox_or", "or", "not" }; assertTokenStreamContents(analyzer.tokenStream("test", source), expected); } } @@ -165,7 +170,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { .put("index.analysis.filter.common_grams_1.ignore_case", true) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_1"); String source = "the quick brown is a fox or noT"; String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox_or", "or_noT" }; @@ -180,7 +185,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { .put("index.analysis.filter.common_grams_2.ignore_case", false) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_2"); String source = "the quick brown is a fox or why noT"; String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "or", "why_noT" }; @@ -194,7 +199,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { .putArray("index.analysis.filter.common_grams_3.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_3"); String source = "the quick brown is a fox or why noT"; String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "or", "why_noT" }; @@ -208,7 +213,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { .putArray("index.analysis.filter.common_grams_4.common_words", "the", "or", "not", "a", "is", "an", "they", "are") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_4"); String source = "the quick brown is a fox Or noT"; String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "Or", "noT" }; @@ -219,13 +224,13 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { } public void testQueryModeCommonGramsAnalysis() throws IOException { - String json = "/org/elasticsearch/index/analysis/commongrams/commongrams_query_mode.json"; + String json = "/org/elasticsearch/analysis/common/commongrams_query_mode.json"; Settings settings = Settings.builder() .loadFromStream(json, getClass().getResourceAsStream(json)) .put(Environment.PATH_HOME_SETTING.getKey(), createHome()) .build(); { - IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings) + IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings) .indexAnalyzers; Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer(); String source = "the quick brown is a fox or not"; @@ -233,7 +238,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { assertTokenStreamContents(analyzer.tokenStream("test", source), expected); } { - IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings) + IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings) .indexAnalyzers; Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer(); String source = "the quick brown is a fox or not"; @@ -251,4 +256,8 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase { return home; } + private static ESTestCase.TestAnalysis createTestAnalysisFromSettings(Settings settings) throws IOException { + return AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()); + } + } diff --git a/core/src/test/java/org/elasticsearch/index/analysis/LimitTokenCountFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java similarity index 86% rename from core/src/test/java/org/elasticsearch/index/analysis/LimitTokenCountFilterFactoryTests.java rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java index f1d810505bf..93ae41f04ee 100644 --- a/core/src/test/java/org/elasticsearch/index/analysis/LimitTokenCountFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactoryTests.java @@ -17,12 +17,14 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; +import org.elasticsearch.index.analysis.AnalysisTestsHelper; +import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTokenStreamTestCase; @@ -35,7 +37,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase { .put("index.analysis.filter.limit_default.type", "limit") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); { TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_default"); String source = "the quick brown fox"; @@ -62,7 +64,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase { .put("index.analysis.filter.limit_1.consume_all_tokens", true) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1"); String source = "the quick brown fox"; String[] expected = new String[] { "the", "quick", "brown" }; @@ -77,7 +79,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase { .put("index.analysis.filter.limit_1.consume_all_tokens", false) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1"); String source = "the quick brown fox"; String[] expected = new String[] { "the", "quick", "brown" }; @@ -93,7 +95,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase { .put("index.analysis.filter.limit_1.consume_all_tokens", true) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); - ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); + ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1"); String source = "the quick brown fox"; String[] expected = new String[] { "the", "quick", "brown", "fox" }; @@ -103,4 +105,8 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase { } } + private static ESTestCase.TestAnalysis createTestAnalysisFromSettings(Settings settings) throws IOException { + return AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()); + } + } diff --git a/core/src/test/java/org/elasticsearch/index/analysis/PatternCaptureTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java similarity index 85% rename from core/src/test/java/org/elasticsearch/index/analysis/PatternCaptureTokenFilterTests.java rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java index 126bbe2ab93..34bc7d9e026 100644 --- a/core/src/test/java/org/elasticsearch/index/analysis/PatternCaptureTokenFilterTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternCaptureTokenFilterTests.java @@ -17,13 +17,15 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.test.ESTokenStreamTestCase; import org.elasticsearch.test.IndexSettingsModule; @@ -32,7 +34,7 @@ import static org.hamcrest.Matchers.containsString; public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase { public void testPatternCaptureTokenFilter() throws Exception { - String json = "/org/elasticsearch/index/analysis/pattern_capture.json"; + String json = "/org/elasticsearch/analysis/common/pattern_capture.json"; Settings settings = Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) .loadFromStream(json, getClass().getResourceAsStream(json)) @@ -40,7 +42,7 @@ public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase { .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); - IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings).indexAnalyzers; + IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers; NamedAnalyzer analyzer1 = indexAnalyzers.get("single"); assertTokenStreamContents(analyzer1.tokenStream("test", "foobarbaz"), new String[]{"foobarbaz","foobar","foo"}); @@ -56,7 +58,8 @@ public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase { public void testNoPatterns() { try { - new PatternCaptureGroupTokenFilterFactory(IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), null, "pattern_capture", Settings.builder().put("pattern", "foobar").build()); + new PatternCaptureGroupTokenFilterFactory(IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), null, + "pattern_capture", Settings.builder().put("pattern", "foobar").build()); fail ("Expected IllegalArgumentException"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("required setting 'patterns' is missing")); diff --git a/core/src/test/resources/org/elasticsearch/index/analysis/commongrams/common_words.txt b/modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/common_words.txt similarity index 100% rename from core/src/test/resources/org/elasticsearch/index/analysis/commongrams/common_words.txt rename to modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/common_words.txt diff --git a/core/src/test/resources/org/elasticsearch/index/analysis/commongrams/commongrams.json b/modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/commongrams.json similarity index 100% rename from core/src/test/resources/org/elasticsearch/index/analysis/commongrams/commongrams.json rename to modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/commongrams.json diff --git a/core/src/test/resources/org/elasticsearch/index/analysis/commongrams/commongrams_query_mode.json b/modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/commongrams_query_mode.json similarity index 100% rename from core/src/test/resources/org/elasticsearch/index/analysis/commongrams/commongrams_query_mode.json rename to modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/commongrams_query_mode.json diff --git a/core/src/test/resources/org/elasticsearch/index/analysis/pattern_capture.json b/modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/pattern_capture.json similarity index 100% rename from core/src/test/resources/org/elasticsearch/index/analysis/pattern_capture.json rename to modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/pattern_capture.json diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml index 2283634a80a..16c644338cb 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml @@ -568,3 +568,98 @@ filter: [my_truncate] - length: { tokens: 1 } - match: { tokens.0.token: foo } + +--- +"pattern_capture": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + my_pattern_capture: + type: pattern_capture + preserve_original: false + patterns: ["([^@]+)"] + - do: + indices.analyze: + index: test + body: + text: foo@bar.baz + tokenizer: keyword + filter: [my_pattern_capture] + - length: { tokens: 2 } + - match: { tokens.0.token: foo } + - match: { tokens.1.token: bar.baz } + +--- +"pattern_replace": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + my_pattern_replace: + type: pattern_replace + pattern: a + replacement: b + - do: + indices.analyze: + index: test + body: + text: a + tokenizer: keyword + filter: [my_pattern_replace] + - length: { tokens: 1 } + - match: { tokens.0.token: b } + +--- +"limit_count": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + my_limit: + type: limit + max_token_count: 2 + - do: + indices.analyze: + index: test + body: + text: a b c + tokenizer: whitespace + filter: [my_limit] + - length: { tokens: 2 } + - match: { tokens.0.token: a } + - match: { tokens.1.token: b } + +--- +"common_grams": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + my_limit: + type: common_grams + common_words: [a] + - do: + indices.analyze: + index: test + body: + text: a b c + tokenizer: whitespace + filter: [my_limit] + - length: { tokens: 4 } + - match: { tokens.0.token: a } + - match: { tokens.1.token: a_b } + - match: { tokens.2.token: b } + - match: { tokens.3.token: c } diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java index 97035623a6c..3d4d39795da 100644 --- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java @@ -31,7 +31,6 @@ import org.elasticsearch.index.analysis.CJKBigramFilterFactory; import org.elasticsearch.index.analysis.CJKWidthFilterFactory; import org.elasticsearch.index.analysis.ClassicFilterFactory; import org.elasticsearch.index.analysis.ClassicTokenizerFactory; -import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory; import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory; import org.elasticsearch.index.analysis.DecimalDigitFilterFactory; import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory; @@ -45,14 +44,11 @@ import org.elasticsearch.index.analysis.KeepTypesFilterFactory; import org.elasticsearch.index.analysis.KeepWordFilterFactory; import org.elasticsearch.index.analysis.KeywordTokenizerFactory; import org.elasticsearch.index.analysis.LetterTokenizerFactory; -import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory; import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory; import org.elasticsearch.index.analysis.MinHashTokenFilterFactory; import org.elasticsearch.index.analysis.MultiTermAwareComponent; import org.elasticsearch.index.analysis.NGramTokenizerFactory; import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory; -import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory; -import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory; import org.elasticsearch.index.analysis.PatternTokenizerFactory; import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory; import org.elasticsearch.index.analysis.PreConfiguredCharFilter; @@ -143,8 +139,8 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("cjkbigram", CJKBigramFilterFactory.class) .put("cjkwidth", CJKWidthFilterFactory.class) .put("classic", ClassicFilterFactory.class) - .put("commongrams", CommonGramsTokenFilterFactory.class) - .put("commongramsquery", CommonGramsTokenFilterFactory.class) + .put("commongrams", MovedToAnalysisCommon.class) + .put("commongramsquery", MovedToAnalysisCommon.class) .put("czechstem", CzechStemTokenFilterFactory.class) .put("decimaldigit", DecimalDigitFilterFactory.class) .put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class) @@ -178,13 +174,13 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("kstem", MovedToAnalysisCommon.class) .put("latvianstem", MovedToAnalysisCommon.class) .put("length", MovedToAnalysisCommon.class) - .put("limittokencount", LimitTokenCountFilterFactory.class) + .put("limittokencount", MovedToAnalysisCommon.class) .put("lowercase", MovedToAnalysisCommon.class) .put("ngram", MovedToAnalysisCommon.class) .put("norwegianlightstem", MovedToAnalysisCommon.class) .put("norwegianminimalstem", MovedToAnalysisCommon.class) - .put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class) - .put("patternreplace", PatternReplaceTokenFilterFactory.class) + .put("patterncapturegroup", MovedToAnalysisCommon.class) + .put("patternreplace", MovedToAnalysisCommon.class) .put("persiannormalization", PersianNormalizationFilterFactory.class) .put("porterstem", MovedToAnalysisCommon.class) .put("portuguesestem", MovedToAnalysisCommon.class)