Move more token filters to analysis-common module

The following token filters were moved: common grams, limit token, pattern capture and pattern raplace.

Relates to #23658
This commit is contained in:
Martijn van Groningen 2017-07-06 14:06:20 +02:00
parent d71feceb23
commit 6db708ef75
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
19 changed files with 180 additions and 66 deletions

View File

@ -266,7 +266,6 @@
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicyConfig.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLog.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]AnalysisRegistry.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CommonGramsTokenFilterFactory.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CustomAnalyzerProvider.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]ShingleTokenFilterFactory.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]StemmerOverrideTokenFilterFactory.java" checks="LineLength" />
@ -564,9 +563,7 @@
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]IndexingSlowLogTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicySettingsTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLogTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PatternCaptureTokenFilterTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PreBuiltAnalyzerTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]commongrams[/\\]CommonGramsTokenFilterFactoryTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]engine[/\\]InternalEngineMergeIT.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]engine[/\\]InternalEngineTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]fielddata[/\\]AbstractFieldDataTestCase.java" checks="LineLength" />

View File

@ -46,7 +46,6 @@ import org.elasticsearch.index.analysis.ChineseAnalyzerProvider;
import org.elasticsearch.index.analysis.CjkAnalyzerProvider;
import org.elasticsearch.index.analysis.ClassicFilterFactory;
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
@ -80,7 +79,6 @@ import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
@ -88,8 +86,6 @@ import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
import org.elasticsearch.index.analysis.PatternAnalyzerProvider;
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
@ -196,13 +192,9 @@ public final class AnalysisModule {
tokenFilters.register("standard", StandardTokenFilterFactory::new);
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
tokenFilters.register("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new));
tokenFilters.register("arabic_stem", ArabicStemTokenFilterFactory::new);
tokenFilters.register("brazilian_stem", BrazilianStemTokenFilterFactory::new);
tokenFilters.register("czech_stem", CzechStemTokenFilterFactory::new);

View File

@ -68,7 +68,6 @@ import org.apache.lucene.analysis.util.ElisionFilter;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
@ -115,6 +114,10 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
filters.put("reverse", ReverseTokenFilterFactory::new);
filters.put("elision", ElisionTokenFilterFactory::new);
filters.put("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
filters.put("limit", LimitTokenCountFilterFactory::new);
filters.put("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
filters.put("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new));
filters.put("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
return filters;
}

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
@ -26,6 +26,8 @@ import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
@ -35,14 +37,17 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
private final boolean queryMode;
public CommonGramsTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
CommonGramsTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
this.ignoreCase = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "ignore_case", false, deprecationLogger);
this.queryMode = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "query_mode", false, deprecationLogger);
this.ignoreCase = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(),
"ignore_case", false, deprecationLogger);
this.queryMode = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(),
"query_mode", false, deprecationLogger);
this.words = Analysis.parseCommonWords(env, settings, null, ignoreCase);
if (this.words == null) {
throw new IllegalArgumentException("missing or empty [common_words] or [common_words_path] configuration for common_grams token filter");
throw new IllegalArgumentException(
"missing or empty [common_words] or [common_words_path] configuration for common_grams token filter");
}
}

View File

@ -17,23 +17,24 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class LimitTokenCountFilterFactory extends AbstractTokenFilterFactory {
public static final int DEFAULT_MAX_TOKEN_COUNT = 1;
public static final boolean DEFAULT_CONSUME_ALL_TOKENS = false;
static final int DEFAULT_MAX_TOKEN_COUNT = 1;
static final boolean DEFAULT_CONSUME_ALL_TOKENS = false;
final int maxTokenCount;
final boolean consumeAllTokens;
private final int maxTokenCount;
private final boolean consumeAllTokens;
public LimitTokenCountFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
LimitTokenCountFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
this.maxTokenCount = settings.getAsInt("max_token_count", DEFAULT_MAX_TOKEN_COUNT);
this.consumeAllTokens = settings.getAsBooleanLenientForPreEs6Indices(

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenFilter;
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.pattern.PatternCaptureGroupTokenFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import java.util.regex.Pattern;
@ -34,7 +35,7 @@ public class PatternCaptureGroupTokenFilterFactory extends AbstractTokenFilterFa
private static final String PATTERNS_KEY = "patterns";
private static final String PRESERVE_ORIG_KEY = "preserve_original";
public PatternCaptureGroupTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
PatternCaptureGroupTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
String[] regexes = settings.getAsArray(PATTERNS_KEY, null, false);
if (regexes == null) {

View File

@ -35,7 +35,7 @@ public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory i
private final Pattern pattern;
private final String replacement;
public PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name);
String sPattern = settings.get("pattern");

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
@ -25,6 +25,7 @@ import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import java.util.regex.Pattern;

View File

@ -101,6 +101,11 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
filters.put("reversestring", ReverseTokenFilterFactory.class);
filters.put("elision", ElisionTokenFilterFactory.class);
filters.put("truncate", TruncateTokenFilterFactory.class);
filters.put("limittokencount", LimitTokenCountFilterFactory.class);
filters.put("commongrams", CommonGramsTokenFilterFactory.class);
filters.put("commongramsquery", CommonGramsTokenFilterFactory.class);
filters.put("patternreplace", PatternReplaceTokenFilterFactory.class);
filters.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class);
return filters;
}

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis.commongrams;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
@ -60,7 +60,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
{
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
String source = "the quick brown is a fox Or noT";
@ -77,7 +77,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.putArray("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein")
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
{
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
String source = "the quick brown is a fox Or noT";
@ -96,10 +96,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.putArray("index.analysis.filter.common_grams_1.common_words", "the", "Or", "Not", "a", "is", "an", "they", "are")
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_1");
String source = "the quick brown is a fox or noT";
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "fox_or", "or", "or_noT", "noT" };
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a",
"a_fox", "fox", "fox_or", "or", "or_noT", "noT" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -110,10 +111,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.putArray("index.analysis.filter.common_grams_2.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_2");
String source = "the quick brown is a fox or why noT";
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "or", "why", "why_noT", "noT" };
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "" +
"a_fox", "fox", "or", "why", "why_noT", "noT" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -123,10 +125,11 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
.putArray("index.analysis.filter.common_grams_3.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_3");
String source = "the quick brown is a fox Or noT";
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "Or", "noT" };
String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a",
"a_fox", "fox", "Or", "noT" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
@ -134,25 +137,27 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
}
public void testCommonGramsAnalysis() throws IOException {
String json = "/org/elasticsearch/index/analysis/commongrams/commongrams.json";
String json = "/org/elasticsearch/analysis/common/commongrams.json";
Settings settings = Settings.builder()
.loadFromStream(json, getClass().getResourceAsStream(json))
.put(Environment.PATH_HOME_SETTING.getKey(), createHome())
.build();
{
IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
.indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", "fox", "fox_or", "or", "not" };
String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox",
"fox", "fox_or", "or", "not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
{
IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
.indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox", "fox", "fox_or", "or", "not" };
String[] expected = new String[] { "the", "quick", "quick_brown", "brown", "brown_is", "is", "a", "a_fox",
"fox", "fox_or", "or", "not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
}
@ -165,7 +170,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
.put("index.analysis.filter.common_grams_1.ignore_case", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_1");
String source = "the quick brown is a fox or noT";
String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox_or", "or_noT" };
@ -180,7 +185,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
.put("index.analysis.filter.common_grams_2.ignore_case", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_2");
String source = "the quick brown is a fox or why noT";
String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "or", "why_noT" };
@ -194,7 +199,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
.putArray("index.analysis.filter.common_grams_3.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_3");
String source = "the quick brown is a fox or why noT";
String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "or", "why_noT" };
@ -208,7 +213,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
.putArray("index.analysis.filter.common_grams_4.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_4");
String source = "the quick brown is a fox Or noT";
String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "Or", "noT" };
@ -219,13 +224,13 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
}
public void testQueryModeCommonGramsAnalysis() throws IOException {
String json = "/org/elasticsearch/index/analysis/commongrams/commongrams_query_mode.json";
String json = "/org/elasticsearch/analysis/common/commongrams_query_mode.json";
Settings settings = Settings.builder()
.loadFromStream(json, getClass().getResourceAsStream(json))
.put(Environment.PATH_HOME_SETTING.getKey(), createHome())
.build();
{
IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
.indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
String source = "the quick brown is a fox or not";
@ -233,7 +238,7 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
{
IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings)
IndexAnalyzers indexAnalyzers = createTestAnalysisFromSettings(settings)
.indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
String source = "the quick brown is a fox or not";
@ -251,4 +256,8 @@ public class CommonGramsTokenFilterFactoryTests extends ESTokenStreamTestCase {
return home;
}
private static ESTestCase.TestAnalysis createTestAnalysisFromSettings(Settings settings) throws IOException {
return AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
}
}

View File

@ -17,12 +17,14 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.analysis.AnalysisTestsHelper;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.ESTokenStreamTestCase;
@ -35,7 +37,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
.put("index.analysis.filter.limit_default.type", "limit")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
{
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_default");
String source = "the quick brown fox";
@ -62,7 +64,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
.put("index.analysis.filter.limit_1.consume_all_tokens", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
String source = "the quick brown fox";
String[] expected = new String[] { "the", "quick", "brown" };
@ -77,7 +79,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
.put("index.analysis.filter.limit_1.consume_all_tokens", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
String source = "the quick brown fox";
String[] expected = new String[] { "the", "quick", "brown" };
@ -93,7 +95,7 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
.put("index.analysis.filter.limit_1.consume_all_tokens", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
ESTestCase.TestAnalysis analysis = createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("limit_1");
String source = "the quick brown fox";
String[] expected = new String[] { "the", "quick", "brown", "fox" };
@ -103,4 +105,8 @@ public class LimitTokenCountFilterFactoryTests extends ESTokenStreamTestCase {
}
}
private static ESTestCase.TestAnalysis createTestAnalysisFromSettings(Settings settings) throws IOException {
return AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
}
}

View File

@ -17,13 +17,15 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.test.ESTokenStreamTestCase;
import org.elasticsearch.test.IndexSettingsModule;
@ -32,7 +34,7 @@ import static org.hamcrest.Matchers.containsString;
public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase {
public void testPatternCaptureTokenFilter() throws Exception {
String json = "/org/elasticsearch/index/analysis/pattern_capture.json";
String json = "/org/elasticsearch/analysis/common/pattern_capture.json";
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.loadFromStream(json, getClass().getResourceAsStream(json))
@ -40,7 +42,7 @@ public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase {
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings).indexAnalyzers;
IndexAnalyzers indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
NamedAnalyzer analyzer1 = indexAnalyzers.get("single");
assertTokenStreamContents(analyzer1.tokenStream("test", "foobarbaz"), new String[]{"foobarbaz","foobar","foo"});
@ -56,7 +58,8 @@ public class PatternCaptureTokenFilterTests extends ESTokenStreamTestCase {
public void testNoPatterns() {
try {
new PatternCaptureGroupTokenFilterFactory(IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), null, "pattern_capture", Settings.builder().put("pattern", "foobar").build());
new PatternCaptureGroupTokenFilterFactory(IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), null,
"pattern_capture", Settings.builder().put("pattern", "foobar").build());
fail ("Expected IllegalArgumentException");
} catch (IllegalArgumentException e) {
assertThat(e.getMessage(), containsString("required setting 'patterns' is missing"));

View File

@ -568,3 +568,98 @@
filter: [my_truncate]
- length: { tokens: 1 }
- match: { tokens.0.token: foo }
---
"pattern_capture":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_pattern_capture:
type: pattern_capture
preserve_original: false
patterns: ["([^@]+)"]
- do:
indices.analyze:
index: test
body:
text: foo@bar.baz
tokenizer: keyword
filter: [my_pattern_capture]
- length: { tokens: 2 }
- match: { tokens.0.token: foo }
- match: { tokens.1.token: bar.baz }
---
"pattern_replace":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_pattern_replace:
type: pattern_replace
pattern: a
replacement: b
- do:
indices.analyze:
index: test
body:
text: a
tokenizer: keyword
filter: [my_pattern_replace]
- length: { tokens: 1 }
- match: { tokens.0.token: b }
---
"limit_count":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_limit:
type: limit
max_token_count: 2
- do:
indices.analyze:
index: test
body:
text: a b c
tokenizer: whitespace
filter: [my_limit]
- length: { tokens: 2 }
- match: { tokens.0.token: a }
- match: { tokens.1.token: b }
---
"common_grams":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_limit:
type: common_grams
common_words: [a]
- do:
indices.analyze:
index: test
body:
text: a b c
tokenizer: whitespace
filter: [my_limit]
- length: { tokens: 4 }
- match: { tokens.0.token: a }
- match: { tokens.1.token: a_b }
- match: { tokens.2.token: b }
- match: { tokens.3.token: c }

View File

@ -31,7 +31,6 @@ import org.elasticsearch.index.analysis.CJKBigramFilterFactory;
import org.elasticsearch.index.analysis.CJKWidthFilterFactory;
import org.elasticsearch.index.analysis.ClassicFilterFactory;
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
@ -45,14 +44,11 @@ import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
@ -143,8 +139,8 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("cjkbigram", CJKBigramFilterFactory.class)
.put("cjkwidth", CJKWidthFilterFactory.class)
.put("classic", ClassicFilterFactory.class)
.put("commongrams", CommonGramsTokenFilterFactory.class)
.put("commongramsquery", CommonGramsTokenFilterFactory.class)
.put("commongrams", MovedToAnalysisCommon.class)
.put("commongramsquery", MovedToAnalysisCommon.class)
.put("czechstem", CzechStemTokenFilterFactory.class)
.put("decimaldigit", DecimalDigitFilterFactory.class)
.put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class)
@ -178,13 +174,13 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("kstem", MovedToAnalysisCommon.class)
.put("latvianstem", MovedToAnalysisCommon.class)
.put("length", MovedToAnalysisCommon.class)
.put("limittokencount", LimitTokenCountFilterFactory.class)
.put("limittokencount", MovedToAnalysisCommon.class)
.put("lowercase", MovedToAnalysisCommon.class)
.put("ngram", MovedToAnalysisCommon.class)
.put("norwegianlightstem", MovedToAnalysisCommon.class)
.put("norwegianminimalstem", MovedToAnalysisCommon.class)
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
.put("patternreplace", PatternReplaceTokenFilterFactory.class)
.put("patterncapturegroup", MovedToAnalysisCommon.class)
.put("patternreplace", MovedToAnalysisCommon.class)
.put("persiannormalization", PersianNormalizationFilterFactory.class)
.put("porterstem", MovedToAnalysisCommon.class)
.put("portuguesestem", MovedToAnalysisCommon.class)