Moved `keyword_marker`, `trim`, `snowball` and `porter_stemmer` tokenfilter factories from core to common-analysis module.
Relates to #23658
This commit is contained in:
parent
a089dc9dcd
commit
258be2b135
|
@ -81,7 +81,6 @@ import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
|
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
|
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
|
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.KeywordMarkerTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
|
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
|
||||||
|
@ -101,7 +100,6 @@ import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
|
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
|
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
|
import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||||
|
@ -115,7 +113,6 @@ import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
|
import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
|
import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.SnowballTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
|
import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
|
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
|
import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
|
||||||
|
@ -132,7 +129,6 @@ import org.elasticsearch.index.analysis.ThaiAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
|
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.TrimTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
|
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
|
import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
|
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
|
||||||
|
@ -212,7 +208,6 @@ public final class AnalysisModule {
|
||||||
tokenFilters.register("length", LengthTokenFilterFactory::new);
|
tokenFilters.register("length", LengthTokenFilterFactory::new);
|
||||||
tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new);
|
tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new);
|
||||||
tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new);
|
tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new);
|
||||||
tokenFilters.register("porter_stem", PorterStemTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("kstem", KStemTokenFilterFactory::new);
|
tokenFilters.register("kstem", KStemTokenFilterFactory::new);
|
||||||
tokenFilters.register("standard", StandardTokenFilterFactory::new);
|
tokenFilters.register("standard", StandardTokenFilterFactory::new);
|
||||||
tokenFilters.register("nGram", NGramTokenFilterFactory::new);
|
tokenFilters.register("nGram", NGramTokenFilterFactory::new);
|
||||||
|
@ -223,10 +218,8 @@ public final class AnalysisModule {
|
||||||
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
|
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
|
||||||
tokenFilters.register("unique", UniqueTokenFilterFactory::new);
|
tokenFilters.register("unique", UniqueTokenFilterFactory::new);
|
||||||
tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
|
tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
|
||||||
tokenFilters.register("trim", TrimTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
|
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
|
||||||
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
|
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
|
||||||
tokenFilters.register("snowball", SnowballTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
|
tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
|
||||||
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
|
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
|
||||||
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
|
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
|
||||||
|
@ -244,7 +237,6 @@ public final class AnalysisModule {
|
||||||
tokenFilters.register("french_stem", FrenchStemTokenFilterFactory::new);
|
tokenFilters.register("french_stem", FrenchStemTokenFilterFactory::new);
|
||||||
tokenFilters.register("german_stem", GermanStemTokenFilterFactory::new);
|
tokenFilters.register("german_stem", GermanStemTokenFilterFactory::new);
|
||||||
tokenFilters.register("russian_stem", RussianStemTokenFilterFactory::new);
|
tokenFilters.register("russian_stem", RussianStemTokenFilterFactory::new);
|
||||||
tokenFilters.register("keyword_marker", requriesAnalysisSettings(KeywordMarkerTokenFilterFactory::new));
|
|
||||||
tokenFilters.register("stemmer_override", requriesAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
|
tokenFilters.register("stemmer_override", requriesAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
|
||||||
tokenFilters.register("arabic_normalization", ArabicNormalizationFilterFactory::new);
|
tokenFilters.register("arabic_normalization", ArabicNormalizationFilterFactory::new);
|
||||||
tokenFilters.register("german_normalization", GermanNormalizationFilterFactory::new);
|
tokenFilters.register("german_normalization", GermanNormalizationFilterFactory::new);
|
||||||
|
|
|
@ -275,38 +275,17 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
||||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
|
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
|
||||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("\nthis is a test\n"));
|
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("\nthis is a test\n"));
|
||||||
|
|
||||||
|
|
||||||
//check other attributes
|
//check other attributes
|
||||||
analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
|
analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
|
||||||
.setExplain(true).setTokenizer("standard").addTokenFilter("snowball").get();
|
.setExplain(true).setTokenizer("standard").addTokenFilter("lowercase").get();
|
||||||
|
|
||||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
||||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
|
||||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
|
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
|
||||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl"));
|
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubled"));
|
||||||
String[] expectedAttributesKey = {
|
String[] expectedAttributesKey = {
|
||||||
"bytes",
|
"bytes",
|
||||||
"positionLength",
|
"positionLength"};
|
||||||
"keyword"};
|
|
||||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length));
|
|
||||||
Object extendedAttribute;
|
|
||||||
|
|
||||||
for (String key : expectedAttributesKey) {
|
|
||||||
extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key);
|
|
||||||
assertThat(extendedAttribute, notNullValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testDetailAnalyzeSpecifyAttributes() throws Exception {
|
|
||||||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
|
|
||||||
.setExplain(true).setTokenizer("standard").addTokenFilter("snowball").setAttributes("keyword").get();
|
|
||||||
|
|
||||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
|
||||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
|
||||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
|
|
||||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl"));
|
|
||||||
String[] expectedAttributesKey = {
|
|
||||||
"keyword"};
|
|
||||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length));
|
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length));
|
||||||
Object extendedAttribute;
|
Object extendedAttribute;
|
||||||
|
|
||||||
|
|
|
@ -89,6 +89,10 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||||
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
|
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
|
||||||
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
|
Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>();
|
||||||
filters.put("asciifolding", ASCIIFoldingTokenFilterFactory::new);
|
filters.put("asciifolding", ASCIIFoldingTokenFilterFactory::new);
|
||||||
|
filters.put("keyword_marker", requriesAnalysisSettings(KeywordMarkerTokenFilterFactory::new));
|
||||||
|
filters.put("porter_stem", PorterStemTokenFilterFactory::new);
|
||||||
|
filters.put("snowball", SnowballTokenFilterFactory::new);
|
||||||
|
filters.put("trim", TrimTokenFilterFactory::new);
|
||||||
filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
|
filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
|
||||||
filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
|
filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
|
||||||
return filters;
|
return filters;
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -26,6 +26,8 @@ import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
import org.elasticsearch.index.analysis.Analysis;
|
||||||
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
@ -50,7 +52,7 @@ public class KeywordMarkerTokenFilterFactory extends AbstractTokenFilterFactory
|
||||||
private final CharArraySet keywordLookup;
|
private final CharArraySet keywordLookup;
|
||||||
private final Pattern keywordPattern;
|
private final Pattern keywordPattern;
|
||||||
|
|
||||||
public KeywordMarkerTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
KeywordMarkerTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
|
|
||||||
boolean ignoreCase =
|
boolean ignoreCase =
|
|
@ -17,17 +17,18 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
|
||||||
public class PorterStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class PorterStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
public PorterStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
PorterStemTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
* specific language governing permissions and limitations
|
* specific language governing permissions and limitations
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||||
|
@ -24,6 +24,7 @@ import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Real work actually done here by Sebastian on the Elasticsearch mailing list
|
* Real work actually done here by Sebastian on the Elasticsearch mailing list
|
||||||
|
@ -33,7 +34,7 @@ public class SnowballTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private String language;
|
private String language;
|
||||||
|
|
||||||
public SnowballTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
SnowballTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
this.language = Strings.capitalize(settings.get("language", settings.get("name", "English")));
|
this.language = Strings.capitalize(settings.get("language", settings.get("name", "English")));
|
||||||
}
|
}
|
|
@ -17,19 +17,20 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
|
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
|
||||||
public class TrimTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class TrimTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private static final String UPDATE_OFFSETS_KEY = "update_offsets";
|
private static final String UPDATE_OFFSETS_KEY = "update_offsets";
|
||||||
|
|
||||||
public TrimTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
TrimTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
if (settings.get(UPDATE_OFFSETS_KEY) != null) {
|
if (settings.get(UPDATE_OFFSETS_KEY) != null) {
|
||||||
throw new IllegalArgumentException(UPDATE_OFFSETS_KEY + " is not supported anymore. Please fix your analysis chain");
|
throw new IllegalArgumentException(UPDATE_OFFSETS_KEY + " is not supported anymore. Please fix your analysis chain");
|
|
@ -49,6 +49,10 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
||||||
protected Map<String, Class<?>> getTokenFilters() {
|
protected Map<String, Class<?>> getTokenFilters() {
|
||||||
Map<String, Class<?>> filters = new TreeMap<>(super.getTokenFilters());
|
Map<String, Class<?>> filters = new TreeMap<>(super.getTokenFilters());
|
||||||
filters.put("asciifolding", ASCIIFoldingTokenFilterFactory.class);
|
filters.put("asciifolding", ASCIIFoldingTokenFilterFactory.class);
|
||||||
|
filters.put("keywordmarker", KeywordMarkerTokenFilterFactory.class);
|
||||||
|
filters.put("porterstem", PorterStemTokenFilterFactory.class);
|
||||||
|
filters.put("snowballporter", SnowballTokenFilterFactory.class);
|
||||||
|
filters.put("trim", TrimTokenFilterFactory.class);
|
||||||
filters.put("worddelimiter", WordDelimiterTokenFilterFactory.class);
|
filters.put("worddelimiter", WordDelimiterTokenFilterFactory.class);
|
||||||
filters.put("worddelimitergraph", WordDelimiterGraphTokenFilterFactory.class);
|
filters.put("worddelimitergraph", WordDelimiterGraphTokenFilterFactory.class);
|
||||||
return filters;
|
return filters;
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
|
@ -25,6 +25,9 @@ import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
|
import org.elasticsearch.index.analysis.AnalysisTestsHelper;
|
||||||
|
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||||
|
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||||
import org.elasticsearch.test.ESTestCase.TestAnalysis;
|
import org.elasticsearch.test.ESTestCase.TestAnalysis;
|
||||||
import org.elasticsearch.test.ESTokenStreamTestCase;
|
import org.elasticsearch.test.ESTokenStreamTestCase;
|
||||||
|
|
||||||
|
@ -49,7 +52,7 @@ public class KeywordMarkerFilterFactoryTests extends ESTokenStreamTestCase {
|
||||||
.put("index.analysis.analyzer.my_keyword.filter", "my_keyword, porter_stem")
|
.put("index.analysis.analyzer.my_keyword.filter", "my_keyword, porter_stem")
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.build();
|
.build();
|
||||||
TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
|
||||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_keyword");
|
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_keyword");
|
||||||
assertThat(tokenFilter, instanceOf(KeywordMarkerTokenFilterFactory.class));
|
assertThat(tokenFilter, instanceOf(KeywordMarkerTokenFilterFactory.class));
|
||||||
TokenStream filter = tokenFilter.create(new WhitespaceTokenizer());
|
TokenStream filter = tokenFilter.create(new WhitespaceTokenizer());
|
||||||
|
@ -72,7 +75,7 @@ public class KeywordMarkerFilterFactoryTests extends ESTokenStreamTestCase {
|
||||||
.put("index.analysis.analyzer.my_keyword.filter", "my_keyword, porter_stem")
|
.put("index.analysis.analyzer.my_keyword.filter", "my_keyword, porter_stem")
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.build();
|
.build();
|
||||||
TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
|
TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
|
||||||
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_keyword");
|
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_keyword");
|
||||||
assertThat(tokenFilter, instanceOf(KeywordMarkerTokenFilterFactory.class));
|
assertThat(tokenFilter, instanceOf(KeywordMarkerTokenFilterFactory.class));
|
||||||
TokenStream filter = tokenFilter.create(new WhitespaceTokenizer());
|
TokenStream filter = tokenFilter.create(new WhitespaceTokenizer());
|
||||||
|
@ -96,7 +99,7 @@ public class KeywordMarkerFilterFactoryTests extends ESTokenStreamTestCase {
|
||||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
.build();
|
.build();
|
||||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings));
|
() -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin()));
|
||||||
assertEquals("cannot specify both `keywords_pattern` and `keywords` or `keywords_path`",
|
assertEquals("cannot specify both `keywords_pattern` and `keywords` or `keywords_path`",
|
||||||
e.getMessage());
|
e.getMessage());
|
||||||
}
|
}
|
|
@ -33,6 +33,70 @@
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: foo bar! }
|
- match: { tokens.0.token: foo bar! }
|
||||||
|
|
||||||
|
---
|
||||||
|
"porterstem":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: This is troubled
|
||||||
|
tokenizer: standard
|
||||||
|
filter: [porter_stem]
|
||||||
|
- length: { tokens: 3 }
|
||||||
|
- match: { tokens.2.token: troubl }
|
||||||
|
- match: { tokens.2.position: 2 }
|
||||||
|
|
||||||
|
---
|
||||||
|
"keywordmarker":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: This is troubled
|
||||||
|
tokenizer: standard
|
||||||
|
filter:
|
||||||
|
- type: keyword_marker
|
||||||
|
keywords: troubled
|
||||||
|
- type: porter_stem
|
||||||
|
- length: { tokens: 3 }
|
||||||
|
- match: { tokens.2.token: troubled }
|
||||||
|
- match: { tokens.2.position: 2 }
|
||||||
|
|
||||||
|
---
|
||||||
|
"snowball":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: This is troubled
|
||||||
|
tokenizer: standard
|
||||||
|
filter: [snowball]
|
||||||
|
- length: { tokens: 3 }
|
||||||
|
- match: { tokens.2.token: troubl }
|
||||||
|
- match: { tokens.2.position: 2 }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
explain: true
|
||||||
|
text: This is troubled
|
||||||
|
tokenizer: standard
|
||||||
|
filter: [snowball]
|
||||||
|
- length: { detail.tokenfilters.0.tokens: 3 }
|
||||||
|
- match: { detail.tokenfilters.0.tokens.2.token: troubl }
|
||||||
|
- match: { detail.tokenfilters.0.tokens.2.position: 2 }
|
||||||
|
- is_true: detail.tokenfilters.0.tokens.2.bytes
|
||||||
|
- match: { detail.tokenfilters.0.tokens.2.positionLength: 1 }
|
||||||
|
- match: { detail.tokenfilters.0.tokens.2.keyword: false }
|
||||||
|
|
||||||
|
---
|
||||||
|
"trim":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: Foo Bar !
|
||||||
|
tokenizer: keyword
|
||||||
|
filter: [trim]
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: Foo Bar ! }
|
||||||
|
|
||||||
---
|
---
|
||||||
"word_delimiter":
|
"word_delimiter":
|
||||||
- do:
|
- do:
|
||||||
|
|
|
@ -48,7 +48,6 @@ import org.elasticsearch.index.analysis.IndicNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
|
import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
|
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
|
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeywordMarkerTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
|
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
|
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
|
||||||
|
@ -64,7 +63,6 @@ import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
|
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
|
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
|
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||||
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
|
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
|
||||||
|
@ -72,7 +70,6 @@ import org.elasticsearch.index.analysis.ScandinavianFoldingFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.ScandinavianNormalizationFilterFactory;
|
import org.elasticsearch.index.analysis.ScandinavianNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory;
|
import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.SnowballTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
|
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
|
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
|
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
|
||||||
|
@ -82,7 +79,6 @@ import org.elasticsearch.index.analysis.StopTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory;
|
import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
|
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
|
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.TrimTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
|
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
|
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
|
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
|
||||||
|
@ -193,7 +189,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||||
.put("indonesianstem", StemmerTokenFilterFactory.class)
|
.put("indonesianstem", StemmerTokenFilterFactory.class)
|
||||||
.put("italianlightstem", StemmerTokenFilterFactory.class)
|
.put("italianlightstem", StemmerTokenFilterFactory.class)
|
||||||
.put("keepword", KeepWordFilterFactory.class)
|
.put("keepword", KeepWordFilterFactory.class)
|
||||||
.put("keywordmarker", KeywordMarkerTokenFilterFactory.class)
|
.put("keywordmarker", MovedToAnalysisCommon.class)
|
||||||
.put("kstem", KStemTokenFilterFactory.class)
|
.put("kstem", KStemTokenFilterFactory.class)
|
||||||
.put("latvianstem", StemmerTokenFilterFactory.class)
|
.put("latvianstem", StemmerTokenFilterFactory.class)
|
||||||
.put("length", LengthTokenFilterFactory.class)
|
.put("length", LengthTokenFilterFactory.class)
|
||||||
|
@ -205,7 +201,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||||
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
|
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
|
||||||
.put("patternreplace", PatternReplaceTokenFilterFactory.class)
|
.put("patternreplace", PatternReplaceTokenFilterFactory.class)
|
||||||
.put("persiannormalization", PersianNormalizationFilterFactory.class)
|
.put("persiannormalization", PersianNormalizationFilterFactory.class)
|
||||||
.put("porterstem", PorterStemTokenFilterFactory.class)
|
.put("porterstem", MovedToAnalysisCommon.class)
|
||||||
.put("portuguesestem", StemmerTokenFilterFactory.class)
|
.put("portuguesestem", StemmerTokenFilterFactory.class)
|
||||||
.put("portugueselightstem", StemmerTokenFilterFactory.class)
|
.put("portugueselightstem", StemmerTokenFilterFactory.class)
|
||||||
.put("portugueseminimalstem", StemmerTokenFilterFactory.class)
|
.put("portugueseminimalstem", StemmerTokenFilterFactory.class)
|
||||||
|
@ -216,7 +212,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||||
.put("serbiannormalization", SerbianNormalizationFilterFactory.class)
|
.put("serbiannormalization", SerbianNormalizationFilterFactory.class)
|
||||||
.put("shingle", ShingleTokenFilterFactory.class)
|
.put("shingle", ShingleTokenFilterFactory.class)
|
||||||
.put("minhash", MinHashTokenFilterFactory.class)
|
.put("minhash", MinHashTokenFilterFactory.class)
|
||||||
.put("snowballporter", SnowballTokenFilterFactory.class)
|
.put("snowballporter", MovedToAnalysisCommon.class)
|
||||||
.put("soraninormalization", SoraniNormalizationFilterFactory.class)
|
.put("soraninormalization", SoraniNormalizationFilterFactory.class)
|
||||||
.put("soranistem", StemmerTokenFilterFactory.class)
|
.put("soranistem", StemmerTokenFilterFactory.class)
|
||||||
.put("spanishlightstem", StemmerTokenFilterFactory.class)
|
.put("spanishlightstem", StemmerTokenFilterFactory.class)
|
||||||
|
@ -226,7 +222,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||||
.put("swedishlightstem", StemmerTokenFilterFactory.class)
|
.put("swedishlightstem", StemmerTokenFilterFactory.class)
|
||||||
.put("synonym", SynonymTokenFilterFactory.class)
|
.put("synonym", SynonymTokenFilterFactory.class)
|
||||||
.put("synonymgraph", SynonymGraphTokenFilterFactory.class)
|
.put("synonymgraph", SynonymGraphTokenFilterFactory.class)
|
||||||
.put("trim", TrimTokenFilterFactory.class)
|
.put("trim", MovedToAnalysisCommon.class)
|
||||||
.put("truncate", TruncateTokenFilterFactory.class)
|
.put("truncate", TruncateTokenFilterFactory.class)
|
||||||
.put("turkishlowercase", LowerCaseTokenFilterFactory.class)
|
.put("turkishlowercase", LowerCaseTokenFilterFactory.class)
|
||||||
.put("type", KeepTypesFilterFactory.class)
|
.put("type", KeepTypesFilterFactory.class)
|
||||||
|
|
Loading…
Reference in New Issue