Moved more token filters to analysis-common module.

The following token filters were moved: `edge_ngram`, `ngram`, `uppercase`, `lowercase`, `length`, `flatten_graph` and `unique`.

Relates to #23658
This commit is contained in:
Martijn van Groningen 2017-06-14 01:26:36 +02:00
parent 2a78b0a19f
commit 428e70758a
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
24 changed files with 470 additions and 196 deletions

View File

@ -571,7 +571,6 @@
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]IndexingSlowLogTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicySettingsTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLogTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]NGramTokenizerFactoryTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PatternCaptureTokenFilterTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PreBuiltAnalyzerTests.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]commongrams[/\\]CommonGramsTokenFilterFactoryTests.java" checks="LineLength" />

View File

@ -54,14 +54,12 @@ import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
@ -83,14 +81,11 @@ import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
@ -133,8 +128,6 @@ import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
import org.elasticsearch.index.analysis.UniqueTokenFilterFactory;
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
@ -209,25 +202,16 @@ public final class AnalysisModule {
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
tokenFilters.register("stop", StopTokenFilterFactory::new);
tokenFilters.register("reverse", ReverseTokenFilterFactory::new);
tokenFilters.register("length", LengthTokenFilterFactory::new);
tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new);
tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new);
tokenFilters.register("kstem", KStemTokenFilterFactory::new);
tokenFilters.register("standard", StandardTokenFilterFactory::new);
tokenFilters.register("nGram", NGramTokenFilterFactory::new);
tokenFilters.register("ngram", NGramTokenFilterFactory::new);
tokenFilters.register("edgeNGram", EdgeNGramTokenFilterFactory::new);
tokenFilters.register("edge_ngram", EdgeNGramTokenFilterFactory::new);
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
tokenFilters.register("unique", UniqueTokenFilterFactory::new);
tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));

View File

@ -19,7 +19,6 @@
package org.elasticsearch.search.fetch.subphase.highlight;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
@ -214,54 +213,6 @@ public class HighlighterSearchIT extends ESIntegTestCase {
assertHighlight(search, 0, "name", 0, startsWith("<em>abc</em> <em>abc</em> <em>abc</em> <em>abc</em>"));
}
public void testNgramHighlighting() throws IOException {
assertAcked(prepareCreate("test")
.addMapping("test",
"name", "type=text,analyzer=name_index_analyzer,search_analyzer=name_search_analyzer,"
+ "term_vector=with_positions_offsets",
"name2", "type=text,analyzer=name2_index_analyzer,search_analyzer=name_search_analyzer,"
+ "term_vector=with_positions_offsets")
.setSettings(Settings.builder()
.put(indexSettings())
.put("analysis.filter.my_ngram.max_gram", 20)
.put("analysis.filter.my_ngram.min_gram", 1)
.put("analysis.filter.my_ngram.type", "ngram")
.put("analysis.tokenizer.my_ngramt.max_gram", 20)
.put("analysis.tokenizer.my_ngramt.min_gram", 1)
.put("analysis.tokenizer.my_ngramt.token_chars", "letter,digit")
.put("analysis.tokenizer.my_ngramt.type", "ngram")
.put("analysis.analyzer.name_index_analyzer.tokenizer", "my_ngramt")
.put("analysis.analyzer.name2_index_analyzer.tokenizer", "whitespace")
.put("analysis.analyzer.name2_index_analyzer.filter", "my_ngram")
.put("analysis.analyzer.name_search_analyzer.tokenizer", "whitespace")));
client().prepareIndex("test", "test", "1")
.setSource("name", "logicacmg ehemals avinci - the know how company",
"name2", "logicacmg ehemals avinci - the know how company").get();
refresh();
ensureGreen();
SearchResponse search = client().prepareSearch().setQuery(matchQuery("name", "logica m"))
.highlighter(new HighlightBuilder().field("name")).get();
assertHighlight(search, 0, "name", 0,
equalTo("<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany"));
search = client().prepareSearch().setQuery(matchQuery("name", "logica ma")).highlighter(new HighlightBuilder().field("name")).get();
assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company"));
search = client().prepareSearch().setQuery(matchQuery("name", "logica")).highlighter(new HighlightBuilder().field("name")).get();
assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehemals avinci - the know how company"));
search = client().prepareSearch().setQuery(matchQuery("name2", "logica m")).highlighter(new HighlightBuilder().field("name2"))
.get();
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>"));
search = client().prepareSearch().setQuery(matchQuery("name2", "logica ma")).highlighter(new HighlightBuilder().field("name2"))
.get();
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how company"));
search = client().prepareSearch().setQuery(matchQuery("name2", "logica")).highlighter(new HighlightBuilder().field("name2")).get();
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> ehemals avinci - the know how company"));
}
public void testEnsureNoNegativeOffsets() throws Exception {
assertAcked(prepareCreate("test")
.addMapping("type1",

View File

@ -19,16 +19,6 @@
package org.elasticsearch.search.query;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import org.apache.lucene.util.LuceneTestCase;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
@ -56,6 +46,16 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
public class QueryStringIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
@ -91,10 +91,6 @@ public class QueryStringIT extends ESIntegTestCase {
resp = client().prepareSearch("test").setQuery(queryStringQuery("Bar")).get();
assertHitCount(resp, 3L);
assertHits(resp.getHits(), "1", "2", "3");
resp = client().prepareSearch("test").setQuery(queryStringQuery("foa")).get();
assertHitCount(resp, 1L);
assertHits(resp.getHits(), "3");
}
public void testWithDate() throws Exception {
@ -161,8 +157,6 @@ public class QueryStringIT extends ESIntegTestCase {
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(queryStringQuery("Baz")).get();
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(queryStringQuery("sbaz")).get();
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(queryStringQuery("19")).get();
assertHits(resp.getHits(), "1");
// nested doesn't match because it's hidden
@ -223,11 +217,11 @@ public class QueryStringIT extends ESIntegTestCase {
indexRandom(true, false, reqs);
SearchResponse resp = client().prepareSearch("test2").setQuery(
queryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get();
queryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get();
assertHitCount(resp, 0L);
resp = client().prepareSearch("test2").setQuery(
queryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get();
queryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get();
assertHits(resp.getHits(), "1");
assertHitCount(resp, 1L);

View File

@ -398,10 +398,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Bar")).get();
assertHitCount(resp, 3L);
assertHits(resp.getHits(), "1", "2", "3");
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("foa")).get();
assertHitCount(resp, 1L);
assertHits(resp.getHits(), "3");
}
public void testWithDate() throws Exception {
@ -480,8 +476,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Baz")).get();
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("sbaz")).get();
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("19")).get();
assertHits(resp.getHits(), "1");
// nested doesn't match because it's hidden
@ -547,11 +541,11 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
indexRandom(true, false, reqs);
SearchResponse resp = client().prepareSearch("test").setQuery(
simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get();
simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get();
assertHitCount(resp, 0L);
resp = client().prepareSearch("test").setQuery(
simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get();
simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get();
assertHits(resp.getHits(), "1");
assertHitCount(resp, 1L);

View File

@ -6,22 +6,7 @@
"version": {
"created": "5000099"
},
"analysis": {
"analyzer": {
"my_ngrams": {
"type": "custom",
"tokenizer": "standard",
"filter": ["my_ngrams"]
}
},
"filter": {
"my_ngrams": {
"type": "ngram",
"min_gram": 2,
"max_gram": 2
}
}
}
"query.default_field": "f1"
}
},
"mappings": {
@ -31,7 +16,7 @@
},
"properties": {
"f1": {"type": "text"},
"f2": {"type": "text", "analyzer": "my_ngrams"}
"f2": {"type": "text"}
}
}
}

View File

@ -2,23 +2,7 @@
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"analyzer": {
"my_ngrams": {
"type": "custom",
"tokenizer": "standard",
"filter": ["my_ngrams"]
}
},
"filter": {
"my_ngrams": {
"type": "ngram",
"min_gram": 2,
"max_gram": 2
}
}
}
"number_of_replicas": 0
}
},
"mappings": {
@ -26,7 +10,7 @@
"properties": {
"f1": {"type": "text"},
"f2": {"type": "keyword"},
"f3": {"type": "text", "analyzer": "my_ngrams"},
"f3": {"type": "text"},
"f4": {
"type": "text",
"index_options": "docs"

View File

@ -52,7 +52,6 @@ import org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilter;
import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
@ -98,6 +97,15 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
filters.put("trim", TrimTokenFilterFactory::new);
filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
filters.put("unique", UniqueTokenFilterFactory::new);
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
filters.put("length", LengthTokenFilterFactory::new);
filters.put("lowercase", LowerCaseTokenFilterFactory::new);
filters.put("uppercase", UpperCaseTokenFilterFactory::new);
filters.put("nGram", NGramTokenFilterFactory::new);
filters.put("ngram", NGramTokenFilterFactory::new);
filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
return filters;
}
@ -172,7 +180,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
filters.add(PreConfiguredTokenFilter.singleton("nGram", false, NGramTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("reverse", false, input -> new ReverseStringFilter(input)));
filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("russian_stem", false, input -> new SnowballFilter(input, "Russian")));
filters.add(PreConfiguredTokenFilter.singleton("scandinavian_folding", true, ScandinavianFoldingFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("scandinavian_normalization", true, ScandinavianNormalizationFilter::new));
@ -185,7 +193,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
filters.add(PreConfiguredTokenFilter.singleton("trim", false, TrimFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("truncate", false, input -> new TruncateTokenFilter(input, 10)));
filters.add(PreConfiguredTokenFilter.singleton("type_as_payload", false, TypeAsPayloadTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("unique", false, input -> new UniqueTokenFilter(input)));
filters.add(PreConfiguredTokenFilter.singleton("unique", false, UniqueTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("uppercase", true, UpperCaseFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("word_delimiter", false, input ->
new WordDelimiterFilter(input,

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
@ -38,7 +39,7 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
public static final int SIDE_BACK = 2;
private final int side;
public EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);

View File

@ -17,17 +17,18 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.FlattenGraphFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class FlattenGraphTokenFilterFactory extends AbstractTokenFilterFactory {
public FlattenGraphTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
FlattenGraphTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
}

View File

@ -17,13 +17,14 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
@ -33,7 +34,7 @@ public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
// ancient unsupported option
private static final String ENABLE_POS_INC_KEY = "enable_position_increments";
public LengthTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
LengthTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
min = settings.getAsInt("min", 0);
max = settings.getAsInt("max", Integer.MAX_VALUE);

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
@ -27,6 +27,8 @@ import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
/**
* Factory for {@link LowerCaseFilter} and some language-specific variants
@ -41,7 +43,7 @@ public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory impl
private final String lang;
public LowerCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
LowerCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.lang = settings.get("language", null);
}

View File

@ -17,13 +17,14 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
@ -33,7 +34,7 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
private final int maxGram;
public NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.apache.lucene.analysis.miscellaneous;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
@ -31,7 +31,7 @@ import java.io.IOException;
* A token filter that generates unique tokens. Can remove unique tokens only on the same
* position increments as well.
*/
public class UniqueTokenFilter extends TokenFilter {
class UniqueTokenFilter extends TokenFilter {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
@ -39,11 +39,11 @@ public class UniqueTokenFilter extends TokenFilter {
private final CharArraySet previous = new CharArraySet(8, false);
private final boolean onlyOnSamePosition;
public UniqueTokenFilter(TokenStream in) {
UniqueTokenFilter(TokenStream in) {
this(in, false);
}
public UniqueTokenFilter(TokenStream in, boolean onlyOnSamePosition) {
UniqueTokenFilter(TokenStream in, boolean onlyOnSamePosition) {
super(in);
this.onlyOnSamePosition = onlyOnSamePosition;
}

View File

@ -17,19 +17,19 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class UniqueTokenFilterFactory extends AbstractTokenFilterFactory {
private final boolean onlyOnSamePosition;
public UniqueTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
UniqueTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.onlyOnSamePosition = settings.getAsBooleanLenientForPreEs6Indices(
indexSettings.getIndexVersionCreated(), "only_on_same_position", false, deprecationLogger);

View File

@ -17,13 +17,15 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.UpperCaseFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

View File

@ -58,6 +58,15 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
filters.put("trim", TrimTokenFilterFactory.class);
filters.put("worddelimiter", WordDelimiterTokenFilterFactory.class);
filters.put("worddelimitergraph", WordDelimiterGraphTokenFilterFactory.class);
filters.put("flattengraph", FlattenGraphTokenFilterFactory.class);
filters.put("length", LengthTokenFilterFactory.class);
filters.put("greeklowercase", LowerCaseTokenFilterFactory.class);
filters.put("irishlowercase", LowerCaseTokenFilterFactory.class);
filters.put("lowercase", LowerCaseTokenFilterFactory.class);
filters.put("turkishlowercase", LowerCaseTokenFilterFactory.class);
filters.put("uppercase", UpperCaseTokenFilterFactory.class);
filters.put("ngram", NGramTokenFilterFactory.class);
filters.put("edgengram", EdgeNGramTokenFilterFactory.class);
return filters;
}

View File

@ -17,9 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
import java.io.IOException;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.Token;
@ -30,6 +28,8 @@ import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.test.ESTokenStreamTestCase;
import org.elasticsearch.test.IndexSettingsModule;
import java.io.IOException;
public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase {
public void testBasic() throws IOException {

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
@ -30,6 +30,8 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.Settings.Builder;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.test.ESTokenStreamTestCase;
import org.elasticsearch.test.IndexSettingsModule;
@ -52,7 +54,8 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
final Settings indexSettings = newAnalysisSettingsBuilder().build();
IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
for (String tokenChars : Arrays.asList("letters", "number", "DIRECTIONALITY_UNDEFINED")) {
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.put("token_chars", tokenChars).build();
try {
new NGramTokenizerFactory(indexProperties, null, name, settings).create();
fail();
@ -61,7 +64,8 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
}
}
for (String tokenChars : Arrays.asList("letter", " digit ", "punctuation", "DIGIT", "CoNtRoL", "dash_punctuation")) {
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.put("token_chars", tokenChars).build();
indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
new NGramTokenizerFactory(indexProperties, null, name, settings).create();
@ -73,8 +77,10 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
final Index index = new Index("test", "_na_");
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4).putArray("token_chars", new String[0]).build();
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4)
.putArray("token_chars", new String[0]).build();
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
.create();
tokenizer.setReader(new StringReader("1.34"));
assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"});
}
@ -84,12 +90,15 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
final Index index = new Index("test", "_na_");
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.put("token_chars", "letter,digit").build();
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
.create();
tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
assertTokenStreamContents(tokenizer,
new String[] {"Åb", "Åbc", "bc", "", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f"});
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
tokenizer.setReader(new StringReader(" a!$ 9"));
assertTokenStreamContents(tokenizer,
@ -102,12 +111,15 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
Tokenizer tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
Tokenizer tokenizer =
new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
assertTokenStreamContents(tokenizer,
new String[] {"Åb", "Åbc", "", "déf", "g\uD801\uDC00", "g\uD801\uDC00f"});
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
.put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
.create();
tokenizer.setReader(new StringReader(" a!$ 9"));
assertTokenStreamContents(tokenizer,
new String[] {" a", " a!"});
@ -128,7 +140,9 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer);
TokenStream edgeNGramTokenFilter =
new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
.create(tokenizer);
if (reverse) {
assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
} else {

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.apache.lucene.analysis.miscellaneous;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;

View File

@ -210,3 +210,185 @@
- match: { detail.tokenfilters.0.tokens.5.start_offset: 16 }
- match: { detail.tokenfilters.0.tokens.5.end_offset: 19 }
- match: { detail.tokenfilters.0.tokens.5.position: 5 }
---
"unique":
- do:
indices.analyze:
body:
text: Foo Foo Bar!
tokenizer: whitespace
filter: [unique]
- length: { tokens: 2 }
- match: { tokens.0.token: Foo }
- match: { tokens.1.token: Bar! }
---
"synonym_graph and flatten_graph":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_synonym_graph:
type: synonym_graph
synonyms: ["automatic teller machine,atm,cash point"]
- do:
indices.analyze:
index: test
body:
text: this automatic teller machine is down
tokenizer: whitespace
filter: [my_synonym_graph]
- length: { tokens: 9 }
- match: { tokens.0.token: this }
- match: { tokens.0.position: 0 }
- is_false: tokens.0.positionLength
- match: { tokens.1.token: atm }
- match: { tokens.1.position: 1 }
- match: { tokens.1.positionLength: 4 }
- match: { tokens.2.token: cash }
- match: { tokens.2.position: 1 }
- is_false: tokens.2.positionLength
- match: { tokens.3.token: automatic }
- match: { tokens.3.position: 1 }
- match: { tokens.3.positionLength: 2 }
- match: { tokens.4.token: point }
- match: { tokens.4.position: 2 }
- match: { tokens.4.positionLength: 3 }
- match: { tokens.5.token: teller }
- match: { tokens.5.position: 3 }
- is_false: tokens.5.positionLength
- match: { tokens.6.token: machine }
- match: { tokens.6.position: 4 }
- is_false: tokens.6.positionLength
- match: { tokens.7.token: is }
- match: { tokens.7.position: 5 }
- is_false: tokens.7.positionLength
- match: { tokens.8.token: down }
- match: { tokens.8.position: 6 }
- is_false: tokens.8.positionLength
- do:
indices.analyze:
index: test
body:
text: this automatic teller machine is down
tokenizer: whitespace
filter: [my_synonym_graph,flatten_graph]
- length: { tokens: 9 }
- match: { tokens.0.token: this }
- match: { tokens.0.position: 0 }
- is_false: tokens.0.positionLength
- match: { tokens.1.token: atm }
- match: { tokens.1.position: 1 }
- match: { tokens.1.positionLength: 3 }
- match: { tokens.2.token: cash }
- match: { tokens.2.position: 1 }
- is_false: tokens.2.positionLength
- match: { tokens.3.token: automatic }
- match: { tokens.3.position: 1 }
- is_false: tokens.3.positionLength
- match: { tokens.4.token: point }
- match: { tokens.4.position: 2 }
- match: { tokens.4.positionLength: 2 }
- match: { tokens.5.token: teller }
- match: { tokens.5.position: 2 }
- is_false: tokens.5.positionLength
- match: { tokens.6.token: machine }
- match: { tokens.6.position: 3 }
- is_false: tokens.6.positionLength
- match: { tokens.7.token: is }
- match: { tokens.7.position: 4 }
- is_false: tokens.7.positionLength
- match: { tokens.8.token: down }
- match: { tokens.8.position: 5 }
- is_false: tokens.8.positionLength
---
"length":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_length:
type: length
min: 6
- do:
indices.analyze:
index: test
body:
text: foo bar foobar
tokenizer: whitespace
filter: [my_length]
- length: { tokens: 1 }
- match: { tokens.0.token: foobar }
---
"uppercase":
- do:
indices.analyze:
body:
text: foobar
tokenizer: keyword
filter: [uppercase]
- length: { tokens: 1 }
- match: { tokens.0.token: FOOBAR }
---
"ngram":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_ngram:
type: ngram
min_gram: 3
max_gram: 3
- do:
indices.analyze:
index: test
body:
text: foobar
tokenizer: keyword
filter: [my_ngram]
- length: { tokens: 4 }
- match: { tokens.0.token: foo }
- match: { tokens.1.token: oob }
- match: { tokens.2.token: oba }
- match: { tokens.3.token: bar }
---
"edge_ngram":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_edge_ngram:
type: edge_ngram
min_gram: 3
max_gram: 6
- do:
indices.analyze:
index: test
body:
text: foobar
tokenizer: keyword
filter: [my_edge_ngram]
- length: { tokens: 4 }
- match: { tokens.0.token: foo }
- match: { tokens.1.token: foob }
- match: { tokens.2.token: fooba }
- match: { tokens.3.token: foobar }

View File

@ -0,0 +1,41 @@
"ngram search":
- do:
indices.create:
index: test
body:
settings:
number_of_shards: 1
number_of_replicas: 0
analysis:
analyzer:
my_analyzer:
tokenizer: standard
filter: [my_ngram]
filter:
my_ngram:
type: ngram
min: 2,
max: 2
mappings:
doc:
properties:
text:
type: text
analyzer: my_analyzer
- do:
index:
index: test
type: doc
id: 1
body: { "text": "foo bar baz" }
refresh: true
- do:
search:
body:
query:
match:
text:
query: foa
- match: {hits.total: 1}

View File

@ -0,0 +1,129 @@
"ngram highlighting":
- do:
indices.create:
index: test
body:
settings:
number_of_shards: 1
number_of_replicas: 0
analysis:
tokenizer:
my_ngramt:
type: ngram
min_gram: 1
max_gram: 20
token_chars: letter,digit
filter:
my_ngram:
type: ngram
min_gram: 1
max_gram: 20
analyzer:
name2_index_analyzer:
tokenizer: whitespace
filter: [my_ngram]
name_index_analyzer:
tokenizer: my_ngramt
name_search_analyzer:
tokenizer: whitespace
mappings:
doc:
properties:
name:
type: text
term_vector: with_positions_offsets
analyzer: name_index_analyzer
search_analyzer: name_search_analyzer
name2:
type: text
term_vector: with_positions_offsets
analyzer: name2_index_analyzer
search_analyzer: name_search_analyzer
- do:
index:
index: test
type: doc
id: 1
refresh: true
body:
name: logicacmg ehemals avinci - the know how company
name2: logicacmg ehemals avinci - the know how company
- do:
search:
body:
query:
match:
name:
query: logica m
highlight:
fields:
- name: {}
- match: {hits.total: 1}
- match: {hits.hits.0.highlight.name.0: "<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany"}
- do:
search:
body:
query:
match:
name:
query: logica ma
highlight:
fields:
- name: {}
- match: {hits.total: 1}
- match: {hits.hits.0.highlight.name.0: "<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company"}
- do:
search:
body:
query:
match:
name:
query: logica
highlight:
fields:
- name: {}
- match: {hits.total: 1}
- match: {hits.hits.0.highlight.name.0: "<em>logica</em>cmg ehemals avinci - the know how company"}
- do:
search:
body:
query:
match:
name2:
query: logica m
highlight:
fields:
- name2: {}
- match: {hits.total: 1}
- match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>"}
- do:
search:
body:
query:
match:
name2:
query: logica ma
highlight:
fields:
- name2: {}
- match: {hits.total: 1}
- match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> <em>ehemals</em> avinci - the know how company"}
- do:
search:
body:
query:
match:
name2:
query: logica
highlight:
fields:
- name2: {}
- match: {hits.total: 1}
- match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> ehemals avinci - the know how company"}

View File

@ -22,7 +22,6 @@ package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.elasticsearch.Version;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.index.analysis.ApostropheFilterFactory;
import org.elasticsearch.index.analysis.ArabicNormalizationFilterFactory;
@ -36,10 +35,8 @@ import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory;
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
@ -49,14 +46,11 @@ import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
@ -82,7 +76,6 @@ import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
@ -90,7 +83,6 @@ import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
import java.util.Collection;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
@ -165,7 +157,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("decimaldigit", DecimalDigitFilterFactory.class)
.put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class)
.put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class)
.put("edgengram", EdgeNGramTokenFilterFactory.class)
.put("edgengram", MovedToAnalysisCommon.class)
.put("elision", ElisionTokenFilterFactory.class)
.put("englishminimalstem", StemmerTokenFilterFactory.class)
.put("englishpossessive", StemmerTokenFilterFactory.class)
@ -178,7 +170,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("germanlightstem", StemmerTokenFilterFactory.class)
.put("germanminimalstem", StemmerTokenFilterFactory.class)
.put("germannormalization", GermanNormalizationFilterFactory.class)
.put("greeklowercase", LowerCaseTokenFilterFactory.class)
.put("greeklowercase", MovedToAnalysisCommon.class)
.put("greekstem", StemmerTokenFilterFactory.class)
.put("hindinormalization", HindiNormalizationFilterFactory.class)
.put("hindistem", StemmerTokenFilterFactory.class)
@ -186,17 +178,17 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("hunspellstem", HunspellTokenFilterFactory.class)
.put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class)
.put("indicnormalization", IndicNormalizationFilterFactory.class)
.put("irishlowercase", LowerCaseTokenFilterFactory.class)
.put("irishlowercase", MovedToAnalysisCommon.class)
.put("indonesianstem", StemmerTokenFilterFactory.class)
.put("italianlightstem", StemmerTokenFilterFactory.class)
.put("keepword", KeepWordFilterFactory.class)
.put("keywordmarker", MovedToAnalysisCommon.class)
.put("kstem", KStemTokenFilterFactory.class)
.put("latvianstem", StemmerTokenFilterFactory.class)
.put("length", LengthTokenFilterFactory.class)
.put("length", MovedToAnalysisCommon.class)
.put("limittokencount", LimitTokenCountFilterFactory.class)
.put("lowercase", LowerCaseTokenFilterFactory.class)
.put("ngram", NGramTokenFilterFactory.class)
.put("lowercase", MovedToAnalysisCommon.class)
.put("ngram", MovedToAnalysisCommon.class)
.put("norwegianlightstem", StemmerTokenFilterFactory.class)
.put("norwegianminimalstem", StemmerTokenFilterFactory.class)
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
@ -225,12 +217,12 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("synonymgraph", SynonymGraphTokenFilterFactory.class)
.put("trim", MovedToAnalysisCommon.class)
.put("truncate", TruncateTokenFilterFactory.class)
.put("turkishlowercase", LowerCaseTokenFilterFactory.class)
.put("turkishlowercase", MovedToAnalysisCommon.class)
.put("type", KeepTypesFilterFactory.class)
.put("uppercase", UpperCaseTokenFilterFactory.class)
.put("uppercase", MovedToAnalysisCommon.class)
.put("worddelimiter", MovedToAnalysisCommon.class)
.put("worddelimitergraph", MovedToAnalysisCommon.class)
.put("flattengraph", FlattenGraphTokenFilterFactory.class)
.put("flattengraph", MovedToAnalysisCommon.class)
// TODO: these tokenfilters are not yet exposed: useful?