diff --git a/buildSrc/src/main/resources/checkstyle_suppressions.xml b/buildSrc/src/main/resources/checkstyle_suppressions.xml
index 678155c6561..caa4d6dec38 100644
--- a/buildSrc/src/main/resources/checkstyle_suppressions.xml
+++ b/buildSrc/src/main/resources/checkstyle_suppressions.xml
@@ -571,7 +571,6 @@
-
diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
index 3f26b722f41..9220c063715 100644
--- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
+++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java
@@ -54,14 +54,12 @@ import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory;
-import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
-import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
@@ -83,14 +81,11 @@ import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
-import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
-import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
-import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
@@ -133,8 +128,6 @@ import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
-import org.elasticsearch.index.analysis.UniqueTokenFilterFactory;
-import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
@@ -209,25 +202,16 @@ public final class AnalysisModule {
NamedRegistry> tokenFilters = new NamedRegistry<>("token_filter");
tokenFilters.register("stop", StopTokenFilterFactory::new);
tokenFilters.register("reverse", ReverseTokenFilterFactory::new);
- tokenFilters.register("length", LengthTokenFilterFactory::new);
- tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new);
- tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new);
tokenFilters.register("kstem", KStemTokenFilterFactory::new);
tokenFilters.register("standard", StandardTokenFilterFactory::new);
- tokenFilters.register("nGram", NGramTokenFilterFactory::new);
- tokenFilters.register("ngram", NGramTokenFilterFactory::new);
- tokenFilters.register("edgeNGram", EdgeNGramTokenFilterFactory::new);
- tokenFilters.register("edge_ngram", EdgeNGramTokenFilterFactory::new);
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
- tokenFilters.register("unique", UniqueTokenFilterFactory::new);
tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
- tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
diff --git a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
index 9cbd9fc5d75..2bc98b39dc2 100644
--- a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
+++ b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java
@@ -19,7 +19,6 @@
package org.elasticsearch.search.fetch.subphase.highlight;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
-
import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
@@ -214,54 +213,6 @@ public class HighlighterSearchIT extends ESIntegTestCase {
assertHighlight(search, 0, "name", 0, startsWith("abc abc abc abc"));
}
- public void testNgramHighlighting() throws IOException {
- assertAcked(prepareCreate("test")
- .addMapping("test",
- "name", "type=text,analyzer=name_index_analyzer,search_analyzer=name_search_analyzer,"
- + "term_vector=with_positions_offsets",
- "name2", "type=text,analyzer=name2_index_analyzer,search_analyzer=name_search_analyzer,"
- + "term_vector=with_positions_offsets")
- .setSettings(Settings.builder()
- .put(indexSettings())
- .put("analysis.filter.my_ngram.max_gram", 20)
- .put("analysis.filter.my_ngram.min_gram", 1)
- .put("analysis.filter.my_ngram.type", "ngram")
- .put("analysis.tokenizer.my_ngramt.max_gram", 20)
- .put("analysis.tokenizer.my_ngramt.min_gram", 1)
- .put("analysis.tokenizer.my_ngramt.token_chars", "letter,digit")
- .put("analysis.tokenizer.my_ngramt.type", "ngram")
- .put("analysis.analyzer.name_index_analyzer.tokenizer", "my_ngramt")
- .put("analysis.analyzer.name2_index_analyzer.tokenizer", "whitespace")
- .put("analysis.analyzer.name2_index_analyzer.filter", "my_ngram")
- .put("analysis.analyzer.name_search_analyzer.tokenizer", "whitespace")));
- client().prepareIndex("test", "test", "1")
- .setSource("name", "logicacmg ehemals avinci - the know how company",
- "name2", "logicacmg ehemals avinci - the know how company").get();
- refresh();
- ensureGreen();
- SearchResponse search = client().prepareSearch().setQuery(matchQuery("name", "logica m"))
- .highlighter(new HighlightBuilder().field("name")).get();
- assertHighlight(search, 0, "name", 0,
- equalTo("logicacmg ehemals avinci - the know how company"));
-
- search = client().prepareSearch().setQuery(matchQuery("name", "logica ma")).highlighter(new HighlightBuilder().field("name")).get();
- assertHighlight(search, 0, "name", 0, equalTo("logicacmg ehemals avinci - the know how company"));
-
- search = client().prepareSearch().setQuery(matchQuery("name", "logica")).highlighter(new HighlightBuilder().field("name")).get();
- assertHighlight(search, 0, "name", 0, equalTo("logicacmg ehemals avinci - the know how company"));
-
- search = client().prepareSearch().setQuery(matchQuery("name2", "logica m")).highlighter(new HighlightBuilder().field("name2"))
- .get();
- assertHighlight(search, 0, "name2", 0, equalTo("logicacmg ehemals avinci - the know how company"));
-
- search = client().prepareSearch().setQuery(matchQuery("name2", "logica ma")).highlighter(new HighlightBuilder().field("name2"))
- .get();
- assertHighlight(search, 0, "name2", 0, equalTo("logicacmg ehemals avinci - the know how company"));
-
- search = client().prepareSearch().setQuery(matchQuery("name2", "logica")).highlighter(new HighlightBuilder().field("name2")).get();
- assertHighlight(search, 0, "name2", 0, equalTo("logicacmg ehemals avinci - the know how company"));
- }
-
public void testEnsureNoNegativeOffsets() throws Exception {
assertAcked(prepareCreate("test")
.addMapping("type1",
diff --git a/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java b/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java
index 05a72276362..bd8cfbcaa5a 100644
--- a/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java
+++ b/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java
@@ -19,16 +19,6 @@
package org.elasticsearch.search.query;
-import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
-import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.equalTo;
-
import org.apache.lucene.util.LuceneTestCase;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
@@ -56,6 +46,16 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
+import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+
public class QueryStringIT extends ESIntegTestCase {
@Override
protected Collection> nodePlugins() {
@@ -91,10 +91,6 @@ public class QueryStringIT extends ESIntegTestCase {
resp = client().prepareSearch("test").setQuery(queryStringQuery("Bar")).get();
assertHitCount(resp, 3L);
assertHits(resp.getHits(), "1", "2", "3");
-
- resp = client().prepareSearch("test").setQuery(queryStringQuery("foa")).get();
- assertHitCount(resp, 1L);
- assertHits(resp.getHits(), "3");
}
public void testWithDate() throws Exception {
@@ -161,8 +157,6 @@ public class QueryStringIT extends ESIntegTestCase {
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(queryStringQuery("Baz")).get();
assertHits(resp.getHits(), "1");
- resp = client().prepareSearch("test").setQuery(queryStringQuery("sbaz")).get();
- assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(queryStringQuery("19")).get();
assertHits(resp.getHits(), "1");
// nested doesn't match because it's hidden
@@ -223,11 +217,11 @@ public class QueryStringIT extends ESIntegTestCase {
indexRandom(true, false, reqs);
SearchResponse resp = client().prepareSearch("test2").setQuery(
- queryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get();
+ queryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get();
assertHitCount(resp, 0L);
resp = client().prepareSearch("test2").setQuery(
- queryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get();
+ queryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get();
assertHits(resp.getHits(), "1");
assertHitCount(resp, 1L);
diff --git a/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java b/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java
index f22ec392b99..a32a8060379 100644
--- a/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java
+++ b/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java
@@ -398,10 +398,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Bar")).get();
assertHitCount(resp, 3L);
assertHits(resp.getHits(), "1", "2", "3");
-
- resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("foa")).get();
- assertHitCount(resp, 1L);
- assertHits(resp.getHits(), "3");
}
public void testWithDate() throws Exception {
@@ -480,8 +476,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Baz")).get();
assertHits(resp.getHits(), "1");
- resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("sbaz")).get();
- assertHits(resp.getHits(), "1");
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("19")).get();
assertHits(resp.getHits(), "1");
// nested doesn't match because it's hidden
@@ -547,11 +541,11 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
indexRandom(true, false, reqs);
SearchResponse resp = client().prepareSearch("test").setQuery(
- simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get();
+ simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get();
assertHitCount(resp, 0L);
resp = client().prepareSearch("test").setQuery(
- simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get();
+ simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get();
assertHits(resp.getHits(), "1");
assertHitCount(resp, 1L);
diff --git a/core/src/test/resources/org/elasticsearch/search/query/all-query-index-with-all.json b/core/src/test/resources/org/elasticsearch/search/query/all-query-index-with-all.json
index 1a96fd71333..d9cbb485d13 100644
--- a/core/src/test/resources/org/elasticsearch/search/query/all-query-index-with-all.json
+++ b/core/src/test/resources/org/elasticsearch/search/query/all-query-index-with-all.json
@@ -6,22 +6,7 @@
"version": {
"created": "5000099"
},
- "analysis": {
- "analyzer": {
- "my_ngrams": {
- "type": "custom",
- "tokenizer": "standard",
- "filter": ["my_ngrams"]
- }
- },
- "filter": {
- "my_ngrams": {
- "type": "ngram",
- "min_gram": 2,
- "max_gram": 2
- }
- }
- }
+ "query.default_field": "f1"
}
},
"mappings": {
@@ -31,7 +16,7 @@
},
"properties": {
"f1": {"type": "text"},
- "f2": {"type": "text", "analyzer": "my_ngrams"}
+ "f2": {"type": "text"}
}
}
}
diff --git a/core/src/test/resources/org/elasticsearch/search/query/all-query-index.json b/core/src/test/resources/org/elasticsearch/search/query/all-query-index.json
index 86dde5aaf88..89c41217125 100644
--- a/core/src/test/resources/org/elasticsearch/search/query/all-query-index.json
+++ b/core/src/test/resources/org/elasticsearch/search/query/all-query-index.json
@@ -2,23 +2,7 @@
"settings": {
"index": {
"number_of_shards": 1,
- "number_of_replicas": 0,
- "analysis": {
- "analyzer": {
- "my_ngrams": {
- "type": "custom",
- "tokenizer": "standard",
- "filter": ["my_ngrams"]
- }
- },
- "filter": {
- "my_ngrams": {
- "type": "ngram",
- "min_gram": 2,
- "max_gram": 2
- }
- }
- }
+ "number_of_replicas": 0
}
},
"mappings": {
@@ -26,7 +10,7 @@
"properties": {
"f1": {"type": "text"},
"f2": {"type": "keyword"},
- "f3": {"type": "text", "analyzer": "my_ngrams"},
+ "f3": {"type": "text"},
"f4": {
"type": "text",
"index_options": "docs"
diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
index 2f8f1d7405a..6cf78044569 100644
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
@@ -52,7 +52,6 @@ import org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilter;
import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
-import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
@@ -98,6 +97,15 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
filters.put("trim", TrimTokenFilterFactory::new);
filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
+ filters.put("unique", UniqueTokenFilterFactory::new);
+ filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
+ filters.put("length", LengthTokenFilterFactory::new);
+ filters.put("lowercase", LowerCaseTokenFilterFactory::new);
+ filters.put("uppercase", UpperCaseTokenFilterFactory::new);
+ filters.put("nGram", NGramTokenFilterFactory::new);
+ filters.put("ngram", NGramTokenFilterFactory::new);
+ filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
+ filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
return filters;
}
@@ -172,7 +180,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
filters.add(PreConfiguredTokenFilter.singleton("nGram", false, NGramTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new));
- filters.add(PreConfiguredTokenFilter.singleton("reverse", false, input -> new ReverseStringFilter(input)));
+ filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("russian_stem", false, input -> new SnowballFilter(input, "Russian")));
filters.add(PreConfiguredTokenFilter.singleton("scandinavian_folding", true, ScandinavianFoldingFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("scandinavian_normalization", true, ScandinavianNormalizationFilter::new));
@@ -185,7 +193,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
filters.add(PreConfiguredTokenFilter.singleton("trim", false, TrimFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("truncate", false, input -> new TruncateTokenFilter(input, 10)));
filters.add(PreConfiguredTokenFilter.singleton("type_as_payload", false, TypeAsPayloadTokenFilter::new));
- filters.add(PreConfiguredTokenFilter.singleton("unique", false, input -> new UniqueTokenFilter(input)));
+ filters.add(PreConfiguredTokenFilter.singleton("unique", false, UniqueTokenFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("uppercase", true, UpperCaseFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("word_delimiter", false, input ->
new WordDelimiterFilter(input,
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/EdgeNGramTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactory.java
similarity index 92%
rename from core/src/main/java/org/elasticsearch/index/analysis/EdgeNGramTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactory.java
index 1d3b8e296ec..af6d30a0354 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/EdgeNGramTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenFilterFactory.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
@@ -38,13 +39,13 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
public static final int SIDE_BACK = 2;
private final int side;
- public EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
this.side = parseSide(settings.get("side", "front"));
}
-
+
static int parseSide(String side) {
switch(side) {
case "front": return SIDE_FRONT;
@@ -56,19 +57,19 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
@Override
public TokenStream create(TokenStream tokenStream) {
TokenStream result = tokenStream;
-
+
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
if (side == SIDE_BACK) {
result = new ReverseStringFilter(result);
}
-
+
result = new EdgeNGramTokenFilter(result, minGram, maxGram);
-
+
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
if (side == SIDE_BACK) {
result = new ReverseStringFilter(result);
}
-
+
return result;
}
@@ -76,4 +77,4 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
public boolean breaksFastVectorHighlighter() {
return true;
}
-}
\ No newline at end of file
+}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactory.java
similarity index 84%
rename from core/src/main/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactory.java
index 6c9487a2cb3..e59c23e4a6c 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactory.java
@@ -17,17 +17,18 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.FlattenGraphFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class FlattenGraphTokenFilterFactory extends AbstractTokenFilterFactory {
- public FlattenGraphTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ FlattenGraphTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/LengthTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LengthTokenFilterFactory.java
similarity index 88%
rename from core/src/main/java/org/elasticsearch/index/analysis/LengthTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LengthTokenFilterFactory.java
index 8a03802a7dd..477886d702b 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/LengthTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LengthTokenFilterFactory.java
@@ -17,23 +17,24 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
private final int min;
private final int max;
-
+
// ancient unsupported option
private static final String ENABLE_POS_INC_KEY = "enable_position_increments";
- public LengthTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ LengthTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
min = settings.getAsInt("min", 0);
max = settings.getAsInt("max", Integer.MAX_VALUE);
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/LowerCaseTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenFilterFactory.java
similarity index 89%
rename from core/src/main/java/org/elasticsearch/index/analysis/LowerCaseTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenFilterFactory.java
index 1d9ca2272b8..f85db0dae68 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/LowerCaseTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenFilterFactory.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -27,6 +27,8 @@ import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
+import org.elasticsearch.index.analysis.MultiTermAwareComponent;
/**
* Factory for {@link LowerCaseFilter} and some language-specific variants
@@ -41,7 +43,7 @@ public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory impl
private final String lang;
- public LowerCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ LowerCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.lang = settings.get("language", null);
}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/NGramTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenFilterFactory.java
similarity index 87%
rename from core/src/main/java/org/elasticsearch/index/analysis/NGramTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenFilterFactory.java
index 7926f585bc3..2d7a8c52fd6 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/NGramTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenFilterFactory.java
@@ -17,13 +17,14 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
@@ -33,7 +34,7 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
private final int maxGram;
- public NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
@@ -43,4 +44,4 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
public TokenStream create(TokenStream tokenStream) {
return new NGramTokenFilter(tokenStream, minGram, maxGram);
}
-}
\ No newline at end of file
+}
diff --git a/core/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UniqueTokenFilter.java
similarity index 92%
rename from core/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UniqueTokenFilter.java
index cc853932efc..ae2b03f5329 100644
--- a/core/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UniqueTokenFilter.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.lucene.analysis.miscellaneous;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
@@ -31,7 +31,7 @@ import java.io.IOException;
* A token filter that generates unique tokens. Can remove unique tokens only on the same
* position increments as well.
*/
-public class UniqueTokenFilter extends TokenFilter {
+class UniqueTokenFilter extends TokenFilter {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
@@ -39,11 +39,11 @@ public class UniqueTokenFilter extends TokenFilter {
private final CharArraySet previous = new CharArraySet(8, false);
private final boolean onlyOnSamePosition;
- public UniqueTokenFilter(TokenStream in) {
+ UniqueTokenFilter(TokenStream in) {
this(in, false);
}
- public UniqueTokenFilter(TokenStream in, boolean onlyOnSamePosition) {
+ UniqueTokenFilter(TokenStream in, boolean onlyOnSamePosition) {
super(in);
this.onlyOnSamePosition = onlyOnSamePosition;
}
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/UniqueTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UniqueTokenFilterFactory.java
similarity index 86%
rename from core/src/main/java/org/elasticsearch/index/analysis/UniqueTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UniqueTokenFilterFactory.java
index 8606a60292c..256e3dad5c0 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/UniqueTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UniqueTokenFilterFactory.java
@@ -17,19 +17,19 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class UniqueTokenFilterFactory extends AbstractTokenFilterFactory {
private final boolean onlyOnSamePosition;
- public UniqueTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
+ UniqueTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
this.onlyOnSamePosition = settings.getAsBooleanLenientForPreEs6Indices(
indexSettings.getIndexVersionCreated(), "only_on_same_position", false, deprecationLogger);
diff --git a/core/src/main/java/org/elasticsearch/index/analysis/UpperCaseTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UpperCaseTokenFilterFactory.java
similarity index 89%
rename from core/src/main/java/org/elasticsearch/index/analysis/UpperCaseTokenFilterFactory.java
rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UpperCaseTokenFilterFactory.java
index 551345fc2e1..7923026d3da 100644
--- a/core/src/main/java/org/elasticsearch/index/analysis/UpperCaseTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UpperCaseTokenFilterFactory.java
@@ -17,13 +17,15 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.UpperCaseFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
+import org.elasticsearch.index.analysis.MultiTermAwareComponent;
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
index f7313572e13..f7c2a411fe1 100644
--- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java
@@ -51,13 +51,22 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
@Override
protected Map> getTokenFilters() {
Map> filters = new TreeMap<>(super.getTokenFilters());
- filters.put("asciifolding", ASCIIFoldingTokenFilterFactory.class);
- filters.put("keywordmarker", KeywordMarkerTokenFilterFactory.class);
- filters.put("porterstem", PorterStemTokenFilterFactory.class);
- filters.put("snowballporter", SnowballTokenFilterFactory.class);
- filters.put("trim", TrimTokenFilterFactory.class);
- filters.put("worddelimiter", WordDelimiterTokenFilterFactory.class);
- filters.put("worddelimitergraph", WordDelimiterGraphTokenFilterFactory.class);
+ filters.put("asciifolding", ASCIIFoldingTokenFilterFactory.class);
+ filters.put("keywordmarker", KeywordMarkerTokenFilterFactory.class);
+ filters.put("porterstem", PorterStemTokenFilterFactory.class);
+ filters.put("snowballporter", SnowballTokenFilterFactory.class);
+ filters.put("trim", TrimTokenFilterFactory.class);
+ filters.put("worddelimiter", WordDelimiterTokenFilterFactory.class);
+ filters.put("worddelimitergraph", WordDelimiterGraphTokenFilterFactory.class);
+ filters.put("flattengraph", FlattenGraphTokenFilterFactory.class);
+ filters.put("length", LengthTokenFilterFactory.class);
+ filters.put("greeklowercase", LowerCaseTokenFilterFactory.class);
+ filters.put("irishlowercase", LowerCaseTokenFilterFactory.class);
+ filters.put("lowercase", LowerCaseTokenFilterFactory.class);
+ filters.put("turkishlowercase", LowerCaseTokenFilterFactory.class);
+ filters.put("uppercase", UpperCaseTokenFilterFactory.class);
+ filters.put("ngram", NGramTokenFilterFactory.class);
+ filters.put("edgengram", EdgeNGramTokenFilterFactory.class);
return filters;
}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java
similarity index 98%
rename from core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java
rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java
index 259da010daa..fec7f73a697 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/FlattenGraphTokenFilterFactoryTests.java
@@ -17,9 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
-
-import java.io.IOException;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.Token;
@@ -30,6 +28,8 @@ import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.test.ESTokenStreamTestCase;
import org.elasticsearch.test.IndexSettingsModule;
+import java.io.IOException;
+
public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase {
public void testBasic() throws IOException {
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java
similarity index 85%
rename from core/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java
rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java
index 5e1cf2e8179..24efd89b7e0 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/NGramTokenizerFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
@@ -30,6 +30,8 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.Settings.Builder;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
+import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.test.ESTokenStreamTestCase;
import org.elasticsearch.test.IndexSettingsModule;
@@ -52,7 +54,8 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
final Settings indexSettings = newAnalysisSettingsBuilder().build();
IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
for (String tokenChars : Arrays.asList("letters", "number", "DIRECTIONALITY_UNDEFINED")) {
- final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build();
+ final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
+ .put("token_chars", tokenChars).build();
try {
new NGramTokenizerFactory(indexProperties, null, name, settings).create();
fail();
@@ -61,7 +64,8 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
}
}
for (String tokenChars : Arrays.asList("letter", " digit ", "punctuation", "DIGIT", "CoNtRoL", "dash_punctuation")) {
- final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build();
+ final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
+ .put("token_chars", tokenChars).build();
indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
new NGramTokenizerFactory(indexProperties, null, name, settings).create();
@@ -73,8 +77,10 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
final Index index = new Index("test", "_na_");
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
- final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4).putArray("token_chars", new String[0]).build();
- Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
+ final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4)
+ .putArray("token_chars", new String[0]).build();
+ Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
+ .create();
tokenizer.setReader(new StringReader("1.34"));
assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"});
}
@@ -84,12 +90,15 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
final Index index = new Index("test", "_na_");
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
- Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
- Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
+ Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
+ .put("token_chars", "letter,digit").build();
+ Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
+ .create();
tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
assertTokenStreamContents(tokenizer,
new String[] {"Åb", "Åbc", "bc", "dé", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f"});
- settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
+ settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
+ .put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
tokenizer.setReader(new StringReader(" a!$ 9"));
assertTokenStreamContents(tokenizer,
@@ -102,12 +111,15 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
- Tokenizer tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
+ Tokenizer tokenizer =
+ new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
assertTokenStreamContents(tokenizer,
new String[] {"Åb", "Åbc", "dé", "déf", "g\uD801\uDC00", "g\uD801\uDC00f"});
- settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
- tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
+ settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
+ .put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
+ tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
+ .create();
tokenizer.setReader(new StringReader(" a!$ 9"));
assertTokenStreamContents(tokenizer,
new String[] {" a", " a!"});
@@ -128,7 +140,9 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
- TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer);
+ TokenStream edgeNGramTokenFilter =
+ new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
+ .create(tokenizer);
if (reverse) {
assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
} else {
diff --git a/core/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/UniqueTokenFilterTests.java
similarity index 97%
rename from core/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java
rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/UniqueTokenFilterTests.java
index 324e422531b..f75822a13c4 100644
--- a/core/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/UniqueTokenFilterTests.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.lucene.analysis.miscellaneous;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml
index eb9dec65542..1d3075e28f8 100644
--- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml
@@ -210,3 +210,185 @@
- match: { detail.tokenfilters.0.tokens.5.start_offset: 16 }
- match: { detail.tokenfilters.0.tokens.5.end_offset: 19 }
- match: { detail.tokenfilters.0.tokens.5.position: 5 }
+
+---
+"unique":
+ - do:
+ indices.analyze:
+ body:
+ text: Foo Foo Bar!
+ tokenizer: whitespace
+ filter: [unique]
+ - length: { tokens: 2 }
+ - match: { tokens.0.token: Foo }
+ - match: { tokens.1.token: Bar! }
+
+---
+"synonym_graph and flatten_graph":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_synonym_graph:
+ type: synonym_graph
+ synonyms: ["automatic teller machine,atm,cash point"]
+
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: this automatic teller machine is down
+ tokenizer: whitespace
+ filter: [my_synonym_graph]
+ - length: { tokens: 9 }
+ - match: { tokens.0.token: this }
+ - match: { tokens.0.position: 0 }
+ - is_false: tokens.0.positionLength
+ - match: { tokens.1.token: atm }
+ - match: { tokens.1.position: 1 }
+ - match: { tokens.1.positionLength: 4 }
+ - match: { tokens.2.token: cash }
+ - match: { tokens.2.position: 1 }
+ - is_false: tokens.2.positionLength
+ - match: { tokens.3.token: automatic }
+ - match: { tokens.3.position: 1 }
+ - match: { tokens.3.positionLength: 2 }
+ - match: { tokens.4.token: point }
+ - match: { tokens.4.position: 2 }
+ - match: { tokens.4.positionLength: 3 }
+ - match: { tokens.5.token: teller }
+ - match: { tokens.5.position: 3 }
+ - is_false: tokens.5.positionLength
+ - match: { tokens.6.token: machine }
+ - match: { tokens.6.position: 4 }
+ - is_false: tokens.6.positionLength
+ - match: { tokens.7.token: is }
+ - match: { tokens.7.position: 5 }
+ - is_false: tokens.7.positionLength
+ - match: { tokens.8.token: down }
+ - match: { tokens.8.position: 6 }
+ - is_false: tokens.8.positionLength
+
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: this automatic teller machine is down
+ tokenizer: whitespace
+ filter: [my_synonym_graph,flatten_graph]
+ - length: { tokens: 9 }
+ - match: { tokens.0.token: this }
+ - match: { tokens.0.position: 0 }
+ - is_false: tokens.0.positionLength
+ - match: { tokens.1.token: atm }
+ - match: { tokens.1.position: 1 }
+ - match: { tokens.1.positionLength: 3 }
+ - match: { tokens.2.token: cash }
+ - match: { tokens.2.position: 1 }
+ - is_false: tokens.2.positionLength
+ - match: { tokens.3.token: automatic }
+ - match: { tokens.3.position: 1 }
+ - is_false: tokens.3.positionLength
+ - match: { tokens.4.token: point }
+ - match: { tokens.4.position: 2 }
+ - match: { tokens.4.positionLength: 2 }
+ - match: { tokens.5.token: teller }
+ - match: { tokens.5.position: 2 }
+ - is_false: tokens.5.positionLength
+ - match: { tokens.6.token: machine }
+ - match: { tokens.6.position: 3 }
+ - is_false: tokens.6.positionLength
+ - match: { tokens.7.token: is }
+ - match: { tokens.7.position: 4 }
+ - is_false: tokens.7.positionLength
+ - match: { tokens.8.token: down }
+ - match: { tokens.8.position: 5 }
+ - is_false: tokens.8.positionLength
+
+---
+"length":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_length:
+ type: length
+ min: 6
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: foo bar foobar
+ tokenizer: whitespace
+ filter: [my_length]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: foobar }
+
+---
+"uppercase":
+ - do:
+ indices.analyze:
+ body:
+ text: foobar
+ tokenizer: keyword
+ filter: [uppercase]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: FOOBAR }
+
+---
+"ngram":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_ngram:
+ type: ngram
+ min_gram: 3
+ max_gram: 3
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: foobar
+ tokenizer: keyword
+ filter: [my_ngram]
+ - length: { tokens: 4 }
+ - match: { tokens.0.token: foo }
+ - match: { tokens.1.token: oob }
+ - match: { tokens.2.token: oba }
+ - match: { tokens.3.token: bar }
+
+---
+"edge_ngram":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ my_edge_ngram:
+ type: edge_ngram
+ min_gram: 3
+ max_gram: 6
+ - do:
+ indices.analyze:
+ index: test
+ body:
+ text: foobar
+ tokenizer: keyword
+ filter: [my_edge_ngram]
+ - length: { tokens: 4 }
+ - match: { tokens.0.token: foo }
+ - match: { tokens.1.token: foob }
+ - match: { tokens.2.token: fooba }
+ - match: { tokens.3.token: foobar }
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/20_ngram_search.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/20_ngram_search.yml
new file mode 100644
index 00000000000..eb8c9789a63
--- /dev/null
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/20_ngram_search.yml
@@ -0,0 +1,41 @@
+"ngram search":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ number_of_shards: 1
+ number_of_replicas: 0
+ analysis:
+ analyzer:
+ my_analyzer:
+ tokenizer: standard
+ filter: [my_ngram]
+ filter:
+ my_ngram:
+ type: ngram
+ min: 2,
+ max: 2
+ mappings:
+ doc:
+ properties:
+ text:
+ type: text
+ analyzer: my_analyzer
+
+ - do:
+ index:
+ index: test
+ type: doc
+ id: 1
+ body: { "text": "foo bar baz" }
+ refresh: true
+
+ - do:
+ search:
+ body:
+ query:
+ match:
+ text:
+ query: foa
+ - match: {hits.total: 1}
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/30_ngram_highligthing.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/30_ngram_highligthing.yml
new file mode 100644
index 00000000000..b04496965eb
--- /dev/null
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/30_ngram_highligthing.yml
@@ -0,0 +1,129 @@
+"ngram highlighting":
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ number_of_shards: 1
+ number_of_replicas: 0
+ analysis:
+ tokenizer:
+ my_ngramt:
+ type: ngram
+ min_gram: 1
+ max_gram: 20
+ token_chars: letter,digit
+ filter:
+ my_ngram:
+ type: ngram
+ min_gram: 1
+ max_gram: 20
+ analyzer:
+ name2_index_analyzer:
+ tokenizer: whitespace
+ filter: [my_ngram]
+ name_index_analyzer:
+ tokenizer: my_ngramt
+ name_search_analyzer:
+ tokenizer: whitespace
+ mappings:
+ doc:
+ properties:
+ name:
+ type: text
+ term_vector: with_positions_offsets
+ analyzer: name_index_analyzer
+ search_analyzer: name_search_analyzer
+ name2:
+ type: text
+ term_vector: with_positions_offsets
+ analyzer: name2_index_analyzer
+ search_analyzer: name_search_analyzer
+
+ - do:
+ index:
+ index: test
+ type: doc
+ id: 1
+ refresh: true
+ body:
+ name: logicacmg ehemals avinci - the know how company
+ name2: logicacmg ehemals avinci - the know how company
+
+ - do:
+ search:
+ body:
+ query:
+ match:
+ name:
+ query: logica m
+ highlight:
+ fields:
+ - name: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"}
+
+ - do:
+ search:
+ body:
+ query:
+ match:
+ name:
+ query: logica ma
+ highlight:
+ fields:
+ - name: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"}
+
+ - do:
+ search:
+ body:
+ query:
+ match:
+ name:
+ query: logica
+ highlight:
+ fields:
+ - name: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"}
+
+ - do:
+ search:
+ body:
+ query:
+ match:
+ name2:
+ query: logica m
+ highlight:
+ fields:
+ - name2: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"}
+
+ - do:
+ search:
+ body:
+ query:
+ match:
+ name2:
+ query: logica ma
+ highlight:
+ fields:
+ - name2: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"}
+
+ - do:
+ search:
+ body:
+ query:
+ match:
+ name2:
+ query: logica
+ highlight:
+ fields:
+ - name2: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"}
diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
index 0c2a29224f8..76d170f7c2c 100644
--- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java
@@ -22,7 +22,6 @@ package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
-import org.elasticsearch.Version;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.index.analysis.ApostropheFilterFactory;
import org.elasticsearch.index.analysis.ArabicNormalizationFilterFactory;
@@ -36,10 +35,8 @@ import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
-import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
-import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory;
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
@@ -49,14 +46,11 @@ import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
-import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
-import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
-import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
@@ -82,7 +76,6 @@ import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
-import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
@@ -90,7 +83,6 @@ import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
import java.util.Collection;
-import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
@@ -165,7 +157,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("decimaldigit", DecimalDigitFilterFactory.class)
.put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class)
.put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class)
- .put("edgengram", EdgeNGramTokenFilterFactory.class)
+ .put("edgengram", MovedToAnalysisCommon.class)
.put("elision", ElisionTokenFilterFactory.class)
.put("englishminimalstem", StemmerTokenFilterFactory.class)
.put("englishpossessive", StemmerTokenFilterFactory.class)
@@ -178,7 +170,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("germanlightstem", StemmerTokenFilterFactory.class)
.put("germanminimalstem", StemmerTokenFilterFactory.class)
.put("germannormalization", GermanNormalizationFilterFactory.class)
- .put("greeklowercase", LowerCaseTokenFilterFactory.class)
+ .put("greeklowercase", MovedToAnalysisCommon.class)
.put("greekstem", StemmerTokenFilterFactory.class)
.put("hindinormalization", HindiNormalizationFilterFactory.class)
.put("hindistem", StemmerTokenFilterFactory.class)
@@ -186,17 +178,17 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("hunspellstem", HunspellTokenFilterFactory.class)
.put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class)
.put("indicnormalization", IndicNormalizationFilterFactory.class)
- .put("irishlowercase", LowerCaseTokenFilterFactory.class)
+ .put("irishlowercase", MovedToAnalysisCommon.class)
.put("indonesianstem", StemmerTokenFilterFactory.class)
.put("italianlightstem", StemmerTokenFilterFactory.class)
.put("keepword", KeepWordFilterFactory.class)
.put("keywordmarker", MovedToAnalysisCommon.class)
.put("kstem", KStemTokenFilterFactory.class)
.put("latvianstem", StemmerTokenFilterFactory.class)
- .put("length", LengthTokenFilterFactory.class)
+ .put("length", MovedToAnalysisCommon.class)
.put("limittokencount", LimitTokenCountFilterFactory.class)
- .put("lowercase", LowerCaseTokenFilterFactory.class)
- .put("ngram", NGramTokenFilterFactory.class)
+ .put("lowercase", MovedToAnalysisCommon.class)
+ .put("ngram", MovedToAnalysisCommon.class)
.put("norwegianlightstem", StemmerTokenFilterFactory.class)
.put("norwegianminimalstem", StemmerTokenFilterFactory.class)
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
@@ -225,12 +217,12 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("synonymgraph", SynonymGraphTokenFilterFactory.class)
.put("trim", MovedToAnalysisCommon.class)
.put("truncate", TruncateTokenFilterFactory.class)
- .put("turkishlowercase", LowerCaseTokenFilterFactory.class)
+ .put("turkishlowercase", MovedToAnalysisCommon.class)
.put("type", KeepTypesFilterFactory.class)
- .put("uppercase", UpperCaseTokenFilterFactory.class)
+ .put("uppercase", MovedToAnalysisCommon.class)
.put("worddelimiter", MovedToAnalysisCommon.class)
.put("worddelimitergraph", MovedToAnalysisCommon.class)
- .put("flattengraph", FlattenGraphTokenFilterFactory.class)
+ .put("flattengraph", MovedToAnalysisCommon.class)
// TODO: these tokenfilters are not yet exposed: useful?