> nodePlugins() {
+ return Arrays.asList(CommonAnalysisPlugin.class);
+ }
+
+ /**
+ * Validates that we properly split fields using the word delimiter filter in query_string.
+ */
+ public void testCustomWordDelimiterQueryString() {
+ assertAcked(client().admin().indices().prepareCreate("test")
+ .setSettings("analysis.analyzer.my_analyzer.type", "custom",
+ "analysis.analyzer.my_analyzer.tokenizer", "whitespace",
+ "analysis.analyzer.my_analyzer.filter", "custom_word_delimiter",
+ "analysis.filter.custom_word_delimiter.type", "word_delimiter",
+ "analysis.filter.custom_word_delimiter.generate_word_parts", "true",
+ "analysis.filter.custom_word_delimiter.generate_number_parts", "false",
+ "analysis.filter.custom_word_delimiter.catenate_numbers", "true",
+ "analysis.filter.custom_word_delimiter.catenate_words", "false",
+ "analysis.filter.custom_word_delimiter.split_on_case_change", "false",
+ "analysis.filter.custom_word_delimiter.split_on_numerics", "false",
+ "analysis.filter.custom_word_delimiter.stem_english_possessive", "false")
+ .addMapping("type1",
+ "field1", "type=text,analyzer=my_analyzer",
+ "field2", "type=text,analyzer=my_analyzer"));
+
+ client().prepareIndex("test", "type1", "1").setSource(
+ "field1", "foo bar baz",
+ "field2", "not needed").get();
+ refresh();
+
+ SearchResponse response = client()
+ .prepareSearch("test")
+ .setQuery(
+ queryStringQuery("foo.baz").useDisMax(false).defaultOperator(Operator.AND)
+ .field("field1").field("field2")).get();
+ assertHitCount(response, 1L);
+ }
+}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterGraphTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java
similarity index 55%
rename from core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterGraphTokenFilterFactoryTests.java
rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java
index 2ae4267104a..bd7ff2f0c01 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterGraphTokenFilterFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterGraphTokenFilterFactoryTests.java
@@ -16,52 +16,62 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.elasticsearch.index.analysis;
-
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.analysis.AnalysisTestsHelper;
+import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import java.io.StringReader;
-public class WordDelimiterGraphTokenFilterFactoryTests extends BaseWordDelimiterTokenFilterFactoryTestCase {
+public class WordDelimiterGraphTokenFilterFactoryTests
+ extends BaseWordDelimiterTokenFilterFactoryTestCase {
public WordDelimiterGraphTokenFilterFactoryTests() {
super("word_delimiter_graph");
}
public void testMultiTerms() throws IOException {
- ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
- .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
- .put("index.analysis.filter.my_word_delimiter.type", type)
- .put("index.analysis.filter.my_word_delimiter.catenate_all", "true")
- .put("index.analysis.filter.my_word_delimiter.preserve_original", "true")
- .build());
+ ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
+ Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .put("index.analysis.filter.my_word_delimiter.type", type)
+ .put("index.analysis.filter.my_word_delimiter.catenate_all", "true")
+ .put("index.analysis.filter.my_word_delimiter.preserve_original", "true")
+ .build(),
+ new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot 500-42 wi-fi wi-fi-4000 j2se O'Neil's";
- String[] expected = new String[]{"PowerShot", "PowerShot", "Power", "Shot", "50042", "500-42", "500", "42",
- "wifi", "wi-fi", "wi", "fi", "wifi4000", "wi-fi-4000", "wi", "fi", "4000", "j2se", "j2se", "j", "2", "se",
- "ONeil", "O'Neil's", "O", "Neil" };
+ String[] expected = new String[] { "PowerShot", "PowerShot", "Power", "Shot", "50042",
+ "500-42", "500", "42", "wifi", "wi-fi", "wi", "fi", "wifi4000", "wi-fi-4000", "wi",
+ "fi", "4000", "j2se", "j2se", "j", "2", "se", "ONeil", "O'Neil's", "O", "Neil" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
- int[] expectedIncr = new int[]{1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1};
- int[] expectedPosLen = new int[]{2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 3, 3, 1, 1, 1, 3, 3, 1, 1, 1, 2, 2, 1, 1};
+ int[] expectedIncr = new int[] { 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0,
+ 1, 1, 1, 0, 0, 1 };
+ int[] expectedPosLen = new int[] { 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 3, 3, 1, 1, 1, 3, 3,
+ 1, 1, 1, 2, 2, 1, 1 };
assertTokenStreamContents(tokenFilter.create(tokenizer), expected, null, null, null,
expectedIncr, expectedPosLen, null);
}
- /** Correct offset order when doing both parts and concatenation: PowerShot is a synonym of Power */
+ /**
+ * Correct offset order when doing both parts and concatenation: PowerShot is a synonym of Power
+ */
public void testPartsAndCatenate() throws IOException {
- ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
- .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
- .put("index.analysis.filter.my_word_delimiter.type", type)
- .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
- .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
- .build());
+ ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
+ Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .put("index.analysis.filter.my_word_delimiter.type", type)
+ .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
+ .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
+ .build(),
+ new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot";
int[] expectedIncr = new int[]{1, 0, 1};
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java
similarity index 65%
rename from core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java
rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java
index 1e919e00bbb..78c4f1485aa 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WordDelimiterTokenFilterFactoryTests.java
@@ -16,31 +16,38 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.elasticsearch.index.analysis;
+package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.analysis.AnalysisTestsHelper;
+import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import java.io.StringReader;
-public class WordDelimiterTokenFilterFactoryTests extends BaseWordDelimiterTokenFilterFactoryTestCase {
+public class WordDelimiterTokenFilterFactoryTests
+ extends BaseWordDelimiterTokenFilterFactoryTestCase {
public WordDelimiterTokenFilterFactoryTests() {
super("word_delimiter");
}
- /** Correct offset order when doing both parts and concatenation: PowerShot is a synonym of Power */
+ /**
+ * Correct offset order when doing both parts and concatenation: PowerShot is a synonym of Power
+ */
public void testPartsAndCatenate() throws IOException {
- ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(Settings.builder()
- .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
- .put("index.analysis.filter.my_word_delimiter.type", type)
- .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
- .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
- .build());
+ ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
+ Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .put("index.analysis.filter.my_word_delimiter.type", type)
+ .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
+ .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
+ .build(),
+ new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_word_delimiter");
String source = "PowerShot";
String[] expected = new String[]{"Power", "PowerShot", "Shot" };
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/10_basic.yaml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/10_basic.yaml
new file mode 100644
index 00000000000..d27a0861b2e
--- /dev/null
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/10_basic.yaml
@@ -0,0 +1,11 @@
+"Module loaded":
+ - do:
+ cluster.state: {}
+
+ # Get master node id
+ - set: { master_node: master }
+
+ - do:
+ nodes.info: {}
+
+ - match: { nodes.$master.modules.0.name: analysis-common }
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/20_analyzers.yaml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/20_analyzers.yaml
new file mode 100644
index 00000000000..9fb34e7a821
--- /dev/null
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/20_analyzers.yaml
@@ -0,0 +1,11 @@
+## Smoke tests for analyzers included in the analysis-common module
+
+"whitespace":
+ - do:
+ indices.analyze:
+ body:
+ text: Foo Bar!
+ analyzer: whitespace
+ - length: { tokens: 2 }
+ - match: { tokens.0.token: Foo }
+ - match: { tokens.1.token: Bar! }
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yaml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yaml
new file mode 100644
index 00000000000..174a15f772b
--- /dev/null
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yaml
@@ -0,0 +1,27 @@
+## Smoke tests for tokenizers included in the analysis-common module
+
+"keyword":
+ - do:
+ indices.analyze:
+ body:
+ text: Foo Bar!
+ tokenizer: keyword
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: Foo Bar! }
+
+---
+"nGram":
+ - do:
+ indices.analyze:
+ body:
+ text: good
+ explain: true
+ tokenizer:
+ type: nGram
+ min_gram: 2
+ max_gram: 2
+ - length: { detail.tokenizer.tokens: 3 }
+ - match: { detail.tokenizer.name: _anonymous_tokenizer }
+ - match: { detail.tokenizer.tokens.0.token: go }
+ - match: { detail.tokenizer.tokens.1.token: oo }
+ - match: { detail.tokenizer.tokens.2.token: od }
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yaml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yaml
new file mode 100644
index 00000000000..ac5bcb82e57
--- /dev/null
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yaml
@@ -0,0 +1,82 @@
+## Smoke tests for token filters included in the analysis-common module
+
+"asciifolding":
+ - do:
+ indices.analyze:
+ body:
+ text: Musée d'Orsay
+ tokenizer: keyword
+ filter: [asciifolding]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: Musee d'Orsay }
+
+---
+"lowercase":
+ - do:
+ indices.analyze:
+ body:
+ text: Foo Bar!
+ tokenizer: keyword
+ filter: [lowercase]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: foo bar! }
+
+---
+"word_delimiter":
+ - do:
+ indices.analyze:
+ body:
+ text: the qu1ck brown fox
+ tokenizer: standard
+ filter: [word_delimiter]
+ - length: { tokens: 6 }
+ - match: { tokens.0.token: the }
+ - match: { tokens.1.token: qu }
+ - match: { tokens.2.token: "1" }
+ - match: { tokens.3.token: ck }
+ - match: { tokens.4.token: brown }
+ - match: { tokens.5.token: fox }
+
+ - do:
+ indices.analyze:
+ body:
+ text: the qu1ck brown fox
+ tokenizer: standard
+ filter:
+ - type: word_delimiter
+ split_on_numerics: false
+ - length: { tokens: 4 }
+ - match: { tokens.0.token: the }
+ - match: { tokens.1.token: qu1ck }
+ - match: { tokens.2.token: brown }
+ - match: { tokens.3.token: fox }
+
+---
+"word_delimiter_graph":
+ - do:
+ indices.analyze:
+ body:
+ text: the qu1ck brown fox
+ tokenizer: standard
+ filter: [word_delimiter_graph]
+ - length: { tokens: 6 }
+ - match: { tokens.0.token: the }
+ - match: { tokens.1.token: qu }
+ - match: { tokens.2.token: "1" }
+ - match: { tokens.3.token: ck }
+ - match: { tokens.4.token: brown }
+ - match: { tokens.5.token: fox }
+
+ - do:
+ indices.analyze:
+ body:
+ text: the qu1ck brown fox
+ tokenizer: standard
+ filter:
+ - type: word_delimiter_graph
+ split_on_numerics: false
+ - length: { tokens: 4 }
+ - match: { tokens.0.token: the }
+ - match: { tokens.1.token: qu1ck }
+ - match: { tokens.2.token: brown }
+ - match: { tokens.3.token: fox }
diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/50_char_filters.yaml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/50_char_filters.yaml
new file mode 100644
index 00000000000..06775a2a722
--- /dev/null
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/50_char_filters.yaml
@@ -0,0 +1,13 @@
+## Smoke tests for analyzers included in the analysis-common module
+
+"mapping":
+ - do:
+ indices.analyze:
+ body:
+ text: jeff quit phish
+ tokenizer: keyword
+ char_filter:
+ - type: mapping
+ mappings: ["ph => f", "qu => q"]
+ - length: { tokens: 1 }
+ - match: { tokens.0.token: "jeff qit fish" }
diff --git a/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java b/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java
index e68cb260b0b..83015296276 100644
--- a/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java
+++ b/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishFactoryTests.java
@@ -19,14 +19,9 @@
package org.elasticsearch.index.analysis;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.AnalysisFactoryTestCase;
import org.elasticsearch.Version;
@@ -37,6 +32,10 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
public class AnalysisPolishFactoryTests extends AnalysisFactoryTestCase {
@Override
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml
index 268cd781289..93ce5c8c807 100644
--- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml
@@ -1,29 +1,11 @@
-# Will be performed before each test as a part of the test setup
-#
-setup:
- - do:
- ping: {}
-
----
"Basic test":
- do:
indices.analyze:
body:
text: Foo Bar
- length: { tokens: 2 }
- - match: { tokens.0.token: foo }
- - match: { tokens.1.token: bar }
-
----
-"Tokenizer and filter":
- - do:
- indices.analyze:
- body:
- filter: [lowercase]
- text: Foo Bar
- tokenizer: keyword
- - length: { tokens: 1 }
- - match: { tokens.0.token: foo bar }
+ - match: { tokens.0.token: foo }
+ - match: { tokens.1.token: bar }
---
"Index and field":
@@ -36,7 +18,7 @@ setup:
properties:
text:
type: text
- analyzer: whitespace
+ analyzer: standard
- do:
indices.analyze:
@@ -45,84 +27,51 @@ setup:
field: text
text: Foo Bar!
- length: { tokens: 2 }
- - match: { tokens.0.token: Foo }
- - match: { tokens.1.token: Bar! }
----
-"JSON in Body":
- - do:
- indices.analyze:
- body: { "text": "Foo Bar", "filter": ["lowercase"], "tokenizer": keyword }
- - length: {tokens: 1 }
- - match: { tokens.0.token: foo bar }
+ - match: { tokens.0.token: foo }
+ - match: { tokens.1.token: bar }
+
---
"Array text":
- do:
indices.analyze:
- body: { "text": ["Foo Bar", "Baz"], "filter": ["lowercase"], "tokenizer": keyword }
- - length: {tokens: 2 }
- - match: { tokens.0.token: foo bar }
- - match: { tokens.1.token: baz }
+ body:
+ text: ["Foo Bar", "Baz"]
+ tokenizer: standard
+ - length: { tokens: 3 }
+ - match: { tokens.0.token: Foo }
+ - match: { tokens.1.token: Bar }
+ - match: { tokens.2.token: Baz }
+
---
"Detail response with Analyzer":
- do:
indices.analyze:
- body: {"text": "This is troubled", "analyzer": standard, "explain": "true"}
+ body:
+ text: This is troubled
+ analyzer: standard
+ explain: true
- length: { detail.analyzer.tokens: 3 }
- - match: { detail.analyzer.name: standard }
- - match: { detail.analyzer.tokens.0.token: this }
- - match: { detail.analyzer.tokens.1.token: is }
- - match: { detail.analyzer.tokens.2.token: troubled }
----
-"Detail output spcified attribute":
- - do:
- indices.analyze:
- body: {"text": "This is troubled", "char_filter": ["html_strip"], "filter": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]}
- - length: { detail.charfilters: 1 }
- - length: { detail.tokenizer.tokens: 3 }
- - length: { detail.tokenfilters.0.tokens: 3 }
- - match: { detail.tokenizer.name: standard }
- - match: { detail.tokenizer.tokens.0.token: This }
- - match: { detail.tokenizer.tokens.1.token: is }
- - match: { detail.tokenizer.tokens.2.token: troubled }
- - match: { detail.tokenfilters.0.name: snowball }
- - match: { detail.tokenfilters.0.tokens.0.token: This }
- - match: { detail.tokenfilters.0.tokens.1.token: is }
- - match: { detail.tokenfilters.0.tokens.2.token: troubl }
- - match: { detail.tokenfilters.0.tokens.2.keyword: false }
+ - match: { detail.analyzer.name: standard }
+ - match: { detail.analyzer.tokens.0.token: this }
+ - match: { detail.analyzer.tokens.1.token: is }
+ - match: { detail.analyzer.tokens.2.token: troubled }
---
"Custom filter in request":
- do:
indices.analyze:
- body: { "text": "Foo Bar Buzz", "filter": ["lowercase", { "type": "stop", "stopwords": ["foo", "buzz"]}], "tokenizer": whitespace, "explain": true }
- - length: {detail.tokenizer.tokens: 3 }
- - length: {detail.tokenfilters.0.tokens: 3 }
- - length: {detail.tokenfilters.1.tokens: 1 }
- - match: { detail.tokenizer.name: whitespace }
- - match: { detail.tokenizer.tokens.0.token: Foo }
- - match: { detail.tokenizer.tokens.1.token: Bar }
- - match: { detail.tokenizer.tokens.2.token: Buzz }
- - match: { detail.tokenfilters.0.name: lowercase }
- - match: { detail.tokenfilters.0.tokens.0.token: foo }
- - match: { detail.tokenfilters.0.tokens.1.token: bar }
- - match: { detail.tokenfilters.0.tokens.2.token: buzz }
- - match: { detail.tokenfilters.1.name: "_anonymous_tokenfilter_[1]" }
- - match: { detail.tokenfilters.1.tokens.0.token: bar }
----
-"Custom char_filter in request":
- - do:
- indices.analyze:
- body: { "text": "jeff quit phish", "char_filter": [{"type": "mapping", "mappings": ["ph => f", "qu => q"]}], "tokenizer": keyword }
- - length: {tokens: 1 }
- - match: { tokens.0.token: "jeff qit fish" }
-
----
-"Custom tokenizer in request":
- - do:
- indices.analyze:
- body: { "text": "good", "tokenizer": {"type": "nGram", "min_gram": 2, "max_gram": 2}, "explain": true }
- - length: {detail.tokenizer.tokens: 3 }
- - match: { detail.tokenizer.name: _anonymous_tokenizer }
- - match: { detail.tokenizer.tokens.0.token: go }
- - match: { detail.tokenizer.tokens.1.token: oo }
- - match: { detail.tokenizer.tokens.2.token: od }
+ body:
+ text: foo bar buzz
+ tokenizer: standard
+ explain: true
+ filter:
+ - type: stop
+ stopwords: ["foo", "buzz"]
+ - length: { detail.tokenizer.tokens: 3 }
+ - length: { detail.tokenfilters.0.tokens: 1 }
+ - match: { detail.tokenizer.name: standard }
+ - match: { detail.tokenizer.tokens.0.token: foo }
+ - match: { detail.tokenizer.tokens.1.token: bar }
+ - match: { detail.tokenizer.tokens.2.token: buzz }
+ - match: { detail.tokenfilters.0.name: "_anonymous_tokenfilter_[0]" }
+ - match: { detail.tokenfilters.0.tokens.0.token: bar }
diff --git a/settings.gradle b/settings.gradle
index 8e6d3d80a0e..36f9c23e7c5 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -26,14 +26,15 @@ List projects = [
'test:fixtures:hdfs-fixture',
'test:logger-usage',
'modules:aggs-matrix-stats',
+ 'modules:analysis-common',
'modules:ingest-common',
'modules:lang-expression',
'modules:lang-mustache',
'modules:lang-painless',
- 'modules:transport-netty4',
- 'modules:reindex',
'modules:percolator',
+ 'modules:reindex',
'modules:repository-url',
+ 'modules:transport-netty4',
'plugins:analysis-icu',
'plugins:analysis-kuromoji',
'plugins:analysis-phonetic',
diff --git a/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java
index 83f955296b7..7f60058788a 100644
--- a/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java
@@ -20,14 +20,12 @@
package org.elasticsearch;
import org.apache.lucene.analysis.en.PorterStemFilterFactory;
-import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilterFactory;
import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory;
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.elasticsearch.common.collect.MapBuilder;
-import org.elasticsearch.index.analysis.ASCIIFoldingTokenFilterFactory;
import org.elasticsearch.index.analysis.ApostropheFilterFactory;
import org.elasticsearch.index.analysis.ArabicNormalizationFilterFactory;
import org.elasticsearch.index.analysis.ArabicStemTokenFilterFactory;
@@ -92,7 +90,6 @@ import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
-import org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
import org.elasticsearch.indices.analysis.PreBuiltCharFilters;
@@ -110,7 +107,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
- * Alerts us if new analyzers are added to lucene, so we don't miss them.
+ * Alerts us if new analysis components are added to Lucene, so we don't miss them.
*
* If we don't want to expose one for a specific reason, just map it to Void.
* The deprecated ones can be mapped to Deprecated.class.
@@ -178,7 +175,7 @@ public class AnalysisFactoryTestCase extends ESTestCase {
.put("apostrophe", ApostropheFilterFactory.class)
.put("arabicnormalization", ArabicNormalizationFilterFactory.class)
.put("arabicstem", ArabicStemTokenFilterFactory.class)
- .put("asciifolding", ASCIIFoldingTokenFilterFactory.class)
+ .put("asciifolding", MovedToAnalysisCommon.class)
.put("brazilianstem", BrazilianStemTokenFilterFactory.class)
.put("bulgarianstem", StemmerTokenFilterFactory.class)
.put("cjkbigram", CJKBigramFilterFactory.class)
@@ -253,8 +250,8 @@ public class AnalysisFactoryTestCase extends ESTestCase {
.put("turkishlowercase", LowerCaseTokenFilterFactory.class)
.put("type", KeepTypesFilterFactory.class)
.put("uppercase", UpperCaseTokenFilterFactory.class)
- .put("worddelimiter", WordDelimiterTokenFilterFactory.class)
- .put("worddelimitergraph", WordDelimiterGraphFilterFactory.class)
+ .put("worddelimiter", MovedToAnalysisCommon.class)
+ .put("worddelimitergraph", MovedToAnalysisCommon.class)
.put("flattengraph", FlattenGraphTokenFilterFactory.class)
// TODO: these tokenfilters are not yet exposed: useful?
@@ -401,6 +398,7 @@ public class AnalysisFactoryTestCase extends ESTestCase {
}
}
expected.remove(Void.class);
+ expected.remove(MovedToAnalysisCommon.class);
expected.remove(Deprecated.class);
Collection> actual = new HashSet<>();
@@ -489,4 +487,11 @@ public class AnalysisFactoryTestCase extends ESTestCase {
classesThatShouldNotHaveMultiTermSupport.isEmpty());
}
+ /**
+ * Marker class for components that have moved to the analysis-common modules. This will be
+ * removed when the module is complete and these analysis components aren't available to core.
+ */
+ protected static final class MovedToAnalysisCommon {
+ private MovedToAnalysisCommon() {}
+ }
}
diff --git a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisTestsHelper.java b/test/framework/src/main/java/org/elasticsearch/index/analysis/AnalysisTestsHelper.java
similarity index 80%
rename from core/src/test/java/org/elasticsearch/index/analysis/AnalysisTestsHelper.java
rename to test/framework/src/main/java/org/elasticsearch/index/analysis/AnalysisTestsHelper.java
index a60c21c1a7e..d75a894d073 100644
--- a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisTestsHelper.java
+++ b/test/framework/src/main/java/org/elasticsearch/index/analysis/AnalysisTestsHelper.java
@@ -25,17 +25,18 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.indices.analysis.AnalysisModule;
+import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;
import java.io.IOException;
import java.nio.file.Path;
-
-import static java.util.Collections.emptyList;
+import java.util.Arrays;
public class AnalysisTestsHelper {
- public static ESTestCase.TestAnalysis createTestAnalysisFromClassPath(Path baseDir, String resource) throws IOException {
+ public static ESTestCase.TestAnalysis createTestAnalysisFromClassPath(Path baseDir,
+ String resource) throws IOException {
Settings settings = Settings.builder()
.loadFromStream(resource, AnalysisTestsHelper.class.getResourceAsStream(resource))
.put(Environment.PATH_HOME_SETTING.getKey(), baseDir.toString())
@@ -45,12 +46,15 @@ public class AnalysisTestsHelper {
}
public static ESTestCase.TestAnalysis createTestAnalysisFromSettings(
- Settings settings) throws IOException {
+ Settings settings, AnalysisPlugin... plugins) throws IOException {
if (settings.get(IndexMetaData.SETTING_VERSION_CREATED) == null) {
- settings = Settings.builder().put(settings).put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
+ settings = Settings.builder().put(settings)
+ .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
}
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
- AnalysisRegistry analysisRegistry = new AnalysisModule(new Environment(settings), emptyList()).getAnalysisRegistry();
+ AnalysisRegistry analysisRegistry =
+ new AnalysisModule(new Environment(settings), Arrays.asList(plugins))
+ .getAnalysisRegistry();
return new ESTestCase.TestAnalysis(analysisRegistry.build(indexSettings),
analysisRegistry.buildTokenFilterFactories(indexSettings),
analysisRegistry.buildTokenizerFactories(indexSettings),