Merge pull request #12070 from rmuir/analysis_rest_tests

add integration tests for analysis plugins
2025-02-28 16:09:10 +00:00 · 2015-07-07 01:41:10 -04:00 · 2015-07-07 01:41:10 -04:00 · 69cd933ea3
commit 69cd933ea3
parent 9e196c3a0b 0736f97cfb
14 changed files with 313 additions and 3 deletions
--- a/plugins/analysis-icu/pom.xml
+++ b/plugins/analysis-icu/pom.xml
@ -17,6 +17,8 @@
    <properties>
        <tests.jvms>1</tests.jvms>
        <es.logger.level>INFO</es.logger.level>
+        <tests.rest.suite>analysis_icu</tests.rest.suite>
+        <tests.rest.load_packaged>false</tests.rest.load_packaged>
    </properties>

    <dependencies>
--- a/plugins/analysis-icu/rest-api-spec/test/analysis_icu/10_basic.yaml
+++ b/plugins/analysis-icu/rest-api-spec/test/analysis_icu/10_basic.yaml
@ -0,0 +1,37 @@
+# Integration tests for ICU analysis components
+#
+"Tokenizer":
+    - do:
+        indices.analyze:
+          text:         Foo Bar
+          tokenizer:    icu_tokenizer
+    - length: { tokens: 2 }
+    - match:  { tokens.0.token: Foo }
+    - match:  { tokens.1.token: Bar }
+---
+"Normalization filter":
+    - do:
+        indices.analyze:
+          filters:      icu_normalizer
+          text:         Foo Bar Ruß
+          tokenizer:    keyword
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: foo bar russ }
+---
+"Normalization charfilter":
+    - do:
+        indices.analyze:
+          char_filters: icu_normalizer
+          text:         Foo Bar Ruß
+          tokenizer:    keyword
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: foo bar russ }
+---
+"Folding filter":
+    - do:
+        indices.analyze:
+          filters:      icu_folding
+          text:         Foo Bar résumé
+          tokenizer:    keyword
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: foo bar resume }
--- a/plugins/analysis-icu/src/main/java/org/elasticsearch/indices/analysis/IcuIndicesAnalysis.java
+++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/indices/analysis/IcuIndicesAnalysis.java
@ -22,20 +22,26 @@ package org.elasticsearch.indices.analysis;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.Normalizer2;
 import com.ibm.icu.text.Transliterator;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.icu.ICUFoldingFilter;
+import org.apache.lucene.analysis.icu.ICUNormalizer2CharFilter;
 import org.apache.lucene.analysis.icu.ICUTransformFilter;
 import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
 import org.elasticsearch.common.component.AbstractComponent;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.analysis.ICUCollationKeyFilter;
+import org.elasticsearch.index.analysis.PreBuiltCharFilterFactoryFactory;
 import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory;
 import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory;
+import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
 import org.elasticsearch.index.analysis.TokenizerFactory;

+import java.io.Reader;
+
 /**
 * Registers indices level analysis components so, if not explicitly configured, will be shared
 * among all indices.
@ -106,5 +112,17 @@ public class IcuIndicesAnalysis extends AbstractComponent {
                return new ICUTransformFilter(tokenStream, Transliterator.getInstance("Null", Transliterator.FORWARD));
            }
        }));
+        
+        indicesAnalysisService.charFilterFactories().put("icu_normalizer", new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
+            @Override
+            public String name() {
+                return "icu_normalizer";
+            }
+
+            @Override
+            public Reader create(Reader reader) {
+                return new ICUNormalizer2CharFilter(reader);
+            }
+        }));
    }
 }
--- a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/AnalysisICURestIT.java
+++ b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/AnalysisICURestIT.java
@ -0,0 +1,41 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
+import org.elasticsearch.test.rest.RestTestCandidate;
+import org.elasticsearch.test.rest.parser.RestTestParseException;
+
+import java.io.IOException;
+
+public class AnalysisICURestIT extends ElasticsearchRestTestCase {
+
+    public AnalysisICURestIT(@Name("yaml") RestTestCandidate testCandidate) {
+        super(testCandidate);
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
+        return ElasticsearchRestTestCase.createParameters(0, 1);
+    }
+}
+
--- a/plugins/analysis-kuromoji/pom.xml
+++ b/plugins/analysis-kuromoji/pom.xml
@ -16,7 +16,8 @@
    <description>The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch.</description>

    <properties>
-        <!-- You can add any specific project property here -->
+        <tests.rest.suite>analysis_kuromoji</tests.rest.suite>
+        <tests.rest.load_packaged>false</tests.rest.load_packaged>
    </properties>

    <dependencies>
--- a/plugins/analysis-kuromoji/rest-api-spec/test/analysis_kuromoji/10_basic.yaml
+++ b/plugins/analysis-kuromoji/rest-api-spec/test/analysis_kuromoji/10_basic.yaml
@ -0,0 +1,39 @@
+# Integration tests for Kuromoji analysis components
+#
+"Tokenizer":
+    - do:
+        indices.analyze:
+          text:         関西国際空港
+          tokenizer:    kuromoji_tokenizer
+    - length: { tokens: 4 }
+    - match:  { tokens.0.token: 関西 }
+    - match:  { tokens.1.token: 関西国際空港 }
+    - match:  { tokens.2.token: 国際 }
+    - match:  { tokens.3.token: 空港 }
+---
+"Baseform filter":
+    - do:
+        indices.analyze:
+          text:         飲み
+          tokenizer:    kuromoji_tokenizer
+          filters:      kuromoji_baseform
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: 飲む }
+---
+"Reading filter":
+    - do:
+        indices.analyze:
+          text:         寿司
+          tokenizer:    kuromoji_tokenizer
+          filters:      kuromoji_readingform
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: sushi }
+---
+"Stemming filter":
+    - do:
+        indices.analyze:
+          text:         サーバー
+          tokenizer:    kuromoji_tokenizer
+          filters:      kuromoji_stemmer
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: サーバ }
--- a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/AnalysisKuromojiRestIT.java
+++ b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/AnalysisKuromojiRestIT.java
@ -0,0 +1,41 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
+import org.elasticsearch.test.rest.RestTestCandidate;
+import org.elasticsearch.test.rest.parser.RestTestParseException;
+
+import java.io.IOException;
+
+public class AnalysisKuromojiRestIT extends ElasticsearchRestTestCase {
+
+    public AnalysisKuromojiRestIT(@Name("yaml") RestTestCandidate testCandidate) {
+        super(testCandidate);
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
+        return ElasticsearchRestTestCase.createParameters(0, 1);
+    }
+}
+
--- a/plugins/analysis-smartcn/pom.xml
+++ b/plugins/analysis-smartcn/pom.xml
@ -15,7 +15,8 @@
    <description>Smart Chinese Analysis plugin integrates Lucene Smart Chinese analysis module into elasticsearch.</description>

    <properties>
-        <!-- You can add any specific project property here -->
+        <tests.rest.suite>analysis_smartcn</tests.rest.suite>
+        <tests.rest.load_packaged>false</tests.rest.load_packaged>
    </properties>

    <dependencies>
--- a/plugins/analysis-smartcn/rest-api-spec/test/analysis_smartcn/10_basic.yaml
+++ b/plugins/analysis-smartcn/rest-api-spec/test/analysis_smartcn/10_basic.yaml
@ -0,0 +1,28 @@
+# Integration tests for Smart Chinese analysis components
+#
+"Tokenizer":
+    - do:
+        indices.analyze:
+          text:         我购买了道具和服装。
+          tokenizer:    smartcn_tokenizer
+    - length: { tokens: 7 }
+    - match:  { tokens.0.token: 我  }
+    - match:  { tokens.1.token: 购买 }
+    - match:  { tokens.2.token: 了 }
+    - match:  { tokens.3.token: 道具 }
+    - match:  { tokens.4.token: 和 }
+    - match:  { tokens.5.token: 服装 }
+    - match:  { tokens.6.token: "," }
+---
+"Analyzer":
+    - do:
+        indices.analyze:
+          text:         我购买了道具和服装。
+          analyzer:     smartcn
+    - length: { tokens: 6 }
+    - match:  { tokens.0.token: 我  }
+    - match:  { tokens.1.token: 购买 }
+    - match:  { tokens.2.token: 了 }
+    - match:  { tokens.3.token: 道具 }
+    - match:  { tokens.4.token: 和 }
+    - match:  { tokens.5.token: 服装 }
--- a/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/AnalysisSmartChineseRestIT.java
+++ b/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/AnalysisSmartChineseRestIT.java
@ -0,0 +1,41 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
+import org.elasticsearch.test.rest.RestTestCandidate;
+import org.elasticsearch.test.rest.parser.RestTestParseException;
+
+import java.io.IOException;
+
+public class AnalysisSmartChineseRestIT extends ElasticsearchRestTestCase {
+
+    public AnalysisSmartChineseRestIT(@Name("yaml") RestTestCandidate testCandidate) {
+        super(testCandidate);
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
+        return ElasticsearchRestTestCase.createParameters(0, 1);
+    }
+}
+
--- a/plugins/analysis-stempel/pom.xml
+++ b/plugins/analysis-stempel/pom.xml
@ -15,7 +15,8 @@
    <description>The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.</description>

    <properties>
-        <!-- You can add any specific project property here -->
+        <tests.rest.suite>analysis_stempel</tests.rest.suite>
+        <tests.rest.load_packaged>false</tests.rest.load_packaged>
    </properties>

    <dependencies>
--- a/plugins/analysis-stempel/rest-api-spec/test/analysis_stempel/10_basic.yaml
+++ b/plugins/analysis-stempel/rest-api-spec/test/analysis_stempel/10_basic.yaml
@ -0,0 +1,18 @@
+# Integration tests for Polish analysis components
+#
+"Stemmer":
+    - do:
+        indices.analyze:
+          text:         studenci
+          tokenizer:    keyword
+          filters:      polish_stem
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: student  }
+---
+"Analyzer":
+    - do:
+        indices.analyze:
+          text:         studenta był
+          analyzer:     polish
+    - length: { tokens: 1 }
+    - match:  { tokens.0.token: student  }
--- a/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishRestIT.java
+++ b/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishRestIT.java
@ -0,0 +1,41 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
+import org.elasticsearch.test.rest.RestTestCandidate;
+import org.elasticsearch.test.rest.parser.RestTestParseException;
+
+import java.io.IOException;
+
+public class AnalysisPolishRestIT extends ElasticsearchRestTestCase {
+
+    public AnalysisPolishRestIT(@Name("yaml") RestTestCandidate testCandidate) {
+        super(testCandidate);
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
+        return ElasticsearchRestTestCase.createParameters(0, 1);
+    }
+}
+
--- a/plugins/pom.xml
+++ b/plugins/pom.xml
@ -277,6 +277,7 @@
                    <include>api/cluster.health.json</include>
                    <!-- used in plugin REST tests -->
                    <include>api/index.json</include>
+                    <include>api/indices.analyze.json</include>
                    <include>api/indices.refresh.json</include>
                    <include>api/count.json</include>
                </includes>