Merge pull request #12070 from rmuir/analysis_rest_tests

add integration tests for analysis plugins
2015-07-07 01:41:10 -04:00 · 2015-07-07 01:41:10 -04:00 · 69cd933ea3
parent 9e196c3a0b 0736f97cfb
commit 69cd933ea3
14 changed files with 313 additions and 3 deletions
--- a/plugins/analysis-icu/pom.xml
+++ b/plugins/analysis-icu/pom.xml
@ -17,6 +17,8 @@
    <properties>
        <tests.jvms>1</tests.jvms>
        <es.logger.level>INFO</es.logger.level>
        <tests.rest.suite>analysis_icu</tests.rest.suite>
        <tests.rest.load_packaged>false</tests.rest.load_packaged>
    </properties>
    <dependencies>
--- a/plugins/analysis-icu/rest-api-spec/test/analysis_icu/10_basic.yaml
+++ b/plugins/analysis-icu/rest-api-spec/test/analysis_icu/10_basic.yaml
@ -0,0 +1,37 @@
 # Integration tests for ICU analysis components
 #
 "Tokenizer":
    - do:
        indices.analyze:
          text:         Foo Bar
          tokenizer:    icu_tokenizer
    - length: { tokens: 2 }
    - match:  { tokens.0.token: Foo }
    - match:  { tokens.1.token: Bar }
 ---
 "Normalization filter":
    - do:
        indices.analyze:
          filters:      icu_normalizer
          text:         Foo Bar Ruß
          tokenizer:    keyword
    - length: { tokens: 1 }
    - match:  { tokens.0.token: foo bar russ }
 ---
 "Normalization charfilter":
    - do:
        indices.analyze:
          char_filters: icu_normalizer
          text:         Foo Bar Ruß
          tokenizer:    keyword
    - length: { tokens: 1 }
    - match:  { tokens.0.token: foo bar russ }
 ---
 "Folding filter":
    - do:
        indices.analyze:
          filters:      icu_folding
          text:         Foo Bar résumé
          tokenizer:    keyword
    - length: { tokens: 1 }
    - match:  { tokens.0.token: foo bar resume }
--- a/plugins/analysis-icu/src/main/java/org/elasticsearch/indices/analysis/IcuIndicesAnalysis.java
+++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/indices/analysis/IcuIndicesAnalysis.java
@ -22,20 +22,26 @@ package org.elasticsearch.indices.analysis;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.Normalizer2;
 import com.ibm.icu.text.Transliterator;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.icu.ICUFoldingFilter;
 import org.apache.lucene.analysis.icu.ICUNormalizer2CharFilter;
 import org.apache.lucene.analysis.icu.ICUTransformFilter;
 import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
 import org.elasticsearch.common.component.AbstractComponent;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.analysis.ICUCollationKeyFilter;
 import org.elasticsearch.index.analysis.PreBuiltCharFilterFactoryFactory;
 import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory;
 import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
 import org.elasticsearch.index.analysis.TokenizerFactory;
 import java.io.Reader;
 /**
 * Registers indices level analysis components so, if not explicitly configured, will be shared
 * among all indices.
@ -106,5 +112,17 @@ public class IcuIndicesAnalysis extends AbstractComponent {
                return new ICUTransformFilter(tokenStream, Transliterator.getInstance("Null", Transliterator.FORWARD));
            }
        }));
        indicesAnalysisService.charFilterFactories().put("icu_normalizer", new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
            @Override
            public String name() {
                return "icu_normalizer";
            }
            @Override
            public Reader create(Reader reader) {
                return new ICUNormalizer2CharFilter(reader);
            }
        }));
    }
 }
--- a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/AnalysisICURestIT.java
+++ b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/AnalysisICURestIT.java
@ -0,0 +1,41 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.index.analysis;
 import com.carrotsearch.randomizedtesting.annotations.Name;
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
 import org.elasticsearch.test.rest.RestTestCandidate;
 import org.elasticsearch.test.rest.parser.RestTestParseException;
 import java.io.IOException;
 public class AnalysisICURestIT extends ElasticsearchRestTestCase {
    public AnalysisICURestIT(@Name("yaml") RestTestCandidate testCandidate) {
        super(testCandidate);
    }
    @ParametersFactory
    public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
        return ElasticsearchRestTestCase.createParameters(0, 1);
    }
 }
--- a/plugins/analysis-kuromoji/pom.xml
+++ b/plugins/analysis-kuromoji/pom.xml
@ -16,7 +16,8 @@
    <description>The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch.</description>
    <properties>
-        <!-- You can add any specific project property here -->
+        <tests.rest.suite>analysis_kuromoji</tests.rest.suite>
        <tests.rest.load_packaged>false</tests.rest.load_packaged>
    </properties>
    <dependencies>
--- a/plugins/analysis-kuromoji/rest-api-spec/test/analysis_kuromoji/10_basic.yaml
+++ b/plugins/analysis-kuromoji/rest-api-spec/test/analysis_kuromoji/10_basic.yaml
@ -0,0 +1,39 @@
 # Integration tests for Kuromoji analysis components
 #
 "Tokenizer":
    - do:
        indices.analyze:
          text:         関西国際空港
          tokenizer:    kuromoji_tokenizer
    - length: { tokens: 4 }
    - match:  { tokens.0.token: 関西 }
    - match:  { tokens.1.token: 関西国際空港 }
    - match:  { tokens.2.token: 国際 }
    - match:  { tokens.3.token: 空港 }
 ---
 "Baseform filter":
    - do:
        indices.analyze:
          text:         飲み
          tokenizer:    kuromoji_tokenizer
          filters:      kuromoji_baseform
    - length: { tokens: 1 }
    - match:  { tokens.0.token: 飲む }
 ---
 "Reading filter":
    - do:
        indices.analyze:
          text:         寿司
          tokenizer:    kuromoji_tokenizer
          filters:      kuromoji_readingform
    - length: { tokens: 1 }
    - match:  { tokens.0.token: sushi }
 ---
 "Stemming filter":
    - do:
        indices.analyze:
          text:         サーバー
          tokenizer:    kuromoji_tokenizer
          filters:      kuromoji_stemmer
    - length: { tokens: 1 }
    - match:  { tokens.0.token: サーバ }
--- a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/AnalysisKuromojiRestIT.java
+++ b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/AnalysisKuromojiRestIT.java
@ -0,0 +1,41 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.index.analysis;
 import com.carrotsearch.randomizedtesting.annotations.Name;
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
 import org.elasticsearch.test.rest.RestTestCandidate;
 import org.elasticsearch.test.rest.parser.RestTestParseException;
 import java.io.IOException;
 public class AnalysisKuromojiRestIT extends ElasticsearchRestTestCase {
    public AnalysisKuromojiRestIT(@Name("yaml") RestTestCandidate testCandidate) {
        super(testCandidate);
    }
    @ParametersFactory
    public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
        return ElasticsearchRestTestCase.createParameters(0, 1);
    }
 }
--- a/plugins/analysis-smartcn/pom.xml
+++ b/plugins/analysis-smartcn/pom.xml
@ -15,7 +15,8 @@
    <description>Smart Chinese Analysis plugin integrates Lucene Smart Chinese analysis module into elasticsearch.</description>
    <properties>
-        <!-- You can add any specific project property here -->
+        <tests.rest.suite>analysis_smartcn</tests.rest.suite>
        <tests.rest.load_packaged>false</tests.rest.load_packaged>
    </properties>
    <dependencies>
--- a/plugins/analysis-smartcn/rest-api-spec/test/analysis_smartcn/10_basic.yaml
+++ b/plugins/analysis-smartcn/rest-api-spec/test/analysis_smartcn/10_basic.yaml
@ -0,0 +1,28 @@
 # Integration tests for Smart Chinese analysis components
 #
 "Tokenizer":
    - do:
        indices.analyze:
          text:         我购买了道具和服装。
          tokenizer:    smartcn_tokenizer
    - length: { tokens: 7 }
    - match:  { tokens.0.token: 我  }
    - match:  { tokens.1.token: 购买 }
    - match:  { tokens.2.token: 了 }
    - match:  { tokens.3.token: 道具 }
    - match:  { tokens.4.token: 和 }
    - match:  { tokens.5.token: 服装 }
    - match:  { tokens.6.token: "," }
 ---
 "Analyzer":
    - do:
        indices.analyze:
          text:         我购买了道具和服装。
          analyzer:     smartcn
    - length: { tokens: 6 }
    - match:  { tokens.0.token: 我  }
    - match:  { tokens.1.token: 购买 }
    - match:  { tokens.2.token: 了 }
    - match:  { tokens.3.token: 道具 }
    - match:  { tokens.4.token: 和 }
    - match:  { tokens.5.token: 服装 }
--- a/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/AnalysisSmartChineseRestIT.java
+++ b/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/AnalysisSmartChineseRestIT.java
@ -0,0 +1,41 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.index.analysis;
 import com.carrotsearch.randomizedtesting.annotations.Name;
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
 import org.elasticsearch.test.rest.RestTestCandidate;
 import org.elasticsearch.test.rest.parser.RestTestParseException;
 import java.io.IOException;
 public class AnalysisSmartChineseRestIT extends ElasticsearchRestTestCase {
    public AnalysisSmartChineseRestIT(@Name("yaml") RestTestCandidate testCandidate) {
        super(testCandidate);
    }
    @ParametersFactory
    public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
        return ElasticsearchRestTestCase.createParameters(0, 1);
    }
 }
--- a/plugins/analysis-stempel/pom.xml
+++ b/plugins/analysis-stempel/pom.xml
@ -15,7 +15,8 @@
    <description>The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.</description>
    <properties>
-        <!-- You can add any specific project property here -->
+        <tests.rest.suite>analysis_stempel</tests.rest.suite>
        <tests.rest.load_packaged>false</tests.rest.load_packaged>
    </properties>
    <dependencies>
--- a/plugins/analysis-stempel/rest-api-spec/test/analysis_stempel/10_basic.yaml
+++ b/plugins/analysis-stempel/rest-api-spec/test/analysis_stempel/10_basic.yaml
@ -0,0 +1,18 @@
 # Integration tests for Polish analysis components
 #
 "Stemmer":
    - do:
        indices.analyze:
          text:         studenci
          tokenizer:    keyword
          filters:      polish_stem
    - length: { tokens: 1 }
    - match:  { tokens.0.token: student  }
 ---
 "Analyzer":
    - do:
        indices.analyze:
          text:         studenta był
          analyzer:     polish
    - length: { tokens: 1 }
    - match:  { tokens.0.token: student  }
--- a/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishRestIT.java
+++ b/plugins/analysis-stempel/src/test/java/org/elasticsearch/index/analysis/AnalysisPolishRestIT.java
@ -0,0 +1,41 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.index.analysis;
 import com.carrotsearch.randomizedtesting.annotations.Name;
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
 import org.elasticsearch.test.rest.RestTestCandidate;
 import org.elasticsearch.test.rest.parser.RestTestParseException;
 import java.io.IOException;
 public class AnalysisPolishRestIT extends ElasticsearchRestTestCase {
    public AnalysisPolishRestIT(@Name("yaml") RestTestCandidate testCandidate) {
        super(testCandidate);
    }
    @ParametersFactory
    public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
        return ElasticsearchRestTestCase.createParameters(0, 1);
    }
 }
--- a/plugins/pom.xml
+++ b/plugins/pom.xml
@ -277,6 +277,7 @@
                    <include>api/cluster.health.json</include>
                    <!-- used in plugin REST tests -->
                    <include>api/index.json</include>
                    <include>api/indices.analyze.json</include>
                    <include>api/indices.refresh.json</include>
                    <include>api/count.json</include>
                </includes>