Merge pull request #12070 from rmuir/analysis_rest_tests

add integration tests for analysis plugins
This commit is contained in:
Robert Muir 2015-07-07 01:41:10 -04:00
commit 69cd933ea3
14 changed files with 313 additions and 3 deletions

View File

@ -17,6 +17,8 @@
<properties>
<tests.jvms>1</tests.jvms>
<es.logger.level>INFO</es.logger.level>
<tests.rest.suite>analysis_icu</tests.rest.suite>
<tests.rest.load_packaged>false</tests.rest.load_packaged>
</properties>
<dependencies>

View File

@ -0,0 +1,37 @@
# Integration tests for ICU analysis components
#
"Tokenizer":
- do:
indices.analyze:
text: Foo Bar
tokenizer: icu_tokenizer
- length: { tokens: 2 }
- match: { tokens.0.token: Foo }
- match: { tokens.1.token: Bar }
---
"Normalization filter":
- do:
indices.analyze:
filters: icu_normalizer
text: Foo Bar Ruß
tokenizer: keyword
- length: { tokens: 1 }
- match: { tokens.0.token: foo bar russ }
---
"Normalization charfilter":
- do:
indices.analyze:
char_filters: icu_normalizer
text: Foo Bar Ruß
tokenizer: keyword
- length: { tokens: 1 }
- match: { tokens.0.token: foo bar russ }
---
"Folding filter":
- do:
indices.analyze:
filters: icu_folding
text: Foo Bar résumé
tokenizer: keyword
- length: { tokens: 1 }
- match: { tokens.0.token: foo bar resume }

View File

@ -22,20 +22,26 @@ package org.elasticsearch.indices.analysis;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.text.Transliterator;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.icu.ICUFoldingFilter;
import org.apache.lucene.analysis.icu.ICUNormalizer2CharFilter;
import org.apache.lucene.analysis.icu.ICUTransformFilter;
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.ICUCollationKeyFilter;
import org.elasticsearch.index.analysis.PreBuiltCharFilterFactoryFactory;
import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory;
import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import java.io.Reader;
/**
* Registers indices level analysis components so, if not explicitly configured, will be shared
* among all indices.
@ -106,5 +112,17 @@ public class IcuIndicesAnalysis extends AbstractComponent {
return new ICUTransformFilter(tokenStream, Transliterator.getInstance("Null", Transliterator.FORWARD));
}
}));
indicesAnalysisService.charFilterFactories().put("icu_normalizer", new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
@Override
public String name() {
return "icu_normalizer";
}
@Override
public Reader create(Reader reader) {
return new ICUNormalizer2CharFilter(reader);
}
}));
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
import org.elasticsearch.test.rest.RestTestCandidate;
import org.elasticsearch.test.rest.parser.RestTestParseException;
import java.io.IOException;
public class AnalysisICURestIT extends ElasticsearchRestTestCase {
public AnalysisICURestIT(@Name("yaml") RestTestCandidate testCandidate) {
super(testCandidate);
}
@ParametersFactory
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
return ElasticsearchRestTestCase.createParameters(0, 1);
}
}

View File

@ -16,7 +16,8 @@
<description>The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch.</description>
<properties>
<!-- You can add any specific project property here -->
<tests.rest.suite>analysis_kuromoji</tests.rest.suite>
<tests.rest.load_packaged>false</tests.rest.load_packaged>
</properties>
<dependencies>

View File

@ -0,0 +1,39 @@
# Integration tests for Kuromoji analysis components
#
"Tokenizer":
- do:
indices.analyze:
text: 関西国際空港
tokenizer: kuromoji_tokenizer
- length: { tokens: 4 }
- match: { tokens.0.token: 関西 }
- match: { tokens.1.token: 関西国際空港 }
- match: { tokens.2.token: 国際 }
- match: { tokens.3.token: 空港 }
---
"Baseform filter":
- do:
indices.analyze:
text: 飲み
tokenizer: kuromoji_tokenizer
filters: kuromoji_baseform
- length: { tokens: 1 }
- match: { tokens.0.token: 飲む }
---
"Reading filter":
- do:
indices.analyze:
text: 寿司
tokenizer: kuromoji_tokenizer
filters: kuromoji_readingform
- length: { tokens: 1 }
- match: { tokens.0.token: sushi }
---
"Stemming filter":
- do:
indices.analyze:
text: サーバー
tokenizer: kuromoji_tokenizer
filters: kuromoji_stemmer
- length: { tokens: 1 }
- match: { tokens.0.token: サーバ }

View File

@ -0,0 +1,41 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
import org.elasticsearch.test.rest.RestTestCandidate;
import org.elasticsearch.test.rest.parser.RestTestParseException;
import java.io.IOException;
public class AnalysisKuromojiRestIT extends ElasticsearchRestTestCase {
public AnalysisKuromojiRestIT(@Name("yaml") RestTestCandidate testCandidate) {
super(testCandidate);
}
@ParametersFactory
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
return ElasticsearchRestTestCase.createParameters(0, 1);
}
}

View File

@ -15,7 +15,8 @@
<description>Smart Chinese Analysis plugin integrates Lucene Smart Chinese analysis module into elasticsearch.</description>
<properties>
<!-- You can add any specific project property here -->
<tests.rest.suite>analysis_smartcn</tests.rest.suite>
<tests.rest.load_packaged>false</tests.rest.load_packaged>
</properties>
<dependencies>

View File

@ -0,0 +1,28 @@
# Integration tests for Smart Chinese analysis components
#
"Tokenizer":
- do:
indices.analyze:
text: 我购买了道具和服装。
tokenizer: smartcn_tokenizer
- length: { tokens: 7 }
- match: { tokens.0.token: 我 }
- match: { tokens.1.token: 购买 }
- match: { tokens.2.token: 了 }
- match: { tokens.3.token: 道具 }
- match: { tokens.4.token: 和 }
- match: { tokens.5.token: 服装 }
- match: { tokens.6.token: "," }
---
"Analyzer":
- do:
indices.analyze:
text: 我购买了道具和服装。
analyzer: smartcn
- length: { tokens: 6 }
- match: { tokens.0.token: 我 }
- match: { tokens.1.token: 购买 }
- match: { tokens.2.token: 了 }
- match: { tokens.3.token: 道具 }
- match: { tokens.4.token: 和 }
- match: { tokens.5.token: 服装 }

View File

@ -0,0 +1,41 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
import org.elasticsearch.test.rest.RestTestCandidate;
import org.elasticsearch.test.rest.parser.RestTestParseException;
import java.io.IOException;
public class AnalysisSmartChineseRestIT extends ElasticsearchRestTestCase {
public AnalysisSmartChineseRestIT(@Name("yaml") RestTestCandidate testCandidate) {
super(testCandidate);
}
@ParametersFactory
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
return ElasticsearchRestTestCase.createParameters(0, 1);
}
}

View File

@ -15,7 +15,8 @@
<description>The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.</description>
<properties>
<!-- You can add any specific project property here -->
<tests.rest.suite>analysis_stempel</tests.rest.suite>
<tests.rest.load_packaged>false</tests.rest.load_packaged>
</properties>
<dependencies>

View File

@ -0,0 +1,18 @@
# Integration tests for Polish analysis components
#
"Stemmer":
- do:
indices.analyze:
text: studenci
tokenizer: keyword
filters: polish_stem
- length: { tokens: 1 }
- match: { tokens.0.token: student }
---
"Analyzer":
- do:
indices.analyze:
text: studenta był
analyzer: polish
- length: { tokens: 1 }
- match: { tokens.0.token: student }

View File

@ -0,0 +1,41 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.elasticsearch.test.rest.ElasticsearchRestTestCase;
import org.elasticsearch.test.rest.RestTestCandidate;
import org.elasticsearch.test.rest.parser.RestTestParseException;
import java.io.IOException;
public class AnalysisPolishRestIT extends ElasticsearchRestTestCase {
public AnalysisPolishRestIT(@Name("yaml") RestTestCandidate testCandidate) {
super(testCandidate);
}
@ParametersFactory
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
return ElasticsearchRestTestCase.createParameters(0, 1);
}
}

View File

@ -277,6 +277,7 @@
<include>api/cluster.health.json</include>
<!-- used in plugin REST tests -->
<include>api/index.json</include>
<include>api/indices.analyze.json</include>
<include>api/indices.refresh.json</include>
<include>api/count.json</include>
</includes>